/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "URIHelper.h" #include #include #include #include #include #include using namespace decaf; using namespace decaf::lang; using namespace decaf::lang::exceptions; using namespace decaf::util; using namespace decaf::net; using namespace decaf::internal; using namespace decaf::internal::net; //////////////////////////////////////////////////////////////////////////////// URIHelper::URIHelper( const std::string& unreserved, const std::string& punct, const std::string& reserved, const std::string& someLegal, const std::string& allLegal ) : unreserved( unreserved ), punct( punct ), reserved( reserved ), someLegal( someLegal ), allLegal( allLegal ) { } //////////////////////////////////////////////////////////////////////////////// URIHelper::URIHelper() : unreserved( "_-!.~\'()*" ), punct( ",;:$&+=" ), reserved( ",;:$&+=?/[]@" ), someLegal( "_-!.~\'()*,;:$&+=" ), allLegal( "_-!.~\'()*,;:$&+=?/[]@" ) { } //////////////////////////////////////////////////////////////////////////////// URIType URIHelper::parseURI( const std::string& uri, bool forceServer ) { URIType result( uri ); std::string temp = uri; std::size_t index, index1, index2, index3; // parse into Fragment, Scheme, and SchemeSpecificPart // then parse SchemeSpecificPart if necessary // Fragment index = temp.find( '#' ); if( index != std::string::npos ) { // remove the fragment from the end result.setFragment( temp.substr( index + 1, std::string::npos ) ); validateFragment( uri, result.getFragment(), index + 1 ); temp = temp.substr( 0, index ); } // Scheme and SchemeSpecificPart index = index1 = temp.find( ':' ); index2 = temp.find( '/' ); index3 = temp.find( '?' ); // if a '/' or '?' occurs before the first ':' the uri has no // specified scheme, and is therefore not absolute if( index != std::string::npos && ( index2 >= index || index2 == std::string::npos ) && ( index3 >= index || index3 == std::string::npos ) ) { // the characters up to the first ':' comprise the scheme result.setAbsolute( true ); result.setScheme( temp.substr( 0, index ) ); if( result.getScheme() == "" ) { throw URISyntaxException( __FILE__, __LINE__, uri, "Scheme not specified.", (int)index ); } validateScheme( uri, result.getScheme(), 0 ); result.setSchemeSpecificPart( temp.substr( index + 1, std::string::npos ) ); if( result.getSchemeSpecificPart() == "" ) { throw URISyntaxException( __FILE__, __LINE__, uri, "Scheme specific part is invalid..", (int)index + 1 ); } } else { result.setAbsolute( false ); result.setSchemeSpecificPart( temp ); } if( result.getScheme() == "" || ( !result.getSchemeSpecificPart().empty() && result.getSchemeSpecificPart().at( 0 ) == '/' ) ) { result.setOpaque( false ); // the URI is hierarchical // Query temp = result.getSchemeSpecificPart(); index = temp.find( '?' ); if( index != std::string::npos ) { result.setQuery( temp.substr( index + 1, std::string::npos ) ); temp = temp.substr( 0, index ); validateQuery( uri, result.getQuery(), index2 + 1 + index ); } // Authority and Path if( temp.size() >= 2 && temp.at(0) == '/' && temp.at(1) == '/' ) { index = temp.find( '/', 2 ); if( index != std::string::npos ) { result.setAuthority( temp.substr( 2, index - 2 ) ); result.setPath( temp.substr( index, std::string::npos ) ); } else { result.setAuthority( temp.substr( 2, std::string::npos ) ); if( result.getAuthority() == "" && result.getQuery() == "" && result.getFragment() == "" ) { throw URISyntaxException( __FILE__, __LINE__, uri, "Scheme specific part is invalid..", (int)uri.length() ); } } if( result.getAuthority() != "" ) { validateAuthority( uri, result.getAuthority(), index1 + 3 ); } } else { // no authority specified result.setPath( temp ); } std::size_t pathIndex = 0; if( index2 != std::string::npos ) { pathIndex += index2; } if( index != std::string::npos ) { pathIndex += index; } validatePath( uri, result.getPath(), pathIndex ); } else { // if not hierarchical, URI is opaque result.setOpaque( true ); validateSsp( uri, result.getSchemeSpecificPart(), index2 + 2 + index ); } URIType authority = parseAuthority( forceServer, result.getAuthority() ); // Authority was valid, so we capture the results if( authority.isValid() ) { result.setUserInfo( authority.getUserInfo() ); result.setHost( authority.getHost() ); result.setPort( authority.getPort() ); result.setServerAuthority( true ); } return result; } //////////////////////////////////////////////////////////////////////////////// void URIHelper::validateScheme( const std::string& uri, const std::string& scheme, int index ) { // first char needs to be an alpha char if( !Character::isLetter( scheme.at(0) ) ) { throw URISyntaxException( __FILE__, __LINE__, uri, "Schema must start with a Letter.", (int)index ); } try { URIEncoderDecoder::validateSimple( scheme, "+-." ); } catch( URISyntaxException& e ) { throw URISyntaxException( __FILE__, __LINE__, uri, "Invalid Schema", (int)index + e.getIndex() ); } } //////////////////////////////////////////////////////////////////////////////// void URIHelper::validateSsp( const std::string& uri, const std::string& ssp, std::size_t index ) { try { URIEncoderDecoder::validate( ssp, allLegal ); } catch( URISyntaxException& e ) { throw URISyntaxException( __FILE__, __LINE__, uri, "Invalid URI Ssp", (int)index + e.getIndex() ); } } //////////////////////////////////////////////////////////////////////////////// void URIHelper::validateAuthority( const std::string& uri, const std::string& authority, std::size_t index ) { try { // "@[]" + someLegal URIEncoderDecoder::validate( authority, "@[]" + someLegal ); } catch( URISyntaxException& e ) { throw URISyntaxException( __FILE__, __LINE__, uri, "Invalid URI Authority", (int)index + e.getIndex() ); } } //////////////////////////////////////////////////////////////////////////////// void URIHelper::validatePath( const std::string& uri, const std::string& path, std::size_t index ) { try { // "/@" + someLegal URIEncoderDecoder::validate( path, "/@" + someLegal ); } catch( URISyntaxException& e ) { throw URISyntaxException( __FILE__, __LINE__, uri, "Invalid URI Path", (int)index + e.getIndex() ); } } //////////////////////////////////////////////////////////////////////////////// void URIHelper::validateQuery( const std::string& uri, const std::string& query, std::size_t index ) { try { URIEncoderDecoder::validate( query, allLegal ); } catch( URISyntaxException& e ) { throw URISyntaxException( __FILE__, __LINE__, uri, "Invalid URI Query", (int)index + e.getIndex() ); } } //////////////////////////////////////////////////////////////////////////////// void URIHelper::validateFragment( const std::string& uri, const std::string& fragment, std::size_t index ) { try { URIEncoderDecoder::validate( fragment, allLegal ); } catch( URISyntaxException& e ) { throw URISyntaxException( __FILE__, __LINE__, uri, "Invalid URI Fragment", (int)index + e.getIndex() ); } } //////////////////////////////////////////////////////////////////////////////// URIType URIHelper::parseAuthority( bool forceServer, const std::string& authority ) { try{ URIType result( authority ); if( authority == "" ) { return result; } std::string temp, tempUserinfo = "", tempHost = ""; std::size_t index, hostindex = 0; int tempPort = -1; temp = authority; index = temp.find( '@' ); if( index != std::string::npos ) { // remove user info tempUserinfo = temp.substr( 0, index ); validateUserinfo( authority, tempUserinfo, 0 ); temp = temp.substr( index + 1, std::string::npos ); // host[:port] is left hostindex = index + 1; } index = temp.rfind( ':' ); std::size_t endindex = temp.find( ']' ); if( index != std::string::npos && ( endindex < index || endindex == std::string::npos ) ){ // determine port and host tempHost = temp.substr( 0, index ); if( index < ( temp.length() - 1 ) ) { // port part is not empty try { tempPort = Integer::parseInt( temp.substr( index + 1, std::string::npos ) ); if( tempPort < 0 ) { if( forceServer ) { throw URISyntaxException( __FILE__, __LINE__, authority, "Port number is missing", (int)hostindex + (int)index + 1 ); } return result; } } catch( NumberFormatException& e ) { if( forceServer ) { throw URISyntaxException( __FILE__, __LINE__, authority, "Port number is malformed.", (int)hostindex + (int)index + 1 ); } return result; } } } else { tempHost = temp; } if( tempHost == "" ) { if( forceServer ) { throw URISyntaxException( __FILE__, __LINE__, authority, "Host name is empty", (int)hostindex ); } return result; } if( !isValidHost( forceServer, tempHost ) ) { return result; } // this is a server based uri, // fill in the userinfo, host and port fields result.setUserInfo( tempUserinfo ); result.setHost( tempHost ); result.setPort( tempPort ); result.setServerAuthority( true ); // We know its valid now so tag it. result.setValid( true ); return result; } DECAF_CATCH_RETHROW( URISyntaxException ) DECAF_CATCH_EXCEPTION_CONVERT( Exception, URISyntaxException ) DECAF_CATCHALL_THROW( URISyntaxException ) } //////////////////////////////////////////////////////////////////////////////// void URIHelper::validateUserinfo( const std::string& uri, const std::string& userinfo, std::size_t index ) { for( std::size_t i = 0; i < userinfo.length(); i++ ) { char ch = userinfo.at( i ); if( ch == ']' || ch == '[' ) { throw URISyntaxException( __FILE__, __LINE__, uri, "User Info cannot contain '[' or ']'", (int)( index + i ) ); } } } //////////////////////////////////////////////////////////////////////////////// bool URIHelper::isValidHost( bool forceServer, const std::string& host ) { try{ if( host.at( 0 ) == '[' ) { // ipv6 address if( host.at( host.length() - 1 ) != ']' ) { throw URISyntaxException( __FILE__, __LINE__, host, "Host address does not end in ']'", 0 ); } if( !isValidIP6Address( host ) ) { throw URISyntaxException( __FILE__, __LINE__, host, "Host IPv6 address is not valid" ); } return true; } // '[' and ']' can only be the first char and last char // of the host name if( host.find( '[' ) != std::string::npos || host.find( ']' ) != std::string::npos ) { throw URISyntaxException( __FILE__, __LINE__, host, "Unexpected '[' or ']' found in address" ); } std::size_t index = host.find_last_of( '.' ); if( index == std::string::npos || index == host.length() - 1 || !Character::isDigit( host.at( index + 1 ) ) ) { // domain name if( isValidDomainName( host ) ) { return true; } if( forceServer ) { throw URISyntaxException( __FILE__, __LINE__, host, "Host address is not valid" ); } return false; } // IPv4 address if( isValidIPv4Address( host ) ) { return true; } if( forceServer ) { throw URISyntaxException( __FILE__, __LINE__, host, "Host IPv4 address is not valid" ); } return false; } DECAF_CATCH_RETHROW( URISyntaxException ) DECAF_CATCH_EXCEPTION_CONVERT( Exception, URISyntaxException ) DECAF_CATCHALL_THROW( URISyntaxException ) } //////////////////////////////////////////////////////////////////////////////// bool URIHelper::isValidDomainName( const std::string& host ) { try { URIEncoderDecoder::validateSimple( host, "-." ); } catch( URISyntaxException& e ) { return false; } std::string label = ""; StringTokenizer st( host, "." ); while( st.hasMoreTokens() ) { label = st.nextToken(); if( *(label.begin()) == '-' || *(label.rbegin()) == '-' ) { return false; } } if( label != host ) { if( Character::isDigit( label.at( 0 ) ) ) { return false; } } return true; } //////////////////////////////////////////////////////////////////////////////// bool URIHelper::isValidIPv4Address( const std::string& host ) { std::size_t index; std::size_t index2; try { int num; index = host.find( '.' ); num = Integer::parseInt( host.substr( 0, index ) ); if( num < 0 || num > 255 ) { return false; } index2 = host.find( '.', index + 1 ); num = Integer::parseInt( host.substr( index + 1, index2 - index - 1 ) ); if( num < 0 || num > 255 ) { return false; } index = host.find( '.', index2 + 1 ); num = Integer::parseInt( host.substr( index2 + 1, index - index2 - 1 ) ); if( num < 0 || num > 255 ) { return false; } num = Integer::parseInt( host.substr( index + 1, std::string::npos ) ); if( num < 0 || num > 255 ) { return false; } } catch( Exception& e ) { return false; } return true; } //////////////////////////////////////////////////////////////////////////////// bool URIHelper::isValidIP6Address( const std::string& ipAddress ) { std::size_t length = ipAddress.length(); bool doubleColon = false; int numberOfColons = 0; int numberOfPeriods = 0; std::string word = ""; char c = 0; char prevChar = 0; int offset = 0; // offset for [] ip addresses if( length < 2 ) { return false; } for( std::size_t i = 0; i < length; i++ ) { prevChar = c; c = ipAddress.at( i ); switch( c ) { // case for an open bracket [x:x:x:...x] case '[': if( i != 0 ) { return false; // must be first character } if( ipAddress.at( length - 1 ) != ']' ) { return false; // must have a close ] } if( ( ipAddress.at( 1 ) == ':' ) && ( ipAddress.at( 2 ) != ':' ) ) { return false; } offset = 1; if( length < 4 ) { return false; } break; // case for a closed bracket at end of IP [x:x:x:...x] case ']': if( i != length - 1 ) { return false; // must be last character } if( ipAddress.at( 0 ) != '[' ) { return false; // must have a open [ } break; // case for the last 32-bits represented as IPv4 // x:x:x:x:x:x:d.d.d.d case '.': numberOfPeriods++; if( numberOfPeriods > 3 ) { return false; } if( !isValidIP4Word( word ) ) { return false; } if( numberOfColons != 6 && !doubleColon ) { return false; } // a special case ::1:2:3:4:5:d.d.d.d allows 7 colons // with // an IPv4 ending, otherwise 7 :'s is bad if( numberOfColons == 7 && ipAddress.at( 0 + offset ) != ':' && ipAddress.at( 1 + offset ) != ':' ) { return false; } word = ""; break; case ':': numberOfColons++; if( numberOfColons > 7 ) { return false; } if( numberOfPeriods > 0 ) { return false; } if( prevChar == ':' ) { if( doubleColon ) { return false; } doubleColon = true; } word = ""; break; default: if( word.length() > 3 ) { return false; } if( !isValidHexChar( c ) ) { return false; } word += c; } } // Check if we have an IPv4 ending if( numberOfPeriods > 0 ) { if( numberOfPeriods != 3 || !isValidIP4Word( word ) ) { return false; } } else { // If we're at then end and we haven't had 7 colons then there // is a problem unless we encountered a doubleColon if( numberOfColons != 7 && !doubleColon ) { return false; } // If we have an empty word at the end, it means we ended in // either a : or a . // If we did not end in :: then this is invalid if( word == "" && ipAddress.at( length - 1 - offset ) != ':' && ipAddress.at( length - 2 - offset ) != ':' ) { return false; } } return true; } //////////////////////////////////////////////////////////////////////////////// bool URIHelper::isValidIP4Word( const std::string& word ) { if( word.length() < 1 || word.length() > 3 ) { return false; } for( std::size_t i = 0; i < word.length(); i++ ) { if( !Character::isDigit( word.at( i ) ) ) { return false; } } if( Integer::parseInt( word ) > 255 ) { return false; } return true; } //////////////////////////////////////////////////////////////////////////////// bool URIHelper::isValidHexChar( char c ) { return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); }