URI.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406
  1. //
  2. // URI.h
  3. //
  4. // Library: Foundation
  5. // Package: URI
  6. // Module: URI
  7. //
  8. // Definition of the URI class.
  9. //
  10. // Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH.
  11. // and Contributors.
  12. //
  13. // SPDX-License-Identifier: BSL-1.0
  14. //
  15. #ifndef Foundation_URI_INCLUDED
  16. #define Foundation_URI_INCLUDED
  17. #include "Poco/Foundation.h"
  18. #include <vector>
  19. #include <utility>
  20. namespace Poco {
  21. class Path;
  22. class Foundation_API URI
  23. /// A Uniform Resource Identifier, as specified in RFC 3986.
  24. ///
  25. /// The URI class provides methods for building URIs from their
  26. /// parts, as well as for splitting URIs into their parts.
  27. /// Furthermore, the class provides methods for resolving
  28. /// relative URIs against base URIs.
  29. ///
  30. /// The class automatically performs a few normalizations on
  31. /// all URIs and URI parts passed to it:
  32. /// * scheme identifiers are converted to lower case
  33. /// * percent-encoded characters are decoded (except for the query string)
  34. /// * optionally, dot segments are removed from paths (see normalize())
  35. ///
  36. /// Note that dealing with query strings requires some precautions, as, internally,
  37. /// query strings are stored in percent-encoded form, while all other parts of the URI
  38. /// are stored in decoded form. While parsing query strings from properly encoded URLs
  39. /// generally works, explicitly setting query strings with setQuery() or extracting
  40. /// query strings with getQuery() may lead to ambiguities. See the descriptions of
  41. /// setQuery(), setRawQuery(), getQuery() and getRawQuery() for more information.
  42. {
  43. public:
  44. typedef std::vector<std::pair<std::string, std::string> > QueryParameters;
  45. URI();
  46. /// Creates an empty URI.
  47. explicit URI(const std::string& uri);
  48. /// Parses an URI from the given string. Throws a
  49. /// SyntaxException if the uri is not valid.
  50. explicit URI(const char* uri);
  51. /// Parses an URI from the given string. Throws a
  52. /// SyntaxException if the uri is not valid.
  53. URI(const std::string& scheme, const std::string& pathEtc);
  54. /// Creates an URI from its parts.
  55. URI(const std::string& scheme, const std::string& authority, const std::string& pathEtc);
  56. /// Creates an URI from its parts.
  57. URI(const std::string& scheme, const std::string& authority, const std::string& path, const std::string& query);
  58. /// Creates an URI from its parts.
  59. URI(const std::string& scheme, const std::string& authority, const std::string& path, const std::string& query, const std::string& fragment);
  60. /// Creates an URI from its parts.
  61. URI(const URI& uri);
  62. /// Copy constructor. Creates an URI from another one.
  63. URI(const URI& baseURI, const std::string& relativeURI);
  64. /// Creates an URI from a base URI and a relative URI, according to
  65. /// the algorithm in section 5.2 of RFC 3986.
  66. explicit URI(const Path& path);
  67. /// Creates a URI from a path.
  68. ///
  69. /// The path will be made absolute, and a file:// URI
  70. /// will be built from it.
  71. ~URI();
  72. /// Destroys the URI.
  73. URI& operator = (const URI& uri);
  74. /// Assignment operator.
  75. URI& operator = (const std::string& uri);
  76. /// Parses and assigns an URI from the given string. Throws a
  77. /// SyntaxException if the uri is not valid.
  78. URI& operator = (const char* uri);
  79. /// Parses and assigns an URI from the given string. Throws a
  80. /// SyntaxException if the uri is not valid.
  81. void swap(URI& uri);
  82. /// Swaps the URI with another one.
  83. void clear();
  84. /// Clears all parts of the URI.
  85. std::string toString() const;
  86. /// Returns a string representation of the URI.
  87. ///
  88. /// Characters in the path, query and fragment parts will be
  89. /// percent-encoded as necessary.
  90. const std::string& getScheme() const;
  91. /// Returns the scheme part of the URI.
  92. void setScheme(const std::string& scheme);
  93. /// Sets the scheme part of the URI. The given scheme
  94. /// is converted to lower-case.
  95. ///
  96. /// A list of registered URI schemes can be found
  97. /// at <http://www.iana.org/assignments/uri-schemes>.
  98. const std::string& getUserInfo() const;
  99. /// Returns the user-info part of the URI.
  100. void setUserInfo(const std::string& userInfo);
  101. /// Sets the user-info part of the URI.
  102. const std::string& getHost() const;
  103. /// Returns the host part of the URI.
  104. void setHost(const std::string& host);
  105. /// Sets the host part of the URI.
  106. unsigned short getPort() const;
  107. /// Returns the port number part of the URI.
  108. ///
  109. /// If no port number (0) has been specified, the
  110. /// well-known port number (e.g., 80 for http) for
  111. /// the given scheme is returned if it is known.
  112. /// Otherwise, 0 is returned.
  113. void setPort(unsigned short port);
  114. /// Sets the port number part of the URI.
  115. std::string getAuthority() const;
  116. /// Returns the authority part (userInfo, host and port)
  117. /// of the URI.
  118. ///
  119. /// If the port number is a well-known port
  120. /// number for the given scheme (e.g., 80 for http), it
  121. /// is not included in the authority.
  122. void setAuthority(const std::string& authority);
  123. /// Parses the given authority part for the URI and sets
  124. /// the user-info, host, port components accordingly.
  125. const std::string& getPath() const;
  126. /// Returns the decoded path part of the URI.
  127. void setPath(const std::string& path);
  128. /// Sets the path part of the URI.
  129. std::string getQuery() const;
  130. /// Returns the decoded query part of the URI.
  131. ///
  132. /// Note that encoded ampersand characters ('&', "%26")
  133. /// will be decoded, which could cause ambiguities if the query
  134. /// string contains multiple parameters and a parameter name
  135. /// or value contains an ampersand as well.
  136. /// In such a case it's better to use getRawQuery() or
  137. /// getQueryParameters().
  138. void setQuery(const std::string& query);
  139. /// Sets the query part of the URI.
  140. ///
  141. /// The query string will be percent-encoded. If the query
  142. /// already contains percent-encoded characters, these
  143. /// will be double-encoded, which is probably not what's
  144. /// intended by the caller. Furthermore, ampersand ('&')
  145. /// characters in the query will not be encoded. This could
  146. /// lead to ambiguity issues if the query string contains multiple
  147. /// name-value parameters separated by ampersand, and if any
  148. /// name or value also contains an ampersand. In such a
  149. /// case, it's better to use setRawQuery() with a properly
  150. /// percent-encoded query string, or use addQueryParameter()
  151. /// or setQueryParameters(), which take care of appropriate
  152. /// percent encoding of parameter names and values.
  153. void addQueryParameter(const std::string& param, const std::string& val = "");
  154. /// Adds "param=val" to the query; "param" may not be empty.
  155. /// If val is empty, only '=' is appended to the parameter.
  156. ///
  157. /// In addition to regular encoding, function also encodes '&' and '=',
  158. /// if found in param or val.
  159. const std::string& getRawQuery() const;
  160. /// Returns the query string in raw form, which usually
  161. /// means percent encoded.
  162. void setRawQuery(const std::string& query);
  163. /// Sets the query part of the URI.
  164. ///
  165. /// The given query string must be properly percent-encoded.
  166. QueryParameters getQueryParameters() const;
  167. /// Returns the decoded query string parameters as a vector
  168. /// of name-value pairs.
  169. void setQueryParameters(const QueryParameters& params);
  170. /// Sets the query part of the URI from a vector
  171. /// of query parameters.
  172. ///
  173. /// Calls addQueryParameter() for each parameter name and value.
  174. const std::string& getFragment() const;
  175. /// Returns the fragment part of the URI.
  176. void setFragment(const std::string& fragment);
  177. /// Sets the fragment part of the URI.
  178. void setPathEtc(const std::string& pathEtc);
  179. /// Sets the path, query and fragment parts of the URI.
  180. std::string getPathEtc() const;
  181. /// Returns the encoded path, query and fragment parts of the URI.
  182. std::string getPathAndQuery() const;
  183. /// Returns the encoded path and query parts of the URI.
  184. void resolve(const std::string& relativeURI);
  185. /// Resolves the given relative URI against the base URI.
  186. /// See section 5.2 of RFC 3986 for the algorithm used.
  187. void resolve(const URI& relativeURI);
  188. /// Resolves the given relative URI against the base URI.
  189. /// See section 5.2 of RFC 3986 for the algorithm used.
  190. bool isRelative() const;
  191. /// Returns true if the URI is a relative reference, false otherwise.
  192. ///
  193. /// A relative reference does not contain a scheme identifier.
  194. /// Relative references are usually resolved against an absolute
  195. /// base reference.
  196. bool empty() const;
  197. /// Returns true if the URI is empty, false otherwise.
  198. bool operator == (const URI& uri) const;
  199. /// Returns true if both URIs are identical, false otherwise.
  200. ///
  201. /// Two URIs are identical if their scheme, authority,
  202. /// path, query and fragment part are identical.
  203. bool operator == (const std::string& uri) const;
  204. /// Parses the given URI and returns true if both URIs are identical,
  205. /// false otherwise.
  206. bool operator != (const URI& uri) const;
  207. /// Returns true if both URIs are identical, false otherwise.
  208. bool operator != (const std::string& uri) const;
  209. /// Parses the given URI and returns true if both URIs are identical,
  210. /// false otherwise.
  211. void normalize();
  212. /// Normalizes the URI by removing all but leading . and .. segments from the path.
  213. ///
  214. /// If the first path segment in a relative path contains a colon (:),
  215. /// such as in a Windows path containing a drive letter, a dot segment (./)
  216. /// is prepended in accordance with section 3.3 of RFC 3986.
  217. void getPathSegments(std::vector<std::string>& segments);
  218. /// Places the single path segments (delimited by slashes) into the
  219. /// given vector.
  220. static void encode(const std::string& str, const std::string& reserved, std::string& encodedStr);
  221. /// URI-encodes the given string by escaping reserved and non-ASCII
  222. /// characters. The encoded string is appended to encodedStr.
  223. static void decode(const std::string& str, std::string& decodedStr, bool plusAsSpace = false);
  224. /// URI-decodes the given string by replacing percent-encoded
  225. /// characters with the actual character. The decoded string
  226. /// is appended to decodedStr.
  227. ///
  228. /// When plusAsSpace is true, non-encoded plus signs in the query are decoded as spaces.
  229. /// (http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.1)
  230. protected:
  231. bool equals(const URI& uri) const;
  232. /// Returns true if both uri's are equivalent.
  233. bool isWellKnownPort() const;
  234. /// Returns true if the URI's port number is a well-known one
  235. /// (for example, 80, if the scheme is http).
  236. unsigned short getWellKnownPort() const;
  237. /// Returns the well-known port number for the URI's scheme,
  238. /// or 0 if the port number is not known.
  239. void parse(const std::string& uri);
  240. /// Parses and assigns an URI from the given string. Throws a
  241. /// SyntaxException if the uri is not valid.
  242. void parseAuthority(std::string::const_iterator& it, const std::string::const_iterator& end);
  243. /// Parses and sets the user-info, host and port from the given data.
  244. void parseHostAndPort(std::string::const_iterator& it, const std::string::const_iterator& end);
  245. /// Parses and sets the host and port from the given data.
  246. void parsePath(std::string::const_iterator& it, const std::string::const_iterator& end);
  247. /// Parses and sets the path from the given data.
  248. void parsePathEtc(std::string::const_iterator& it, const std::string::const_iterator& end);
  249. /// Parses and sets the path, query and fragment from the given data.
  250. void parseQuery(std::string::const_iterator& it, const std::string::const_iterator& end);
  251. /// Parses and sets the query from the given data.
  252. void parseFragment(std::string::const_iterator& it, const std::string::const_iterator& end);
  253. /// Parses and sets the fragment from the given data.
  254. void mergePath(const std::string& path);
  255. /// Appends a path to the URI's path.
  256. void removeDotSegments(bool removeLeading = true);
  257. /// Removes all dot segments from the path.
  258. static void getPathSegments(const std::string& path, std::vector<std::string>& segments);
  259. /// Places the single path segments (delimited by slashes) into the
  260. /// given vector.
  261. void buildPath(const std::vector<std::string>& segments, bool leadingSlash, bool trailingSlash);
  262. /// Builds the path from the given segments.
  263. static const std::string RESERVED_PATH;
  264. static const std::string RESERVED_QUERY;
  265. static const std::string RESERVED_QUERY_PARAM;
  266. static const std::string RESERVED_FRAGMENT;
  267. static const std::string ILLEGAL;
  268. private:
  269. std::string _scheme;
  270. std::string _userInfo;
  271. std::string _host;
  272. unsigned short _port;
  273. std::string _path;
  274. std::string _query;
  275. std::string _fragment;
  276. };
  277. //
  278. // inlines
  279. //
  280. inline const std::string& URI::getScheme() const
  281. {
  282. return _scheme;
  283. }
  284. inline const std::string& URI::getUserInfo() const
  285. {
  286. return _userInfo;
  287. }
  288. inline const std::string& URI::getHost() const
  289. {
  290. return _host;
  291. }
  292. inline const std::string& URI::getPath() const
  293. {
  294. return _path;
  295. }
  296. inline const std::string& URI::getRawQuery() const
  297. {
  298. return _query;
  299. }
  300. inline const std::string& URI::getFragment() const
  301. {
  302. return _fragment;
  303. }
  304. inline void swap(URI& u1, URI& u2)
  305. {
  306. u1.swap(u2);
  307. }
  308. } // namespace Poco
  309. #endif // Foundation_URI_INCLUDED