From 5044bcf5564f23e31724d25f40ffddc94525fd9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1t=C3=A9=20Kocsis?= Date: Sun, 25 May 2025 22:37:05 +0200 Subject: [PATCH 01/10] Add ext/uri skeleton along with uriparser Relates to https://github.com/php/php-src/pull/14461/ and https://wiki.php.net/rfc/url_parsing_api --- .gitattributes | 1 + .github/labeler.yml | 5 + EXTENSIONS | 6 + README.REDIST.BINS | 1 + codecov.yml | 2 + ext/uri/CREDITS | 2 + ext/uri/config.m4 | 23 + ext/uri/config.w32 | 8 + ext/uri/php_uri.c | 99 + ext/uri/php_uri.h | 27 + ext/uri/php_uri.stub.php | 23 + ext/uri/php_uri_arginfo.h | 38 + ext/uri/uriparser/COPYING | 36 + ext/uri/uriparser/include/uriparser/Uri.h | 1150 ++++++++ ext/uri/uriparser/include/uriparser/UriBase.h | 373 +++ .../uriparser/include/uriparser/UriDefsAnsi.h | 82 + .../include/uriparser/UriDefsConfig.h | 101 + .../include/uriparser/UriDefsUnicode.h | 82 + ext/uri/uriparser/include/uriparser/UriIp4.h | 110 + ext/uri/uriparser/src/UriCommon.c | 616 +++++ ext/uri/uriparser/src/UriCommon.h | 104 + ext/uri/uriparser/src/UriCompare.c | 168 ++ ext/uri/uriparser/src/UriConfig.h | 51 + ext/uri/uriparser/src/UriConfig.h.in | 51 + ext/uri/uriparser/src/UriEscape.c | 453 ++++ ext/uri/uriparser/src/UriFile.c | 242 ++ ext/uri/uriparser/src/UriIp4.c | 329 +++ ext/uri/uriparser/src/UriIp4Base.c | 96 + ext/uri/uriparser/src/UriIp4Base.h | 59 + ext/uri/uriparser/src/UriMemory.c | 471 ++++ ext/uri/uriparser/src/UriMemory.h | 78 + ext/uri/uriparser/src/UriNormalize.c | 827 ++++++ ext/uri/uriparser/src/UriNormalizeBase.c | 119 + ext/uri/uriparser/src/UriNormalizeBase.h | 53 + ext/uri/uriparser/src/UriParse.c | 2409 +++++++++++++++++ ext/uri/uriparser/src/UriParseBase.c | 90 + ext/uri/uriparser/src/UriParseBase.h | 55 + ext/uri/uriparser/src/UriQuery.c | 505 ++++ ext/uri/uriparser/src/UriRecompose.c | 577 ++++ ext/uri/uriparser/src/UriResolve.c | 329 +++ ext/uri/uriparser/src/UriShorten.c | 324 +++ scripts/dev/tidy.php | 1 + 42 files changed, 10176 insertions(+) create mode 100644 ext/uri/CREDITS create mode 100644 ext/uri/config.m4 create mode 100644 ext/uri/config.w32 create mode 100644 ext/uri/php_uri.c create mode 100644 ext/uri/php_uri.h create mode 100644 ext/uri/php_uri.stub.php create mode 100644 ext/uri/php_uri_arginfo.h create mode 100644 ext/uri/uriparser/COPYING create mode 100644 ext/uri/uriparser/include/uriparser/Uri.h create mode 100644 ext/uri/uriparser/include/uriparser/UriBase.h create mode 100644 ext/uri/uriparser/include/uriparser/UriDefsAnsi.h create mode 100644 ext/uri/uriparser/include/uriparser/UriDefsConfig.h create mode 100644 ext/uri/uriparser/include/uriparser/UriDefsUnicode.h create mode 100644 ext/uri/uriparser/include/uriparser/UriIp4.h create mode 100644 ext/uri/uriparser/src/UriCommon.c create mode 100644 ext/uri/uriparser/src/UriCommon.h create mode 100644 ext/uri/uriparser/src/UriCompare.c create mode 100644 ext/uri/uriparser/src/UriConfig.h create mode 100644 ext/uri/uriparser/src/UriConfig.h.in create mode 100644 ext/uri/uriparser/src/UriEscape.c create mode 100644 ext/uri/uriparser/src/UriFile.c create mode 100644 ext/uri/uriparser/src/UriIp4.c create mode 100644 ext/uri/uriparser/src/UriIp4Base.c create mode 100644 ext/uri/uriparser/src/UriIp4Base.h create mode 100644 ext/uri/uriparser/src/UriMemory.c create mode 100644 ext/uri/uriparser/src/UriMemory.h create mode 100644 ext/uri/uriparser/src/UriNormalize.c create mode 100644 ext/uri/uriparser/src/UriNormalizeBase.c create mode 100644 ext/uri/uriparser/src/UriNormalizeBase.h create mode 100644 ext/uri/uriparser/src/UriParse.c create mode 100644 ext/uri/uriparser/src/UriParseBase.c create mode 100644 ext/uri/uriparser/src/UriParseBase.h create mode 100644 ext/uri/uriparser/src/UriQuery.c create mode 100644 ext/uri/uriparser/src/UriRecompose.c create mode 100644 ext/uri/uriparser/src/UriResolve.c create mode 100644 ext/uri/uriparser/src/UriShorten.c diff --git a/.gitattributes b/.gitattributes index 953363407e0d3..8dea3f8bbafbd 100644 --- a/.gitattributes +++ b/.gitattributes @@ -31,3 +31,4 @@ # Vendored libraries /ext/lexbor/lexbor linguist-vendored +/ext/uri/uriparser linguist-vendored diff --git a/.github/labeler.yml b/.github/labeler.yml index 00807db1d135b..40ac864fd56c5 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -320,6 +320,11 @@ - any-glob-to-any-file: - ext/tokenizer/**/* +"Extension: uri": + - changed-files: + - any-glob-to-any-file: + - ext/uri/**/* + "Extension: xml": - changed-files: - any-glob-to-any-file: diff --git a/EXTENSIONS b/EXTENSIONS index 5a2345a90b914..34e289ffcae2b 100644 --- a/EXTENSIONS +++ b/EXTENSIONS @@ -503,6 +503,12 @@ PRIMARY MAINTAINER: Andrei Zmievski (2002 - 2002) MAINTENANCE: Maintained STATUS: Working ------------------------------------------------------------------------------- +EXTENSION: uri +PRIMARY MAINTAINER Máté Kocsis (2025 - 2025) +MAINTENANCE: Maintained +STATUS: Working +SINCE: 8.5.0 +------------------------------------------------------------------------------- EXTENSION: zip PRIMARY MAINTAINER: Pierre-Alain Joye (2006 - 2011) Remi Collet (2013-2020) diff --git a/README.REDIST.BINS b/README.REDIST.BINS index 4ba325830e2ca..37f41fdf78183 100644 --- a/README.REDIST.BINS +++ b/README.REDIST.BINS @@ -19,6 +19,7 @@ 19. xxHash (ext/hash/xxhash) 20. Lexbor (ext/lexbor/lexbor) see ext/lexbor/LICENSE 21. Portions of libcperciva (ext/hash/hash_sha_{ni,sse2}.c) see the header in the source file +22. uriparser (ext/uri/uriparser) see ext/uri/uriparser/COPYING 3. pcre2lib (ext/pcre) diff --git a/codecov.yml b/codecov.yml index 65ffec2475ae8..a489bffa656ab 100644 --- a/codecov.yml +++ b/codecov.yml @@ -9,3 +9,5 @@ ignore: - "ext/lexbor/lexbor/ports" - "ext/lexbor/lexbor/tag" - "ext/pcre/pcre2lib" + - "ext/uri/uriparser/include" + - "ext/uri/uriparser/src" diff --git a/ext/uri/CREDITS b/ext/uri/CREDITS new file mode 100644 index 0000000000000..07147e007507f --- /dev/null +++ b/ext/uri/CREDITS @@ -0,0 +1,2 @@ +uri +Máté Kocsis, Tim Düsterhus, Ignace Nyamagana Butera, Arnaud Le Blanc, Niels Dossche, Nicolas Grekas diff --git a/ext/uri/config.m4 b/ext/uri/config.m4 new file mode 100644 index 0000000000000..794a31d21d208 --- /dev/null +++ b/ext/uri/config.m4 @@ -0,0 +1,23 @@ +dnl Configure options +dnl + +PHP_INSTALL_HEADERS([ext/uri], m4_normalize([ + php_lexbor.h + php_uri.h + php_uri_common.h + php_uriparser.h +])) + +AC_DEFINE([URI_ENABLE_ANSI], [1], [Define to 1 for enabling ANSI support of uriparser.]) +AC_DEFINE([URI_NO_UNICODE], [1], [Define to 1 for disabling unicode support of uriparser.]) + +URIPARSER_DIR="uriparser" +URIPARSER_SOURCES="$URIPARSER_DIR/src/UriCommon.c $URIPARSER_DIR/src/UriCompare.c $URIPARSER_DIR/src/UriEscape.c \ +$URIPARSER_DIR/src/UriFile.c $URIPARSER_DIR/src/UriIp4.c $URIPARSER_DIR/src/UriIp4Base.c \ +$URIPARSER_DIR/src/UriMemory.c $URIPARSER_DIR/src/UriNormalize.c $URIPARSER_DIR/src/UriNormalizeBase.c \ +$URIPARSER_DIR/src/UriParse.c $URIPARSER_DIR/src/UriParseBase.c $URIPARSER_DIR/src/UriQuery.c \ +$URIPARSER_DIR/src/UriRecompose.c $URIPARSER_DIR/src/UriResolve.c $URIPARSER_DIR/src/UriShorten.c" + +PHP_NEW_EXTENSION(uri, [php_uri.c php_uri_common.c php_lexbor.c php_uriparser.c $URIPARSER_SOURCES], [no],,[-I$ext_builddir/$URIPARSER_DIR/include -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1]) +PHP_ADD_EXTENSION_DEP(uri, lexbor) +PHP_ADD_BUILD_DIR($ext_builddir/$URIPARSER_DIR/src $ext_builddir/$URIPARSER_DIR/include) diff --git a/ext/uri/config.w32 b/ext/uri/config.w32 new file mode 100644 index 0000000000000..3d5a0fc44102f --- /dev/null +++ b/ext/uri/config.w32 @@ -0,0 +1,8 @@ +EXTENSION("uri", "php_lexbor.c php_uri.c php_uri_common.c php_uriparser.c", false /* never shared */, "/I ext/lexbor /DZEND_ENABLE_STATIC_TSRMLS_CACHE=1"); + +AC_DEFINE("URI_ENABLE_ANSI", 1, "Define to 1 for enabling ANSI support of uriparser.") +AC_DEFINE("URI_NO_UNICODE", 1, "Define to 1 for disabling unicode support of uriparser.") + +ADD_EXTENSION_DEP('uri', 'lexbor'); +ADD_SOURCES("ext/uri/uriparser/src", "UriCommon.c UriCompare.c UriEscape.c UriFile.c UriIp4.c UriIp4Base.c UriMemory.c UriNormalize.c UriNormalizeBase.c UriParse.c UriQuery.c UriRecompose.c UriShorten.c", "lexbor"); +PHP_INSTALL_HEADERS("ext/uri", "php_lexbor.h php_uri.h php_uri_common.h php_uriparser.h uriparser/src uriparser/include"); diff --git a/ext/uri/php_uri.c b/ext/uri/php_uri.c new file mode 100644 index 0000000000000..5bf1cfbf2adb3 --- /dev/null +++ b/ext/uri/php_uri.c @@ -0,0 +1,99 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | https://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Máté Kocsis | + +----------------------------------------------------------------------+ +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "php.h" +#include "Zend/zend_interfaces.h" +#include "Zend/zend_exceptions.h" +#include "Zend/zend_attributes.h" +#include "main/php_ini.h" + +#include "php_uri.h" +#include "php_uri_arginfo.h" + +zend_class_entry *uri_exception_ce; +zend_class_entry *invalid_uri_exception_ce; +zend_class_entry *whatwg_invalid_url_exception_ce; + +#define URIPARSER_VERSION "0.9.8" + +static const zend_module_dep uri_deps[] = { + ZEND_MOD_REQUIRED("lexbor") + ZEND_MOD_END +}; + + +static PHP_MINIT_FUNCTION(uri) +{ + uri_exception_ce = register_class_Uri_UriException(zend_ce_exception); + invalid_uri_exception_ce = register_class_Uri_InvalidUriException(uri_exception_ce); + whatwg_invalid_url_exception_ce = register_class_Uri_WhatWg_InvalidUrlException(invalid_uri_exception_ce); + + return SUCCESS; +} + +static PHP_MINFO_FUNCTION(uri) +{ + php_info_print_table_start(); + php_info_print_table_row(2, "uri support", "active"); + php_info_print_table_row(2, "uriparser version", URIPARSER_VERSION); + php_info_print_table_end(); +} + +static PHP_MSHUTDOWN_FUNCTION(uri) +{ + UNREGISTER_INI_ENTRIES(); + + return SUCCESS; +} + +PHP_RINIT_FUNCTION(uri) +{ +#if defined(COMPILE_DL_URI) && defined(ZTS) + ZEND_TSRMLS_CACHE_UPDATE(); +#endif + + return SUCCESS; +} + +PHP_RSHUTDOWN_FUNCTION(uri) +{ + return SUCCESS; +} + +zend_module_entry uri_module_entry = { + STANDARD_MODULE_HEADER_EX, NULL, + uri_deps, + "uri", /* Extension name */ + NULL, /* zend_function_entry */ + PHP_MINIT(uri), /* PHP_MINIT - Module initialization */ + PHP_MSHUTDOWN(uri), /* PHP_MSHUTDOWN - Module shutdown */ + PHP_RINIT(uri), /* PHP_RINIT - Request initialization */ + PHP_RSHUTDOWN(uri), /* PHP_RSHUTDOWN - Request shutdown */ + PHP_MINFO(uri), /* PHP_MINFO - Module info */ + PHP_VERSION, /* Version */ + STANDARD_MODULE_PROPERTIES +}; + +#ifdef COMPILE_DL_URI +#ifdef ZTS +ZEND_TSRMLS_CACHE_DEFINE() +#endif +ZEND_GET_MODULE(uri) +#endif diff --git a/ext/uri/php_uri.h b/ext/uri/php_uri.h new file mode 100644 index 0000000000000..d988495eac04d --- /dev/null +++ b/ext/uri/php_uri.h @@ -0,0 +1,27 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | https://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Máté Kocsis | + +----------------------------------------------------------------------+ +*/ + +#ifndef PHP_URI_H +#define PHP_URI_H + +extern zend_module_entry uri_module_entry; +#define phpext_uri_ptr &uri_module_entry + +#if defined(ZTS) && defined(COMPILE_DL_URI) +ZEND_TSRMLS_CACHE_EXTERN() +#endif + +#endif diff --git a/ext/uri/php_uri.stub.php b/ext/uri/php_uri.stub.php new file mode 100644 index 0000000000000..926be1fbb8267 --- /dev/null +++ b/ext/uri/php_uri.stub.php @@ -0,0 +1,23 @@ + +Copyright (C) 2007, Sebastian Pipping +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + 1. Redistributions of source code must retain the above + copyright notice, this list of conditions and the following + disclaimer. + + 2. Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials + provided with the distribution. + + 3. Neither the name of the copyright holder nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/ext/uri/uriparser/include/uriparser/Uri.h b/ext/uri/uriparser/include/uriparser/Uri.h new file mode 100644 index 0000000000000..d6a85f3d9ac57 --- /dev/null +++ b/ext/uri/uriparser/include/uriparser/Uri.h @@ -0,0 +1,1150 @@ +/* e8e2c75d033ddfe256fe87c3fd5a330a6f2c9cbb376ebd83a1b3263e804c766a (0.9.8+) + * + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file Uri.h + * Holds the RFC 3986 %URI parser interface. + * NOTE: This header includes itself twice. + */ + +#if (defined(URI_PASS_ANSI) && !defined(URI_H_ANSI)) \ + || (defined(URI_PASS_UNICODE) && !defined(URI_H_UNICODE)) \ + || (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* What encodings are enabled? */ +#include "UriDefsConfig.h" +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "Uri.h" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "Uri.h" +# undef URI_PASS_UNICODE +# endif +/* Only one pass for each encoding */ +#elif (defined(URI_PASS_ANSI) && !defined(URI_H_ANSI) \ + && defined(URI_ENABLE_ANSI)) || (defined(URI_PASS_UNICODE) \ + && !defined(URI_H_UNICODE) && defined(URI_ENABLE_UNICODE)) +# ifdef URI_PASS_ANSI +# define URI_H_ANSI 1 +# include "UriDefsAnsi.h" +# else +# define URI_H_UNICODE 1 +# include "UriDefsUnicode.h" +# endif + + + +#ifdef __cplusplus +extern "C" { +#endif + + + +#ifndef URI_DOXYGEN +# include "UriBase.h" +#endif + +extern const URI_CHAR * const URI_FUNC(SafeToPointTo); + +/** + * Specifies a range of characters within a string. + * The range includes all characters from first + * to one before afterLast. So if both are + * non-NULL the difference is the length of the text range. + * + * @see UriUriA + * @see UriPathSegmentA + * @see UriHostDataA + * @since 0.3.0 + */ +typedef struct URI_TYPE(TextRangeStruct) { + const URI_CHAR * first; /**< Pointer to first character */ + const URI_CHAR * afterLast; /**< Pointer to character after the last one still in */ +} URI_TYPE(TextRange); /**< @copydoc UriTextRangeStructA */ + + + +/** + * Represents a path segment within a %URI path. + * More precisely it is a node in a linked + * list of path segments. + * + * @see UriUriA + * @since 0.3.0 + */ +typedef struct URI_TYPE(PathSegmentStruct) { + URI_TYPE(TextRange) text; /**< Path segment name */ + struct URI_TYPE(PathSegmentStruct) * next; /**< Pointer to the next path segment in the list, can be NULL if last already */ + + void * reserved; /**< Reserved to the parser */ +} URI_TYPE(PathSegment); /**< @copydoc UriPathSegmentStructA */ + + + +/** + * Holds structured host information. + * This is either a IPv4, IPv6, plain + * text for IPvFuture or all zero for + * a registered name. + * + * @see UriUriA + * @since 0.3.0 + */ +typedef struct URI_TYPE(HostDataStruct) { + UriIp4 * ip4; /**< IPv4 address */ + UriIp6 * ip6; /**< IPv6 address */ + URI_TYPE(TextRange) ipFuture; /**< IPvFuture address */ +} URI_TYPE(HostData); /**< @copydoc UriHostDataStructA */ + + + +/** + * Represents an RFC 3986 %URI. + * Missing components can be {NULL, NULL} ranges. + * + * @see uriFreeUriMembersA + * @see uriFreeUriMembersMmA + * @see UriParserStateA + * @since 0.3.0 + */ +typedef struct URI_TYPE(UriStruct) { + URI_TYPE(TextRange) scheme; /**< Scheme (e.g. "http") */ + URI_TYPE(TextRange) userInfo; /**< User info (e.g. "user:pass") */ + URI_TYPE(TextRange) hostText; /**< Host text (set for all hosts, excluding square brackets) */ + URI_TYPE(HostData) hostData; /**< Structured host type specific data */ + URI_TYPE(TextRange) portText; /**< Port (e.g. "80") */ + URI_TYPE(PathSegment) * pathHead; /**< Head of a linked list of path segments */ + URI_TYPE(PathSegment) * pathTail; /**< Tail of the list behind pathHead */ + URI_TYPE(TextRange) query; /**< Query without leading "?" */ + URI_TYPE(TextRange) fragment; /**< Query without leading "#" */ + UriBool absolutePath; /**< Absolute path flag, distincting "a" and "/a"; + always URI_FALSE for URIs with host */ + UriBool owner; /**< Memory owner flag */ + + void * reserved; /**< Reserved to the parser */ +} URI_TYPE(Uri); /**< @copydoc UriUriStructA */ + + + +/** + * Represents a state of the %URI parser. + * Missing components can be NULL to reflect + * a components absence. + * + * @see uriFreeUriMembersA + * @see uriFreeUriMembersMmA + * @since 0.3.0 + */ +typedef struct URI_TYPE(ParserStateStruct) { + URI_TYPE(Uri) * uri; /**< Plug in the %URI structure to be filled while parsing here */ + int errorCode; /**< Code identifying the error which occurred */ + const URI_CHAR * errorPos; /**< Pointer to position in case of a syntax error */ + + void * reserved; /**< Reserved to the parser */ +} URI_TYPE(ParserState); /**< @copydoc UriParserStateStructA */ + + + +/** + * Represents a query element. + * More precisely it is a node in a linked + * list of query elements. + * + * @since 0.7.0 + */ +typedef struct URI_TYPE(QueryListStruct) { + const URI_CHAR * key; /**< Key of the query element */ + const URI_CHAR * value; /**< Value of the query element, can be NULL */ + + struct URI_TYPE(QueryListStruct) * next; /**< Pointer to the next key/value pair in the list, can be NULL if last already */ +} URI_TYPE(QueryList); /**< @copydoc UriQueryListStructA */ + +URI_PUBLIC UriBool URI_FUNC(IsHostSet)(const URI_TYPE(Uri) * uri); + +URI_PUBLIC UriBool URI_FUNC(PushPathSegment)(URI_TYPE(ParserState) * state, + const URI_CHAR * first, const URI_CHAR * afterLast, + UriMemoryManager * memory); + +/** + * Parses a RFC 3986 %URI. + * Uses default libc-based memory manager. + * + * @param state INOUT: Parser state with set output %URI, must not be NULL + * @param first IN: Pointer to the first character to parse, must not be NULL + * @param afterLast IN: Pointer to the character after the last to parse, must not be NULL + * @return 0 on success, error code otherwise + * + * @see uriParseUriA + * @see uriParseSingleUriA + * @see uriParseSingleUriExA + * @see uriToStringA + * @since 0.3.0 + * @deprecated Deprecated since 0.9.0, please migrate to uriParseSingleUriExA (with "Single"). + */ +URI_PUBLIC int URI_FUNC(ParseUriEx)(URI_TYPE(ParserState) * state, + const URI_CHAR * first, const URI_CHAR * afterLast); + + + +/** + * Parses a RFC 3986 %URI. + * Uses default libc-based memory manager. + * + * @param state INOUT: Parser state with set output %URI, must not be NULL + * @param text IN: Text to parse, must not be NULL + * @return 0 on success, error code otherwise + * + * @see uriParseUriExA + * @see uriParseSingleUriA + * @see uriParseSingleUriExA + * @see uriToStringA + * @since 0.3.0 + * @deprecated Deprecated since 0.9.0, please migrate to uriParseSingleUriA (with "Single"). + */ +URI_PUBLIC int URI_FUNC(ParseUri)(URI_TYPE(ParserState) * state, + const URI_CHAR * text); + + + +/** + * Parses a single RFC 3986 %URI. + * Uses default libc-based memory manager. + * + * @param uri OUT: Output %URI, must not be NULL + * @param text IN: Pointer to the first character to parse, + * must not be NULL + * @param errorPos OUT: Pointer to a pointer to the first character + * causing a syntax error, can be NULL; + * only set when URI_ERROR_SYNTAX was returned + * @return 0 on success, error code otherwise + * + * @see uriParseSingleUriExA + * @see uriParseSingleUriExMmA + * @see uriToStringA + * @since 0.9.0 + */ +URI_PUBLIC int URI_FUNC(ParseSingleUri)(URI_TYPE(Uri) * uri, + const URI_CHAR * text, const URI_CHAR ** errorPos); + + + +/** + * Parses a single RFC 3986 %URI. + * Uses default libc-based memory manager. + * + * @param uri OUT: Output %URI, must not be NULL + * @param first IN: Pointer to the first character to parse, + * must not be NULL + * @param afterLast IN: Pointer to the character after the last to + * parse, can be NULL + * (to use first + strlen(first)) + * @param errorPos OUT: Pointer to a pointer to the first character + * causing a syntax error, can be NULL; + * only set when URI_ERROR_SYNTAX was returned + * @return 0 on success, error code otherwise + * + * @see uriParseSingleUriA + * @see uriParseSingleUriExMmA + * @see uriToStringA + * @since 0.9.0 + */ +URI_PUBLIC int URI_FUNC(ParseSingleUriEx)(URI_TYPE(Uri) * uri, + const URI_CHAR * first, const URI_CHAR * afterLast, + const URI_CHAR ** errorPos); + + + +/** + * Parses a single RFC 3986 %URI. + * + * @param uri OUT: Output %URI, must not be NULL + * @param first IN: Pointer to the first character to parse, + * must not be NULL + * @param afterLast IN: Pointer to the character after the last to + * parse, can be NULL + * (to use first + strlen(first)) + * @param errorPos OUT: Pointer to a pointer to the first character + * causing a syntax error, can be NULL; + * only set when URI_ERROR_SYNTAX was returned + * @param memory IN: Memory manager to use, NULL for default libc + * @return 0 on success, error code otherwise + * + * @see uriParseSingleUriA + * @see uriParseSingleUriExA + * @see uriToStringA + * @since 0.9.0 + */ +URI_PUBLIC int URI_FUNC(ParseSingleUriExMm)(URI_TYPE(Uri) * uri, + const URI_CHAR * first, const URI_CHAR * afterLast, + const URI_CHAR ** errorPos, UriMemoryManager * memory); + + + +/** + * Frees all memory associated with the members + * of the %URI structure. Note that the structure + * itself is not freed, only its members. + * Uses default libc-based memory manager. + * + * @param uri INOUT: %URI structure whose members should be freed + * + * @see uriFreeUriMembersMmA + * @since 0.3.0 + */ +URI_PUBLIC void URI_FUNC(FreeUriMembers)(URI_TYPE(Uri) * uri); + + + +/** + * Frees all memory associated with the members + * of the %URI structure. Note that the structure + * itself is not freed, only its members. + * + * @param uri INOUT: %URI structure whose members should be freed + * @param memory IN: Memory manager to use, NULL for default libc + * @return 0 on success, error code otherwise + * + * @see uriFreeUriMembersA + * @since 0.9.0 + */ +URI_PUBLIC int URI_FUNC(FreeUriMembersMm)(URI_TYPE(Uri) * uri, + UriMemoryManager * memory); + + + +/** + * Percent-encodes all unreserved characters from the input string and + * writes the encoded version to the output string. + * + * NOTE: Be sure to allocate 3 times the space of the input buffer for + * the output buffer for normalizeBreaks == URI_FALSE and 6 times + * the space for normalizeBreaks == URI_TRUE + * (since e.g. "\x0d" becomes "%0D%0A" in that case). + * + * NOTE: The implementation treats (both char and) wchar_t units + * as code point integers, which works well for code points U+0001 to U+00ff + * in host-native endianness but nothing more; + * in particular, using uriEscapeExW with arbitrary Unicode input will + * not produce healthy results. + * Passing UTF-8 input to uriEscapeExA may be useful in some scenarios. + * Keep in mind that uriparser is about %URI (RFC 3986) not %IRI (RFC 3987). + * + * @param inFirst IN: Pointer to first character of the input text + * @param inAfterLast IN: Pointer after the last character of the input text + * @param out OUT: Encoded text destination + * @param spaceToPlus IN: Whether to convert ' ' to '+' or not + * @param normalizeBreaks IN: Whether to convert CR and LF to CR-LF or not. + * @return Position of terminator in output string + * + * @see uriEscapeA + * @see uriUnescapeInPlaceExA + * @since 0.5.2 + */ +URI_PUBLIC URI_CHAR * URI_FUNC(EscapeEx)(const URI_CHAR * inFirst, + const URI_CHAR * inAfterLast, URI_CHAR * out, + UriBool spaceToPlus, UriBool normalizeBreaks); + + + +/** + * Percent-encodes all unreserved characters from the input string and + * writes the encoded version to the output string. + * + * NOTE: Be sure to allocate 3 times the space of the input buffer for + * the output buffer for normalizeBreaks == URI_FALSE and 6 times + * the space for normalizeBreaks == URI_TRUE + * (since e.g. "\x0d" becomes "%0D%0A" in that case). + * + * NOTE: The implementation treats (both char and) wchar_t units + * as code point integers, which works well for code points U+0001 to U+00ff + * in host-native endianness but nothing more; + * in particular, using uriEscapeW with arbitrary Unicode input will + * not produce healthy results. + * Passing UTF-8 input to uriEscapeA may be useful in some scenarios. + * Keep in mind that uriparser is about %URI (RFC 3986) not %IRI (RFC 3987). + * + * @param in IN: Text source + * @param out OUT: Encoded text destination + * @param spaceToPlus IN: Whether to convert ' ' to '+' or not + * @param normalizeBreaks IN: Whether to convert CR and LF to CR-LF or not. + * @return Position of terminator in output string + * + * @see uriEscapeExA + * @see uriUnescapeInPlaceA + * @since 0.5.0 + */ +URI_PUBLIC URI_CHAR * URI_FUNC(Escape)(const URI_CHAR * in, URI_CHAR * out, + UriBool spaceToPlus, UriBool normalizeBreaks); + + + +/** + * Unescapes percent-encoded groups in a given string. + * E.g. "%20" will become " ". Unescaping is done in place. + * The return value will be point to the new position + * of the terminating zero. Use this value to get the new + * length of the string. NULL is only returned if inout + * is NULL. + * + * @param inout INOUT: Text to unescape/decode + * @param plusToSpace IN: Whether to convert '+' to ' ' or not + * @param breakConversion IN: Line break conversion mode + * @return Pointer to new position of the terminating zero + * + * @see uriUnescapeInPlaceA + * @see uriEscapeExA + * @since 0.5.0 + */ +URI_PUBLIC const URI_CHAR * URI_FUNC(UnescapeInPlaceEx)(URI_CHAR * inout, + UriBool plusToSpace, UriBreakConversion breakConversion); + + + +/** + * Unescapes percent-encoded groups in a given string. + * E.g. "%20" will become " ". Unescaping is done in place. + * The return value will be point to the new position + * of the terminating zero. Use this value to get the new + * length of the string. NULL is only returned if inout + * is NULL. + * + * NOTE: '+' is not decoded to ' ' and line breaks are not converted. + * Use the more advanced UnescapeInPlaceEx for that features instead. + * + * @param inout INOUT: Text to unescape/decode + * @return Pointer to new position of the terminating zero + * + * @see uriUnescapeInPlaceExA + * @see uriEscapeA + * @since 0.3.0 + */ +URI_PUBLIC const URI_CHAR * URI_FUNC(UnescapeInPlace)(URI_CHAR * inout); + + + +/** + * Performs reference resolution as described in + * section 5.2.2 of RFC 3986. + * Uses default libc-based memory manager. + * NOTE: On success you have to call uriFreeUriMembersA on \p absoluteDest manually later. + * + * @param absoluteDest OUT: Result %URI + * @param relativeSource IN: Reference to resolve + * @param absoluteBase IN: Base %URI to apply + * @return Error code or 0 on success + * + * @see uriRemoveBaseUriA + * @see uriRemoveBaseUriMmA + * @see uriAddBaseUriExA + * @see uriAddBaseUriExMmA + * @since 0.4.0 + */ +URI_PUBLIC int URI_FUNC(AddBaseUri)(URI_TYPE(Uri) * absoluteDest, + const URI_TYPE(Uri) * relativeSource, + const URI_TYPE(Uri) * absoluteBase); + + + +/** + * Performs reference resolution as described in + * section 5.2.2 of RFC 3986. + * Uses default libc-based memory manager. + * NOTE: On success you have to call uriFreeUriMembersA on \p absoluteDest manually later. + * + * @param absoluteDest OUT: Result %URI + * @param relativeSource IN: Reference to resolve + * @param absoluteBase IN: Base %URI to apply + * @param options IN: Configuration to apply + * @return Error code or 0 on success + * + * @see uriRemoveBaseUriA + * @see uriAddBaseUriA + * @see uriAddBaseUriExMmA + * @since 0.8.1 + */ +URI_PUBLIC int URI_FUNC(AddBaseUriEx)(URI_TYPE(Uri) * absoluteDest, + const URI_TYPE(Uri) * relativeSource, + const URI_TYPE(Uri) * absoluteBase, + UriResolutionOptions options); + + + +/** + * Performs reference resolution as described in + * section 5.2.2 of RFC 3986. + * NOTE: On success you have to call uriFreeUriMembersMmA on \p absoluteDest manually later. + * + * @param absoluteDest OUT: Result %URI + * @param relativeSource IN: Reference to resolve + * @param absoluteBase IN: Base %URI to apply + * @param options IN: Configuration to apply + * @param memory IN: Memory manager to use, NULL for default libc + * @return Error code or 0 on success + * + * @see uriRemoveBaseUriA + * @see uriRemoveBaseUriMmA + * @see uriAddBaseUriA + * @see uriAddBaseUriExA + * @since 0.9.0 + */ +URI_PUBLIC int URI_FUNC(AddBaseUriExMm)(URI_TYPE(Uri) * absoluteDest, + const URI_TYPE(Uri) * relativeSource, + const URI_TYPE(Uri) * absoluteBase, + UriResolutionOptions options, UriMemoryManager * memory); + + + +/** + * Tries to make a relative %URI (a reference) from an + * absolute %URI and a given base %URI. The resulting %URI is going to be + * relative if the absolute %URI and base %UI share both scheme and authority. + * If that is not the case, the result will still be + * an absolute URI (with scheme part if necessary). + * Uses default libc-based memory manager. + * NOTE: On success you have to call uriFreeUriMembersA on + * \p dest manually later. + * + * @param dest OUT: Result %URI + * @param absoluteSource IN: Absolute %URI to make relative + * @param absoluteBase IN: Base %URI + * @param domainRootMode IN: Create %URI with path relative to domain root + * @return Error code or 0 on success + * + * @see uriRemoveBaseUriMmA + * @see uriAddBaseUriA + * @see uriAddBaseUriExA + * @see uriAddBaseUriExMmA + * @since 0.5.2 + */ +URI_PUBLIC int URI_FUNC(RemoveBaseUri)(URI_TYPE(Uri) * dest, + const URI_TYPE(Uri) * absoluteSource, + const URI_TYPE(Uri) * absoluteBase, + UriBool domainRootMode); + + + +/** + * Tries to make a relative %URI (a reference) from an + * absolute %URI and a given base %URI. The resulting %URI is going to be + * relative if the absolute %URI and base %UI share both scheme and authority. + * If that is not the case, the result will still be + * an absolute URI (with scheme part if necessary). + * NOTE: On success you have to call uriFreeUriMembersMmA on + * \p dest manually later. + * + * @param dest OUT: Result %URI + * @param absoluteSource IN: Absolute %URI to make relative + * @param absoluteBase IN: Base %URI + * @param domainRootMode IN: Create %URI with path relative to domain root + * @param memory IN: Memory manager to use, NULL for default libc + * @return Error code or 0 on success + * + * @see uriRemoveBaseUriA + * @see uriAddBaseUriA + * @see uriAddBaseUriExA + * @see uriAddBaseUriExMmA + * @since 0.9.0 + */ +URI_PUBLIC int URI_FUNC(RemoveBaseUriMm)(URI_TYPE(Uri) * dest, + const URI_TYPE(Uri) * absoluteSource, + const URI_TYPE(Uri) * absoluteBase, + UriBool domainRootMode, UriMemoryManager * memory); + + + +/** + * Checks two URIs for equivalence. Comparison is done + * the naive way, without prior normalization. + * NOTE: Two NULL URIs are equal as well. + * + * @param a IN: First %URI + * @param b IN: Second %URI + * @return URI_TRUE when equal, URI_FAlSE else + * + * @since 0.4.0 + */ +URI_PUBLIC UriBool URI_FUNC(EqualsUri)(const URI_TYPE(Uri) * a, + const URI_TYPE(Uri) * b); + + + +/** + * Calculates the number of characters needed to store the + * string representation of the given %URI excluding the + * terminator. + * + * @param uri IN: %URI to measure + * @param charsRequired OUT: Length of the string representation in characters excluding terminator + * @return Error code or 0 on success + * + * @see uriToStringA + * @since 0.5.0 + */ +URI_PUBLIC int URI_FUNC(ToStringCharsRequired)(const URI_TYPE(Uri) * uri, + int * charsRequired); + + + +/** + * Converts a %URI structure back to text as described in + * section 5.3 of RFC 3986. + * + * NOTE: Scheme-based normalization + * (section 6.2.3 of RFC 3986) + * is not applied and is considered a responsibility of the application using uriparser. + * + * @param dest OUT: Output destination + * @param uri IN: %URI to convert + * @param maxChars IN: Maximum number of characters to copy including terminator + * @param charsWritten OUT: Number of characters written, can be lower than maxChars even if the %URI is too long! + * @return Error code or 0 on success + * + * @see uriToStringCharsRequiredA + * @since 0.4.0 + */ +URI_PUBLIC int URI_FUNC(ToString)(URI_CHAR * dest, const URI_TYPE(Uri) * uri, + int maxChars, int * charsWritten); + + + +/** + * Determines the components of a %URI that are not normalized. + * + * @param uri IN: %URI to check + * @return Normalization job mask + * + * @see uriNormalizeSyntaxA + * @see uriNormalizeSyntaxExA + * @see uriNormalizeSyntaxExMmA + * @see uriNormalizeSyntaxMaskRequiredExA + * @since 0.5.0 + * @deprecated Deprecated since 0.9.0, please migrate to uriNormalizeSyntaxMaskRequiredExA (with "Ex"). + */ +URI_PUBLIC unsigned int URI_FUNC(NormalizeSyntaxMaskRequired)( + const URI_TYPE(Uri) * uri); + + + +/** + * Determines the components of a %URI that are not normalized. + * + * @param uri IN: %URI to check + * @param outMask OUT: Normalization job mask + * @return Error code or 0 on success + * + * @see uriNormalizeSyntaxA + * @see uriNormalizeSyntaxExA + * @see uriNormalizeSyntaxExMmA + * @see uriNormalizeSyntaxMaskRequiredA + * @since 0.9.0 + */ +URI_PUBLIC int URI_FUNC(NormalizeSyntaxMaskRequiredEx)( + const URI_TYPE(Uri) * uri, unsigned int * outMask); + + + +/** + * Normalizes a %URI using a normalization mask. + * The normalization mask decides what components are normalized. + * + * NOTE: If necessary the %URI becomes owner of all memory + * behind the text pointed to. Text is duplicated in that case. + * Uses default libc-based memory manager. + * + * @param uri INOUT: %URI to normalize + * @param mask IN: Normalization mask + * @return Error code or 0 on success + * + * @see uriNormalizeSyntaxA + * @see uriNormalizeSyntaxExMmA + * @see uriNormalizeSyntaxMaskRequiredA + * @since 0.5.0 + */ +URI_PUBLIC int URI_FUNC(NormalizeSyntaxEx)(URI_TYPE(Uri) * uri, + unsigned int mask); + + + +/** + * Normalizes a %URI using a normalization mask. + * The normalization mask decides what components are normalized. + * + * NOTE: If necessary the %URI becomes owner of all memory + * behind the text pointed to. Text is duplicated in that case. + * + * @param uri INOUT: %URI to normalize + * @param mask IN: Normalization mask + * @param memory IN: Memory manager to use, NULL for default libc + * @return Error code or 0 on success + * + * @see uriNormalizeSyntaxA + * @see uriNormalizeSyntaxExA + * @see uriNormalizeSyntaxMaskRequiredA + * @since 0.9.0 + */ +URI_PUBLIC int URI_FUNC(NormalizeSyntaxExMm)(URI_TYPE(Uri) * uri, + unsigned int mask, UriMemoryManager * memory); + + + +/** + * Normalizes all components of a %URI. + * + * NOTE: If necessary the %URI becomes owner of all memory + * behind the text pointed to. Text is duplicated in that case. + * Uses default libc-based memory manager. + * + * @param uri INOUT: %URI to normalize + * @return Error code or 0 on success + * + * @see uriNormalizeSyntaxExA + * @see uriNormalizeSyntaxExMmA + * @see uriNormalizeSyntaxMaskRequiredA + * @since 0.5.0 + */ +URI_PUBLIC int URI_FUNC(NormalizeSyntax)(URI_TYPE(Uri) * uri); + + + +/** + * Converts a Unix filename to a %URI string. + * The destination buffer must be large enough to hold 7 + 3 * len(filename) + 1 + * characters in case of an absolute filename or 3 * len(filename) + 1 in case + * of a relative filename. + * + * EXAMPLE + * Input: "/bin/bash" + * Output: "file:///bin/bash" + * + * @param filename IN: Unix filename to convert + * @param uriString OUT: Destination to write %URI string to + * @return Error code or 0 on success + * + * @see uriUriStringToUnixFilenameA + * @see uriWindowsFilenameToUriStringA + * @since 0.5.2 + */ +URI_PUBLIC int URI_FUNC(UnixFilenameToUriString)(const URI_CHAR * filename, + URI_CHAR * uriString); + + + +/** + * Converts a Windows filename to a %URI string. + * The destination buffer must be large enough to hold 8 + 3 * len(filename) + 1 + * characters in case of an absolute filename or 3 * len(filename) + 1 in case + * of a relative filename. + * + * EXAMPLE + * Input: "E:\\Documents and Settings" + * Output: "file:///E:/Documents%20and%20Settings" + * + * @param filename IN: Windows filename to convert + * @param uriString OUT: Destination to write %URI string to + * @return Error code or 0 on success + * + * @see uriUriStringToWindowsFilenameA + * @see uriUnixFilenameToUriStringA + * @since 0.5.2 + */ +URI_PUBLIC int URI_FUNC(WindowsFilenameToUriString)(const URI_CHAR * filename, + URI_CHAR * uriString); + + + +/** + * Extracts a Unix filename from a %URI string. + * The destination buffer must be large enough to hold len(uriString) + 1 - 5 + * characters in case of an absolute %URI or len(uriString) + 1 in case + * of a relative %URI. + * + * @param uriString IN: %URI string to convert + * @param filename OUT: Destination to write filename to + * @return Error code or 0 on success + * + * @see uriUnixFilenameToUriStringA + * @see uriUriStringToWindowsFilenameA + * @since 0.5.2 + */ +URI_PUBLIC int URI_FUNC(UriStringToUnixFilename)(const URI_CHAR * uriString, + URI_CHAR * filename); + + + +/** + * Extracts a Windows filename from a %URI string. + * The destination buffer must be large enough to hold len(uriString) + 1 - 5 + * characters in case of an absolute %URI or len(uriString) + 1 in case + * of a relative %URI. + * + * @param uriString IN: %URI string to convert + * @param filename OUT: Destination to write filename to + * @return Error code or 0 on success + * + * @see uriWindowsFilenameToUriStringA + * @see uriUriStringToUnixFilenameA + * @since 0.5.2 + */ +URI_PUBLIC int URI_FUNC(UriStringToWindowsFilename)(const URI_CHAR * uriString, + URI_CHAR * filename); + + + +/** + * Calculates the number of characters needed to store the + * string representation of the given query list excluding the + * terminator. It is assumed that line breaks are will be + * normalized to "%0D%0A". + * + * @param queryList IN: Query list to measure + * @param charsRequired OUT: Length of the string representation in characters excluding terminator + * @return Error code or 0 on success + * + * @see uriComposeQueryCharsRequiredExA + * @see uriComposeQueryA + * @since 0.7.0 + */ +URI_PUBLIC int URI_FUNC(ComposeQueryCharsRequired)( + const URI_TYPE(QueryList) * queryList, int * charsRequired); + + + +/** + * Calculates the number of characters needed to store the + * string representation of the given query list excluding the + * terminator. + * + * @param queryList IN: Query list to measure + * @param charsRequired OUT: Length of the string representation in characters excluding terminator + * @param spaceToPlus IN: Whether to convert ' ' to '+' or not + * @param normalizeBreaks IN: Whether to convert CR and LF to CR-LF or not. + * @return Error code or 0 on success + * + * @see uriComposeQueryCharsRequiredA + * @see uriComposeQueryExA + * @since 0.7.0 + */ +URI_PUBLIC int URI_FUNC(ComposeQueryCharsRequiredEx)( + const URI_TYPE(QueryList) * queryList, + int * charsRequired, UriBool spaceToPlus, UriBool normalizeBreaks); + + + +/** + * Converts a query list structure back to a query string. + * The composed string does not start with '?', + * on the way ' ' is converted to '+' and line breaks are + * normalized to "%0D%0A". + * + * @param dest OUT: Output destination + * @param queryList IN: Query list to convert + * @param maxChars IN: Maximum number of characters to copy including terminator + * @param charsWritten OUT: Number of characters written, can be lower than maxChars even if the query list is too long! + * @return Error code or 0 on success + * + * @see uriComposeQueryExA + * @see uriComposeQueryMallocA + * @see uriComposeQueryMallocExA + * @see uriComposeQueryMallocExMmA + * @see uriComposeQueryCharsRequiredA + * @see uriDissectQueryMallocA + * @see uriDissectQueryMallocExA + * @see uriDissectQueryMallocExMmA + * @since 0.7.0 + */ +URI_PUBLIC int URI_FUNC(ComposeQuery)(URI_CHAR * dest, + const URI_TYPE(QueryList) * queryList, int maxChars, int * charsWritten); + + + +/** + * Converts a query list structure back to a query string. + * The composed string does not start with '?'. + * + * @param dest OUT: Output destination + * @param queryList IN: Query list to convert + * @param maxChars IN: Maximum number of characters to copy including terminator + * @param charsWritten OUT: Number of characters written, can be lower than maxChars even if the query list is too long! + * @param spaceToPlus IN: Whether to convert ' ' to '+' or not + * @param normalizeBreaks IN: Whether to convert CR and LF to CR-LF or not. + * @return Error code or 0 on success + * + * @see uriComposeQueryA + * @see uriComposeQueryMallocA + * @see uriComposeQueryMallocExA + * @see uriComposeQueryMallocExMmA + * @see uriComposeQueryCharsRequiredExA + * @see uriDissectQueryMallocA + * @see uriDissectQueryMallocExA + * @see uriDissectQueryMallocExMmA + * @since 0.7.0 + */ +URI_PUBLIC int URI_FUNC(ComposeQueryEx)(URI_CHAR * dest, + const URI_TYPE(QueryList) * queryList, int maxChars, int * charsWritten, + UriBool spaceToPlus, UriBool normalizeBreaks); + + + +/** + * Converts a query list structure back to a query string. + * Memory for this string is allocated internally. + * The composed string does not start with '?', + * on the way ' ' is converted to '+' and line breaks are + * normalized to "%0D%0A". + * Uses default libc-based memory manager. + * + * @param dest OUT: Output destination + * @param queryList IN: Query list to convert + * @return Error code or 0 on success + * + * @see uriComposeQueryMallocExA + * @see uriComposeQueryMallocExMmA + * @see uriComposeQueryA + * @see uriDissectQueryMallocA + * @see uriDissectQueryMallocExA + * @see uriDissectQueryMallocExMmA + * @since 0.7.0 + */ +URI_PUBLIC int URI_FUNC(ComposeQueryMalloc)(URI_CHAR ** dest, + const URI_TYPE(QueryList) * queryList); + + + +/** + * Converts a query list structure back to a query string. + * Memory for this string is allocated internally. + * The composed string does not start with '?'. + * Uses default libc-based memory manager. + * + * @param dest OUT: Output destination + * @param queryList IN: Query list to convert + * @param spaceToPlus IN: Whether to convert ' ' to '+' or not + * @param normalizeBreaks IN: Whether to convert CR and LF to CR-LF or not. + * @return Error code or 0 on success + * + * @see uriComposeQueryMallocA + * @see uriComposeQueryMallocExMmA + * @see uriComposeQueryExA + * @see uriDissectQueryMallocA + * @see uriDissectQueryMallocExA + * @see uriDissectQueryMallocExMmA + * @since 0.7.0 + */ +URI_PUBLIC int URI_FUNC(ComposeQueryMallocEx)(URI_CHAR ** dest, + const URI_TYPE(QueryList) * queryList, + UriBool spaceToPlus, UriBool normalizeBreaks); + + + +/** + * Converts a query list structure back to a query string. + * Memory for this string is allocated internally. + * The composed string does not start with '?'. + * + * @param dest OUT: Output destination + * @param queryList IN: Query list to convert + * @param spaceToPlus IN: Whether to convert ' ' to '+' or not + * @param normalizeBreaks IN: Whether to convert CR and LF to CR-LF or not. + * @param memory IN: Memory manager to use, NULL for default libc + * @return Error code or 0 on success + * + * @see uriComposeQueryMallocA + * @see uriComposeQueryMallocExA + * @see uriComposeQueryExA + * @see uriDissectQueryMallocA + * @see uriDissectQueryMallocExA + * @see uriDissectQueryMallocExMmA + * @since 0.9.0 + */ +URI_PUBLIC int URI_FUNC(ComposeQueryMallocExMm)(URI_CHAR ** dest, + const URI_TYPE(QueryList) * queryList, + UriBool spaceToPlus, UriBool normalizeBreaks, + UriMemoryManager * memory); + + + +/** + * Constructs a query list from the raw query string of a given URI. + * On the way '+' is converted back to ' ', line breaks are not modified. + * Uses default libc-based memory manager. + * + * @param dest OUT: Output destination + * @param itemCount OUT: Number of items found, can be NULL + * @param first IN: Pointer to first character after '?' + * @param afterLast IN: Pointer to character after the last one still in + * @return Error code or 0 on success + * + * @see uriDissectQueryMallocExA + * @see uriDissectQueryMallocExMmA + * @see uriComposeQueryA + * @see uriFreeQueryListA + * @see uriFreeQueryListMmA + * @since 0.7.0 + */ +URI_PUBLIC int URI_FUNC(DissectQueryMalloc)(URI_TYPE(QueryList) ** dest, + int * itemCount, const URI_CHAR * first, const URI_CHAR * afterLast); + + + +/** + * Constructs a query list from the raw query string of a given URI. + * Uses default libc-based memory manager. + * + * @param dest OUT: Output destination + * @param itemCount OUT: Number of items found, can be NULL + * @param first IN: Pointer to first character after '?' + * @param afterLast IN: Pointer to character after the last one still in + * @param plusToSpace IN: Whether to convert '+' to ' ' or not + * @param breakConversion IN: Line break conversion mode + * @return Error code or 0 on success + * + * @see uriDissectQueryMallocA + * @see uriDissectQueryMallocExMmA + * @see uriComposeQueryExA + * @see uriFreeQueryListA + * @since 0.7.0 + */ +URI_PUBLIC int URI_FUNC(DissectQueryMallocEx)(URI_TYPE(QueryList) ** dest, + int * itemCount, const URI_CHAR * first, const URI_CHAR * afterLast, + UriBool plusToSpace, UriBreakConversion breakConversion); + + + +/** + * Constructs a query list from the raw query string of a given URI. + * + * @param dest OUT: Output destination + * @param itemCount OUT: Number of items found, can be NULL + * @param first IN: Pointer to first character after '?' + * @param afterLast IN: Pointer to character after the last one still in + * @param plusToSpace IN: Whether to convert '+' to ' ' or not + * @param breakConversion IN: Line break conversion mode + * @param memory IN: Memory manager to use, NULL for default libc + * @return Error code or 0 on success + * + * @see uriDissectQueryMallocA + * @see uriDissectQueryMallocExA + * @see uriComposeQueryExA + * @see uriFreeQueryListA + * @see uriFreeQueryListMmA + * @since 0.9.0 + */ +URI_PUBLIC int URI_FUNC(DissectQueryMallocExMm)(URI_TYPE(QueryList) ** dest, + int * itemCount, const URI_CHAR * first, const URI_CHAR * afterLast, + UriBool plusToSpace, UriBreakConversion breakConversion, + UriMemoryManager * memory); + + + +/** + * Frees all memory associated with the given query list. + * The structure itself is freed as well. + * + * @param queryList INOUT: Query list to free + * + * @see uriFreeQueryListMmA + * @since 0.7.0 + */ +URI_PUBLIC void URI_FUNC(FreeQueryList)(URI_TYPE(QueryList) * queryList); + + + +/** + * Frees all memory associated with the given query list. + * The structure itself is freed as well. + * + * @param queryList INOUT: Query list to free + * @param memory IN: Memory manager to use, NULL for default libc + * @return Error code or 0 on success + * + * @see uriFreeQueryListA + * @since 0.9.0 + */ +URI_PUBLIC int URI_FUNC(FreeQueryListMm)(URI_TYPE(QueryList) * queryList, + UriMemoryManager * memory); + + + +/** + * Makes the %URI hold copies of strings so that it no longer depends + * on the original %URI string. If the %URI is already owner of copies, + * this function returns URI_TRUE and does not modify the %URI further. + * + * Uses default libc-based memory manager. + * + * @param uri INOUT: %URI to make independent + * @return Error code or 0 on success + * + * @see uriMakeOwnerMmA + * @since 0.9.4 + */ +URI_PUBLIC int URI_FUNC(MakeOwner)(URI_TYPE(Uri) * uri); + + + +/** + * Makes the %URI hold copies of strings so that it no longer depends + * on the original %URI string. If the %URI is already owner of copies, + * this function returns URI_TRUE and does not modify the %URI further. + * + * @param uri INOUT: %URI to make independent + * @param memory IN: Memory manager to use, NULL for default libc + * @return Error code or 0 on success + * + * @see uriMakeOwnerA + * @since 0.9.4 + */ +URI_PUBLIC int URI_FUNC(MakeOwnerMm)(URI_TYPE(Uri) * uri, + UriMemoryManager * memory); + + + +#ifdef __cplusplus +} +#endif + + + +#endif +#endif diff --git a/ext/uri/uriparser/include/uriparser/UriBase.h b/ext/uri/uriparser/include/uriparser/UriBase.h new file mode 100644 index 0000000000000..dc3883e65167c --- /dev/null +++ b/ext/uri/uriparser/include/uriparser/UriBase.h @@ -0,0 +1,373 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file UriBase.h + * Holds definitions independent of the encoding pass. + */ + +#ifndef URI_BASE_H +#define URI_BASE_H 1 + + + +/* Version helper macro */ +#define URI_ANSI_TO_UNICODE(x) L##x + + + +/* Version */ +#define URI_VER_MAJOR 0 +#define URI_VER_MINOR 9 +#define URI_VER_RELEASE 8 +#define URI_VER_SUFFIX_ANSI "" +#define URI_VER_SUFFIX_UNICODE URI_ANSI_TO_UNICODE(URI_VER_SUFFIX_ANSI) + + + +/* More version helper macros */ +#define URI_INT_TO_ANSI_HELPER(x) #x +#define URI_INT_TO_ANSI(x) URI_INT_TO_ANSI_HELPER(x) + +#define URI_INT_TO_UNICODE_HELPER(x) URI_ANSI_TO_UNICODE(#x) +#define URI_INT_TO_UNICODE(x) URI_INT_TO_UNICODE_HELPER(x) + +#define URI_VER_ANSI_HELPER(ma, mi, r, s) \ + URI_INT_TO_ANSI(ma) "." \ + URI_INT_TO_ANSI(mi) "." \ + URI_INT_TO_ANSI(r) \ + s + +#define URI_VER_UNICODE_HELPER(ma, mi, r, s) \ + URI_INT_TO_UNICODE(ma) L"." \ + URI_INT_TO_UNICODE(mi) L"." \ + URI_INT_TO_UNICODE(r) \ + s + + + +/* Full version strings */ +#define URI_VER_ANSI URI_VER_ANSI_HELPER(URI_VER_MAJOR, URI_VER_MINOR, URI_VER_RELEASE, URI_VER_SUFFIX_ANSI) +#define URI_VER_UNICODE URI_VER_UNICODE_HELPER(URI_VER_MAJOR, URI_VER_MINOR, URI_VER_RELEASE, URI_VER_SUFFIX_UNICODE) + + + +/* Unused parameter macro */ +#ifdef __GNUC__ +# define URI_UNUSED(x) unused_##x __attribute__((unused)) +#else +# define URI_UNUSED(x) x +#endif + + + +/* Import/export decorator */ +#if defined(_MSC_VER) +# if defined(URI_STATIC_BUILD) +# define URI_PUBLIC +# elif defined(URI_LIBRARY_BUILD) +# define URI_PUBLIC __declspec(dllexport) +# else +# define URI_PUBLIC __declspec(dllimport) +# endif +#else +# if ! defined(URI_LIBRARY_BUILD) || ! defined(URI_VISIBILITY) +# define URI_PUBLIC +# else +# define URI_PUBLIC __attribute__ ((visibility("default"))) +# endif +#endif + + + +typedef int UriBool; /**< Boolean type */ + +#define URI_TRUE 1 +#define URI_FALSE 0 + + + +/* Shared errors */ +#define URI_SUCCESS 0 +#define URI_ERROR_SYNTAX 1 /* Parsed text violates expected format */ +#define URI_ERROR_NULL 2 /* One of the params passed was NULL + although it mustn't be */ +#define URI_ERROR_MALLOC 3 /* Requested memory could not be allocated */ +#define URI_ERROR_OUTPUT_TOO_LARGE 4 /* Some output is to large for the receiving buffer */ +#define URI_ERROR_NOT_IMPLEMENTED 8 /* The called function is not implemented yet */ +#define URI_ERROR_RANGE_INVALID 9 /* The parameters passed contained invalid ranges */ +#define URI_ERROR_MEMORY_MANAGER_INCOMPLETE 10 /* [>=0.9.0] The UriMemoryManager passed does not implement all needed functions */ + + +/* Errors specific to ToString */ +#define URI_ERROR_TOSTRING_TOO_LONG URI_ERROR_OUTPUT_TOO_LARGE /* Deprecated, test for URI_ERROR_OUTPUT_TOO_LARGE instead */ + +/* Errors specific to AddBaseUri */ +#define URI_ERROR_ADDBASE_REL_BASE 5 /* Given base is not absolute */ + +/* Errors specific to RemoveBaseUri */ +#define URI_ERROR_REMOVEBASE_REL_BASE 6 /* Given base is not absolute */ +#define URI_ERROR_REMOVEBASE_REL_SOURCE 7 /* Given base is not absolute */ + +/* Error specific to uriTestMemoryManager */ +#define URI_ERROR_MEMORY_MANAGER_FAULTY 11 /* [>=0.9.0] The UriMemoryManager given did not pass the test suite */ + + +#ifndef URI_DOXYGEN +# include /* For NULL, snprintf */ +# include /* For wchar_t */ +# include /* For strlen, memset, memcpy */ +# include /* For malloc */ +#endif /* URI_DOXYGEN */ + + + +/** + * Holds an IPv4 address. + */ +typedef struct UriIp4Struct { + unsigned char data[4]; /**< Each octet in one byte */ +} UriIp4; /**< @copydoc UriIp4Struct */ + + + +/** + * Holds an IPv6 address. + */ +typedef struct UriIp6Struct { + unsigned char data[16]; /**< Each quad in two bytes */ +} UriIp6; /**< @copydoc UriIp6Struct */ + + +struct UriMemoryManagerStruct; /* foward declaration to break loop */ + + +/** + * Function signature that custom malloc(3) functions must conform to + * + * @since 0.9.0 + */ +typedef void * (*UriFuncMalloc)(struct UriMemoryManagerStruct *, size_t); + +/** + * Function signature that custom calloc(3) functions must conform to + * + * @since 0.9.0 + */ +typedef void * (*UriFuncCalloc)(struct UriMemoryManagerStruct *, size_t, size_t); + +/** + * Function signature that custom realloc(3) functions must conform to + * + * @since 0.9.0 + */ +typedef void * (*UriFuncRealloc)(struct UriMemoryManagerStruct *, void *, size_t); + +/** + * Function signature that custom reallocarray(3) functions must conform to + * + * @since 0.9.0 + */ +typedef void * (*UriFuncReallocarray)(struct UriMemoryManagerStruct *, void *, size_t, size_t); + +/** + * Function signature that custom free(3) functions must conform to + * + * @since 0.9.0 + */ +typedef void (*UriFuncFree)(struct UriMemoryManagerStruct *, void *); + + +/** + * Class-like interface of custom memory managers + * + * @see uriCompleteMemoryManager + * @see uriEmulateCalloc + * @see uriEmulateReallocarray + * @see uriTestMemoryManager + * @since 0.9.0 + */ +typedef struct UriMemoryManagerStruct { + UriFuncMalloc malloc; /**< Pointer to custom malloc(3) */ + UriFuncCalloc calloc; /**< Pointer to custom calloc(3); to emulate using malloc and memset see uriEmulateCalloc */ + UriFuncRealloc realloc; /**< Pointer to custom realloc(3) */ + UriFuncReallocarray reallocarray; /**< Pointer to custom reallocarray(3); to emulate using realloc see uriEmulateReallocarray */ + UriFuncFree free; /**< Pointer to custom free(3) */ + void * userData; /**< Pointer to data that the other function members need access to */ +} UriMemoryManager; /**< @copydoc UriMemoryManagerStruct */ + + +/** + * Specifies a line break conversion mode. + */ +typedef enum UriBreakConversionEnum { + URI_BR_TO_LF, /**< Convert to Unix line breaks ("\\x0a") */ + URI_BR_TO_CRLF, /**< Convert to Windows line breaks ("\\x0d\\x0a") */ + URI_BR_TO_CR, /**< Convert to Macintosh line breaks ("\\x0d") */ + URI_BR_TO_UNIX = URI_BR_TO_LF, /**< @copydoc UriBreakConversionEnum::URI_BR_TO_LF */ + URI_BR_TO_WINDOWS = URI_BR_TO_CRLF, /**< @copydoc UriBreakConversionEnum::URI_BR_TO_CRLF */ + URI_BR_TO_MAC = URI_BR_TO_CR, /**< @copydoc UriBreakConversionEnum::URI_BR_TO_CR */ + URI_BR_DONT_TOUCH /**< Copy line breaks unmodified */ +} UriBreakConversion; /**< @copydoc UriBreakConversionEnum */ + + + +/** + * Specifies which component of a %URI has to be normalized. + */ +typedef enum UriNormalizationMaskEnum { + URI_NORMALIZED = 0, /**< Do not normalize anything */ + URI_NORMALIZE_SCHEME = 1 << 0, /**< Normalize scheme (fix uppercase letters) */ + URI_NORMALIZE_USER_INFO = 1 << 1, /**< Normalize user info (fix uppercase percent-encodings) */ + URI_NORMALIZE_HOST = 1 << 2, /**< Normalize host (fix uppercase letters) */ + URI_NORMALIZE_PATH = 1 << 3, /**< Normalize path (fix uppercase percent-encodings and redundant dot segments) */ + URI_NORMALIZE_QUERY = 1 << 4, /**< Normalize query (fix uppercase percent-encodings) */ + URI_NORMALIZE_FRAGMENT = 1 << 5 /**< Normalize fragment (fix uppercase percent-encodings) */ +} UriNormalizationMask; /**< @copydoc UriNormalizationMaskEnum */ + + + +/** + * Specifies how to resolve %URI references. + */ +typedef enum UriResolutionOptionsEnum { + URI_RESOLVE_STRICTLY = 0, /**< Full RFC conformance */ + URI_RESOLVE_IDENTICAL_SCHEME_COMPAT = 1 << 0 /**< Treat %URI to resolve with identical scheme as having no scheme */ +} UriResolutionOptions; /**< @copydoc UriResolutionOptionsEnum */ + + + +/** + * Wraps a memory manager backend that only provides malloc and free + * to make a complete memory manager ready to be used. + * + * The core feature of this wrapper is that you don't need to implement + * realloc if you don't want to. The wrapped memory manager uses + * backend->malloc, memcpy, and backend->free and soieof(size_t) extra + * bytes per allocation to emulate fallback realloc for you. + * + * memory->calloc is uriEmulateCalloc. + * memory->free uses backend->free and handles the size header. + * memory->malloc uses backend->malloc and adds a size header. + * memory->realloc uses memory->malloc, memcpy, and memory->free and reads + * the size header. + * memory->reallocarray is uriEmulateReallocarray. + * + * The internal workings behind memory->free, memory->malloc, and + * memory->realloc may change so the functions exposed by these function + * pointer sshould be consided internal and not public API. + * + * @param memory OUT: Where to write the wrapped memory manager to + * @param backend IN: Memory manager to use as a backend + * @return Error code or 0 on success + * + * @see uriEmulateCalloc + * @see uriEmulateReallocarray + * @see UriMemoryManager + * @since 0.9.0 + */ +URI_PUBLIC int uriCompleteMemoryManager(UriMemoryManager * memory, + UriMemoryManager * backend); + + + +/** + * Offers emulation of calloc(3) based on memory->malloc and memset. + * See "man 3 calloc" as well. + * + * @param memory IN: Memory manager to use, should not be NULL + * @param nmemb IN: Number of elements to allocate + * @param size IN: Size in bytes per element + * @return Pointer to allocated memory or NULL + * + * @see uriCompleteMemoryManager + * @see uriEmulateReallocarray + * @see UriMemoryManager + * @since 0.9.0 + */ +URI_PUBLIC void * uriEmulateCalloc(UriMemoryManager * memory, + size_t nmemb, size_t size); + + + +/** + * Offers emulation of reallocarray(3) based on memory->realloc. + * See "man 3 reallocarray" as well. + * + * @param memory IN: Memory manager to use, should not be NULL + * @param ptr IN: Pointer allocated using memory->malloc/... or NULL + * @param nmemb IN: Number of elements to allocate + * @param size IN: Size in bytes per element + * @return Pointer to allocated memory or NULL + * + * @see uriCompleteMemoryManager + * @see uriEmulateCalloc + * @see UriMemoryManager + * @since 0.9.0 + */ +URI_PUBLIC void * uriEmulateReallocarray(UriMemoryManager * memory, + void * ptr, size_t nmemb, size_t size); + + + +/** + * Run multiple tests against a given memory manager. + * For example, one test + * 1. allocates a small amount of memory, + * 2. writes some magic bytes to it, + * 3. reallocates it, + * 4. checks that previous values are still present, + * 5. and frees that memory. + * + * It is recommended to compile with AddressSanitizer enabled + * to take full advantage of uriTestMemoryManager. + * + * @param memory IN: Memory manager to use, should not be NULL + * @return Error code or 0 on success + * + * @see uriEmulateCalloc + * @see uriEmulateReallocarray + * @see UriMemoryManager + * @since 0.9.0 + */ +URI_PUBLIC int uriTestMemoryManager(UriMemoryManager * memory); + + + +#endif /* URI_BASE_H */ diff --git a/ext/uri/uriparser/include/uriparser/UriDefsAnsi.h b/ext/uri/uriparser/include/uriparser/UriDefsAnsi.h new file mode 100644 index 0000000000000..af581b91a2152 --- /dev/null +++ b/ext/uri/uriparser/include/uriparser/UriDefsAnsi.h @@ -0,0 +1,82 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file UriDefsAnsi.h + * Holds definitions for the ANSI pass. + * NOTE: This header is included N times, not once. + */ + +/* Allow multi inclusion */ +#include "UriDefsConfig.h" + + + +#undef URI_CHAR +#define URI_CHAR char + +#undef _UT +#define _UT(x) x + + + +#undef URI_FUNC +#define URI_FUNC(x) uri##x##A + +#undef URI_TYPE +#define URI_TYPE(x) Uri##x##A + + + +#undef URI_STRLEN +#define URI_STRLEN strlen +#undef URI_STRCPY +#define URI_STRCPY strcpy +#undef URI_STRCMP +#define URI_STRCMP strcmp +#undef URI_STRNCMP +#define URI_STRNCMP strncmp + +/* TODO Remove on next source-compatibility break */ +#undef URI_SNPRINTF +#if (defined(__WIN32__) || defined(_WIN32) || defined(WIN32)) +# define URI_SNPRINTF _snprintf +#else +# define URI_SNPRINTF snprintf +#endif diff --git a/ext/uri/uriparser/include/uriparser/UriDefsConfig.h b/ext/uri/uriparser/include/uriparser/UriDefsConfig.h new file mode 100644 index 0000000000000..51bc93eeddaae --- /dev/null +++ b/ext/uri/uriparser/include/uriparser/UriDefsConfig.h @@ -0,0 +1,101 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file UriDefsConfig.h + * Adjusts the internal configuration after processing external definitions. + */ + +#ifndef URI_DEFS_CONFIG_H +#define URI_DEFS_CONFIG_H 1 + + + +/* Deny external overriding */ +#undef URI_ENABLE_ANSI /* Internal for !URI_NO_ANSI */ +#undef URI_ENABLE_UNICODE /* Internal for !URI_NO_UNICODE */ + + + +/* Encoding */ +#ifdef URI_NO_ANSI +# ifdef URI_NO_UNICODE +/* No encoding at all */ +# error URI_NO_ANSI and URI_NO_UNICODE cannot go together. +# else +/* Wide strings only */ +# define URI_ENABLE_UNICODE 1 +# endif +#else +# ifdef URI_NO_UNICODE +/* Narrow strings only */ +# define URI_ENABLE_ANSI 1 +# else +/* Both narrow and wide strings */ +# define URI_ENABLE_ANSI 1 +# define URI_ENABLE_UNICODE 1 +# endif +#endif + + + +/* Function inlining, not ANSI/ISO C! */ +#if defined(URI_DOXYGEN) +# define URI_INLINE +#elif defined(__INTEL_COMPILER) +/* Intel C/C++ */ +/* http://predef.sourceforge.net/precomp.html#sec20 */ +/* http://www.intel.com/support/performancetools/c/windows/sb/CS-007751.htm#2 */ +# define URI_INLINE __forceinline +#elif defined(_MSC_VER) +/* Microsoft Visual C++ */ +/* http://predef.sourceforge.net/precomp.html#sec32 */ +/* http://msdn2.microsoft.com/en-us/library/ms882281.aspx */ +# define URI_INLINE __forceinline +#elif (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) +/* C99, "inline" is a keyword */ +# define URI_INLINE inline +#else +/* No inlining */ +# define URI_INLINE +#endif + + + +#endif /* URI_DEFS_CONFIG_H */ diff --git a/ext/uri/uriparser/include/uriparser/UriDefsUnicode.h b/ext/uri/uriparser/include/uriparser/UriDefsUnicode.h new file mode 100644 index 0000000000000..01421f5f861df --- /dev/null +++ b/ext/uri/uriparser/include/uriparser/UriDefsUnicode.h @@ -0,0 +1,82 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file UriDefsUnicode.h + * Holds definitions for the wide string pass. + * NOTE: This header is included N times, not once. + */ + +/* Allow multi inclusion */ +#include "UriDefsConfig.h" + + + +#undef URI_CHAR +#define URI_CHAR wchar_t + +#undef _UT +#define _UT(x) L##x + + + +#undef URI_FUNC +#define URI_FUNC(x) uri##x##W + +#undef URI_TYPE +#define URI_TYPE(x) Uri##x##W + + + +#undef URI_STRLEN +#define URI_STRLEN wcslen +#undef URI_STRCPY +#define URI_STRCPY wcscpy +#undef URI_STRCMP +#define URI_STRCMP wcscmp +#undef URI_STRNCMP +#define URI_STRNCMP wcsncmp + +/* TODO Remove on next source-compatibility break */ +#undef URI_SNPRINTF +#if (defined(__WIN32__) || defined(_WIN32) || defined(WIN32)) +# define URI_SNPRINTF _snwprintf +#else +# define URI_SNPRINTF swprintf +#endif diff --git a/ext/uri/uriparser/include/uriparser/UriIp4.h b/ext/uri/uriparser/include/uriparser/UriIp4.h new file mode 100644 index 0000000000000..c2e59a6bf2d13 --- /dev/null +++ b/ext/uri/uriparser/include/uriparser/UriIp4.h @@ -0,0 +1,110 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file UriIp4.h + * Holds the IPv4 parser interface. + * NOTE: This header includes itself twice. + */ + +#if (defined(URI_PASS_ANSI) && !defined(URI_IP4_TWICE_H_ANSI)) \ + || (defined(URI_PASS_UNICODE) && !defined(URI_IP4_TWICE_H_UNICODE)) \ + || (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* What encodings are enabled? */ +#include "UriDefsConfig.h" +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriIp4.h" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriIp4.h" +# undef URI_PASS_UNICODE +# endif +/* Only one pass for each encoding */ +#elif (defined(URI_PASS_ANSI) && !defined(URI_IP4_TWICE_H_ANSI) \ + && defined(URI_ENABLE_ANSI)) || (defined(URI_PASS_UNICODE) \ + && !defined(URI_IP4_TWICE_H_UNICODE) && defined(URI_ENABLE_UNICODE)) +# ifdef URI_PASS_ANSI +# define URI_IP4_TWICE_H_ANSI 1 +# include "UriDefsAnsi.h" +# else +# define URI_IP4_TWICE_H_UNICODE 1 +# include "UriDefsUnicode.h" +# include +# endif + + + +#ifdef __cplusplus +extern "C" { +#endif + + + +#ifndef URI_DOXYGEN +# include "UriBase.h" +#endif + + + +/** + * Converts a IPv4 text representation into four bytes. + * + * @param octetOutput Output destination + * @param first First character of IPv4 text to parse + * @param afterLast Position to stop parsing at + * @return Error code or 0 on success + */ +URI_PUBLIC int URI_FUNC(ParseIpFourAddress)(unsigned char * octetOutput, + const URI_CHAR * first, const URI_CHAR * afterLast); + + + +#ifdef __cplusplus +} +#endif + + + +#endif +#endif diff --git a/ext/uri/uriparser/src/UriCommon.c b/ext/uri/uriparser/src/UriCommon.c new file mode 100644 index 0000000000000..7ba92f2495176 --- /dev/null +++ b/ext/uri/uriparser/src/UriCommon.c @@ -0,0 +1,616 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriCommon.c" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriCommon.c" +# undef URI_PASS_UNICODE +# endif +#else +# ifdef URI_PASS_ANSI +# include +# else +# include +# include +# endif + + + +#ifndef URI_DOXYGEN +# include +# include "UriCommon.h" +#endif + + + +/*extern*/ const URI_CHAR * const URI_FUNC(SafeToPointTo) = _UT("X"); +/*extern*/ const URI_CHAR * const URI_FUNC(ConstPwd) = _UT("."); +/*extern*/ const URI_CHAR * const URI_FUNC(ConstParent) = _UT(".."); + + + +void URI_FUNC(ResetUri)(URI_TYPE(Uri) * uri) { + if (uri == NULL) { + return; + } + memset(uri, 0, sizeof(URI_TYPE(Uri))); +} + + + +/* Compares two text ranges for equal text content */ +int URI_FUNC(CompareRange)( + const URI_TYPE(TextRange) * a, + const URI_TYPE(TextRange) * b) { + int diff; + + /* NOTE: Both NULL means equal! */ + if ((a == NULL) || (b == NULL)) { + return ((a == NULL) ? 0 : 1) - ((b == NULL) ? 0 : 1); + } + + /* NOTE: Both NULL means equal! */ + if ((a->first == NULL) || (b->first == NULL)) { + return ((a->first == NULL) ? 0 : 1) - ((b->first == NULL) ? 0 : 1); + } + + diff = ((int)(a->afterLast - a->first) - (int)(b->afterLast - b->first)); + if (diff > 0) { + return 1; + } else if (diff < 0) { + return -1; + } + + diff = URI_STRNCMP(a->first, b->first, (a->afterLast - a->first)); + + if (diff > 0) { + return 1; + } else if (diff < 0) { + return -1; + } + + return diff; +} + + + +UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri, + UriBool relative, UriBool pathOwned, UriMemoryManager * memory) { + URI_TYPE(PathSegment) * walker; + if ((uri == NULL) || (uri->pathHead == NULL)) { + return URI_TRUE; + } + + walker = uri->pathHead; + walker->reserved = NULL; /* Prev pointer */ + do { + UriBool removeSegment = URI_FALSE; + int len = (int)(walker->text.afterLast - walker->text.first); + switch (len) { + case 1: + if ((walker->text.first)[0] == _UT('.')) { + /* "." segment -> remove if not essential */ + URI_TYPE(PathSegment) * const prev = walker->reserved; + URI_TYPE(PathSegment) * const nextBackup = walker->next; + + /* + * Is this dot segment essential, + * i.e. is there a chance of changing semantics by dropping this dot segment? + * + * For example, changing "./http://foo" into "http://foo" would change semantics + * and hence the dot segment is essential to that case and cannot be removed. + */ + removeSegment = URI_TRUE; + if (relative && (walker == uri->pathHead) && (walker->next != NULL)) { + const URI_CHAR * ch = walker->next->text.first; + for (; ch < walker->next->text.afterLast; ch++) { + if (*ch == _UT(':')) { + removeSegment = URI_FALSE; + break; + } + } + } + + if (removeSegment) { + /* .. then let's go remove that segment. */ + /* Last segment? */ + if (walker->next != NULL) { + /* Not last segment, i.e. first or middle segment + * OLD: (prev|NULL) <- walker <- next + * NEW: (prev|NULL) <----------- next */ + walker->next->reserved = prev; + + if (prev == NULL) { + /* First but not last segment + * OLD: head -> walker -> next + * NEW: head -----------> next */ + uri->pathHead = walker->next; + } else { + /* Middle segment + * OLD: prev -> walker -> next + * NEW: prev -----------> next */ + prev->next = walker->next; + } + + if (pathOwned && (walker->text.first != walker->text.afterLast)) { + memory->free(memory, (URI_CHAR *)walker->text.first); + } + memory->free(memory, walker); + } else { + /* Last segment */ + if (pathOwned && (walker->text.first != walker->text.afterLast)) { + memory->free(memory, (URI_CHAR *)walker->text.first); + } + + if (prev == NULL) { + /* Last and first */ + if (URI_FUNC(IsHostSet)(uri)) { + /* Replace "." with empty segment to represent trailing slash */ + walker->text.first = URI_FUNC(SafeToPointTo); + walker->text.afterLast = URI_FUNC(SafeToPointTo); + } else { + memory->free(memory, walker); + + uri->pathHead = NULL; + uri->pathTail = NULL; + } + } else { + /* Last but not first, replace "." with empty segment to represent trailing slash */ + walker->text.first = URI_FUNC(SafeToPointTo); + walker->text.afterLast = URI_FUNC(SafeToPointTo); + } + } + + walker = nextBackup; + } + } + break; + + case 2: + if (((walker->text.first)[0] == _UT('.')) + && ((walker->text.first)[1] == _UT('.'))) { + /* Path ".." -> remove this and the previous segment */ + URI_TYPE(PathSegment) * const prev = walker->reserved; + URI_TYPE(PathSegment) * prevPrev; + URI_TYPE(PathSegment) * const nextBackup = walker->next; + + removeSegment = URI_TRUE; + if (relative) { + if (prev == NULL) { + /* We cannot remove traversal beyond because the + * URI is relative and may be resolved later. + * So we can simplify "a/../b/d" to "b/d" but + * we cannot simplify "../b/d" (outside of reference resolution). */ + removeSegment = URI_FALSE; + } else if ((prev != NULL) + && ((prev->text.afterLast - prev->text.first) == 2) + && ((prev->text.first)[0] == _UT('.')) + && ((prev->text.first)[1] == _UT('.'))) { + /* We need to protect against mis-simplifying "a/../../b" to "a/b". */ + removeSegment = URI_FALSE; + } + } + + if (removeSegment) { + if (prev != NULL) { + /* Not first segment */ + prevPrev = prev->reserved; + if (prevPrev != NULL) { + /* Not even prev is the first one + * OLD: prevPrev -> prev -> walker -> (next|NULL) + * NEW: prevPrev -------------------> (next|NULL) */ + prevPrev->next = walker->next; + if (walker->next != NULL) { + /* Update parent relationship as well + * OLD: prevPrev <- prev <- walker <- next + * NEW: prevPrev <------------------- next */ + walker->next->reserved = prevPrev; + } else { + /* Last segment -> insert "" segment to represent trailing slash, update tail */ + URI_TYPE(PathSegment) * const segment = memory->calloc(memory, 1, sizeof(URI_TYPE(PathSegment))); + if (segment == NULL) { + if (pathOwned && (walker->text.first != walker->text.afterLast)) { + memory->free(memory, (URI_CHAR *)walker->text.first); + } + memory->free(memory, walker); + + if (pathOwned && (prev->text.first != prev->text.afterLast)) { + memory->free(memory, (URI_CHAR *)prev->text.first); + } + memory->free(memory, prev); + + return URI_FALSE; /* Raises malloc error */ + } + segment->text.first = URI_FUNC(SafeToPointTo); + segment->text.afterLast = URI_FUNC(SafeToPointTo); + prevPrev->next = segment; + uri->pathTail = segment; + } + + if (pathOwned && (walker->text.first != walker->text.afterLast)) { + memory->free(memory, (URI_CHAR *)walker->text.first); + } + memory->free(memory, walker); + + if (pathOwned && (prev->text.first != prev->text.afterLast)) { + memory->free(memory, (URI_CHAR *)prev->text.first); + } + memory->free(memory, prev); + + walker = nextBackup; + } else { + /* Prev is the first segment */ + if (walker->next != NULL) { + uri->pathHead = walker->next; + walker->next->reserved = NULL; + + if (pathOwned && (walker->text.first != walker->text.afterLast)) { + memory->free(memory, (URI_CHAR *)walker->text.first); + } + memory->free(memory, walker); + } else { + /* Re-use segment for "" path segment to represent trailing slash, update tail */ + URI_TYPE(PathSegment) * const segment = walker; + if (pathOwned && (segment->text.first != segment->text.afterLast)) { + memory->free(memory, (URI_CHAR *)segment->text.first); + } + segment->text.first = URI_FUNC(SafeToPointTo); + segment->text.afterLast = URI_FUNC(SafeToPointTo); + uri->pathHead = segment; + uri->pathTail = segment; + } + + if (pathOwned && (prev->text.first != prev->text.afterLast)) { + memory->free(memory, (URI_CHAR *)prev->text.first); + } + memory->free(memory, prev); + + walker = nextBackup; + } + } else { + URI_TYPE(PathSegment) * const anotherNextBackup = walker->next; + int freeWalker = URI_TRUE; + + /* First segment */ + if (walker->next != NULL) { + /* First segment of multiple -> update head + * OLD: head -> walker -> next + * NEW: head -----------> next */ + uri->pathHead = walker->next; + + /* Update parent link as well + * OLD: head <- walker <- next + * NEW: head <----------- next */ + walker->next->reserved = NULL; + } else { + if (uri->absolutePath) { + /* First and only segment -> update head + * OLD: head -> walker -> NULL + * NEW: head -----------> NULL */ + uri->pathHead = NULL; + + /* Last segment -> update tail + * OLD: tail -> walker + * NEW: tail -> NULL */ + uri->pathTail = NULL; + } else { + /* Re-use segment for "" path segment to represent trailing slash, + * then update head and tail */ + if (pathOwned && (walker->text.first != walker->text.afterLast)) { + memory->free(memory, (URI_CHAR *)walker->text.first); + } + walker->text.first = URI_FUNC(SafeToPointTo); + walker->text.afterLast = URI_FUNC(SafeToPointTo); + freeWalker = URI_FALSE; + } + } + + if (freeWalker) { + if (pathOwned && (walker->text.first != walker->text.afterLast)) { + memory->free(memory, (URI_CHAR *)walker->text.first); + } + memory->free(memory, walker); + } + + walker = anotherNextBackup; + } + } + } + break; + } /* end of switch */ + + if (!removeSegment) { + /* .. then let's move to the next element, and start again. */ + if (walker->next != NULL) { + walker->next->reserved = walker; + } else { + /* Last segment -> update tail */ + uri->pathTail = walker; + } + walker = walker->next; + } + } while (walker != NULL); + + return URI_TRUE; +} + + + +/* Properly removes "." and ".." path segments */ +UriBool URI_FUNC(RemoveDotSegmentsAbsolute)(URI_TYPE(Uri) * uri, + UriMemoryManager * memory) { + const UriBool ABSOLUTE = URI_FALSE; + if (uri == NULL) { + return URI_TRUE; + } + return URI_FUNC(RemoveDotSegmentsEx)(uri, ABSOLUTE, uri->owner, memory); +} + + + +unsigned char URI_FUNC(HexdigToInt)(URI_CHAR hexdig) { + switch (hexdig) { + case _UT('0'): + case _UT('1'): + case _UT('2'): + case _UT('3'): + case _UT('4'): + case _UT('5'): + case _UT('6'): + case _UT('7'): + case _UT('8'): + case _UT('9'): + return (unsigned char)(9 + hexdig - _UT('9')); + + case _UT('a'): + case _UT('b'): + case _UT('c'): + case _UT('d'): + case _UT('e'): + case _UT('f'): + return (unsigned char)(15 + hexdig - _UT('f')); + + case _UT('A'): + case _UT('B'): + case _UT('C'): + case _UT('D'): + case _UT('E'): + case _UT('F'): + return (unsigned char)(15 + hexdig - _UT('F')); + + default: + return 0; + } +} + + + +URI_CHAR URI_FUNC(HexToLetter)(unsigned int value) { + /* Uppercase recommended in section 2.1. of RFC 3986 * + * http://tools.ietf.org/html/rfc3986#section-2.1 */ + return URI_FUNC(HexToLetterEx)(value, URI_TRUE); +} + + + +URI_CHAR URI_FUNC(HexToLetterEx)(unsigned int value, UriBool uppercase) { + switch (value) { + case 0: return _UT('0'); + case 1: return _UT('1'); + case 2: return _UT('2'); + case 3: return _UT('3'); + case 4: return _UT('4'); + case 5: return _UT('5'); + case 6: return _UT('6'); + case 7: return _UT('7'); + case 8: return _UT('8'); + case 9: return _UT('9'); + + case 10: return (uppercase == URI_TRUE) ? _UT('A') : _UT('a'); + case 11: return (uppercase == URI_TRUE) ? _UT('B') : _UT('b'); + case 12: return (uppercase == URI_TRUE) ? _UT('C') : _UT('c'); + case 13: return (uppercase == URI_TRUE) ? _UT('D') : _UT('d'); + case 14: return (uppercase == URI_TRUE) ? _UT('E') : _UT('e'); + default: return (uppercase == URI_TRUE) ? _UT('F') : _UT('f'); + } +} + + + +/* Checks if a URI has the host component set. */ +UriBool URI_FUNC(IsHostSet)(const URI_TYPE(Uri) * uri) { + return (uri != NULL) + && ((uri->hostText.first != NULL) + || (uri->hostData.ip4 != NULL) + || (uri->hostData.ip6 != NULL) + || (uri->hostData.ipFuture.first != NULL) + ); +} + + + +/* Copies the path segment list from one URI to another. */ +UriBool URI_FUNC(CopyPath)(URI_TYPE(Uri) * dest, + const URI_TYPE(Uri) * source, UriMemoryManager * memory) { + if (source->pathHead == NULL) { + /* No path component */ + dest->pathHead = NULL; + dest->pathTail = NULL; + } else { + /* Copy list but not the text contained */ + URI_TYPE(PathSegment) * sourceWalker = source->pathHead; + URI_TYPE(PathSegment) * destPrev = NULL; + do { + URI_TYPE(PathSegment) * cur = memory->malloc(memory, sizeof(URI_TYPE(PathSegment))); + if (cur == NULL) { + /* Fix broken list */ + if (destPrev != NULL) { + destPrev->next = NULL; + } + return URI_FALSE; /* Raises malloc error */ + } + + /* From this functions usage we know that * + * the dest URI cannot be uri->owner */ + cur->text = sourceWalker->text; + if (destPrev == NULL) { + /* First segment ever */ + dest->pathHead = cur; + } else { + destPrev->next = cur; + } + destPrev = cur; + sourceWalker = sourceWalker->next; + } while (sourceWalker != NULL); + dest->pathTail = destPrev; + dest->pathTail->next = NULL; + } + + dest->absolutePath = source->absolutePath; + return URI_TRUE; +} + + + +/* Copies the authority part of an URI over to another. */ +UriBool URI_FUNC(CopyAuthority)(URI_TYPE(Uri) * dest, + const URI_TYPE(Uri) * source, UriMemoryManager * memory) { + /* From this functions usage we know that * + * the dest URI cannot be uri->owner */ + + /* Copy userInfo */ + dest->userInfo = source->userInfo; + + /* Copy hostText */ + dest->hostText = source->hostText; + + /* Copy hostData */ + if (source->hostData.ip4 != NULL) { + dest->hostData.ip4 = memory->malloc(memory, sizeof(UriIp4)); + if (dest->hostData.ip4 == NULL) { + return URI_FALSE; /* Raises malloc error */ + } + *(dest->hostData.ip4) = *(source->hostData.ip4); + dest->hostData.ip6 = NULL; + dest->hostData.ipFuture.first = NULL; + dest->hostData.ipFuture.afterLast = NULL; + } else if (source->hostData.ip6 != NULL) { + dest->hostData.ip4 = NULL; + dest->hostData.ip6 = memory->malloc(memory, sizeof(UriIp6)); + if (dest->hostData.ip6 == NULL) { + return URI_FALSE; /* Raises malloc error */ + } + *(dest->hostData.ip6) = *(source->hostData.ip6); + dest->hostData.ipFuture.first = NULL; + dest->hostData.ipFuture.afterLast = NULL; + } else { + dest->hostData.ip4 = NULL; + dest->hostData.ip6 = NULL; + dest->hostData.ipFuture = source->hostData.ipFuture; + } + + /* Copy portText */ + dest->portText = source->portText; + + return URI_TRUE; +} + + + +UriBool URI_FUNC(FixAmbiguity)(URI_TYPE(Uri) * uri, + UriMemoryManager * memory) { + URI_TYPE(PathSegment) * segment; + + if ( /* Case 1: absolute path, empty first segment */ + (uri->absolutePath + && (uri->pathHead != NULL) + && (uri->pathHead->text.afterLast == uri->pathHead->text.first)) + + /* Case 2: relative path, empty first and second segment */ + || (!uri->absolutePath + && (uri->pathHead != NULL) + && (uri->pathHead->next != NULL) + && (uri->pathHead->text.afterLast == uri->pathHead->text.first) + && (uri->pathHead->next->text.afterLast == uri->pathHead->next->text.first))) { + /* NOOP */ + } else { + return URI_TRUE; + } + + segment = memory->malloc(memory, 1 * sizeof(URI_TYPE(PathSegment))); + if (segment == NULL) { + return URI_FALSE; /* Raises malloc error */ + } + + /* Insert "." segment in front */ + segment->next = uri->pathHead; + segment->text.first = URI_FUNC(ConstPwd); + segment->text.afterLast = URI_FUNC(ConstPwd) + 1; + uri->pathHead = segment; + return URI_TRUE; +} + + + +void URI_FUNC(FixEmptyTrailSegment)(URI_TYPE(Uri) * uri, + UriMemoryManager * memory) { + /* Fix path if only one empty segment */ + if (!uri->absolutePath + && !URI_FUNC(IsHostSet)(uri) + && (uri->pathHead != NULL) + && (uri->pathHead->next == NULL) + && (uri->pathHead->text.first == uri->pathHead->text.afterLast)) { + memory->free(memory, uri->pathHead); + uri->pathHead = NULL; + uri->pathTail = NULL; + } +} + + + +#endif diff --git a/ext/uri/uriparser/src/UriCommon.h b/ext/uri/uriparser/src/UriCommon.h new file mode 100644 index 0000000000000..7937a3951ce5b --- /dev/null +++ b/ext/uri/uriparser/src/UriCommon.h @@ -0,0 +1,104 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if (defined(URI_PASS_ANSI) && !defined(URI_COMMON_H_ANSI)) \ + || (defined(URI_PASS_UNICODE) && !defined(URI_COMMON_H_UNICODE)) \ + || (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriCommon.h" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriCommon.h" +# undef URI_PASS_UNICODE +# endif +/* Only one pass for each encoding */ +#elif (defined(URI_PASS_ANSI) && !defined(URI_COMMON_H_ANSI) \ + && defined(URI_ENABLE_ANSI)) || (defined(URI_PASS_UNICODE) \ + && !defined(URI_COMMON_H_UNICODE) && defined(URI_ENABLE_UNICODE)) +# ifdef URI_PASS_ANSI +# define URI_COMMON_H_ANSI 1 +# include +# else +# define URI_COMMON_H_UNICODE 1 +# include +# endif + + + +/* Used to point to from empty path segments. + * X.first and X.afterLast must be the same non-NULL value then. */ +extern const URI_CHAR * const URI_FUNC(ConstPwd); +extern const URI_CHAR * const URI_FUNC(ConstParent); + + + +void URI_FUNC(ResetUri)(URI_TYPE(Uri) * uri); + +int URI_FUNC(CompareRange)( + const URI_TYPE(TextRange) * a, + const URI_TYPE(TextRange) * b); + +UriBool URI_FUNC(RemoveDotSegmentsAbsolute)(URI_TYPE(Uri) * uri, + UriMemoryManager * memory); +UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri, + UriBool relative, UriBool pathOwned, UriMemoryManager * memory); + +unsigned char URI_FUNC(HexdigToInt)(URI_CHAR hexdig); +URI_CHAR URI_FUNC(HexToLetter)(unsigned int value); +URI_CHAR URI_FUNC(HexToLetterEx)(unsigned int value, UriBool uppercase); + +UriBool URI_FUNC(CopyPath)(URI_TYPE(Uri) * dest, const URI_TYPE(Uri) * source, + UriMemoryManager * memory); +UriBool URI_FUNC(CopyAuthority)(URI_TYPE(Uri) * dest, + const URI_TYPE(Uri) * source, UriMemoryManager * memory); + +UriBool URI_FUNC(FixAmbiguity)(URI_TYPE(Uri) * uri, UriMemoryManager * memory); +void URI_FUNC(FixEmptyTrailSegment)(URI_TYPE(Uri) * uri, + UriMemoryManager * memory); + + +#endif +#endif diff --git a/ext/uri/uriparser/src/UriCompare.c b/ext/uri/uriparser/src/UriCompare.c new file mode 100644 index 0000000000000..bca3db92361d9 --- /dev/null +++ b/ext/uri/uriparser/src/UriCompare.c @@ -0,0 +1,168 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriCompare.c" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriCompare.c" +# undef URI_PASS_UNICODE +# endif +#else +# ifdef URI_PASS_ANSI +# include +# else +# include +# include +# endif + + + +#ifndef URI_DOXYGEN +# include +# include +# include "UriCommon.h" +#endif + + + +UriBool URI_FUNC(EqualsUri)(const URI_TYPE(Uri) * a, + const URI_TYPE(Uri) * b) { + /* NOTE: Both NULL means equal! */ + if ((a == NULL) || (b == NULL)) { + return ((a == NULL) && (b == NULL)) ? URI_TRUE : URI_FALSE; + } + + /* scheme */ + if (URI_FUNC(CompareRange)(&(a->scheme), &(b->scheme))) { + return URI_FALSE; + } + + /* absolutePath */ + if ((a->scheme.first == NULL)&& (a->absolutePath != b->absolutePath)) { + return URI_FALSE; + } + + /* userInfo */ + if (URI_FUNC(CompareRange)(&(a->userInfo), &(b->userInfo))) { + return URI_FALSE; + } + + /* Host */ + if (((a->hostData.ip4 == NULL) != (b->hostData.ip4 == NULL)) + || ((a->hostData.ip6 == NULL) != (b->hostData.ip6 == NULL)) + || ((a->hostData.ipFuture.first == NULL) + != (b->hostData.ipFuture.first == NULL))) { + return URI_FALSE; + } + + if (a->hostData.ip4 != NULL) { + if (memcmp(a->hostData.ip4->data, b->hostData.ip4->data, 4)) { + return URI_FALSE; + } + } + + if (a->hostData.ip6 != NULL) { + if (memcmp(a->hostData.ip6->data, b->hostData.ip6->data, 16)) { + return URI_FALSE; + } + } + + if (a->hostData.ipFuture.first != NULL) { + if (URI_FUNC(CompareRange)(&(a->hostData.ipFuture), &(b->hostData.ipFuture))) { + return URI_FALSE; + } + } + + if ((a->hostData.ip4 == NULL) + && (a->hostData.ip6 == NULL) + && (a->hostData.ipFuture.first == NULL)) { + if (URI_FUNC(CompareRange)(&(a->hostText), &(b->hostText))) { + return URI_FALSE; + } + } + + /* portText */ + if (URI_FUNC(CompareRange)(&(a->portText), &(b->portText))) { + return URI_FALSE; + } + + /* Path */ + if ((a->pathHead == NULL) != (b->pathHead == NULL)) { + return URI_FALSE; + } + + if (a->pathHead != NULL) { + URI_TYPE(PathSegment) * walkA = a->pathHead; + URI_TYPE(PathSegment) * walkB = b->pathHead; + do { + if (URI_FUNC(CompareRange)(&(walkA->text), &(walkB->text))) { + return URI_FALSE; + } + if ((walkA->next == NULL) != (walkB->next == NULL)) { + return URI_FALSE; + } + walkA = walkA->next; + walkB = walkB->next; + } while (walkA != NULL); + } + + /* query */ + if (URI_FUNC(CompareRange)(&(a->query), &(b->query))) { + return URI_FALSE; + } + + /* fragment */ + if (URI_FUNC(CompareRange)(&(a->fragment), &(b->fragment))) { + return URI_FALSE; + } + + return URI_TRUE; /* Equal*/ +} + + + +#endif diff --git a/ext/uri/uriparser/src/UriConfig.h b/ext/uri/uriparser/src/UriConfig.h new file mode 100644 index 0000000000000..e799cec33d697 --- /dev/null +++ b/ext/uri/uriparser/src/UriConfig.h @@ -0,0 +1,51 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2018, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if !defined(URI_CONFIG_H) +# define URI_CONFIG_H 1 + + + +#define PACKAGE_VERSION "0.9.8" + +//#define HAVE_WPRINTF +//#define HAVE_REALLOCARRAY + + + +#endif /* !defined(URI_CONFIG_H) */ diff --git a/ext/uri/uriparser/src/UriConfig.h.in b/ext/uri/uriparser/src/UriConfig.h.in new file mode 100644 index 0000000000000..a16a93381c102 --- /dev/null +++ b/ext/uri/uriparser/src/UriConfig.h.in @@ -0,0 +1,51 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2018, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if !defined(URI_CONFIG_H) +# define URI_CONFIG_H 1 + + + +#define PACKAGE_VERSION "@PROJECT_VERSION@" + +#cmakedefine HAVE_WPRINTF +#cmakedefine HAVE_REALLOCARRAY + + + +#endif /* !defined(URI_CONFIG_H) */ diff --git a/ext/uri/uriparser/src/UriEscape.c b/ext/uri/uriparser/src/UriEscape.c new file mode 100644 index 0000000000000..ab8305fa9735c --- /dev/null +++ b/ext/uri/uriparser/src/UriEscape.c @@ -0,0 +1,453 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriEscape.c" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriEscape.c" +# undef URI_PASS_UNICODE +# endif +#else +# ifdef URI_PASS_ANSI +# include +# else +# include +# include +# endif + + + +#ifndef URI_DOXYGEN +# include +# include "UriCommon.h" +#endif + + + +URI_CHAR * URI_FUNC(Escape)(const URI_CHAR * in, URI_CHAR * out, + UriBool spaceToPlus, UriBool normalizeBreaks) { + return URI_FUNC(EscapeEx)(in, NULL, out, spaceToPlus, normalizeBreaks); +} + + + +URI_CHAR * URI_FUNC(EscapeEx)(const URI_CHAR * inFirst, + const URI_CHAR * inAfterLast, URI_CHAR * out, + UriBool spaceToPlus, UriBool normalizeBreaks) { + const URI_CHAR * read = inFirst; + URI_CHAR * write = out; + UriBool prevWasCr = URI_FALSE; + if ((out == NULL) || (inFirst == out)) { + return NULL; + } else if (inFirst == NULL) { + if (out != NULL) { + out[0] = _UT('\0'); + } + return out; + } + + for (;;) { + if ((inAfterLast != NULL) && (read >= inAfterLast)) { + write[0] = _UT('\0'); + return write; + } + + switch (read[0]) { + case _UT('\0'): + write[0] = _UT('\0'); + return write; + + case _UT(' '): + if (spaceToPlus) { + write[0] = _UT('+'); + write++; + } else { + write[0] = _UT('%'); + write[1] = _UT('2'); + write[2] = _UT('0'); + write += 3; + } + prevWasCr = URI_FALSE; + break; + + case _UT('a'): /* ALPHA */ + case _UT('A'): + case _UT('b'): + case _UT('B'): + case _UT('c'): + case _UT('C'): + case _UT('d'): + case _UT('D'): + case _UT('e'): + case _UT('E'): + case _UT('f'): + case _UT('F'): + case _UT('g'): + case _UT('G'): + case _UT('h'): + case _UT('H'): + case _UT('i'): + case _UT('I'): + case _UT('j'): + case _UT('J'): + case _UT('k'): + case _UT('K'): + case _UT('l'): + case _UT('L'): + case _UT('m'): + case _UT('M'): + case _UT('n'): + case _UT('N'): + case _UT('o'): + case _UT('O'): + case _UT('p'): + case _UT('P'): + case _UT('q'): + case _UT('Q'): + case _UT('r'): + case _UT('R'): + case _UT('s'): + case _UT('S'): + case _UT('t'): + case _UT('T'): + case _UT('u'): + case _UT('U'): + case _UT('v'): + case _UT('V'): + case _UT('w'): + case _UT('W'): + case _UT('x'): + case _UT('X'): + case _UT('y'): + case _UT('Y'): + case _UT('z'): + case _UT('Z'): + case _UT('0'): /* DIGIT */ + case _UT('1'): + case _UT('2'): + case _UT('3'): + case _UT('4'): + case _UT('5'): + case _UT('6'): + case _UT('7'): + case _UT('8'): + case _UT('9'): + case _UT('-'): /* "-" / "." / "_" / "~" */ + case _UT('.'): + case _UT('_'): + case _UT('~'): + /* Copy unmodified */ + write[0] = read[0]; + write++; + + prevWasCr = URI_FALSE; + break; + + case _UT('\x0a'): + if (normalizeBreaks) { + if (!prevWasCr) { + write[0] = _UT('%'); + write[1] = _UT('0'); + write[2] = _UT('D'); + write[3] = _UT('%'); + write[4] = _UT('0'); + write[5] = _UT('A'); + write += 6; + } + } else { + write[0] = _UT('%'); + write[1] = _UT('0'); + write[2] = _UT('A'); + write += 3; + } + prevWasCr = URI_FALSE; + break; + + case _UT('\x0d'): + if (normalizeBreaks) { + write[0] = _UT('%'); + write[1] = _UT('0'); + write[2] = _UT('D'); + write[3] = _UT('%'); + write[4] = _UT('0'); + write[5] = _UT('A'); + write += 6; + } else { + write[0] = _UT('%'); + write[1] = _UT('0'); + write[2] = _UT('D'); + write += 3; + } + prevWasCr = URI_TRUE; + break; + + default: + /* Percent encode */ + { + const unsigned char code = (unsigned char)read[0]; + write[0] = _UT('%'); + write[1] = URI_FUNC(HexToLetter)(code >> 4); + write[2] = URI_FUNC(HexToLetter)(code & 0x0f); + write += 3; + } + prevWasCr = URI_FALSE; + break; + } + + read++; + } +} + + + +const URI_CHAR * URI_FUNC(UnescapeInPlace)(URI_CHAR * inout) { + return URI_FUNC(UnescapeInPlaceEx)(inout, URI_FALSE, URI_BR_DONT_TOUCH); +} + + + +const URI_CHAR * URI_FUNC(UnescapeInPlaceEx)(URI_CHAR * inout, + UriBool plusToSpace, UriBreakConversion breakConversion) { + URI_CHAR * read = inout; + URI_CHAR * write = inout; + UriBool prevWasCr = URI_FALSE; + + if (inout == NULL) { + return NULL; + } + + for (;;) { + switch (read[0]) { + case _UT('\0'): + if (read > write) { + write[0] = _UT('\0'); + } + return write; + + case _UT('%'): + switch (read[1]) { + case _UT('0'): + case _UT('1'): + case _UT('2'): + case _UT('3'): + case _UT('4'): + case _UT('5'): + case _UT('6'): + case _UT('7'): + case _UT('8'): + case _UT('9'): + case _UT('a'): + case _UT('b'): + case _UT('c'): + case _UT('d'): + case _UT('e'): + case _UT('f'): + case _UT('A'): + case _UT('B'): + case _UT('C'): + case _UT('D'): + case _UT('E'): + case _UT('F'): + switch (read[2]) { + case _UT('0'): + case _UT('1'): + case _UT('2'): + case _UT('3'): + case _UT('4'): + case _UT('5'): + case _UT('6'): + case _UT('7'): + case _UT('8'): + case _UT('9'): + case _UT('a'): + case _UT('b'): + case _UT('c'): + case _UT('d'): + case _UT('e'): + case _UT('f'): + case _UT('A'): + case _UT('B'): + case _UT('C'): + case _UT('D'): + case _UT('E'): + case _UT('F'): + { + /* Percent group found */ + const unsigned char left = URI_FUNC(HexdigToInt)(read[1]); + const unsigned char right = URI_FUNC(HexdigToInt)(read[2]); + const int code = 16 * left + right; + switch (code) { + case 10: + switch (breakConversion) { + case URI_BR_TO_LF: + if (!prevWasCr) { + write[0] = (URI_CHAR)10; + write++; + } + break; + + case URI_BR_TO_CRLF: + if (!prevWasCr) { + write[0] = (URI_CHAR)13; + write[1] = (URI_CHAR)10; + write += 2; + } + break; + + case URI_BR_TO_CR: + if (!prevWasCr) { + write[0] = (URI_CHAR)13; + write++; + } + break; + + case URI_BR_DONT_TOUCH: + default: + write[0] = (URI_CHAR)10; + write++; + + } + prevWasCr = URI_FALSE; + break; + + case 13: + switch (breakConversion) { + case URI_BR_TO_LF: + write[0] = (URI_CHAR)10; + write++; + break; + + case URI_BR_TO_CRLF: + write[0] = (URI_CHAR)13; + write[1] = (URI_CHAR)10; + write += 2; + break; + + case URI_BR_TO_CR: + write[0] = (URI_CHAR)13; + write++; + break; + + case URI_BR_DONT_TOUCH: + default: + write[0] = (URI_CHAR)13; + write++; + + } + prevWasCr = URI_TRUE; + break; + + default: + write[0] = (URI_CHAR)(code); + write++; + + prevWasCr = URI_FALSE; + + } + read += 3; + } + break; + + default: + /* Copy two chars unmodified and */ + /* look at this char again */ + if (read > write) { + write[0] = read[0]; + write[1] = read[1]; + } + read += 2; + write += 2; + + prevWasCr = URI_FALSE; + } + break; + + default: + /* Copy one char unmodified and */ + /* look at this char again */ + if (read > write) { + write[0] = read[0]; + } + read++; + write++; + + prevWasCr = URI_FALSE; + } + break; + + case _UT('+'): + if (plusToSpace) { + /* Convert '+' to ' ' */ + write[0] = _UT(' '); + } else { + /* Copy one char unmodified */ + if (read > write) { + write[0] = read[0]; + } + } + read++; + write++; + + prevWasCr = URI_FALSE; + break; + + default: + /* Copy one char unmodified */ + if (read > write) { + write[0] = read[0]; + } + read++; + write++; + + prevWasCr = URI_FALSE; + } + } +} + + + +#endif diff --git a/ext/uri/uriparser/src/UriFile.c b/ext/uri/uriparser/src/UriFile.c new file mode 100644 index 0000000000000..232957d3c8079 --- /dev/null +++ b/ext/uri/uriparser/src/UriFile.c @@ -0,0 +1,242 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriFile.c" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriFile.c" +# undef URI_PASS_UNICODE +# endif +#else +# ifdef URI_PASS_ANSI +# include +# else +# include +# include +# endif + + + +#ifndef URI_DOXYGEN +# include +#endif + + + +#include /* for size_t, avoiding stddef.h for older MSVCs */ + + + +static URI_INLINE int URI_FUNC(FilenameToUriString)(const URI_CHAR * filename, + URI_CHAR * uriString, UriBool fromUnix) { + const URI_CHAR * input = filename; + const URI_CHAR * lastSep = input - 1; + UriBool firstSegment = URI_TRUE; + URI_CHAR * output = uriString; + UriBool absolute; + UriBool is_windows_network; + + if ((filename == NULL) || (uriString == NULL)) { + return URI_ERROR_NULL; + } + + is_windows_network = (filename[0] == _UT('\\')) && (filename[1] == _UT('\\')); + absolute = fromUnix + ? (filename[0] == _UT('/')) + : (((filename[0] != _UT('\0')) && (filename[1] == _UT(':'))) + || is_windows_network); + + if (absolute) { + const URI_CHAR * const prefix = fromUnix + ? _UT("file://") + : is_windows_network + ? _UT("file:") + : _UT("file:///"); + const size_t prefixLen = URI_STRLEN(prefix); + + /* Copy prefix */ + memcpy(uriString, prefix, prefixLen * sizeof(URI_CHAR)); + output += prefixLen; + } + + /* Copy and escape on the fly */ + for (;;) { + if ((input[0] == _UT('\0')) + || (fromUnix && input[0] == _UT('/')) + || (!fromUnix && input[0] == _UT('\\'))) { + /* Copy text after last separator */ + if (lastSep + 1 < input) { + if (!fromUnix && absolute && (firstSegment == URI_TRUE)) { + /* Quick hack to not convert "C:" to "C%3A" */ + const int charsToCopy = (int)(input - (lastSep + 1)); + memcpy(output, lastSep + 1, charsToCopy * sizeof(URI_CHAR)); + output += charsToCopy; + } else { + output = URI_FUNC(EscapeEx)(lastSep + 1, input, output, + URI_FALSE, URI_FALSE); + } + } + firstSegment = URI_FALSE; + } + + if (input[0] == _UT('\0')) { + output[0] = _UT('\0'); + break; + } else if (fromUnix && (input[0] == _UT('/'))) { + /* Copy separators unmodified */ + output[0] = _UT('/'); + output++; + lastSep = input; + } else if (!fromUnix && (input[0] == _UT('\\'))) { + /* Convert backslashes to forward slashes */ + output[0] = _UT('/'); + output++; + lastSep = input; + } + input++; + } + + return URI_SUCCESS; +} + + + +static URI_INLINE int URI_FUNC(UriStringToFilename)(const URI_CHAR * uriString, + URI_CHAR * filename, UriBool toUnix) { + if ((uriString == NULL) || (filename == NULL)) { + return URI_ERROR_NULL; + } + + { + const UriBool file_unknown_slashes = + URI_STRNCMP(uriString, _UT("file:"), URI_STRLEN(_UT("file:"))) == 0; + const UriBool file_one_or_more_slashes = file_unknown_slashes + && (URI_STRNCMP(uriString, _UT("file:/"), URI_STRLEN(_UT("file:/"))) == 0); + const UriBool file_two_or_more_slashes = file_one_or_more_slashes + && (URI_STRNCMP(uriString, _UT("file://"), URI_STRLEN(_UT("file://"))) == 0); + const UriBool file_three_or_more_slashes = file_two_or_more_slashes + && (URI_STRNCMP(uriString, _UT("file:///"), URI_STRLEN(_UT("file:///"))) == 0); + + const size_t charsToSkip = file_two_or_more_slashes + ? file_three_or_more_slashes + ? toUnix + /* file:///bin/bash */ + ? URI_STRLEN(_UT("file://")) + /* file:///E:/Documents%20and%20Settings */ + : URI_STRLEN(_UT("file:///")) + /* file://Server01/Letter.txt */ + : URI_STRLEN(_UT("file://")) + : ((file_one_or_more_slashes && toUnix) + /* file:/bin/bash */ + /* https://tools.ietf.org/html/rfc8089#appendix-B */ + ? URI_STRLEN(_UT("file:")) + : ((! toUnix && file_unknown_slashes && ! file_one_or_more_slashes) + /* file:c:/path/to/file */ + /* https://tools.ietf.org/html/rfc8089#appendix-E.2 */ + ? URI_STRLEN(_UT("file:")) + : 0)); + const size_t charsToCopy = URI_STRLEN(uriString + charsToSkip) + 1; + + const UriBool is_windows_network_with_authority = + (toUnix == URI_FALSE) + && file_two_or_more_slashes + && ! file_three_or_more_slashes; + + URI_CHAR * const unescape_target = is_windows_network_with_authority + ? (filename + 2) + : filename; + + if (is_windows_network_with_authority) { + filename[0] = '\\'; + filename[1] = '\\'; + } + + memcpy(unescape_target, uriString + charsToSkip, charsToCopy * sizeof(URI_CHAR)); + URI_FUNC(UnescapeInPlaceEx)(filename, URI_FALSE, URI_BR_DONT_TOUCH); + } + + /* Convert forward slashes to backslashes */ + if (!toUnix) { + URI_CHAR * walker = filename; + while (walker[0] != _UT('\0')) { + if (walker[0] == _UT('/')) { + walker[0] = _UT('\\'); + } + walker++; + } + } + + return URI_SUCCESS; +} + + + +int URI_FUNC(UnixFilenameToUriString)(const URI_CHAR * filename, URI_CHAR * uriString) { + return URI_FUNC(FilenameToUriString)(filename, uriString, URI_TRUE); +} + + + +int URI_FUNC(WindowsFilenameToUriString)(const URI_CHAR * filename, URI_CHAR * uriString) { + return URI_FUNC(FilenameToUriString)(filename, uriString, URI_FALSE); +} + + + +int URI_FUNC(UriStringToUnixFilename)(const URI_CHAR * uriString, URI_CHAR * filename) { + return URI_FUNC(UriStringToFilename)(uriString, filename, URI_TRUE); +} + + + +int URI_FUNC(UriStringToWindowsFilename)(const URI_CHAR * uriString, URI_CHAR * filename) { + return URI_FUNC(UriStringToFilename)(uriString, filename, URI_FALSE); +} + + + +#endif diff --git a/ext/uri/uriparser/src/UriIp4.c b/ext/uri/uriparser/src/UriIp4.c new file mode 100644 index 0000000000000..a794265269da5 --- /dev/null +++ b/ext/uri/uriparser/src/UriIp4.c @@ -0,0 +1,329 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file UriIp4.c + * Holds the IPv4 parser implementation. + * NOTE: This source file includes itself twice. + */ + +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriIp4.c" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriIp4.c" +# undef URI_PASS_UNICODE +# endif +#else +# ifdef URI_PASS_ANSI +# include +# else +# include +# endif + + + +#ifndef URI_DOXYGEN +# include +# include "UriIp4Base.h" +# include +#endif + + + +/* Prototypes */ +static const URI_CHAR * URI_FUNC(ParseDecOctet)(UriIp4Parser * parser, + const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseDecOctetOne)(UriIp4Parser * parser, + const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseDecOctetTwo)(UriIp4Parser * parser, + const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseDecOctetThree)(UriIp4Parser * parser, + const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseDecOctetFour)(UriIp4Parser * parser, + const URI_CHAR * first, const URI_CHAR * afterLast); + + + +/* + * [ipFourAddress]->[decOctet]<.>[decOctet]<.>[decOctet]<.>[decOctet] + */ +int URI_FUNC(ParseIpFourAddress)(unsigned char * octetOutput, + const URI_CHAR * first, const URI_CHAR * afterLast) { + const URI_CHAR * after; + UriIp4Parser parser; + + /* Essential checks */ + if ((octetOutput == NULL) || (first == NULL) + || (afterLast <= first)) { + return URI_ERROR_SYNTAX; + } + + /* Reset parser */ + parser.stackCount = 0; + + /* Octet #1 */ + after = URI_FUNC(ParseDecOctet)(&parser, first, afterLast); + if ((after == NULL) || (after >= afterLast) || (*after != _UT('.'))) { + return URI_ERROR_SYNTAX; + } + uriStackToOctet(&parser, octetOutput); + + /* Octet #2 */ + after = URI_FUNC(ParseDecOctet)(&parser, after + 1, afterLast); + if ((after == NULL) || (after >= afterLast) || (*after != _UT('.'))) { + return URI_ERROR_SYNTAX; + } + uriStackToOctet(&parser, octetOutput + 1); + + /* Octet #3 */ + after = URI_FUNC(ParseDecOctet)(&parser, after + 1, afterLast); + if ((after == NULL) || (after >= afterLast) || (*after != _UT('.'))) { + return URI_ERROR_SYNTAX; + } + uriStackToOctet(&parser, octetOutput + 2); + + /* Octet #4 */ + after = URI_FUNC(ParseDecOctet)(&parser, after + 1, afterLast); + if (after != afterLast) { + return URI_ERROR_SYNTAX; + } + uriStackToOctet(&parser, octetOutput + 3); + + return URI_SUCCESS; +} + + + +/* + * [decOctet]-><0> + * [decOctet]-><1>[decOctetOne] + * [decOctet]-><2>[decOctetTwo] + * [decOctet]-><3>[decOctetThree] + * [decOctet]-><4>[decOctetThree] + * [decOctet]-><5>[decOctetThree] + * [decOctet]-><6>[decOctetThree] + * [decOctet]-><7>[decOctetThree] + * [decOctet]-><8>[decOctetThree] + * [decOctet]-><9>[decOctetThree] + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseDecOctet)(UriIp4Parser * parser, + const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return NULL; + } + + switch (*first) { + case _UT('0'): + uriPushToStack(parser, 0); + return first + 1; + + case _UT('1'): + uriPushToStack(parser, 1); + return (const URI_CHAR *)URI_FUNC(ParseDecOctetOne)(parser, first + 1, afterLast); + + case _UT('2'): + uriPushToStack(parser, 2); + return (const URI_CHAR *)URI_FUNC(ParseDecOctetTwo)(parser, first + 1, afterLast); + + case _UT('3'): + case _UT('4'): + case _UT('5'): + case _UT('6'): + case _UT('7'): + case _UT('8'): + case _UT('9'): + uriPushToStack(parser, (unsigned char)(9 + *first - _UT('9'))); + return (const URI_CHAR *)URI_FUNC(ParseDecOctetThree)(parser, first + 1, afterLast); + + default: + return NULL; + } +} + + + +/* + * [decOctetOne]-> + * [decOctetOne]->[DIGIT][decOctetThree] + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseDecOctetOne)(UriIp4Parser * parser, + const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('0'): + case _UT('1'): + case _UT('2'): + case _UT('3'): + case _UT('4'): + case _UT('5'): + case _UT('6'): + case _UT('7'): + case _UT('8'): + case _UT('9'): + uriPushToStack(parser, (unsigned char)(9 + *first - _UT('9'))); + return (const URI_CHAR *)URI_FUNC(ParseDecOctetThree)(parser, first + 1, afterLast); + + default: + return first; + } +} + + + +/* + * [decOctetTwo]-> + * [decOctetTwo]-><0>[decOctetThree] + * [decOctetTwo]-><1>[decOctetThree] + * [decOctetTwo]-><2>[decOctetThree] + * [decOctetTwo]-><3>[decOctetThree] + * [decOctetTwo]-><4>[decOctetThree] + * [decOctetTwo]-><5>[decOctetFour] + * [decOctetTwo]-><6> + * [decOctetTwo]-><7> + * [decOctetTwo]-><8> + * [decOctetTwo]-><9> +*/ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseDecOctetTwo)(UriIp4Parser * parser, + const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('0'): + case _UT('1'): + case _UT('2'): + case _UT('3'): + case _UT('4'): + uriPushToStack(parser, (unsigned char)(9 + *first - _UT('9'))); + return (const URI_CHAR *)URI_FUNC(ParseDecOctetThree)(parser, first + 1, afterLast); + + case _UT('5'): + uriPushToStack(parser, 5); + return (const URI_CHAR *)URI_FUNC(ParseDecOctetFour)(parser, first + 1, afterLast); + + case _UT('6'): + case _UT('7'): + case _UT('8'): + case _UT('9'): + uriPushToStack(parser, (unsigned char)(9 + *first - _UT('9'))); + return first + 1; + + default: + return first; + } +} + + + +/* + * [decOctetThree]-> + * [decOctetThree]->[DIGIT] + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseDecOctetThree)(UriIp4Parser * parser, + const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('0'): + case _UT('1'): + case _UT('2'): + case _UT('3'): + case _UT('4'): + case _UT('5'): + case _UT('6'): + case _UT('7'): + case _UT('8'): + case _UT('9'): + uriPushToStack(parser, (unsigned char)(9 + *first - _UT('9'))); + return first + 1; + + default: + return first; + } +} + + + +/* + * [decOctetFour]-> + * [decOctetFour]-><0> + * [decOctetFour]-><1> + * [decOctetFour]-><2> + * [decOctetFour]-><3> + * [decOctetFour]-><4> + * [decOctetFour]-><5> + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseDecOctetFour)(UriIp4Parser * parser, + const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('0'): + case _UT('1'): + case _UT('2'): + case _UT('3'): + case _UT('4'): + case _UT('5'): + uriPushToStack(parser, (unsigned char)(9 + *first - _UT('9'))); + return first + 1; + + default: + return first; + } +} + + + +#endif diff --git a/ext/uri/uriparser/src/UriIp4Base.c b/ext/uri/uriparser/src/UriIp4Base.c new file mode 100644 index 0000000000000..ded7c32c9940e --- /dev/null +++ b/ext/uri/uriparser/src/UriIp4Base.c @@ -0,0 +1,96 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file UriIp4Base.c + * Holds code independent of the encoding pass. + */ + +#ifndef URI_DOXYGEN +# include "UriIp4Base.h" +#endif + + + +void uriStackToOctet(UriIp4Parser * parser, unsigned char * octet) { + switch (parser->stackCount) { + case 1: + *octet = parser->stackOne; + break; + + case 2: + *octet = parser->stackOne * 10 + + parser->stackTwo; + break; + + case 3: + *octet = parser->stackOne * 100 + + parser->stackTwo * 10 + + parser->stackThree; + break; + + default: + ; + } + parser->stackCount = 0; +} + + + +void uriPushToStack(UriIp4Parser * parser, unsigned char digit) { + switch (parser->stackCount) { + case 0: + parser->stackOne = digit; + parser->stackCount = 1; + break; + + case 1: + parser->stackTwo = digit; + parser->stackCount = 2; + break; + + case 2: + parser->stackThree = digit; + parser->stackCount = 3; + break; + + default: + ; + } +} diff --git a/ext/uri/uriparser/src/UriIp4Base.h b/ext/uri/uriparser/src/UriIp4Base.h new file mode 100644 index 0000000000000..4867850224033 --- /dev/null +++ b/ext/uri/uriparser/src/UriIp4Base.h @@ -0,0 +1,59 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef URI_IP4_BASE_H +#define URI_IP4_BASE_H 1 + + + +typedef struct UriIp4ParserStruct { + unsigned char stackCount; + unsigned char stackOne; + unsigned char stackTwo; + unsigned char stackThree; +} UriIp4Parser; + + + +void uriPushToStack(UriIp4Parser * parser, unsigned char digit); +void uriStackToOctet(UriIp4Parser * parser, unsigned char * octet); + + + +#endif /* URI_IP4_BASE_H */ diff --git a/ext/uri/uriparser/src/UriMemory.c b/ext/uri/uriparser/src/UriMemory.c new file mode 100644 index 0000000000000..916d7cea8308a --- /dev/null +++ b/ext/uri/uriparser/src/UriMemory.c @@ -0,0 +1,471 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2018, Weijia Song + * Copyright (C) 2018, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file UriMemory.c + * Holds memory manager implementation. + */ + +#include "UriConfig.h" /* for HAVE_REALLOCARRAY */ + +#ifdef HAVE_REALLOCARRAY +# ifndef _GNU_SOURCE +# define _GNU_SOURCE 1 +# endif +# ifdef __NetBSD__ +# define _OPENBSD_SOURCE 1 +# endif +#endif + +#include +#include + + + +#ifndef URI_DOXYGEN +# include "UriMemory.h" +#endif + + + +#define URI_CHECK_ALLOC_OVERFLOW(total_size, nmemb, size) \ + do { \ + /* check for unsigned overflow */ \ + if ((nmemb != 0) && (total_size / nmemb != size)) { \ + errno = ENOMEM; \ + return NULL; \ + } \ + } while (0) + + + +static void * uriDefaultMalloc(UriMemoryManager * URI_UNUSED(memory), + size_t size) { + return malloc(size); +} + + + +static void * uriDefaultCalloc(UriMemoryManager * URI_UNUSED(memory), + size_t nmemb, size_t size) { + return calloc(nmemb, size); +} + + + +static void * uriDefaultRealloc(UriMemoryManager * URI_UNUSED(memory), + void * ptr, size_t size) { + return realloc(ptr, size); +} + + + +static void * uriDefaultReallocarray(UriMemoryManager * URI_UNUSED(memory), + void * ptr, size_t nmemb, size_t size) { +#ifdef HAVE_REALLOCARRAY + return reallocarray(ptr, nmemb, size); +#else + const size_t total_size = nmemb * size; + + URI_CHECK_ALLOC_OVERFLOW(total_size, nmemb, size); /* may return */ + + return realloc(ptr, total_size); +#endif +} + + + +static void uriDefaultFree(UriMemoryManager * URI_UNUSED(memory), + void * ptr) { + free(ptr); +} + + + +UriBool uriMemoryManagerIsComplete(const UriMemoryManager * memory) { + return (memory + && memory->malloc + && memory->calloc + && memory->realloc + && memory->reallocarray + && memory->free) ? URI_TRUE : URI_FALSE; +} + + + +void * uriEmulateCalloc(UriMemoryManager * memory, size_t nmemb, size_t size) { + void * buffer; + const size_t total_size = nmemb * size; + + if (memory == NULL) { + errno = EINVAL; + return NULL; + } + + URI_CHECK_ALLOC_OVERFLOW(total_size, nmemb, size); /* may return */ + + buffer = memory->malloc(memory, total_size); + if (buffer == NULL) { + /* errno set by malloc */ + return NULL; + } + memset(buffer, 0, total_size); + return buffer; +} + + + +void * uriEmulateReallocarray(UriMemoryManager * memory, + void * ptr, size_t nmemb, size_t size) { + const size_t total_size = nmemb * size; + + if (memory == NULL) { + errno = EINVAL; + return NULL; + } + + URI_CHECK_ALLOC_OVERFLOW(total_size, nmemb, size); /* may return */ + + return memory->realloc(memory, ptr, total_size); +} + + + +static void * uriDecorateMalloc(UriMemoryManager * memory, + size_t size) { + UriMemoryManager * backend; + const size_t extraBytes = sizeof(size_t); + void * buffer; + + if (memory == NULL) { + errno = EINVAL; + return NULL; + } + + /* check for unsigned overflow */ + if (size > ((size_t)-1) - extraBytes) { + errno = ENOMEM; + return NULL; + } + + backend = (UriMemoryManager *)memory->userData; + if (backend == NULL) { + errno = EINVAL; + return NULL; + } + + buffer = backend->malloc(backend, extraBytes + size); + if (buffer == NULL) { + return NULL; + } + + *(size_t *)buffer = size; + + return (char *)buffer + extraBytes; +} + + + +static void * uriDecorateRealloc(UriMemoryManager * memory, + void * ptr, size_t size) { + void * newBuffer; + size_t prevSize; + + if (memory == NULL) { + errno = EINVAL; + return NULL; + } + + /* man realloc: "If ptr is NULL, then the call is equivalent to + * malloc(size), for *all* values of size" */ + if (ptr == NULL) { + return memory->malloc(memory, size); + } + + /* man realloc: "If size is equal to zero, and ptr is *not* NULL, + * then the call is equivalent to free(ptr)." */ + if (size == 0) { + memory->free(memory, ptr); + return NULL; + } + + prevSize = *((size_t *)((char *)ptr - sizeof(size_t))); + + /* Anything to do? */ + if (size <= prevSize) { + return ptr; + } + + newBuffer = memory->malloc(memory, size); + if (newBuffer == NULL) { + /* errno set by malloc */ + return NULL; + } + + memcpy(newBuffer, ptr, prevSize); + + memory->free(memory, ptr); + + return newBuffer; +} + + + +static void uriDecorateFree(UriMemoryManager * memory, void * ptr) { + UriMemoryManager * backend; + + if ((ptr == NULL) || (memory == NULL)) { + return; + } + + backend = (UriMemoryManager *)memory->userData; + if (backend == NULL) { + return; + } + + backend->free(backend, (char *)ptr - sizeof(size_t)); +} + + + +int uriCompleteMemoryManager(UriMemoryManager * memory, + UriMemoryManager * backend) { + if ((memory == NULL) || (backend == NULL)) { + return URI_ERROR_NULL; + } + + if ((backend->malloc == NULL) || (backend->free == NULL)) { + return URI_ERROR_MEMORY_MANAGER_INCOMPLETE; + } + + memory->calloc = uriEmulateCalloc; + memory->reallocarray = uriEmulateReallocarray; + + memory->malloc = uriDecorateMalloc; + memory->realloc = uriDecorateRealloc; + memory->free = uriDecorateFree; + + memory->userData = backend; + + return URI_SUCCESS; +} + + + +int uriTestMemoryManager(UriMemoryManager * memory) { + const size_t mallocSize = 7; + const size_t callocNmemb = 3; + const size_t callocSize = 5; + const size_t callocTotalSize = callocNmemb * callocSize; + const size_t reallocSize = 11; + const size_t reallocarrayNmemb = 5; + const size_t reallocarraySize = 7; + const size_t reallocarrayTotal = reallocarrayNmemb * reallocarraySize; + size_t index; + char * buffer; + + if (memory == NULL) { + return URI_ERROR_NULL; + } + + if (uriMemoryManagerIsComplete(memory) != URI_TRUE) { + return URI_ERROR_MEMORY_MANAGER_INCOMPLETE; + } + + /* malloc + free*/ + buffer = memory->malloc(memory, mallocSize); + if (buffer == NULL) { + return URI_ERROR_MEMORY_MANAGER_FAULTY; + } + buffer[mallocSize - 1] = '\xF1'; + memory->free(memory, buffer); + buffer = NULL; + + /* calloc + free */ + buffer = memory->calloc(memory, callocNmemb, callocSize); + if (buffer == NULL) { + return URI_ERROR_MEMORY_MANAGER_FAULTY; + } + for (index = 0; index < callocTotalSize; index++) { /* all zeros? */ + if (buffer[index] != '\0') { + return URI_ERROR_MEMORY_MANAGER_FAULTY; + } + } + buffer[callocTotalSize - 1] = '\xF2'; + memory->free(memory, buffer); + buffer = NULL; + + /* malloc + realloc + free */ + buffer = memory->malloc(memory, mallocSize); + if (buffer == NULL) { + return URI_ERROR_MEMORY_MANAGER_FAULTY; + } + for (index = 0; index < mallocSize; index++) { + buffer[index] = '\xF3'; + } + buffer = memory->realloc(memory, buffer, reallocSize); + if (buffer == NULL) { + return URI_ERROR_MEMORY_MANAGER_FAULTY; + } + for (index = 0; index < mallocSize; index++) { /* previous content? */ + if (buffer[index] != '\xF3') { + return URI_ERROR_MEMORY_MANAGER_FAULTY; + } + } + buffer[reallocSize - 1] = '\xF4'; + memory->free(memory, buffer); + buffer = NULL; + + /* malloc + realloc ptr!=NULL size==0 (equals free) */ + buffer = memory->malloc(memory, mallocSize); + if (buffer == NULL) { + return URI_ERROR_MEMORY_MANAGER_FAULTY; + } + buffer[mallocSize - 1] = '\xF5'; + memory->realloc(memory, buffer, 0); + buffer = NULL; + + /* realloc ptr==NULL size!=0 (equals malloc) + free */ + buffer = memory->realloc(memory, NULL, mallocSize); + if (buffer == NULL) { + return URI_ERROR_MEMORY_MANAGER_FAULTY; + } + buffer[mallocSize - 1] = '\xF6'; + memory->free(memory, buffer); + buffer = NULL; + + /* realloc ptr==NULL size==0 (equals malloc) + free */ + buffer = memory->realloc(memory, NULL, 0); + if (buffer != NULL) { + memory->free(memory, buffer); + buffer = NULL; + } + + /* malloc + reallocarray + free */ + buffer = memory->malloc(memory, mallocSize); + if (buffer == NULL) { + return URI_ERROR_MEMORY_MANAGER_FAULTY; + } + for (index = 0; index < mallocSize; index++) { + buffer[index] = '\xF7'; + } + buffer = memory->reallocarray(memory, buffer, reallocarrayNmemb, + reallocarraySize); + if (buffer == NULL) { + return URI_ERROR_MEMORY_MANAGER_FAULTY; + } + for (index = 0; index < mallocSize; index++) { /* previous content? */ + if (buffer[index] != '\xF7') { + return URI_ERROR_MEMORY_MANAGER_FAULTY; + } + } + buffer[reallocarrayTotal - 1] = '\xF8'; + memory->free(memory, buffer); + buffer = NULL; + + /* malloc + reallocarray ptr!=NULL nmemb==0 size!=0 (equals free) */ + buffer = memory->malloc(memory, mallocSize); + if (buffer == NULL) { + return URI_ERROR_MEMORY_MANAGER_FAULTY; + } + buffer[mallocSize - 1] = '\xF9'; + memory->reallocarray(memory, buffer, 0, reallocarraySize); + buffer = NULL; + + /* malloc + reallocarray ptr!=NULL nmemb!=0 size==0 (equals free) */ + buffer = memory->malloc(memory, mallocSize); + if (buffer == NULL) { + return URI_ERROR_MEMORY_MANAGER_FAULTY; + } + buffer[mallocSize - 1] = '\xFA'; + memory->reallocarray(memory, buffer, reallocarrayNmemb, 0); + buffer = NULL; + + /* malloc + reallocarray ptr!=NULL nmemb==0 size==0 (equals free) */ + buffer = memory->malloc(memory, mallocSize); + if (buffer == NULL) { + return URI_ERROR_MEMORY_MANAGER_FAULTY; + } + buffer[mallocSize - 1] = '\xFB'; + memory->reallocarray(memory, buffer, 0, 0); + buffer = NULL; + + /* reallocarray ptr==NULL nmemb!=0 size!=0 (equals malloc) + free */ + buffer = memory->reallocarray(memory, NULL, callocNmemb, callocSize); + if (buffer == NULL) { + return URI_ERROR_MEMORY_MANAGER_FAULTY; + } + buffer[callocTotalSize - 1] = '\xFC'; + memory->free(memory, buffer); + buffer = NULL; + + /* reallocarray ptr==NULL nmemb==0 size!=0 (equals malloc) + free */ + buffer = memory->reallocarray(memory, NULL, 0, callocSize); + if (buffer != NULL) { + memory->free(memory, buffer); + buffer = NULL; + } + + /* reallocarray ptr==NULL nmemb!=0 size==0 (equals malloc) + free */ + buffer = memory->reallocarray(memory, NULL, callocNmemb, 0); + if (buffer != NULL) { + memory->free(memory, buffer); + buffer = NULL; + } + + /* reallocarray ptr==NULL nmemb==0 size==0 (equals malloc) + free */ + buffer = memory->reallocarray(memory, NULL, 0, 0); + if (buffer != NULL) { + memory->free(memory, buffer); + buffer = NULL; + } + + return URI_SUCCESS; +} + + + +/*extern*/ UriMemoryManager defaultMemoryManager = { + uriDefaultMalloc, + uriDefaultCalloc, + uriDefaultRealloc, + uriDefaultReallocarray, + uriDefaultFree, + NULL /* userData */ +}; diff --git a/ext/uri/uriparser/src/UriMemory.h b/ext/uri/uriparser/src/UriMemory.h new file mode 100644 index 0000000000000..a930f93ac3c35 --- /dev/null +++ b/ext/uri/uriparser/src/UriMemory.h @@ -0,0 +1,78 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2018, Weijia Song + * Copyright (C) 2018, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef URI_MEMORY_H +#define URI_MEMORY_H 1 + + + +#ifndef URI_DOXYGEN +# include +#endif + + + +#define URI_CHECK_MEMORY_MANAGER(memory) \ + do { \ + if (memory == NULL) { \ + memory = &defaultMemoryManager; \ + } else if (uriMemoryManagerIsComplete(memory) != URI_TRUE) { \ + return URI_ERROR_MEMORY_MANAGER_INCOMPLETE; \ + } \ + } while (0) + + + +#ifdef __cplusplus +# define URIPARSER_EXTERN extern "C" +#else +# define URIPARSER_EXTERN extern +#endif + +URIPARSER_EXTERN UriMemoryManager defaultMemoryManager; + +#undef URIPARSER_EXTERN + + + +UriBool uriMemoryManagerIsComplete(const UriMemoryManager * memory); + + + +#endif /* URI_MEMORY_H */ diff --git a/ext/uri/uriparser/src/UriNormalize.c b/ext/uri/uriparser/src/UriNormalize.c new file mode 100644 index 0000000000000..36f53ba3653e5 --- /dev/null +++ b/ext/uri/uriparser/src/UriNormalize.c @@ -0,0 +1,827 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file UriNormalize.c + * Holds the RFC 3986 %URI normalization implementation. + * NOTE: This source file includes itself twice. + */ + +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriNormalize.c" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriNormalize.c" +# undef URI_PASS_UNICODE +# endif +#else +# ifdef URI_PASS_ANSI +# include +# else +# include +# include +# endif + + + +#ifndef URI_DOXYGEN +# include +# include "UriNormalizeBase.h" +# include "UriCommon.h" +# include "UriMemory.h" +#endif + + + +#include + + + +static int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri, unsigned int inMask, + unsigned int * outMask, UriMemoryManager * memory); + +static UriBool URI_FUNC(MakeRangeOwner)(unsigned int * doneMask, + unsigned int maskTest, URI_TYPE(TextRange) * range, + UriMemoryManager * memory); +static UriBool URI_FUNC(MakeOwnerEngine)(URI_TYPE(Uri) * uri, + unsigned int * doneMask, UriMemoryManager * memory); + +static void URI_FUNC(FixPercentEncodingInplace)(const URI_CHAR * first, + const URI_CHAR ** afterLast); +static UriBool URI_FUNC(FixPercentEncodingMalloc)(const URI_CHAR ** first, + const URI_CHAR ** afterLast, UriMemoryManager * memory); +static void URI_FUNC(FixPercentEncodingEngine)( + const URI_CHAR * inFirst, const URI_CHAR * inAfterLast, + const URI_CHAR * outFirst, const URI_CHAR ** outAfterLast); + +static UriBool URI_FUNC(ContainsUppercaseLetters)(const URI_CHAR * first, + const URI_CHAR * afterLast); +static UriBool URI_FUNC(ContainsUglyPercentEncoding)(const URI_CHAR * first, + const URI_CHAR * afterLast); + +static void URI_FUNC(LowercaseInplace)(const URI_CHAR * first, + const URI_CHAR * afterLast); +static void URI_FUNC(LowercaseInplaceAfterPercentEncoding)(const URI_CHAR * first, + const URI_CHAR * afterLast); +static UriBool URI_FUNC(LowercaseMalloc)(const URI_CHAR ** first, + const URI_CHAR ** afterLast, UriMemoryManager * memory); + +static void URI_FUNC(PreventLeakage)(URI_TYPE(Uri) * uri, + unsigned int revertMask, UriMemoryManager * memory); + + + +static URI_INLINE void URI_FUNC(PreventLeakage)(URI_TYPE(Uri) * uri, + unsigned int revertMask, UriMemoryManager * memory) { + if (revertMask & URI_NORMALIZE_SCHEME) { + /* NOTE: A scheme cannot be the empty string + * so no need to compare .first with .afterLast, here. */ + memory->free(memory, (URI_CHAR *)uri->scheme.first); + uri->scheme.first = NULL; + uri->scheme.afterLast = NULL; + } + + if (revertMask & URI_NORMALIZE_USER_INFO) { + if (uri->userInfo.first != uri->userInfo.afterLast) { + memory->free(memory, (URI_CHAR *)uri->userInfo.first); + } + uri->userInfo.first = NULL; + uri->userInfo.afterLast = NULL; + } + + if (revertMask & URI_NORMALIZE_HOST) { + if (uri->hostData.ipFuture.first != NULL) { + /* IPvFuture */ + /* NOTE: An IPvFuture address cannot be the empty string + * so no need to compare .first with .afterLast, here. */ + memory->free(memory, (URI_CHAR *)uri->hostData.ipFuture.first); + uri->hostData.ipFuture.first = NULL; + uri->hostData.ipFuture.afterLast = NULL; + uri->hostText.first = NULL; + uri->hostText.afterLast = NULL; + } else if (uri->hostText.first != NULL) { + /* Regname */ + if (uri->hostText.first != uri->hostText.afterLast) { + memory->free(memory, (URI_CHAR *)uri->hostText.first); + } + uri->hostText.first = NULL; + uri->hostText.afterLast = NULL; + } + } + + /* NOTE: Port cannot happen! */ + + if (revertMask & URI_NORMALIZE_PATH) { + URI_TYPE(PathSegment) * walker = uri->pathHead; + while (walker != NULL) { + URI_TYPE(PathSegment) * const next = walker->next; + if (walker->text.afterLast > walker->text.first) { + memory->free(memory, (URI_CHAR *)walker->text.first); + } + memory->free(memory, walker); + walker = next; + } + uri->pathHead = NULL; + uri->pathTail = NULL; + } + + if (revertMask & URI_NORMALIZE_QUERY) { + if (uri->query.first != uri->query.afterLast) { + memory->free(memory, (URI_CHAR *)uri->query.first); + } + uri->query.first = NULL; + uri->query.afterLast = NULL; + } + + if (revertMask & URI_NORMALIZE_FRAGMENT) { + if (uri->fragment.first != uri->fragment.afterLast) { + memory->free(memory, (URI_CHAR *)uri->fragment.first); + } + uri->fragment.first = NULL; + uri->fragment.afterLast = NULL; + } +} + + + +static URI_INLINE UriBool URI_FUNC(ContainsUppercaseLetters)(const URI_CHAR * first, + const URI_CHAR * afterLast) { + if ((first != NULL) && (afterLast != NULL) && (afterLast > first)) { + const URI_CHAR * i = first; + for (; i < afterLast; i++) { + /* 6.2.2.1 Case Normalization: uppercase letters in scheme or host */ + if ((*i >= _UT('A')) && (*i <= _UT('Z'))) { + return URI_TRUE; + } + } + } + return URI_FALSE; +} + + + +static URI_INLINE UriBool URI_FUNC(ContainsUglyPercentEncoding)(const URI_CHAR * first, + const URI_CHAR * afterLast) { + if ((first != NULL) && (afterLast != NULL) && (afterLast > first)) { + const URI_CHAR * i = first; + for (; i + 2 < afterLast; i++) { + if (i[0] == _UT('%')) { + /* 6.2.2.1 Case Normalization: * + * lowercase percent-encodings */ + if (((i[1] >= _UT('a')) && (i[1] <= _UT('f'))) + || ((i[2] >= _UT('a')) && (i[2] <= _UT('f')))) { + return URI_TRUE; + } else { + /* 6.2.2.2 Percent-Encoding Normalization: * + * percent-encoded unreserved characters */ + const unsigned char left = URI_FUNC(HexdigToInt)(i[1]); + const unsigned char right = URI_FUNC(HexdigToInt)(i[2]); + const int code = 16 * left + right; + if (uriIsUnreserved(code)) { + return URI_TRUE; + } + } + } + } + } + return URI_FALSE; +} + + + +static URI_INLINE void URI_FUNC(LowercaseInplace)(const URI_CHAR * first, + const URI_CHAR * afterLast) { + if ((first != NULL) && (afterLast != NULL) && (afterLast > first)) { + URI_CHAR * i = (URI_CHAR *)first; + const int lowerUpperDiff = (_UT('a') - _UT('A')); + for (; i < afterLast; i++) { + if ((*i >= _UT('A')) && (*i <=_UT('Z'))) { + *i = (URI_CHAR)(*i + lowerUpperDiff); + } + } + } +} + + + +static URI_INLINE void URI_FUNC(LowercaseInplaceAfterPercentEncoding)(const URI_CHAR * first, + const URI_CHAR * afterLast) { + if ((first != NULL) && (afterLast != NULL) && (afterLast > first)) { + URI_CHAR * i = (URI_CHAR *)first; + const int lowerUpperDiff = (_UT('a') - _UT('A')); + for (; i < afterLast; i++) { + if ((*i >= _UT('A')) && (*i <=_UT('Z'))) { + *i = (URI_CHAR)(*i + lowerUpperDiff); + } else if (*i == _UT('%')) { + i += 2; + } + } + } +} + + + +static URI_INLINE UriBool URI_FUNC(LowercaseMalloc)(const URI_CHAR ** first, + const URI_CHAR ** afterLast, UriMemoryManager * memory) { + int lenInChars; + const int lowerUpperDiff = (_UT('a') - _UT('A')); + URI_CHAR * buffer; + int i = 0; + + if ((first == NULL) || (afterLast == NULL) || (*first == NULL) + || (*afterLast == NULL)) { + return URI_FALSE; + } + + lenInChars = (int)(*afterLast - *first); + if (lenInChars == 0) { + return URI_TRUE; + } else if (lenInChars < 0) { + return URI_FALSE; + } + + buffer = memory->malloc(memory, lenInChars * sizeof(URI_CHAR)); + if (buffer == NULL) { + return URI_FALSE; + } + + for (; i < lenInChars; i++) { + if (((*first)[i] >= _UT('A')) && ((*first)[i] <=_UT('Z'))) { + buffer[i] = (URI_CHAR)((*first)[i] + lowerUpperDiff); + } else { + buffer[i] = (*first)[i]; + } + } + + *first = buffer; + *afterLast = buffer + lenInChars; + return URI_TRUE; +} + + + +/* NOTE: Implementation must stay inplace-compatible */ +static URI_INLINE void URI_FUNC(FixPercentEncodingEngine)( + const URI_CHAR * inFirst, const URI_CHAR * inAfterLast, + const URI_CHAR * outFirst, const URI_CHAR ** outAfterLast) { + URI_CHAR * write = (URI_CHAR *)outFirst; + const int lenInChars = (int)(inAfterLast - inFirst); + int i = 0; + + /* All but last two */ + for (; i + 2 < lenInChars; i++) { + if (inFirst[i] != _UT('%')) { + write[0] = inFirst[i]; + write++; + } else { + /* 6.2.2.2 Percent-Encoding Normalization: * + * percent-encoded unreserved characters */ + const URI_CHAR one = inFirst[i + 1]; + const URI_CHAR two = inFirst[i + 2]; + const unsigned char left = URI_FUNC(HexdigToInt)(one); + const unsigned char right = URI_FUNC(HexdigToInt)(two); + const int code = 16 * left + right; + if (uriIsUnreserved(code)) { + write[0] = (URI_CHAR)(code); + write++; + } else { + /* 6.2.2.1 Case Normalization: * + * lowercase percent-encodings */ + write[0] = _UT('%'); + write[1] = URI_FUNC(HexToLetter)(left); + write[2] = URI_FUNC(HexToLetter)(right); + write += 3; + } + + i += 2; /* For the two chars of the percent group we just ate */ + } + } + + /* Last two */ + for (; i < lenInChars; i++) { + write[0] = inFirst[i]; + write++; + } + + *outAfterLast = write; +} + + + +static URI_INLINE void URI_FUNC(FixPercentEncodingInplace)(const URI_CHAR * first, + const URI_CHAR ** afterLast) { + /* Death checks */ + if ((first == NULL) || (afterLast == NULL) || (*afterLast == NULL)) { + return; + } + + /* Fix inplace */ + URI_FUNC(FixPercentEncodingEngine)(first, *afterLast, first, afterLast); +} + + + +static URI_INLINE UriBool URI_FUNC(FixPercentEncodingMalloc)(const URI_CHAR ** first, + const URI_CHAR ** afterLast, UriMemoryManager * memory) { + int lenInChars; + URI_CHAR * buffer; + + /* Death checks */ + if ((first == NULL) || (afterLast == NULL) + || (*first == NULL) || (*afterLast == NULL)) { + return URI_FALSE; + } + + /* Old text length */ + lenInChars = (int)(*afterLast - *first); + if (lenInChars == 0) { + return URI_TRUE; + } else if (lenInChars < 0) { + return URI_FALSE; + } + + /* New buffer */ + buffer = memory->malloc(memory, lenInChars * sizeof(URI_CHAR)); + if (buffer == NULL) { + return URI_FALSE; + } + + /* Fix on copy */ + URI_FUNC(FixPercentEncodingEngine)(*first, *afterLast, buffer, afterLast); + *first = buffer; + return URI_TRUE; +} + + + +static URI_INLINE UriBool URI_FUNC(MakeRangeOwner)(unsigned int * doneMask, + unsigned int maskTest, URI_TYPE(TextRange) * range, + UriMemoryManager * memory) { + if (((*doneMask & maskTest) == 0) + && (range->first != NULL) + && (range->afterLast != NULL) + && (range->afterLast > range->first)) { + const int lenInChars = (int)(range->afterLast - range->first); + const int lenInBytes = lenInChars * sizeof(URI_CHAR); + URI_CHAR * dup = memory->malloc(memory, lenInBytes); + if (dup == NULL) { + return URI_FALSE; /* Raises malloc error */ + } + memcpy(dup, range->first, lenInBytes); + range->first = dup; + range->afterLast = dup + lenInChars; + *doneMask |= maskTest; + } + return URI_TRUE; +} + + + +static URI_INLINE UriBool URI_FUNC(MakeOwnerEngine)(URI_TYPE(Uri) * uri, + unsigned int * doneMask, UriMemoryManager * memory) { + URI_TYPE(PathSegment) * walker = uri->pathHead; + if (!URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_SCHEME, + &(uri->scheme), memory) + || !URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_USER_INFO, + &(uri->userInfo), memory) + || !URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_QUERY, + &(uri->query), memory) + || !URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_FRAGMENT, + &(uri->fragment), memory)) { + return URI_FALSE; /* Raises malloc error */ + } + + /* Host */ + if ((*doneMask & URI_NORMALIZE_HOST) == 0) { + if (uri->hostData.ipFuture.first != NULL) { + /* IPvFuture */ + if (!URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_HOST, + &(uri->hostData.ipFuture), memory)) { + return URI_FALSE; /* Raises malloc error */ + } + uri->hostText.first = uri->hostData.ipFuture.first; + uri->hostText.afterLast = uri->hostData.ipFuture.afterLast; + } else if (uri->hostText.first != NULL) { + /* Regname */ + if (!URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_HOST, + &(uri->hostText), memory)) { + return URI_FALSE; /* Raises malloc error */ + } + } + } + + /* Path */ + if ((*doneMask & URI_NORMALIZE_PATH) == 0) { + while (walker != NULL) { + if (!URI_FUNC(MakeRangeOwner)(doneMask, 0, &(walker->text), memory)) { + /* Free allocations done so far and kill path */ + + /* Kill path to one before walker (if any) */ + URI_TYPE(PathSegment) * ranger = uri->pathHead; + while (ranger != walker) { + URI_TYPE(PathSegment) * const next = ranger->next; + if ((ranger->text.first != NULL) + && (ranger->text.afterLast != NULL) + && (ranger->text.afterLast > ranger->text.first)) { + memory->free(memory, (URI_CHAR *)ranger->text.first); + } + memory->free(memory, ranger); + ranger = next; + } + + /* Kill path from walker */ + while (walker != NULL) { + URI_TYPE(PathSegment) * const next = walker->next; + memory->free(memory, walker); + walker = next; + } + + uri->pathHead = NULL; + uri->pathTail = NULL; + return URI_FALSE; /* Raises malloc error */ + } + walker = walker->next; + } + *doneMask |= URI_NORMALIZE_PATH; + } + + /* Port text, must come last so we don't have to undo that one if it fails. * + * Otherwise we would need and extra enum flag for it although the port * + * cannot go unnormalized... */ + if (!URI_FUNC(MakeRangeOwner)(doneMask, 0, &(uri->portText), memory)) { + return URI_FALSE; /* Raises malloc error */ + } + + return URI_TRUE; +} + + + +unsigned int URI_FUNC(NormalizeSyntaxMaskRequired)(const URI_TYPE(Uri) * uri) { + unsigned int outMask = URI_NORMALIZED; /* for NULL uri */ + URI_FUNC(NormalizeSyntaxMaskRequiredEx)(uri, &outMask); + return outMask; +} + + + +int URI_FUNC(NormalizeSyntaxMaskRequiredEx)(const URI_TYPE(Uri) * uri, + unsigned int * outMask) { + UriMemoryManager * const memory = NULL; /* no use of memory manager */ + +#if defined(__GNUC__) && ((__GNUC__ > 4) \ + || ((__GNUC__ == 4) && defined(__GNUC_MINOR__) && (__GNUC_MINOR__ >= 2))) + /* Slower code that fixes a warning, not sure if this is a smart idea */ + URI_TYPE(Uri) writeableClone; +#endif + + if ((uri == NULL) || (outMask == NULL)) { + return URI_ERROR_NULL; + } + +#if defined(__GNUC__) && ((__GNUC__ > 4) \ + || ((__GNUC__ == 4) && defined(__GNUC_MINOR__) && (__GNUC_MINOR__ >= 2))) + /* Slower code that fixes a warning, not sure if this is a smart idea */ + memcpy(&writeableClone, uri, 1 * sizeof(URI_TYPE(Uri))); + URI_FUNC(NormalizeSyntaxEngine)(&writeableClone, 0, outMask, memory); +#else + URI_FUNC(NormalizeSyntaxEngine)((URI_TYPE(Uri) *)uri, 0, outMask, memory); +#endif + return URI_SUCCESS; +} + + + +int URI_FUNC(NormalizeSyntaxEx)(URI_TYPE(Uri) * uri, unsigned int mask) { + return URI_FUNC(NormalizeSyntaxExMm)(uri, mask, NULL); +} + + + +int URI_FUNC(NormalizeSyntaxExMm)(URI_TYPE(Uri) * uri, unsigned int mask, + UriMemoryManager * memory) { + URI_CHECK_MEMORY_MANAGER(memory); /* may return */ + return URI_FUNC(NormalizeSyntaxEngine)(uri, mask, NULL, memory); +} + + + +int URI_FUNC(NormalizeSyntax)(URI_TYPE(Uri) * uri) { + return URI_FUNC(NormalizeSyntaxEx)(uri, (unsigned int)-1); +} + + + +static URI_INLINE int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri, + unsigned int inMask, unsigned int * outMask, + UriMemoryManager * memory) { + unsigned int doneMask = URI_NORMALIZED; + + /* Not just doing inspection? -> memory manager required! */ + if (outMask == NULL) { + assert(memory != NULL); + } + + if (uri == NULL) { + if (outMask != NULL) { + *outMask = URI_NORMALIZED; + return URI_SUCCESS; + } else { + return URI_ERROR_NULL; + } + } + + if (outMask != NULL) { + /* Reset mask */ + *outMask = URI_NORMALIZED; + } else if (inMask == URI_NORMALIZED) { + /* Nothing to do */ + return URI_SUCCESS; + } + + /* Scheme, host */ + if (outMask != NULL) { + const UriBool normalizeScheme = URI_FUNC(ContainsUppercaseLetters)( + uri->scheme.first, uri->scheme.afterLast); + const UriBool normalizeHostCase = URI_FUNC(ContainsUppercaseLetters)( + uri->hostText.first, uri->hostText.afterLast); + if (normalizeScheme) { + *outMask |= URI_NORMALIZE_SCHEME; + } + + if (normalizeHostCase) { + *outMask |= URI_NORMALIZE_HOST; + } else { + const UriBool normalizeHostPrecent = URI_FUNC(ContainsUglyPercentEncoding)( + uri->hostText.first, uri->hostText.afterLast); + if (normalizeHostPrecent) { + *outMask |= URI_NORMALIZE_HOST; + } + } + } else { + /* Scheme */ + if ((inMask & URI_NORMALIZE_SCHEME) && (uri->scheme.first != NULL)) { + if (uri->owner) { + URI_FUNC(LowercaseInplace)(uri->scheme.first, uri->scheme.afterLast); + } else { + if (!URI_FUNC(LowercaseMalloc)(&(uri->scheme.first), &(uri->scheme.afterLast), memory)) { + URI_FUNC(PreventLeakage)(uri, doneMask, memory); + return URI_ERROR_MALLOC; + } + doneMask |= URI_NORMALIZE_SCHEME; + } + } + + /* Host */ + if (inMask & URI_NORMALIZE_HOST) { + if (uri->hostData.ipFuture.first != NULL) { + /* IPvFuture */ + if (uri->owner) { + URI_FUNC(LowercaseInplace)(uri->hostData.ipFuture.first, + uri->hostData.ipFuture.afterLast); + } else { + if (!URI_FUNC(LowercaseMalloc)(&(uri->hostData.ipFuture.first), + &(uri->hostData.ipFuture.afterLast), memory)) { + URI_FUNC(PreventLeakage)(uri, doneMask, memory); + return URI_ERROR_MALLOC; + } + doneMask |= URI_NORMALIZE_HOST; + } + uri->hostText.first = uri->hostData.ipFuture.first; + uri->hostText.afterLast = uri->hostData.ipFuture.afterLast; + } else if ((uri->hostText.first != NULL) + && (uri->hostData.ip4 == NULL)) { + /* Regname and IPv6 */ + if (uri->owner) { + URI_FUNC(FixPercentEncodingInplace)(uri->hostText.first, + &(uri->hostText.afterLast)); + } else { + if (!URI_FUNC(FixPercentEncodingMalloc)( + &(uri->hostText.first), + &(uri->hostText.afterLast), + memory)) { + URI_FUNC(PreventLeakage)(uri, doneMask, memory); + return URI_ERROR_MALLOC; + } + doneMask |= URI_NORMALIZE_HOST; + } + + URI_FUNC(LowercaseInplaceAfterPercentEncoding)(uri->hostText.first, + uri->hostText.afterLast); + } + } + } + + /* User info */ + if (outMask != NULL) { + const UriBool normalizeUserInfo = URI_FUNC(ContainsUglyPercentEncoding)( + uri->userInfo.first, uri->userInfo.afterLast); + if (normalizeUserInfo) { + *outMask |= URI_NORMALIZE_USER_INFO; + } + } else { + if ((inMask & URI_NORMALIZE_USER_INFO) && (uri->userInfo.first != NULL)) { + if (uri->owner) { + URI_FUNC(FixPercentEncodingInplace)(uri->userInfo.first, &(uri->userInfo.afterLast)); + } else { + if (!URI_FUNC(FixPercentEncodingMalloc)(&(uri->userInfo.first), + &(uri->userInfo.afterLast), memory)) { + URI_FUNC(PreventLeakage)(uri, doneMask, memory); + return URI_ERROR_MALLOC; + } + doneMask |= URI_NORMALIZE_USER_INFO; + } + } + } + + /* Path */ + if (outMask != NULL) { + const URI_TYPE(PathSegment) * walker = uri->pathHead; + while (walker != NULL) { + const URI_CHAR * const first = walker->text.first; + const URI_CHAR * const afterLast = walker->text.afterLast; + if ((first != NULL) + && (afterLast != NULL) + && (afterLast > first) + && ( + (((afterLast - first) == 1) + && (first[0] == _UT('.'))) + || + (((afterLast - first) == 2) + && (first[0] == _UT('.')) + && (first[1] == _UT('.'))) + || + URI_FUNC(ContainsUglyPercentEncoding)(first, afterLast) + )) { + *outMask |= URI_NORMALIZE_PATH; + break; + } + walker = walker->next; + } + } else if (inMask & URI_NORMALIZE_PATH) { + URI_TYPE(PathSegment) * walker; + const UriBool relative = ((uri->scheme.first == NULL) + && !uri->absolutePath) ? URI_TRUE : URI_FALSE; + + /* Fix percent-encoding for each segment */ + walker = uri->pathHead; + if (uri->owner) { + while (walker != NULL) { + URI_FUNC(FixPercentEncodingInplace)(walker->text.first, &(walker->text.afterLast)); + walker = walker->next; + } + } else { + while (walker != NULL) { + if (!URI_FUNC(FixPercentEncodingMalloc)(&(walker->text.first), + &(walker->text.afterLast), memory)) { + URI_FUNC(PreventLeakage)(uri, doneMask, memory); + return URI_ERROR_MALLOC; + } + walker = walker->next; + } + doneMask |= URI_NORMALIZE_PATH; + } + + /* 6.2.2.3 Path Segment Normalization */ + if (!URI_FUNC(RemoveDotSegmentsEx)(uri, relative, + (uri->owner == URI_TRUE) + || ((doneMask & URI_NORMALIZE_PATH) != 0), + memory)) { + URI_FUNC(PreventLeakage)(uri, doneMask, memory); + return URI_ERROR_MALLOC; + } + URI_FUNC(FixEmptyTrailSegment)(uri, memory); + } + + /* Query, fragment */ + if (outMask != NULL) { + const UriBool normalizeQuery = URI_FUNC(ContainsUglyPercentEncoding)( + uri->query.first, uri->query.afterLast); + const UriBool normalizeFragment = URI_FUNC(ContainsUglyPercentEncoding)( + uri->fragment.first, uri->fragment.afterLast); + if (normalizeQuery) { + *outMask |= URI_NORMALIZE_QUERY; + } + + if (normalizeFragment) { + *outMask |= URI_NORMALIZE_FRAGMENT; + } + } else { + /* Query */ + if ((inMask & URI_NORMALIZE_QUERY) && (uri->query.first != NULL)) { + if (uri->owner) { + URI_FUNC(FixPercentEncodingInplace)(uri->query.first, &(uri->query.afterLast)); + } else { + if (!URI_FUNC(FixPercentEncodingMalloc)(&(uri->query.first), + &(uri->query.afterLast), memory)) { + URI_FUNC(PreventLeakage)(uri, doneMask, memory); + return URI_ERROR_MALLOC; + } + doneMask |= URI_NORMALIZE_QUERY; + } + } + + /* Fragment */ + if ((inMask & URI_NORMALIZE_FRAGMENT) && (uri->fragment.first != NULL)) { + if (uri->owner) { + URI_FUNC(FixPercentEncodingInplace)(uri->fragment.first, &(uri->fragment.afterLast)); + } else { + if (!URI_FUNC(FixPercentEncodingMalloc)(&(uri->fragment.first), + &(uri->fragment.afterLast), memory)) { + URI_FUNC(PreventLeakage)(uri, doneMask, memory); + return URI_ERROR_MALLOC; + } + doneMask |= URI_NORMALIZE_FRAGMENT; + } + } + } + + /* Dup all not duped yet */ + if ((outMask == NULL) && !uri->owner) { + if (!URI_FUNC(MakeOwnerEngine)(uri, &doneMask, memory)) { + URI_FUNC(PreventLeakage)(uri, doneMask, memory); + return URI_ERROR_MALLOC; + } + uri->owner = URI_TRUE; + } + + return URI_SUCCESS; +} + + + +int URI_FUNC(MakeOwnerMm)(URI_TYPE(Uri) * uri, UriMemoryManager * memory) { + unsigned int doneMask = URI_NORMALIZED; + + URI_CHECK_MEMORY_MANAGER(memory); /* may return */ + + if (uri == NULL) { + return URI_ERROR_NULL; + } + + if (uri->owner == URI_TRUE) { + return URI_SUCCESS; + } + + if (! URI_FUNC(MakeOwnerEngine)(uri, &doneMask, memory)) { + URI_FUNC(PreventLeakage)(uri, doneMask, memory); + return URI_ERROR_MALLOC; + } + + uri->owner = URI_TRUE; + + return URI_SUCCESS; +} + + + +int URI_FUNC(MakeOwner)(URI_TYPE(Uri) * uri) { + return URI_FUNC(MakeOwnerMm)(uri, NULL); +} + + + +#endif diff --git a/ext/uri/uriparser/src/UriNormalizeBase.c b/ext/uri/uriparser/src/UriNormalizeBase.c new file mode 100644 index 0000000000000..a74fcd3f8bfbf --- /dev/null +++ b/ext/uri/uriparser/src/UriNormalizeBase.c @@ -0,0 +1,119 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef URI_DOXYGEN +# include "UriNormalizeBase.h" +#endif + + + +UriBool uriIsUnreserved(int code) { + switch (code) { + case L'a': /* ALPHA */ + case L'A': + case L'b': + case L'B': + case L'c': + case L'C': + case L'd': + case L'D': + case L'e': + case L'E': + case L'f': + case L'F': + case L'g': + case L'G': + case L'h': + case L'H': + case L'i': + case L'I': + case L'j': + case L'J': + case L'k': + case L'K': + case L'l': + case L'L': + case L'm': + case L'M': + case L'n': + case L'N': + case L'o': + case L'O': + case L'p': + case L'P': + case L'q': + case L'Q': + case L'r': + case L'R': + case L's': + case L'S': + case L't': + case L'T': + case L'u': + case L'U': + case L'v': + case L'V': + case L'w': + case L'W': + case L'x': + case L'X': + case L'y': + case L'Y': + case L'z': + case L'Z': + case L'0': /* DIGIT */ + case L'1': + case L'2': + case L'3': + case L'4': + case L'5': + case L'6': + case L'7': + case L'8': + case L'9': + case L'-': /* "-" / "." / "_" / "~" */ + case L'.': + case L'_': + case L'~': + return URI_TRUE; + + default: + return URI_FALSE; + } +} diff --git a/ext/uri/uriparser/src/UriNormalizeBase.h b/ext/uri/uriparser/src/UriNormalizeBase.h new file mode 100644 index 0000000000000..8651c8616649f --- /dev/null +++ b/ext/uri/uriparser/src/UriNormalizeBase.h @@ -0,0 +1,53 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef URI_NORMALIZE_BASE_H +#define URI_NORMALIZE_BASE_H 1 + + + +#include + + + +UriBool uriIsUnreserved(int code); + + + +#endif /* URI_NORMALIZE_BASE_H */ diff --git a/ext/uri/uriparser/src/UriParse.c b/ext/uri/uriparser/src/UriParse.c new file mode 100644 index 0000000000000..b91e511a58492 --- /dev/null +++ b/ext/uri/uriparser/src/UriParse.c @@ -0,0 +1,2409 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file UriParse.c + * Holds the RFC 3986 %URI parsing implementation. + * NOTE: This source file includes itself twice. + */ + +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriParse.c" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriParse.c" +# undef URI_PASS_UNICODE +# endif +#else +# ifdef URI_PASS_ANSI +# include +# else +# include +# include +# endif + + + +#ifndef URI_DOXYGEN +# include +# include +# include "UriCommon.h" +# include "UriMemory.h" +# include "UriParseBase.h" +#endif + + + +#define URI_SET_DIGIT \ + _UT('0'): \ + case _UT('1'): \ + case _UT('2'): \ + case _UT('3'): \ + case _UT('4'): \ + case _UT('5'): \ + case _UT('6'): \ + case _UT('7'): \ + case _UT('8'): \ + case _UT('9') + +#define URI_SET_HEX_LETTER_UPPER \ + _UT('A'): \ + case _UT('B'): \ + case _UT('C'): \ + case _UT('D'): \ + case _UT('E'): \ + case _UT('F') + +#define URI_SET_HEX_LETTER_LOWER \ + _UT('a'): \ + case _UT('b'): \ + case _UT('c'): \ + case _UT('d'): \ + case _UT('e'): \ + case _UT('f') + +#define URI_SET_HEXDIG \ + URI_SET_DIGIT: \ + case URI_SET_HEX_LETTER_UPPER: \ + case URI_SET_HEX_LETTER_LOWER + +#define URI_SET_ALPHA \ + URI_SET_HEX_LETTER_UPPER: \ + case URI_SET_HEX_LETTER_LOWER: \ + case _UT('g'): \ + case _UT('G'): \ + case _UT('h'): \ + case _UT('H'): \ + case _UT('i'): \ + case _UT('I'): \ + case _UT('j'): \ + case _UT('J'): \ + case _UT('k'): \ + case _UT('K'): \ + case _UT('l'): \ + case _UT('L'): \ + case _UT('m'): \ + case _UT('M'): \ + case _UT('n'): \ + case _UT('N'): \ + case _UT('o'): \ + case _UT('O'): \ + case _UT('p'): \ + case _UT('P'): \ + case _UT('q'): \ + case _UT('Q'): \ + case _UT('r'): \ + case _UT('R'): \ + case _UT('s'): \ + case _UT('S'): \ + case _UT('t'): \ + case _UT('T'): \ + case _UT('u'): \ + case _UT('U'): \ + case _UT('v'): \ + case _UT('V'): \ + case _UT('w'): \ + case _UT('W'): \ + case _UT('x'): \ + case _UT('X'): \ + case _UT('y'): \ + case _UT('Y'): \ + case _UT('z'): \ + case _UT('Z') + + + +static const URI_CHAR * URI_FUNC(ParseAuthority)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParseAuthorityTwo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseHexZero)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseHierPart)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParseIpFutLoop)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParseIpFutStopGo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParseIpLit2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParseIPv6address2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParseMustBeSegmentNzNc)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParseOwnHost)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParseOwnHost2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParseOwnHostUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParseOwnHostUserInfoNz)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParseOwnPortUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParseOwnUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParsePartHelperTwo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParsePathAbsEmpty)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParsePathAbsNoLeadSlash)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParsePathRootless)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParsePchar)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParsePctEncoded)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParsePctSubUnres)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParsePort)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseQueryFrag)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParseSegment)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParseSegmentNz)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParseSegmentNzNcOrScheme2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParseUriReference)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParseUriTail)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParseUriTailTwo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); +static const URI_CHAR * URI_FUNC(ParseZeroMoreSlashSegs)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); + +static UriBool URI_FUNC(OnExitOwnHost2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, UriMemoryManager * memory); +static UriBool URI_FUNC(OnExitOwnHostUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, UriMemoryManager * memory); +static UriBool URI_FUNC(OnExitOwnPortUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, UriMemoryManager * memory); +static UriBool URI_FUNC(OnExitSegmentNzNcOrScheme2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, UriMemoryManager * memory); +static void URI_FUNC(OnExitPartHelperTwo)(URI_TYPE(ParserState) * state); + +static void URI_FUNC(ResetParserStateExceptUri)(URI_TYPE(ParserState) * state); + +UriBool URI_FUNC(PushPathSegment)(URI_TYPE(ParserState) * state, + const URI_CHAR * first, const URI_CHAR * afterLast, + UriMemoryManager * memory); + +static void URI_FUNC(StopSyntax)(URI_TYPE(ParserState) * state, const URI_CHAR * errorPos, UriMemoryManager * memory); +static void URI_FUNC(StopMalloc)(URI_TYPE(ParserState) * state, UriMemoryManager * memory); + +static int URI_FUNC(ParseUriExMm)(URI_TYPE(ParserState) * state, + const URI_CHAR * first, const URI_CHAR * afterLast, + UriMemoryManager * memory); + + + +static URI_INLINE void URI_FUNC(StopSyntax)(URI_TYPE(ParserState) * state, + const URI_CHAR * errorPos, UriMemoryManager * memory) { + URI_FUNC(FreeUriMembersMm)(state->uri, memory); + state->errorPos = errorPos; + state->errorCode = URI_ERROR_SYNTAX; +} + + + +static URI_INLINE void URI_FUNC(StopMalloc)(URI_TYPE(ParserState) * state, UriMemoryManager * memory) { + URI_FUNC(FreeUriMembersMm)(state->uri, memory); + state->errorPos = NULL; + state->errorCode = URI_ERROR_MALLOC; +} + + + +/* + * [authority]-><[>[ipLit2][authorityTwo] + * [authority]->[ownHostUserInfoNz] + * [authority]-> + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseAuthority)( + URI_TYPE(ParserState) * state, const URI_CHAR * first, + const URI_CHAR * afterLast, UriMemoryManager * memory) { + if (first >= afterLast) { + /* "" regname host */ + state->uri->hostText.first = URI_FUNC(SafeToPointTo); + state->uri->hostText.afterLast = URI_FUNC(SafeToPointTo); + return afterLast; + } + + switch (*first) { + case _UT('['): + { + const URI_CHAR * const afterIpLit2 + = URI_FUNC(ParseIpLit2)(state, first + 1, afterLast, memory); + if (afterIpLit2 == NULL) { + return NULL; + } + state->uri->hostText.first = first + 1; /* HOST BEGIN */ + return URI_FUNC(ParseAuthorityTwo)(state, afterIpLit2, afterLast); + } + + case _UT('!'): + case _UT('$'): + case _UT('%'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('-'): + case _UT('*'): + case _UT(','): + case _UT('.'): + case _UT(':'): + case _UT(';'): + case _UT('@'): + case _UT('\''): + case _UT('_'): + case _UT('~'): + case _UT('+'): + case _UT('='): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + state->uri->userInfo.first = first; /* USERINFO BEGIN */ + return URI_FUNC(ParseOwnHostUserInfoNz)(state, first, afterLast, memory); + + default: + /* "" regname host */ + state->uri->hostText.first = URI_FUNC(SafeToPointTo); + state->uri->hostText.afterLast = URI_FUNC(SafeToPointTo); + return first; + } +} + + + +/* + * [authorityTwo]-><:>[port] + * [authorityTwo]-> + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseAuthorityTwo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT(':'): + { + const URI_CHAR * const afterPort = URI_FUNC(ParsePort)(state, first + 1, afterLast); + if (afterPort == NULL) { + return NULL; + } + state->uri->portText.first = first + 1; /* PORT BEGIN */ + state->uri->portText.afterLast = afterPort; /* PORT END */ + return afterPort; + } + + default: + return first; + } +} + + + +/* + * [hexZero]->[HEXDIG][hexZero] + * [hexZero]-> + */ +static const URI_CHAR * URI_FUNC(ParseHexZero)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case URI_SET_HEXDIG: + return URI_FUNC(ParseHexZero)(state, first + 1, afterLast); + + default: + return first; + } +} + + + +/* + * [hierPart]->[pathRootless] + * [hierPart]->[partHelperTwo] + * [hierPart]-> + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseHierPart)( + URI_TYPE(ParserState) * state, const URI_CHAR * first, + const URI_CHAR * afterLast, UriMemoryManager * memory) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('!'): + case _UT('$'): + case _UT('%'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('-'): + case _UT('*'): + case _UT(','): + case _UT('.'): + case _UT(':'): + case _UT(';'): + case _UT('@'): + case _UT('\''): + case _UT('_'): + case _UT('~'): + case _UT('+'): + case _UT('='): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + return URI_FUNC(ParsePathRootless)(state, first, afterLast, memory); + + case _UT('/'): + return URI_FUNC(ParsePartHelperTwo)(state, first + 1, afterLast, memory); + + default: + return first; + } +} + + + +/* + * [ipFutLoop]->[subDelims][ipFutStopGo] + * [ipFutLoop]->[unreserved][ipFutStopGo] + * [ipFutLoop]-><:>[ipFutStopGo] + */ +static const URI_CHAR * URI_FUNC(ParseIpFutLoop)(URI_TYPE(ParserState) * state, + const URI_CHAR * first, const URI_CHAR * afterLast, + UriMemoryManager * memory) { + if (first >= afterLast) { + URI_FUNC(StopSyntax)(state, afterLast, memory); + return NULL; + } + + switch (*first) { + case _UT('!'): + case _UT('$'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('-'): + case _UT('*'): + case _UT(','): + case _UT('.'): + case _UT(':'): + case _UT(';'): + case _UT('\''): + case _UT('_'): + case _UT('~'): + case _UT('+'): + case _UT('='): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + return URI_FUNC(ParseIpFutStopGo)(state, first + 1, afterLast, memory); + + default: + URI_FUNC(StopSyntax)(state, first, memory); + return NULL; + } +} + + + +/* + * [ipFutStopGo]->[ipFutLoop] + * [ipFutStopGo]-> + */ +static const URI_CHAR * URI_FUNC(ParseIpFutStopGo)( + URI_TYPE(ParserState) * state, + const URI_CHAR * first, const URI_CHAR * afterLast, + UriMemoryManager * memory) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('!'): + case _UT('$'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('-'): + case _UT('*'): + case _UT(','): + case _UT('.'): + case _UT(':'): + case _UT(';'): + case _UT('\''): + case _UT('_'): + case _UT('~'): + case _UT('+'): + case _UT('='): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + return URI_FUNC(ParseIpFutLoop)(state, first, afterLast, memory); + + default: + return first; + } +} + + + +/* + * [ipFuture]->[HEXDIG][hexZero]<.>[ipFutLoop] + */ +static const URI_CHAR * URI_FUNC(ParseIpFuture)(URI_TYPE(ParserState) * state, + const URI_CHAR * first, const URI_CHAR * afterLast, + UriMemoryManager * memory) { + if (first >= afterLast) { + URI_FUNC(StopSyntax)(state, afterLast, memory); + return NULL; + } + + /* + First character has already been + checked before entering this rule. + + switch (*first) { + case _UT('v'): + case _UT('V'): + */ + if (first + 1 >= afterLast) { + URI_FUNC(StopSyntax)(state, afterLast, memory); + return NULL; + } + + switch (first[1]) { + case URI_SET_HEXDIG: + { + const URI_CHAR * afterIpFutLoop; + const URI_CHAR * const afterHexZero + = URI_FUNC(ParseHexZero)(state, first + 2, afterLast); + if (afterHexZero == NULL) { + return NULL; + } + if (afterHexZero >= afterLast) { + URI_FUNC(StopSyntax)(state, afterLast, memory); + return NULL; + } + if (*afterHexZero != _UT('.')) { + URI_FUNC(StopSyntax)(state, afterHexZero, memory); + return NULL; + } + state->uri->hostText.first = first; /* HOST BEGIN */ + state->uri->hostData.ipFuture.first = first; /* IPFUTURE BEGIN */ + afterIpFutLoop = URI_FUNC(ParseIpFutLoop)(state, afterHexZero + 1, afterLast, memory); + if (afterIpFutLoop == NULL) { + return NULL; + } + state->uri->hostText.afterLast = afterIpFutLoop; /* HOST END */ + state->uri->hostData.ipFuture.afterLast = afterIpFutLoop; /* IPFUTURE END */ + return afterIpFutLoop; + } + + default: + URI_FUNC(StopSyntax)(state, first + 1, memory); + return NULL; + } + + /* + default: + URI_FUNC(StopSyntax)(state, first, memory); + return NULL; + } + */ +} + + + +/* + * [ipLit2]->[ipFuture]<]> + * [ipLit2]->[IPv6address2] + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseIpLit2)( + URI_TYPE(ParserState) * state, const URI_CHAR * first, + const URI_CHAR * afterLast, UriMemoryManager * memory) { + if (first >= afterLast) { + URI_FUNC(StopSyntax)(state, afterLast, memory); + return NULL; + } + + switch (*first) { + /* The leading "v" of IPvFuture is case-insensitive. */ + case _UT('v'): + case _UT('V'): + { + const URI_CHAR * const afterIpFuture + = URI_FUNC(ParseIpFuture)(state, first, afterLast, memory); + if (afterIpFuture == NULL) { + return NULL; + } + if (afterIpFuture >= afterLast) { + URI_FUNC(StopSyntax)(state, afterLast, memory); + return NULL; + } + if (*afterIpFuture != _UT(']')) { + URI_FUNC(StopSyntax)(state, afterIpFuture, memory); + return NULL; + } + return afterIpFuture + 1; + } + + case _UT(':'): + case _UT(']'): + case URI_SET_HEXDIG: + state->uri->hostData.ip6 = memory->malloc(memory, 1 * sizeof(UriIp6)); /* Freed when stopping on parse error */ + if (state->uri->hostData.ip6 == NULL) { + URI_FUNC(StopMalloc)(state, memory); + return NULL; + } + return URI_FUNC(ParseIPv6address2)(state, first, afterLast, memory); + + default: + URI_FUNC(StopSyntax)(state, first, memory); + return NULL; + } +} + + + +/* + * [IPv6address2]->..<]> + */ +static const URI_CHAR * URI_FUNC(ParseIPv6address2)( + URI_TYPE(ParserState) * state, + const URI_CHAR * first, const URI_CHAR * afterLast, + UriMemoryManager * memory) { + int zipperEver = 0; + int quadsDone = 0; + int digitCount = 0; + unsigned char digitHistory[4]; + int ip4OctetsDone = 0; + + unsigned char quadsAfterZipper[14]; + int quadsAfterZipperCount = 0; + + + for (;;) { + if (first >= afterLast) { + URI_FUNC(StopSyntax)(state, afterLast, memory); + return NULL; + } + + /* Inside IPv4 part? */ + if (ip4OctetsDone > 0) { + /* Eat rest of IPv4 address */ + for (;;) { + switch (*first) { + case URI_SET_DIGIT: + if (digitCount == 4) { + URI_FUNC(StopSyntax)(state, first, memory); + return NULL; + } + digitHistory[digitCount++] = (unsigned char)(9 + *first - _UT('9')); + break; + + case _UT('.'): + if ((ip4OctetsDone == 4) /* NOTE! */ + || (digitCount == 0) + || (digitCount == 4)) { + /* Invalid digit or octet count */ + URI_FUNC(StopSyntax)(state, first, memory); + return NULL; + } else if ((digitCount > 1) + && (digitHistory[0] == 0)) { + /* Leading zero */ + URI_FUNC(StopSyntax)(state, first - digitCount, memory); + return NULL; + } else if ((digitCount == 3) + && (100 * digitHistory[0] + + 10 * digitHistory[1] + + digitHistory[2] > 255)) { + /* Octet value too large */ + if (digitHistory[0] > 2) { + URI_FUNC(StopSyntax)(state, first - 3, memory); + } else if (digitHistory[1] > 5) { + URI_FUNC(StopSyntax)(state, first - 2, memory); + } else { + URI_FUNC(StopSyntax)(state, first - 1, memory); + } + return NULL; + } + + /* Copy IPv4 octet */ + state->uri->hostData.ip6->data[16 - 4 + ip4OctetsDone] = uriGetOctetValue(digitHistory, digitCount); + digitCount = 0; + ip4OctetsDone++; + break; + + case _UT(']'): + if ((ip4OctetsDone != 3) /* NOTE! */ + || (digitCount == 0) + || (digitCount == 4)) { + /* Invalid digit or octet count */ + URI_FUNC(StopSyntax)(state, first, memory); + return NULL; + } else if ((digitCount > 1) + && (digitHistory[0] == 0)) { + /* Leading zero */ + URI_FUNC(StopSyntax)(state, first - digitCount, memory); + return NULL; + } else if ((digitCount == 3) + && (100 * digitHistory[0] + + 10 * digitHistory[1] + + digitHistory[2] > 255)) { + /* Octet value too large */ + if (digitHistory[0] > 2) { + URI_FUNC(StopSyntax)(state, first - 3, memory); + } else if (digitHistory[1] > 5) { + URI_FUNC(StopSyntax)(state, first - 2, memory); + } else { + URI_FUNC(StopSyntax)(state, first - 1, memory); + } + return NULL; + } + + state->uri->hostText.afterLast = first; /* HOST END */ + + /* Copy missing quads right before IPv4 */ + memcpy(state->uri->hostData.ip6->data + 16 - 4 - 2 * quadsAfterZipperCount, + quadsAfterZipper, 2 * quadsAfterZipperCount); + + /* Copy last IPv4 octet */ + state->uri->hostData.ip6->data[16 - 4 + 3] = uriGetOctetValue(digitHistory, digitCount); + + return first + 1; + + default: + URI_FUNC(StopSyntax)(state, first, memory); + return NULL; + } + first++; + + if (first >= afterLast) { + URI_FUNC(StopSyntax)(state, afterLast, memory); + return NULL; + } + } + } else { + /* Eat while no dot in sight */ + int letterAmong = 0; + int walking = 1; + do { + switch (*first) { + case URI_SET_HEX_LETTER_LOWER: + letterAmong = 1; + if (digitCount == 4) { + URI_FUNC(StopSyntax)(state, first, memory); + return NULL; + } + digitHistory[digitCount] = (unsigned char)(15 + *first - _UT('f')); + digitCount++; + break; + + case URI_SET_HEX_LETTER_UPPER: + letterAmong = 1; + if (digitCount == 4) { + URI_FUNC(StopSyntax)(state, first, memory); + return NULL; + } + digitHistory[digitCount] = (unsigned char)(15 + *first - _UT('F')); + digitCount++; + break; + + case URI_SET_DIGIT: + if (digitCount == 4) { + URI_FUNC(StopSyntax)(state, first, memory); + return NULL; + } + digitHistory[digitCount] = (unsigned char)(9 + *first - _UT('9')); + digitCount++; + break; + + case _UT(':'): + { + int setZipper = 0; + + if (digitCount > 0) { + if (zipperEver) { + uriWriteQuadToDoubleByte(digitHistory, digitCount, quadsAfterZipper + 2 * quadsAfterZipperCount); + quadsAfterZipperCount++; + } else { + uriWriteQuadToDoubleByte(digitHistory, digitCount, state->uri->hostData.ip6->data + 2 * quadsDone); + } + quadsDone++; + digitCount = 0; + } + letterAmong = 0; + + /* Too many quads? */ + if (quadsDone >= 8 - zipperEver) { + URI_FUNC(StopSyntax)(state, first, memory); + return NULL; + } + + /* "::"? */ + if (first + 1 >= afterLast) { + URI_FUNC(StopSyntax)(state, afterLast, memory); + return NULL; + } + if (first[1] == _UT(':')) { + const int resetOffset = 2 * (quadsDone + (digitCount > 0)); + + first++; + if (zipperEver) { + URI_FUNC(StopSyntax)(state, first, memory); + return NULL; /* "::.+::" */ + } + + /* Zero everything after zipper */ + memset(state->uri->hostData.ip6->data + resetOffset, 0, 16 - resetOffset); + setZipper = 1; + + /* ":::+"? */ + if (first + 1 >= afterLast) { + URI_FUNC(StopSyntax)(state, afterLast, memory); + return NULL; /* No ']' yet */ + } + if (first[1] == _UT(':')) { + URI_FUNC(StopSyntax)(state, first + 1, memory); + return NULL; /* ":::+ "*/ + } + } else if (quadsDone == 0 || first[1] == _UT(']')) { + /* Single leading or trailing ":" */ + URI_FUNC(StopSyntax)(state, first, memory); + return NULL; + } + + if (setZipper) { + zipperEver = 1; + } + } + break; + + case _UT('.'): + if ((quadsDone + zipperEver > 6) /* NOTE */ + || (!zipperEver && (quadsDone < 6)) + || letterAmong + || (digitCount == 0) + || (digitCount == 4)) { + /* Invalid octet before */ + URI_FUNC(StopSyntax)(state, first, memory); + return NULL; + } else if ((digitCount > 1) + && (digitHistory[0] == 0)) { + /* Leading zero */ + URI_FUNC(StopSyntax)(state, first - digitCount, memory); + return NULL; + } else if ((digitCount == 3) + && (100 * digitHistory[0] + + 10 * digitHistory[1] + + digitHistory[2] > 255)) { + /* Octet value too large */ + if (digitHistory[0] > 2) { + URI_FUNC(StopSyntax)(state, first - 3, memory); + } else if (digitHistory[1] > 5) { + URI_FUNC(StopSyntax)(state, first - 2, memory); + } else { + URI_FUNC(StopSyntax)(state, first - 1, memory); + } + return NULL; + } + + /* Copy first IPv4 octet */ + state->uri->hostData.ip6->data[16 - 4] = uriGetOctetValue(digitHistory, digitCount); + digitCount = 0; + + /* Switch over to IPv4 loop */ + ip4OctetsDone = 1; + walking = 0; + break; + + case _UT(']'): + /* Too little quads? */ + if (!zipperEver && !((quadsDone == 7) && (digitCount > 0))) { + URI_FUNC(StopSyntax)(state, first, memory); + return NULL; + } + + if (digitCount > 0) { + if (zipperEver) { + /* Too many quads? */ + if (quadsDone >= 7) { + URI_FUNC(StopSyntax)(state, first, memory); + return NULL; + } + uriWriteQuadToDoubleByte(digitHistory, digitCount, quadsAfterZipper + 2 * quadsAfterZipperCount); + quadsAfterZipperCount++; + } else { + uriWriteQuadToDoubleByte(digitHistory, digitCount, state->uri->hostData.ip6->data + 2 * quadsDone); + } + /* + quadsDone++; + digitCount = 0; + */ + } + + /* Copy missing quads to the end */ + memcpy(state->uri->hostData.ip6->data + 16 - 2 * quadsAfterZipperCount, + quadsAfterZipper, 2 * quadsAfterZipperCount); + + state->uri->hostText.afterLast = first; /* HOST END */ + return first + 1; /* Fine */ + + default: + URI_FUNC(StopSyntax)(state, first, memory); + return NULL; + } + first++; + + if (first >= afterLast) { + URI_FUNC(StopSyntax)(state, afterLast, memory); + return NULL; /* No ']' yet */ + } + } while (walking); + } + } +} + + + +/* + * [mustBeSegmentNzNc]->[pctEncoded][mustBeSegmentNzNc] + * [mustBeSegmentNzNc]->[subDelims][mustBeSegmentNzNc] + * [mustBeSegmentNzNc]->[unreserved][mustBeSegmentNzNc] + * [mustBeSegmentNzNc]->[uriTail] // can take + * [mustBeSegmentNzNc]->[segment][zeroMoreSlashSegs][uriTail] + * [mustBeSegmentNzNc]-><@>[mustBeSegmentNzNc] + */ +static const URI_CHAR * URI_FUNC(ParseMustBeSegmentNzNc)( + URI_TYPE(ParserState) * state, const URI_CHAR * first, + const URI_CHAR * afterLast, UriMemoryManager * memory) { + if (first >= afterLast) { + if (!URI_FUNC(PushPathSegment)(state, state->uri->scheme.first, first, memory)) { /* SEGMENT BOTH */ + URI_FUNC(StopMalloc)(state, memory); + return NULL; + } + state->uri->scheme.first = NULL; /* Not a scheme, reset */ + return afterLast; + } + + switch (*first) { + case _UT('%'): + { + const URI_CHAR * const afterPctEncoded + = URI_FUNC(ParsePctEncoded)(state, first, afterLast, memory); + if (afterPctEncoded == NULL) { + return NULL; + } + return URI_FUNC(ParseMustBeSegmentNzNc)(state, afterPctEncoded, afterLast, memory); + } + + case _UT('@'): + case _UT('!'): + case _UT('$'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('*'): + case _UT(','): + case _UT(';'): + case _UT('\''): + case _UT('+'): + case _UT('='): + case _UT('-'): + case _UT('.'): + case _UT('_'): + case _UT('~'): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + return URI_FUNC(ParseMustBeSegmentNzNc)(state, first + 1, afterLast, memory); + + case _UT('/'): + { + const URI_CHAR * afterZeroMoreSlashSegs; + const URI_CHAR * afterSegment; + if (!URI_FUNC(PushPathSegment)(state, state->uri->scheme.first, first, memory)) { /* SEGMENT BOTH */ + URI_FUNC(StopMalloc)(state, memory); + return NULL; + } + state->uri->scheme.first = NULL; /* Not a scheme, reset */ + afterSegment = URI_FUNC(ParseSegment)(state, first + 1, afterLast, memory); + if (afterSegment == NULL) { + return NULL; + } + if (!URI_FUNC(PushPathSegment)(state, first + 1, afterSegment, memory)) { /* SEGMENT BOTH */ + URI_FUNC(StopMalloc)(state, memory); + return NULL; + } + afterZeroMoreSlashSegs + = URI_FUNC(ParseZeroMoreSlashSegs)(state, afterSegment, afterLast, memory); + if (afterZeroMoreSlashSegs == NULL) { + return NULL; + } + return URI_FUNC(ParseUriTail)(state, afterZeroMoreSlashSegs, afterLast, memory); + } + + default: + if (!URI_FUNC(PushPathSegment)(state, state->uri->scheme.first, first, memory)) { /* SEGMENT BOTH */ + URI_FUNC(StopMalloc)(state, memory); + return NULL; + } + state->uri->scheme.first = NULL; /* Not a scheme, reset */ + return URI_FUNC(ParseUriTail)(state, first, afterLast, memory); + } +} + + + +/* + * [ownHost]-><[>[ipLit2][authorityTwo] + * [ownHost]->[ownHost2] // can take + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseOwnHost)( + URI_TYPE(ParserState) * state, const URI_CHAR * first, + const URI_CHAR * afterLast, UriMemoryManager * memory) { + if (first >= afterLast) { + state->uri->hostText.afterLast = afterLast; /* HOST END */ + return afterLast; + } + + switch (*first) { + case _UT('['): + { + const URI_CHAR * const afterIpLit2 + = URI_FUNC(ParseIpLit2)(state, first + 1, afterLast, memory); + if (afterIpLit2 == NULL) { + return NULL; + } + state->uri->hostText.first = first + 1; /* HOST BEGIN */ + return URI_FUNC(ParseAuthorityTwo)(state, afterIpLit2, afterLast); + } + + default: + return URI_FUNC(ParseOwnHost2)(state, first, afterLast, memory); + } +} + + + +static URI_INLINE UriBool URI_FUNC(OnExitOwnHost2)( + URI_TYPE(ParserState) * state, const URI_CHAR * first, + UriMemoryManager * memory) { + state->uri->hostText.afterLast = first; /* HOST END */ + + /* Valid IPv4 or just a regname? */ + state->uri->hostData.ip4 = memory->malloc(memory, 1 * sizeof(UriIp4)); /* Freed when stopping on parse error */ + if (state->uri->hostData.ip4 == NULL) { + return URI_FALSE; /* Raises malloc error */ + } + if (URI_FUNC(ParseIpFourAddress)(state->uri->hostData.ip4->data, + state->uri->hostText.first, state->uri->hostText.afterLast)) { + /* Not IPv4 */ + memory->free(memory, state->uri->hostData.ip4); + state->uri->hostData.ip4 = NULL; + } + return URI_TRUE; /* Success */ +} + + + +/* + * [ownHost2]->[authorityTwo] // can take + * [ownHost2]->[pctSubUnres][ownHost2] + */ +static const URI_CHAR * URI_FUNC(ParseOwnHost2)( + URI_TYPE(ParserState) * state, const URI_CHAR * first, + const URI_CHAR * afterLast, UriMemoryManager * memory) { + if (first >= afterLast) { + if (!URI_FUNC(OnExitOwnHost2)(state, first, memory)) { + URI_FUNC(StopMalloc)(state, memory); + return NULL; + } + return afterLast; + } + + switch (*first) { + case _UT('!'): + case _UT('$'): + case _UT('%'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('-'): + case _UT('*'): + case _UT(','): + case _UT('.'): + case _UT(';'): + case _UT('\''): + case _UT('_'): + case _UT('~'): + case _UT('+'): + case _UT('='): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + { + const URI_CHAR * const afterPctSubUnres + = URI_FUNC(ParsePctSubUnres)(state, first, afterLast, memory); + if (afterPctSubUnres == NULL) { + return NULL; + } + return URI_FUNC(ParseOwnHost2)(state, afterPctSubUnres, afterLast, memory); + } + + default: + if (!URI_FUNC(OnExitOwnHost2)(state, first, memory)) { + URI_FUNC(StopMalloc)(state, memory); + return NULL; + } + return URI_FUNC(ParseAuthorityTwo)(state, first, afterLast); + } +} + + + +static URI_INLINE UriBool URI_FUNC(OnExitOwnHostUserInfo)( + URI_TYPE(ParserState) * state, const URI_CHAR * first, + UriMemoryManager * memory) { + state->uri->hostText.first = state->uri->userInfo.first; /* Host instead of userInfo, update */ + state->uri->userInfo.first = NULL; /* Not a userInfo, reset */ + state->uri->hostText.afterLast = first; /* HOST END */ + + /* Valid IPv4 or just a regname? */ + state->uri->hostData.ip4 = memory->malloc(memory, 1 * sizeof(UriIp4)); /* Freed when stopping on parse error */ + if (state->uri->hostData.ip4 == NULL) { + return URI_FALSE; /* Raises malloc error */ + } + if (URI_FUNC(ParseIpFourAddress)(state->uri->hostData.ip4->data, + state->uri->hostText.first, state->uri->hostText.afterLast)) { + /* Not IPv4 */ + memory->free(memory, state->uri->hostData.ip4); + state->uri->hostData.ip4 = NULL; + } + return URI_TRUE; /* Success */ +} + + + +/* + * [ownHostUserInfo]->[ownHostUserInfoNz] + * [ownHostUserInfo]-> + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseOwnHostUserInfo)( + URI_TYPE(ParserState) * state, const URI_CHAR * first, + const URI_CHAR * afterLast, UriMemoryManager * memory) { + if (first >= afterLast) { + if (!URI_FUNC(OnExitOwnHostUserInfo)(state, first, memory)) { + URI_FUNC(StopMalloc)(state, memory); + return NULL; + } + return afterLast; + } + + switch (*first) { + case _UT('!'): + case _UT('$'): + case _UT('%'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('-'): + case _UT('*'): + case _UT(','): + case _UT('.'): + case _UT(':'): + case _UT(';'): + case _UT('@'): + case _UT('\''): + case _UT('_'): + case _UT('~'): + case _UT('+'): + case _UT('='): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + return URI_FUNC(ParseOwnHostUserInfoNz)(state, first, afterLast, memory); + + default: + if (!URI_FUNC(OnExitOwnHostUserInfo)(state, first, memory)) { + URI_FUNC(StopMalloc)(state, memory); + return NULL; + } + return first; + } +} + + + +/* + * [ownHostUserInfoNz]->[pctSubUnres][ownHostUserInfo] + * [ownHostUserInfoNz]-><:>[ownPortUserInfo] + * [ownHostUserInfoNz]-><@>[ownHost] + */ +static const URI_CHAR * URI_FUNC(ParseOwnHostUserInfoNz)( + URI_TYPE(ParserState) * state, const URI_CHAR * first, + const URI_CHAR * afterLast, UriMemoryManager * memory) { + if (first >= afterLast) { + URI_FUNC(StopSyntax)(state, afterLast, memory); + return NULL; + } + + switch (*first) { + case _UT('!'): + case _UT('$'): + case _UT('%'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('-'): + case _UT('*'): + case _UT(','): + case _UT('.'): + case _UT(';'): + case _UT('\''): + case _UT('_'): + case _UT('~'): + case _UT('+'): + case _UT('='): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + { + const URI_CHAR * const afterPctSubUnres + = URI_FUNC(ParsePctSubUnres)(state, first, afterLast, memory); + if (afterPctSubUnres == NULL) { + return NULL; + } + return URI_FUNC(ParseOwnHostUserInfo)(state, afterPctSubUnres, afterLast, memory); + } + + case _UT(':'): + state->uri->hostText.afterLast = first; /* HOST END */ + state->uri->portText.first = first + 1; /* PORT BEGIN */ + return URI_FUNC(ParseOwnPortUserInfo)(state, first + 1, afterLast, memory); + + case _UT('@'): + state->uri->userInfo.afterLast = first; /* USERINFO END */ + state->uri->hostText.first = first + 1; /* HOST BEGIN */ + return URI_FUNC(ParseOwnHost)(state, first + 1, afterLast, memory); + + default: + URI_FUNC(StopSyntax)(state, first, memory); + return NULL; + } +} + + + +static URI_INLINE UriBool URI_FUNC(OnExitOwnPortUserInfo)( + URI_TYPE(ParserState) * state, const URI_CHAR * first, + UriMemoryManager * memory) { + state->uri->hostText.first = state->uri->userInfo.first; /* Host instead of userInfo, update */ + state->uri->userInfo.first = NULL; /* Not a userInfo, reset */ + state->uri->portText.afterLast = first; /* PORT END */ + + /* Valid IPv4 or just a regname? */ + state->uri->hostData.ip4 = memory->malloc(memory, 1 * sizeof(UriIp4)); /* Freed when stopping on parse error */ + if (state->uri->hostData.ip4 == NULL) { + return URI_FALSE; /* Raises malloc error */ + } + if (URI_FUNC(ParseIpFourAddress)(state->uri->hostData.ip4->data, + state->uri->hostText.first, state->uri->hostText.afterLast)) { + /* Not IPv4 */ + memory->free(memory, state->uri->hostData.ip4); + state->uri->hostData.ip4 = NULL; + } + return URI_TRUE; /* Success */ +} + + + +/* + * [ownPortUserInfo]->[ALPHA][ownUserInfo] + * [ownPortUserInfo]->[DIGIT][ownPortUserInfo] + * [ownPortUserInfo]-><.>[ownUserInfo] + * [ownPortUserInfo]-><_>[ownUserInfo] + * [ownPortUserInfo]-><~>[ownUserInfo] + * [ownPortUserInfo]-><->[ownUserInfo] + * [ownPortUserInfo]->[subDelims][ownUserInfo] + * [ownPortUserInfo]->[pctEncoded][ownUserInfo] + * [ownPortUserInfo]-><:>[ownUserInfo] + * [ownPortUserInfo]-><@>[ownHost] + * [ownPortUserInfo]-> + */ +static const URI_CHAR * URI_FUNC(ParseOwnPortUserInfo)( + URI_TYPE(ParserState) * state, const URI_CHAR * first, + const URI_CHAR * afterLast, UriMemoryManager * memory) { + if (first >= afterLast) { + if (!URI_FUNC(OnExitOwnPortUserInfo)(state, first, memory)) { + URI_FUNC(StopMalloc)(state, memory); + return NULL; + } + return afterLast; + } + + switch (*first) { + /* begin sub-delims */ + case _UT('!'): + case _UT('$'): + case _UT('&'): + case _UT('\''): + case _UT('('): + case _UT(')'): + case _UT('*'): + case _UT('+'): + case _UT(','): + case _UT(';'): + case _UT('='): + /* end sub-delims */ + /* begin unreserved (except alpha and digit) */ + case _UT('-'): + case _UT('.'): + case _UT('_'): + case _UT('~'): + /* end unreserved (except alpha and digit) */ + case _UT(':'): + case URI_SET_ALPHA: + state->uri->hostText.afterLast = NULL; /* Not a host, reset */ + state->uri->portText.first = NULL; /* Not a port, reset */ + return URI_FUNC(ParseOwnUserInfo)(state, first + 1, afterLast, memory); + + case URI_SET_DIGIT: + return URI_FUNC(ParseOwnPortUserInfo)(state, first + 1, afterLast, memory); + + case _UT('%'): + state->uri->portText.first = NULL; /* Not a port, reset */ + { + const URI_CHAR * const afterPct + = URI_FUNC(ParsePctEncoded)(state, first, afterLast, memory); + if (afterPct == NULL) { + return NULL; + } + return URI_FUNC(ParseOwnUserInfo)(state, afterPct, afterLast, memory); + } + + case _UT('@'): + state->uri->hostText.afterLast = NULL; /* Not a host, reset */ + state->uri->portText.first = NULL; /* Not a port, reset */ + state->uri->userInfo.afterLast = first; /* USERINFO END */ + state->uri->hostText.first = first + 1; /* HOST BEGIN */ + return URI_FUNC(ParseOwnHost)(state, first + 1, afterLast, memory); + + default: + if (!URI_FUNC(OnExitOwnPortUserInfo)(state, first, memory)) { + URI_FUNC(StopMalloc)(state, memory); + return NULL; + } + return first; + } +} + + + +/* + * [ownUserInfo]->[pctSubUnres][ownUserInfo] + * [ownUserInfo]-><:>[ownUserInfo] + * [ownUserInfo]-><@>[ownHost] + */ +static const URI_CHAR * URI_FUNC(ParseOwnUserInfo)( + URI_TYPE(ParserState) * state, const URI_CHAR * first, + const URI_CHAR * afterLast, UriMemoryManager * memory) { + if (first >= afterLast) { + URI_FUNC(StopSyntax)(state, afterLast, memory); + return NULL; + } + + switch (*first) { + case _UT('!'): + case _UT('$'): + case _UT('%'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('-'): + case _UT('*'): + case _UT(','): + case _UT('.'): + case _UT(';'): + case _UT('\''): + case _UT('_'): + case _UT('~'): + case _UT('+'): + case _UT('='): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + { + const URI_CHAR * const afterPctSubUnres + = URI_FUNC(ParsePctSubUnres)(state, first, afterLast, memory); + if (afterPctSubUnres == NULL) { + return NULL; + } + return URI_FUNC(ParseOwnUserInfo)(state, afterPctSubUnres, afterLast, memory); + } + + case _UT(':'): + return URI_FUNC(ParseOwnUserInfo)(state, first + 1, afterLast, memory); + + case _UT('@'): + /* SURE */ + state->uri->userInfo.afterLast = first; /* USERINFO END */ + state->uri->hostText.first = first + 1; /* HOST BEGIN */ + return URI_FUNC(ParseOwnHost)(state, first + 1, afterLast, memory); + + default: + URI_FUNC(StopSyntax)(state, first, memory); + return NULL; + } +} + + + +static URI_INLINE void URI_FUNC(OnExitPartHelperTwo)(URI_TYPE(ParserState) * state) { + state->uri->absolutePath = URI_TRUE; +} + + + +/* + * [partHelperTwo]->[pathAbsNoLeadSlash] // can take + * [partHelperTwo]->[authority][pathAbsEmpty] + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParsePartHelperTwo)( + URI_TYPE(ParserState) * state, const URI_CHAR * first, + const URI_CHAR * afterLast, UriMemoryManager * memory) { + if (first >= afterLast) { + URI_FUNC(OnExitPartHelperTwo)(state); + return afterLast; + } + + switch (*first) { + case _UT('/'): + { + const URI_CHAR * const afterAuthority + = URI_FUNC(ParseAuthority)(state, first + 1, afterLast, memory); + const URI_CHAR * afterPathAbsEmpty; + if (afterAuthority == NULL) { + return NULL; + } + afterPathAbsEmpty = URI_FUNC(ParsePathAbsEmpty)(state, afterAuthority, afterLast, memory); + + URI_FUNC(FixEmptyTrailSegment)(state->uri, memory); + + return afterPathAbsEmpty; + } + + default: + URI_FUNC(OnExitPartHelperTwo)(state); + return URI_FUNC(ParsePathAbsNoLeadSlash)(state, first, afterLast, memory); + } +} + + + +/* + * [pathAbsEmpty]->[segment][pathAbsEmpty] + * [pathAbsEmpty]-> + */ +static const URI_CHAR * URI_FUNC(ParsePathAbsEmpty)( + URI_TYPE(ParserState) * state, const URI_CHAR * first, + const URI_CHAR * afterLast, UriMemoryManager * memory) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('/'): + { + const URI_CHAR * const afterSegment + = URI_FUNC(ParseSegment)(state, first + 1, afterLast, memory); + if (afterSegment == NULL) { + return NULL; + } + if (!URI_FUNC(PushPathSegment)(state, first + 1, afterSegment, memory)) { /* SEGMENT BOTH */ + URI_FUNC(StopMalloc)(state, memory); + return NULL; + } + return URI_FUNC(ParsePathAbsEmpty)(state, afterSegment, afterLast, memory); + } + + default: + return first; + } +} + + + +/* + * [pathAbsNoLeadSlash]->[segmentNz][zeroMoreSlashSegs] + * [pathAbsNoLeadSlash]-> + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParsePathAbsNoLeadSlash)( + URI_TYPE(ParserState) * state, const URI_CHAR * first, + const URI_CHAR * afterLast, UriMemoryManager * memory) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('!'): + case _UT('$'): + case _UT('%'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('-'): + case _UT('*'): + case _UT(','): + case _UT('.'): + case _UT(':'): + case _UT(';'): + case _UT('@'): + case _UT('\''): + case _UT('_'): + case _UT('~'): + case _UT('+'): + case _UT('='): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + { + const URI_CHAR * const afterSegmentNz + = URI_FUNC(ParseSegmentNz)(state, first, afterLast, memory); + if (afterSegmentNz == NULL) { + return NULL; + } + if (!URI_FUNC(PushPathSegment)(state, first, afterSegmentNz, memory)) { /* SEGMENT BOTH */ + URI_FUNC(StopMalloc)(state, memory); + return NULL; + } + return URI_FUNC(ParseZeroMoreSlashSegs)(state, afterSegmentNz, afterLast, memory); + } + + default: + return first; + } +} + + + +/* + * [pathRootless]->[segmentNz][zeroMoreSlashSegs] + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParsePathRootless)( + URI_TYPE(ParserState) * state, const URI_CHAR * first, + const URI_CHAR * afterLast, UriMemoryManager * memory) { + const URI_CHAR * const afterSegmentNz + = URI_FUNC(ParseSegmentNz)(state, first, afterLast, memory); + if (afterSegmentNz == NULL) { + return NULL; + } else { + if (!URI_FUNC(PushPathSegment)(state, first, afterSegmentNz, memory)) { /* SEGMENT BOTH */ + URI_FUNC(StopMalloc)(state, memory); + return NULL; + } + } + return URI_FUNC(ParseZeroMoreSlashSegs)(state, afterSegmentNz, afterLast, memory); +} + + + +/* + * [pchar]->[pctEncoded] + * [pchar]->[subDelims] + * [pchar]->[unreserved] + * [pchar]-><:> + * [pchar]-><@> + */ +static const URI_CHAR * URI_FUNC(ParsePchar)(URI_TYPE(ParserState) * state, + const URI_CHAR * first, const URI_CHAR * afterLast, + UriMemoryManager * memory) { + if (first >= afterLast) { + URI_FUNC(StopSyntax)(state, afterLast, memory); + return NULL; + } + + switch (*first) { + case _UT('%'): + return URI_FUNC(ParsePctEncoded)(state, first, afterLast, memory); + + case _UT(':'): + case _UT('@'): + case _UT('!'): + case _UT('$'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('*'): + case _UT(','): + case _UT(';'): + case _UT('\''): + case _UT('+'): + case _UT('='): + case _UT('-'): + case _UT('.'): + case _UT('_'): + case _UT('~'): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + return first + 1; + + default: + URI_FUNC(StopSyntax)(state, first, memory); + return NULL; + } +} + + + +/* + * [pctEncoded]-><%>[HEXDIG][HEXDIG] + */ +static const URI_CHAR * URI_FUNC(ParsePctEncoded)( + URI_TYPE(ParserState) * state, + const URI_CHAR * first, const URI_CHAR * afterLast, + UriMemoryManager * memory) { + if (first >= afterLast) { + URI_FUNC(StopSyntax)(state, afterLast, memory); + return NULL; + } + + /* + First character has already been + checked before entering this rule. + + switch (*first) { + case _UT('%'): + */ + if (first + 1 >= afterLast) { + URI_FUNC(StopSyntax)(state, afterLast, memory); + return NULL; + } + + switch (first[1]) { + case URI_SET_HEXDIG: + if (first + 2 >= afterLast) { + URI_FUNC(StopSyntax)(state, afterLast, memory); + return NULL; + } + + switch (first[2]) { + case URI_SET_HEXDIG: + return first + 3; + + default: + URI_FUNC(StopSyntax)(state, first + 2, memory); + return NULL; + } + + default: + URI_FUNC(StopSyntax)(state, first + 1, memory); + return NULL; + } + + /* + default: + URI_FUNC(StopSyntax)(state, first, memory); + return NULL; + } + */ +} + + + +/* + * [pctSubUnres]->[pctEncoded] + * [pctSubUnres]->[subDelims] + * [pctSubUnres]->[unreserved] + */ +static const URI_CHAR * URI_FUNC(ParsePctSubUnres)( + URI_TYPE(ParserState) * state, + const URI_CHAR * first, const URI_CHAR * afterLast, + UriMemoryManager * memory) { + if (first >= afterLast) { + URI_FUNC(StopSyntax)(state, afterLast, memory); + return NULL; + } + + switch (*first) { + case _UT('%'): + return URI_FUNC(ParsePctEncoded)(state, first, afterLast, memory); + + case _UT('!'): + case _UT('$'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('*'): + case _UT(','): + case _UT(';'): + case _UT('\''): + case _UT('+'): + case _UT('='): + case _UT('-'): + case _UT('.'): + case _UT('_'): + case _UT('~'): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + return first + 1; + + default: + URI_FUNC(StopSyntax)(state, first, memory); + return NULL; + } +} + + + +/* + * [port]->[DIGIT][port] + * [port]-> + */ +static const URI_CHAR * URI_FUNC(ParsePort)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case URI_SET_DIGIT: + return URI_FUNC(ParsePort)(state, first + 1, afterLast); + + default: + return first; + } +} + + + +/* + * [queryFrag]->[pchar][queryFrag] + * [queryFrag]->[queryFrag] + * [queryFrag]->[queryFrag] + * [queryFrag]-> + */ +static const URI_CHAR * URI_FUNC(ParseQueryFrag)(URI_TYPE(ParserState) * state, + const URI_CHAR * first, const URI_CHAR * afterLast, + UriMemoryManager * memory) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('!'): + case _UT('$'): + case _UT('%'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('-'): + case _UT('*'): + case _UT(','): + case _UT('.'): + case _UT(':'): + case _UT(';'): + case _UT('@'): + case _UT('\''): + case _UT('_'): + case _UT('~'): + case _UT('+'): + case _UT('='): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + { + const URI_CHAR * const afterPchar + = URI_FUNC(ParsePchar)(state, first, afterLast, memory); + if (afterPchar == NULL) { + return NULL; + } + return URI_FUNC(ParseQueryFrag)(state, afterPchar, afterLast, memory); + } + + case _UT('/'): + case _UT('?'): + return URI_FUNC(ParseQueryFrag)(state, first + 1, afterLast, memory); + + default: + return first; + } +} + + + +/* + * [segment]->[pchar][segment] + * [segment]-> + */ +static const URI_CHAR * URI_FUNC(ParseSegment)(URI_TYPE(ParserState) * state, + const URI_CHAR * first, const URI_CHAR * afterLast, + UriMemoryManager * memory) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('!'): + case _UT('$'): + case _UT('%'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('-'): + case _UT('*'): + case _UT(','): + case _UT('.'): + case _UT(':'): + case _UT(';'): + case _UT('@'): + case _UT('\''): + case _UT('_'): + case _UT('~'): + case _UT('+'): + case _UT('='): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + { + const URI_CHAR * const afterPchar + = URI_FUNC(ParsePchar)(state, first, afterLast, memory); + if (afterPchar == NULL) { + return NULL; + } + return URI_FUNC(ParseSegment)(state, afterPchar, afterLast, memory); + } + + default: + return first; + } +} + + + +/* + * [segmentNz]->[pchar][segment] + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseSegmentNz)( + URI_TYPE(ParserState) * state, + const URI_CHAR * first, const URI_CHAR * afterLast, + UriMemoryManager * memory) { + const URI_CHAR * const afterPchar + = URI_FUNC(ParsePchar)(state, first, afterLast, memory); + if (afterPchar == NULL) { + return NULL; + } + return URI_FUNC(ParseSegment)(state, afterPchar, afterLast, memory); +} + + + +static URI_INLINE UriBool URI_FUNC(OnExitSegmentNzNcOrScheme2)( + URI_TYPE(ParserState) * state, const URI_CHAR * first, + UriMemoryManager * memory) { + if (!URI_FUNC(PushPathSegment)(state, state->uri->scheme.first, first, memory)) { /* SEGMENT BOTH */ + return URI_FALSE; /* Raises malloc error*/ + } + state->uri->scheme.first = NULL; /* Not a scheme, reset */ + return URI_TRUE; /* Success */ +} + + + +/* + * [segmentNzNcOrScheme2]->[ALPHA][segmentNzNcOrScheme2] + * [segmentNzNcOrScheme2]->[DIGIT][segmentNzNcOrScheme2] + * [segmentNzNcOrScheme2]->[pctEncoded][mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]->[uriTail] // can take + * [segmentNzNcOrScheme2]->[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><$>[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><&>[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><(>[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><)>[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><*>[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><,>[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><.>[segmentNzNcOrScheme2] + * [segmentNzNcOrScheme2]->[segment][zeroMoreSlashSegs][uriTail] + * [segmentNzNcOrScheme2]-><:>[hierPart][uriTail] + * [segmentNzNcOrScheme2]-><;>[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><@>[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><_>[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><~>[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><+>[segmentNzNcOrScheme2] + * [segmentNzNcOrScheme2]-><=>[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><'>[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><->[segmentNzNcOrScheme2] + */ +static const URI_CHAR * URI_FUNC(ParseSegmentNzNcOrScheme2)( + URI_TYPE(ParserState) * state, const URI_CHAR * first, + const URI_CHAR * afterLast, UriMemoryManager * memory) { + if (first >= afterLast) { + if (!URI_FUNC(OnExitSegmentNzNcOrScheme2)(state, first, memory)) { + URI_FUNC(StopMalloc)(state, memory); + return NULL; + } + return afterLast; + } + + switch (*first) { + case _UT('.'): + case _UT('+'): + case _UT('-'): + case URI_SET_ALPHA: + case URI_SET_DIGIT: + return URI_FUNC(ParseSegmentNzNcOrScheme2)(state, first + 1, afterLast, memory); + + case _UT('%'): + { + const URI_CHAR * const afterPctEncoded + = URI_FUNC(ParsePctEncoded)(state, first, afterLast, memory); + if (afterPctEncoded == NULL) { + return NULL; + } + return URI_FUNC(ParseMustBeSegmentNzNc)(state, afterPctEncoded, afterLast, memory); + } + + case _UT('!'): + case _UT('$'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('*'): + case _UT(','): + case _UT(';'): + case _UT('@'): + case _UT('_'): + case _UT('~'): + case _UT('='): + case _UT('\''): + return URI_FUNC(ParseMustBeSegmentNzNc)(state, first + 1, afterLast, memory); + + case _UT('/'): + { + const URI_CHAR * afterZeroMoreSlashSegs; + const URI_CHAR * const afterSegment + = URI_FUNC(ParseSegment)(state, first + 1, afterLast, memory); + if (afterSegment == NULL) { + return NULL; + } + if (!URI_FUNC(PushPathSegment)(state, state->uri->scheme.first, first, memory)) { /* SEGMENT BOTH */ + URI_FUNC(StopMalloc)(state, memory); + return NULL; + } + state->uri->scheme.first = NULL; /* Not a scheme, reset */ + if (!URI_FUNC(PushPathSegment)(state, first + 1, afterSegment, memory)) { /* SEGMENT BOTH */ + URI_FUNC(StopMalloc)(state, memory); + return NULL; + } + afterZeroMoreSlashSegs + = URI_FUNC(ParseZeroMoreSlashSegs)(state, afterSegment, afterLast, memory); + if (afterZeroMoreSlashSegs == NULL) { + return NULL; + } + return URI_FUNC(ParseUriTail)(state, afterZeroMoreSlashSegs, afterLast, memory); + } + + case _UT(':'): + { + const URI_CHAR * const afterHierPart + = URI_FUNC(ParseHierPart)(state, first + 1, afterLast, memory); + state->uri->scheme.afterLast = first; /* SCHEME END */ + if (afterHierPart == NULL) { + return NULL; + } + return URI_FUNC(ParseUriTail)(state, afterHierPart, afterLast, memory); + } + + default: + if (!URI_FUNC(OnExitSegmentNzNcOrScheme2)(state, first, memory)) { + URI_FUNC(StopMalloc)(state, memory); + return NULL; + } + return URI_FUNC(ParseUriTail)(state, first, afterLast, memory); + } +} + + + +/* + * [uriReference]->[ALPHA][segmentNzNcOrScheme2] + * [uriReference]->[DIGIT][mustBeSegmentNzNc] + * [uriReference]->[pctEncoded][mustBeSegmentNzNc] + * [uriReference]->[subDelims][mustBeSegmentNzNc] + * [uriReference]->[uriTail] // can take + * [uriReference]-><.>[mustBeSegmentNzNc] + * [uriReference]->[partHelperTwo][uriTail] + * [uriReference]-><@>[mustBeSegmentNzNc] + * [uriReference]-><_>[mustBeSegmentNzNc] + * [uriReference]-><~>[mustBeSegmentNzNc] + * [uriReference]-><->[mustBeSegmentNzNc] + */ +static const URI_CHAR * URI_FUNC(ParseUriReference)( + URI_TYPE(ParserState) * state, const URI_CHAR * first, + const URI_CHAR * afterLast, UriMemoryManager * memory) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case URI_SET_ALPHA: + state->uri->scheme.first = first; /* SCHEME BEGIN */ + return URI_FUNC(ParseSegmentNzNcOrScheme2)(state, first + 1, afterLast, memory); + + case URI_SET_DIGIT: + case _UT('!'): + case _UT('$'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('*'): + case _UT(','): + case _UT(';'): + case _UT('\''): + case _UT('+'): + case _UT('='): + case _UT('.'): + case _UT('_'): + case _UT('~'): + case _UT('-'): + case _UT('@'): + state->uri->scheme.first = first; /* SEGMENT BEGIN, ABUSE SCHEME POINTER */ + return URI_FUNC(ParseMustBeSegmentNzNc)(state, first + 1, afterLast, memory); + + case _UT('%'): + { + const URI_CHAR * const afterPctEncoded + = URI_FUNC(ParsePctEncoded)(state, first, afterLast, memory); + if (afterPctEncoded == NULL) { + return NULL; + } + state->uri->scheme.first = first; /* SEGMENT BEGIN, ABUSE SCHEME POINTER */ + return URI_FUNC(ParseMustBeSegmentNzNc)(state, afterPctEncoded, afterLast, memory); + } + + case _UT('/'): + { + const URI_CHAR * const afterPartHelperTwo + = URI_FUNC(ParsePartHelperTwo)(state, first + 1, afterLast, memory); + if (afterPartHelperTwo == NULL) { + return NULL; + } + return URI_FUNC(ParseUriTail)(state, afterPartHelperTwo, afterLast, memory); + } + + default: + return URI_FUNC(ParseUriTail)(state, first, afterLast, memory); + } +} + + + +/* + * [uriTail]-><#>[queryFrag] + * [uriTail]->[queryFrag][uriTailTwo] + * [uriTail]-> + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseUriTail)( + URI_TYPE(ParserState) * state, + const URI_CHAR * first, const URI_CHAR * afterLast, + UriMemoryManager * memory) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('#'): + { + const URI_CHAR * const afterQueryFrag = URI_FUNC(ParseQueryFrag)(state, first + 1, afterLast, memory); + if (afterQueryFrag == NULL) { + return NULL; + } + state->uri->fragment.first = first + 1; /* FRAGMENT BEGIN */ + state->uri->fragment.afterLast = afterQueryFrag; /* FRAGMENT END */ + return afterQueryFrag; + } + + case _UT('?'): + { + const URI_CHAR * const afterQueryFrag + = URI_FUNC(ParseQueryFrag)(state, first + 1, afterLast, memory); + if (afterQueryFrag == NULL) { + return NULL; + } + state->uri->query.first = first + 1; /* QUERY BEGIN */ + state->uri->query.afterLast = afterQueryFrag; /* QUERY END */ + return URI_FUNC(ParseUriTailTwo)(state, afterQueryFrag, afterLast, memory); + } + + default: + return first; + } +} + + + +/* + * [uriTailTwo]-><#>[queryFrag] + * [uriTailTwo]-> + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseUriTailTwo)( + URI_TYPE(ParserState) * state, + const URI_CHAR * first, const URI_CHAR * afterLast, + UriMemoryManager * memory) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('#'): + { + const URI_CHAR * const afterQueryFrag = URI_FUNC(ParseQueryFrag)(state, first + 1, afterLast, memory); + if (afterQueryFrag == NULL) { + return NULL; + } + state->uri->fragment.first = first + 1; /* FRAGMENT BEGIN */ + state->uri->fragment.afterLast = afterQueryFrag; /* FRAGMENT END */ + return afterQueryFrag; + } + + default: + return first; + } +} + + + +/* + * [zeroMoreSlashSegs]->[segment][zeroMoreSlashSegs] + * [zeroMoreSlashSegs]-> + */ +static const URI_CHAR * URI_FUNC(ParseZeroMoreSlashSegs)( + URI_TYPE(ParserState) * state, const URI_CHAR * first, + const URI_CHAR * afterLast, UriMemoryManager * memory) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('/'): + { + const URI_CHAR * const afterSegment + = URI_FUNC(ParseSegment)(state, first + 1, afterLast, memory); + if (afterSegment == NULL) { + return NULL; + } + if (!URI_FUNC(PushPathSegment)(state, first + 1, afterSegment, memory)) { /* SEGMENT BOTH */ + URI_FUNC(StopMalloc)(state, memory); + return NULL; + } + return URI_FUNC(ParseZeroMoreSlashSegs)(state, afterSegment, afterLast, memory); + } + + default: + return first; + } +} + + + +static URI_INLINE void URI_FUNC(ResetParserStateExceptUri)(URI_TYPE(ParserState) * state) { + URI_TYPE(Uri) * const uriBackup = state->uri; + memset(state, 0, sizeof(URI_TYPE(ParserState))); + state->uri = uriBackup; +} + + + +UriBool URI_FUNC(PushPathSegment)( + URI_TYPE(ParserState) * state, const URI_CHAR * first, + const URI_CHAR * afterLast, UriMemoryManager * memory) { + URI_TYPE(PathSegment) * segment = memory->calloc(memory, 1, sizeof(URI_TYPE(PathSegment))); + if (segment == NULL) { + return URI_FALSE; /* Raises malloc error */ + } + if (first == afterLast) { + segment->text.first = URI_FUNC(SafeToPointTo); + segment->text.afterLast = URI_FUNC(SafeToPointTo); + } else { + segment->text.first = first; + segment->text.afterLast = afterLast; + } + + /* First segment ever? */ + if (state->uri->pathHead == NULL) { + /* First segment ever, set head and tail */ + state->uri->pathHead = segment; + state->uri->pathTail = segment; + } else { + /* Append, update tail */ + state->uri->pathTail->next = segment; + state->uri->pathTail = segment; + } + + return URI_TRUE; /* Success */ +} + + + +int URI_FUNC(ParseUriEx)(URI_TYPE(ParserState) * state, + const URI_CHAR * first, const URI_CHAR * afterLast) { + return URI_FUNC(ParseUriExMm)(state, first, afterLast, NULL); +} + + + +static int URI_FUNC(ParseUriExMm)(URI_TYPE(ParserState) * state, + const URI_CHAR * first, const URI_CHAR * afterLast, + UriMemoryManager * memory) { + const URI_CHAR * afterUriReference; + URI_TYPE(Uri) * uri; + + /* Check params */ + if ((state == NULL) || (first == NULL) || (afterLast == NULL)) { + return URI_ERROR_NULL; + } + URI_CHECK_MEMORY_MANAGER(memory); /* may return */ + + uri = state->uri; + + /* Init parser */ + URI_FUNC(ResetParserStateExceptUri)(state); + URI_FUNC(ResetUri)(uri); + + /* Parse */ + afterUriReference = URI_FUNC(ParseUriReference)(state, first, afterLast, memory); + if (afterUriReference == NULL) { + /* Waterproof errorPos <= afterLast */ + if (state->errorPos && (state->errorPos > afterLast)) { + state->errorPos = afterLast; + } + return state->errorCode; + } + if (afterUriReference != afterLast) { + if (afterUriReference < afterLast) { + URI_FUNC(StopSyntax)(state, afterUriReference, memory); + } else { + URI_FUNC(StopSyntax)(state, afterLast, memory); + } + return state->errorCode; + } + return URI_SUCCESS; +} + + + +int URI_FUNC(ParseUri)(URI_TYPE(ParserState) * state, const URI_CHAR * text) { + if ((state == NULL) || (text == NULL)) { + return URI_ERROR_NULL; + } + return URI_FUNC(ParseUriEx)(state, text, text + URI_STRLEN(text)); +} + + + +int URI_FUNC(ParseSingleUri)(URI_TYPE(Uri) * uri, const URI_CHAR * text, + const URI_CHAR ** errorPos) { + return URI_FUNC(ParseSingleUriEx)(uri, text, NULL, errorPos); +} + + + +int URI_FUNC(ParseSingleUriEx)(URI_TYPE(Uri) * uri, + const URI_CHAR * first, const URI_CHAR * afterLast, + const URI_CHAR ** errorPos) { + if ((afterLast == NULL) && (first != NULL)) { + afterLast = first + URI_STRLEN(first); + } + return URI_FUNC(ParseSingleUriExMm)(uri, first, afterLast, errorPos, NULL); +} + + + +int URI_FUNC(ParseSingleUriExMm)(URI_TYPE(Uri) * uri, + const URI_CHAR * first, const URI_CHAR * afterLast, + const URI_CHAR ** errorPos, UriMemoryManager * memory) { + URI_TYPE(ParserState) state; + int res; + + /* Check params */ + if ((uri == NULL) || (first == NULL) || (afterLast == NULL)) { + return URI_ERROR_NULL; + } + URI_CHECK_MEMORY_MANAGER(memory); /* may return */ + + state.uri = uri; + + res = URI_FUNC(ParseUriExMm)(&state, first, afterLast, memory); + + if (res != URI_SUCCESS) { + if (errorPos != NULL) { + *errorPos = state.errorPos; + } + URI_FUNC(FreeUriMembersMm)(uri, memory); + } + + return res; +} + + + +void URI_FUNC(FreeUriMembers)(URI_TYPE(Uri) * uri) { + URI_FUNC(FreeUriMembersMm)(uri, NULL); +} + + + +int URI_FUNC(FreeUriMembersMm)(URI_TYPE(Uri) * uri, UriMemoryManager * memory) { + if (uri == NULL) { + return URI_ERROR_NULL; + } + + URI_CHECK_MEMORY_MANAGER(memory); /* may return */ + + if (uri->owner) { + /* Scheme */ + if (uri->scheme.first != NULL) { + if (uri->scheme.first != uri->scheme.afterLast) { + memory->free(memory, (URI_CHAR *)uri->scheme.first); + } + uri->scheme.first = NULL; + uri->scheme.afterLast = NULL; + } + + /* User info */ + if (uri->userInfo.first != NULL) { + if (uri->userInfo.first != uri->userInfo.afterLast) { + memory->free(memory, (URI_CHAR *)uri->userInfo.first); + } + uri->userInfo.first = NULL; + uri->userInfo.afterLast = NULL; + } + + /* Host data - IPvFuture (may affect host text) */ + if (uri->hostData.ipFuture.first != NULL) { + /* NOTE: .hostData.ipFuture may hold the very same range pointers + * as .hostText; then we need to prevent freeing memory twice. */ + if (uri->hostText.first == uri->hostData.ipFuture.first) { + uri->hostText.first = NULL; + uri->hostText.afterLast = NULL; + } + + if (uri->hostData.ipFuture.first != uri->hostData.ipFuture.afterLast) { + memory->free(memory, (URI_CHAR *)uri->hostData.ipFuture.first); + } + uri->hostData.ipFuture.first = NULL; + uri->hostData.ipFuture.afterLast = NULL; + } + + /* Host text (after IPvFuture, see above) */ + if (uri->hostText.first != NULL) { + if (uri->hostText.first != uri->hostText.afterLast) { + memory->free(memory, (URI_CHAR *)uri->hostText.first); + } + uri->hostText.first = NULL; + uri->hostText.afterLast = NULL; + } + } + + /* Host data - IPv4 */ + if (uri->hostData.ip4 != NULL) { + memory->free(memory, uri->hostData.ip4); + uri->hostData.ip4 = NULL; + } + + /* Host data - IPv6 */ + if (uri->hostData.ip6 != NULL) { + memory->free(memory, uri->hostData.ip6); + uri->hostData.ip6 = NULL; + } + + /* Port text */ + if (uri->owner && (uri->portText.first != NULL)) { + if (uri->portText.first != uri->portText.afterLast) { + memory->free(memory, (URI_CHAR *)uri->portText.first); + } + uri->portText.first = NULL; + uri->portText.afterLast = NULL; + } + + /* Path */ + if (uri->pathHead != NULL) { + URI_TYPE(PathSegment) * segWalk = uri->pathHead; + while (segWalk != NULL) { + URI_TYPE(PathSegment) * const next = segWalk->next; + if (uri->owner && (segWalk->text.first != NULL) + && (segWalk->text.first < segWalk->text.afterLast)) { + memory->free(memory, (URI_CHAR *)segWalk->text.first); + } + memory->free(memory, segWalk); + segWalk = next; + } + uri->pathHead = NULL; + uri->pathTail = NULL; + } + + if (uri->owner) { + /* Query */ + if (uri->query.first != NULL) { + if (uri->query.first != uri->query.afterLast) { + memory->free(memory, (URI_CHAR *)uri->query.first); + } + uri->query.first = NULL; + uri->query.afterLast = NULL; + } + + /* Fragment */ + if (uri->fragment.first != NULL) { + if (uri->fragment.first != uri->fragment.afterLast) { + memory->free(memory, (URI_CHAR *)uri->fragment.first); + } + uri->fragment.first = NULL; + uri->fragment.afterLast = NULL; + } + } + + return URI_SUCCESS; +} + + + +UriBool URI_FUNC(_TESTING_ONLY_ParseIpSix)(const URI_CHAR * text) { + UriMemoryManager * const memory = &defaultMemoryManager; + URI_TYPE(Uri) uri; + URI_TYPE(ParserState) parser; + const URI_CHAR * const afterIpSix = text + URI_STRLEN(text); + const URI_CHAR * res; + + URI_FUNC(ResetUri)(&uri); + parser.uri = &uri; + URI_FUNC(ResetParserStateExceptUri)(&parser); + parser.uri->hostData.ip6 = memory->malloc(memory, 1 * sizeof(UriIp6)); + res = URI_FUNC(ParseIPv6address2)(&parser, text, afterIpSix, memory); + URI_FUNC(FreeUriMembersMm)(&uri, memory); + return res == afterIpSix ? URI_TRUE : URI_FALSE; +} + + + +UriBool URI_FUNC(_TESTING_ONLY_ParseIpFour)(const URI_CHAR * text) { + unsigned char octets[4]; + int res = URI_FUNC(ParseIpFourAddress)(octets, text, text + URI_STRLEN(text)); + return (res == URI_SUCCESS) ? URI_TRUE : URI_FALSE; +} + + + +#undef URI_SET_DIGIT +#undef URI_SET_HEX_LETTER_UPPER +#undef URI_SET_HEX_LETTER_LOWER +#undef URI_SET_HEXDIG +#undef URI_SET_ALPHA + + + +#endif diff --git a/ext/uri/uriparser/src/UriParseBase.c b/ext/uri/uriparser/src/UriParseBase.c new file mode 100644 index 0000000000000..3a4aa08f6b760 --- /dev/null +++ b/ext/uri/uriparser/src/UriParseBase.c @@ -0,0 +1,90 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef URI_DOXYGEN +# include "UriParseBase.h" +#endif + + + +void uriWriteQuadToDoubleByte(const unsigned char * hexDigits, int digitCount, unsigned char * output) { + switch (digitCount) { + case 1: + /* 0x___? -> \x00 \x0? */ + output[0] = 0; + output[1] = hexDigits[0]; + break; + + case 2: + /* 0x__?? -> \0xx \x?? */ + output[0] = 0; + output[1] = 16 * hexDigits[0] + hexDigits[1]; + break; + + case 3: + /* 0x_??? -> \0x? \x?? */ + output[0] = hexDigits[0]; + output[1] = 16 * hexDigits[1] + hexDigits[2]; + break; + + case 4: + /* 0x???? -> \0?? \x?? */ + output[0] = 16 * hexDigits[0] + hexDigits[1]; + output[1] = 16 * hexDigits[2] + hexDigits[3]; + break; + + } +} + + + +unsigned char uriGetOctetValue(const unsigned char * digits, int digitCount) { + switch (digitCount) { + case 1: + return digits[0]; + + case 2: + return 10 * digits[0] + digits[1]; + + case 3: + default: + return 100 * digits[0] + 10 * digits[1] + digits[2]; + + } +} diff --git a/ext/uri/uriparser/src/UriParseBase.h b/ext/uri/uriparser/src/UriParseBase.h new file mode 100644 index 0000000000000..6d7379de1c0a9 --- /dev/null +++ b/ext/uri/uriparser/src/UriParseBase.h @@ -0,0 +1,55 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef URI_PARSE_BASE_H +#define URI_PARSE_BASE_H 1 + + + +#include + + + +void uriWriteQuadToDoubleByte(const unsigned char * hexDigits, int digitCount, + unsigned char * output); +unsigned char uriGetOctetValue(const unsigned char * digits, int digitCount); + + + +#endif /* URI_PARSE_BASE_H */ diff --git a/ext/uri/uriparser/src/UriQuery.c b/ext/uri/uriparser/src/UriQuery.c new file mode 100644 index 0000000000000..bbc15488773c6 --- /dev/null +++ b/ext/uri/uriparser/src/UriQuery.c @@ -0,0 +1,505 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriQuery.c" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriQuery.c" +# undef URI_PASS_UNICODE +# endif +#else +# ifdef URI_PASS_ANSI +# include +# else +# include +# include +# endif + + + +#ifndef URI_DOXYGEN +# include +# include "UriCommon.h" +# include "UriMemory.h" +#endif + + + +#include +#include /* size_t */ + + + +static int URI_FUNC(ComposeQueryEngine)(URI_CHAR * dest, + const URI_TYPE(QueryList) * queryList, + int maxChars, int * charsWritten, int * charsRequired, + UriBool spaceToPlus, UriBool normalizeBreaks); + +static UriBool URI_FUNC(AppendQueryItem)(URI_TYPE(QueryList) ** prevNext, + int * itemCount, const URI_CHAR * keyFirst, const URI_CHAR * keyAfter, + const URI_CHAR * valueFirst, const URI_CHAR * valueAfter, + UriBool plusToSpace, UriBreakConversion breakConversion, + UriMemoryManager * memory); + + + +int URI_FUNC(ComposeQueryCharsRequired)(const URI_TYPE(QueryList) * queryList, + int * charsRequired) { + const UriBool spaceToPlus = URI_TRUE; + const UriBool normalizeBreaks = URI_TRUE; + + return URI_FUNC(ComposeQueryCharsRequiredEx)(queryList, charsRequired, + spaceToPlus, normalizeBreaks); +} + + + +int URI_FUNC(ComposeQueryCharsRequiredEx)(const URI_TYPE(QueryList) * queryList, + int * charsRequired, UriBool spaceToPlus, UriBool normalizeBreaks) { + if ((queryList == NULL) || (charsRequired == NULL)) { + return URI_ERROR_NULL; + } + + return URI_FUNC(ComposeQueryEngine)(NULL, queryList, 0, NULL, + charsRequired, spaceToPlus, normalizeBreaks); +} + + + +int URI_FUNC(ComposeQuery)(URI_CHAR * dest, + const URI_TYPE(QueryList) * queryList, int maxChars, int * charsWritten) { + const UriBool spaceToPlus = URI_TRUE; + const UriBool normalizeBreaks = URI_TRUE; + + return URI_FUNC(ComposeQueryEx)(dest, queryList, maxChars, charsWritten, + spaceToPlus, normalizeBreaks); +} + + + +int URI_FUNC(ComposeQueryEx)(URI_CHAR * dest, + const URI_TYPE(QueryList) * queryList, int maxChars, int * charsWritten, + UriBool spaceToPlus, UriBool normalizeBreaks) { + if ((dest == NULL) || (queryList == NULL)) { + return URI_ERROR_NULL; + } + + if (maxChars < 1) { + return URI_ERROR_OUTPUT_TOO_LARGE; + } + + return URI_FUNC(ComposeQueryEngine)(dest, queryList, maxChars, + charsWritten, NULL, spaceToPlus, normalizeBreaks); +} + + + +int URI_FUNC(ComposeQueryMalloc)(URI_CHAR ** dest, + const URI_TYPE(QueryList) * queryList) { + const UriBool spaceToPlus = URI_TRUE; + const UriBool normalizeBreaks = URI_TRUE; + + return URI_FUNC(ComposeQueryMallocEx)(dest, queryList, + spaceToPlus, normalizeBreaks); +} + + + +int URI_FUNC(ComposeQueryMallocEx)(URI_CHAR ** dest, + const URI_TYPE(QueryList) * queryList, + UriBool spaceToPlus, UriBool normalizeBreaks) { + return URI_FUNC(ComposeQueryMallocExMm)(dest, queryList, spaceToPlus, + normalizeBreaks, NULL); +} + + + +int URI_FUNC(ComposeQueryMallocExMm)(URI_CHAR ** dest, + const URI_TYPE(QueryList) * queryList, + UriBool spaceToPlus, UriBool normalizeBreaks, + UriMemoryManager * memory) { + int charsRequired; + int res; + URI_CHAR * queryString; + + if (dest == NULL) { + return URI_ERROR_NULL; + } + + URI_CHECK_MEMORY_MANAGER(memory); /* may return */ + + /* Calculate space */ + res = URI_FUNC(ComposeQueryCharsRequiredEx)(queryList, &charsRequired, + spaceToPlus, normalizeBreaks); + if (res != URI_SUCCESS) { + return res; + } + if (charsRequired == INT_MAX) { + return URI_ERROR_MALLOC; + } + charsRequired++; + + /* Allocate space */ + queryString = memory->calloc(memory, charsRequired, sizeof(URI_CHAR)); + if (queryString == NULL) { + return URI_ERROR_MALLOC; + } + + /* Put query in */ + res = URI_FUNC(ComposeQueryEx)(queryString, queryList, charsRequired, + NULL, spaceToPlus, normalizeBreaks); + if (res != URI_SUCCESS) { + memory->free(memory, queryString); + return res; + } + + *dest = queryString; + return URI_SUCCESS; +} + + + +int URI_FUNC(ComposeQueryEngine)(URI_CHAR * dest, + const URI_TYPE(QueryList) * queryList, + int maxChars, int * charsWritten, int * charsRequired, + UriBool spaceToPlus, UriBool normalizeBreaks) { + UriBool firstItem = URI_TRUE; + int ampersandLen = 0; /* increased to 1 from second item on */ + URI_CHAR * write = dest; + + /* Subtract terminator */ + if (dest == NULL) { + *charsRequired = 0; + } else { + maxChars--; + } + + while (queryList != NULL) { + const URI_CHAR * const key = queryList->key; + const URI_CHAR * const value = queryList->value; + const int worstCase = (normalizeBreaks == URI_TRUE ? 6 : 3); + const size_t keyLen = (key == NULL) ? 0 : URI_STRLEN(key); + int keyRequiredChars; + const size_t valueLen = (value == NULL) ? 0 : URI_STRLEN(value); + int valueRequiredChars; + + if ((keyLen >= (size_t)INT_MAX / worstCase) || (valueLen >= (size_t)INT_MAX / worstCase)) { + return URI_ERROR_OUTPUT_TOO_LARGE; + } + keyRequiredChars = worstCase * (int)keyLen; + valueRequiredChars = worstCase * (int)valueLen; + + if (dest == NULL) { + (*charsRequired) += ampersandLen + keyRequiredChars + ((value == NULL) + ? 0 + : 1 + valueRequiredChars); + + if (firstItem == URI_TRUE) { + ampersandLen = 1; + firstItem = URI_FALSE; + } + } else { + if ((write - dest) + ampersandLen + keyRequiredChars > maxChars) { + return URI_ERROR_OUTPUT_TOO_LARGE; + } + + /* Copy key */ + if (firstItem == URI_TRUE) { + ampersandLen = 1; + firstItem = URI_FALSE; + } else { + write[0] = _UT('&'); + write++; + } + write = URI_FUNC(EscapeEx)(key, key + keyLen, + write, spaceToPlus, normalizeBreaks); + + if (value != NULL) { + if ((write - dest) + 1 + valueRequiredChars > maxChars) { + return URI_ERROR_OUTPUT_TOO_LARGE; + } + + /* Copy value */ + write[0] = _UT('='); + write++; + write = URI_FUNC(EscapeEx)(value, value + valueLen, + write, spaceToPlus, normalizeBreaks); + } + } + + queryList = queryList->next; + } + + if (dest != NULL) { + write[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = (int)(write - dest) + 1; /* .. for terminator */ + } + } + + return URI_SUCCESS; +} + + + +UriBool URI_FUNC(AppendQueryItem)(URI_TYPE(QueryList) ** prevNext, + int * itemCount, const URI_CHAR * keyFirst, const URI_CHAR * keyAfter, + const URI_CHAR * valueFirst, const URI_CHAR * valueAfter, + UriBool plusToSpace, UriBreakConversion breakConversion, + UriMemoryManager * memory) { + const int keyLen = (int)(keyAfter - keyFirst); + const int valueLen = (int)(valueAfter - valueFirst); + URI_CHAR * key; + URI_CHAR * value; + + if ((prevNext == NULL) || (itemCount == NULL) + || (keyFirst == NULL) || (keyAfter == NULL) + || (keyFirst > keyAfter) || (valueFirst > valueAfter) + || ((keyFirst == keyAfter) + && (valueFirst == NULL) && (valueAfter == NULL))) { + return URI_TRUE; + } + + /* Append new empty item */ + *prevNext = memory->malloc(memory, 1 * sizeof(URI_TYPE(QueryList))); + if (*prevNext == NULL) { + return URI_FALSE; /* Raises malloc error */ + } + (*prevNext)->next = NULL; + + + /* Fill key */ + key = memory->malloc(memory, (keyLen + 1) * sizeof(URI_CHAR)); + if (key == NULL) { + memory->free(memory, *prevNext); + *prevNext = NULL; + return URI_FALSE; /* Raises malloc error */ + } + + key[keyLen] = _UT('\0'); + if (keyLen > 0) { + /* Copy 1:1 */ + memcpy(key, keyFirst, keyLen * sizeof(URI_CHAR)); + + /* Unescape */ + URI_FUNC(UnescapeInPlaceEx)(key, plusToSpace, breakConversion); + } + (*prevNext)->key = key; + + + /* Fill value */ + if (valueFirst != NULL) { + value = memory->malloc(memory, (valueLen + 1) * sizeof(URI_CHAR)); + if (value == NULL) { + memory->free(memory, key); + memory->free(memory, *prevNext); + *prevNext = NULL; + return URI_FALSE; /* Raises malloc error */ + } + + value[valueLen] = _UT('\0'); + if (valueLen > 0) { + /* Copy 1:1 */ + memcpy(value, valueFirst, valueLen * sizeof(URI_CHAR)); + + /* Unescape */ + URI_FUNC(UnescapeInPlaceEx)(value, plusToSpace, breakConversion); + } + (*prevNext)->value = value; + } else { + value = NULL; + } + (*prevNext)->value = value; + + (*itemCount)++; + return URI_TRUE; +} + + + +void URI_FUNC(FreeQueryList)(URI_TYPE(QueryList) * queryList) { + URI_FUNC(FreeQueryListMm)(queryList, NULL); +} + + + +int URI_FUNC(FreeQueryListMm)(URI_TYPE(QueryList) * queryList, + UriMemoryManager * memory) { + URI_CHECK_MEMORY_MANAGER(memory); /* may return */ + while (queryList != NULL) { + URI_TYPE(QueryList) * nextBackup = queryList->next; + memory->free(memory, (URI_CHAR *)queryList->key); /* const cast */ + memory->free(memory, (URI_CHAR *)queryList->value); /* const cast */ + memory->free(memory, queryList); + queryList = nextBackup; + } + return URI_SUCCESS; +} + + + +int URI_FUNC(DissectQueryMalloc)(URI_TYPE(QueryList) ** dest, int * itemCount, + const URI_CHAR * first, const URI_CHAR * afterLast) { + const UriBool plusToSpace = URI_TRUE; + const UriBreakConversion breakConversion = URI_BR_DONT_TOUCH; + + return URI_FUNC(DissectQueryMallocEx)(dest, itemCount, first, afterLast, + plusToSpace, breakConversion); +} + + + +int URI_FUNC(DissectQueryMallocEx)(URI_TYPE(QueryList) ** dest, int * itemCount, + const URI_CHAR * first, const URI_CHAR * afterLast, + UriBool plusToSpace, UriBreakConversion breakConversion) { + return URI_FUNC(DissectQueryMallocExMm)(dest, itemCount, first, afterLast, + plusToSpace, breakConversion, NULL); +} + + + +int URI_FUNC(DissectQueryMallocExMm)(URI_TYPE(QueryList) ** dest, int * itemCount, + const URI_CHAR * first, const URI_CHAR * afterLast, + UriBool plusToSpace, UriBreakConversion breakConversion, + UriMemoryManager * memory) { + const URI_CHAR * walk = first; + const URI_CHAR * keyFirst = first; + const URI_CHAR * keyAfter = NULL; + const URI_CHAR * valueFirst = NULL; + const URI_CHAR * valueAfter = NULL; + URI_TYPE(QueryList) ** prevNext = dest; + int nullCounter; + int * itemsAppended = (itemCount == NULL) ? &nullCounter : itemCount; + + if ((dest == NULL) || (first == NULL) || (afterLast == NULL)) { + return URI_ERROR_NULL; + } + + if (first > afterLast) { + return URI_ERROR_RANGE_INVALID; + } + + URI_CHECK_MEMORY_MANAGER(memory); /* may return */ + + *dest = NULL; + *itemsAppended = 0; + + /* Parse query string */ + for (; walk < afterLast; walk++) { + switch (*walk) { + case _UT('&'): + if (valueFirst != NULL) { + valueAfter = walk; + } else { + keyAfter = walk; + } + + if (URI_FUNC(AppendQueryItem)(prevNext, itemsAppended, + keyFirst, keyAfter, valueFirst, valueAfter, + plusToSpace, breakConversion, memory) + == URI_FALSE) { + /* Free list we built */ + *itemsAppended = 0; + URI_FUNC(FreeQueryListMm)(*dest, memory); + return URI_ERROR_MALLOC; + } + + /* Make future items children of the current */ + if ((prevNext != NULL) && (*prevNext != NULL)) { + prevNext = &((*prevNext)->next); + } + + if (walk + 1 < afterLast) { + keyFirst = walk + 1; + } else { + keyFirst = NULL; + } + keyAfter = NULL; + valueFirst = NULL; + valueAfter = NULL; + break; + + case _UT('='): + /* NOTE: WE treat the first '=' as a separator, */ + /* all following go into the value part */ + if (keyAfter == NULL) { + keyAfter = walk; + if (walk + 1 <= afterLast) { + valueFirst = walk + 1; + valueAfter = walk + 1; + } + } + break; + + default: + break; + } + } + + if (valueFirst != NULL) { + /* Must be key/value pair */ + valueAfter = walk; + } else { + /* Must be key only */ + keyAfter = walk; + } + + if (URI_FUNC(AppendQueryItem)(prevNext, itemsAppended, keyFirst, keyAfter, + valueFirst, valueAfter, plusToSpace, breakConversion, memory) + == URI_FALSE) { + /* Free list we built */ + *itemsAppended = 0; + URI_FUNC(FreeQueryListMm)(*dest, memory); + return URI_ERROR_MALLOC; + } + + return URI_SUCCESS; +} + + + +#endif diff --git a/ext/uri/uriparser/src/UriRecompose.c b/ext/uri/uriparser/src/UriRecompose.c new file mode 100644 index 0000000000000..5027eca6cfa33 --- /dev/null +++ b/ext/uri/uriparser/src/UriRecompose.c @@ -0,0 +1,577 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriRecompose.c" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriRecompose.c" +# undef URI_PASS_UNICODE +# endif +#else +# ifdef URI_PASS_ANSI +# include +# else +# include +# include +# endif + + + +#ifndef URI_DOXYGEN +# include +# include "UriCommon.h" +#endif + + + +static int URI_FUNC(ToStringEngine)(URI_CHAR * dest, const URI_TYPE(Uri) * uri, + int maxChars, int * charsWritten, int * charsRequired); + + + +int URI_FUNC(ToStringCharsRequired)(const URI_TYPE(Uri) * uri, + int * charsRequired) { + const int MAX_CHARS = ((unsigned int)-1) >> 1; + return URI_FUNC(ToStringEngine)(NULL, uri, MAX_CHARS, NULL, charsRequired); +} + + + +int URI_FUNC(ToString)(URI_CHAR * dest, const URI_TYPE(Uri) * uri, + int maxChars, int * charsWritten) { + return URI_FUNC(ToStringEngine)(dest, uri, maxChars, charsWritten, NULL); +} + + + +static URI_INLINE int URI_FUNC(ToStringEngine)(URI_CHAR * dest, + const URI_TYPE(Uri) * uri, int maxChars, int * charsWritten, + int * charsRequired) { + int written = 0; + if ((uri == NULL) || ((dest == NULL) && (charsRequired == NULL))) { + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_NULL; + } + + if (maxChars < 1) { + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + maxChars--; /* So we don't have to subtract 1 for '\0' all the time */ + + /* [01/19] result = "" */ + if (dest != NULL) { + dest[0] = _UT('\0'); + } else { + (*charsRequired) = 0; + } + /* [02/19] if defined(scheme) then */ + if (uri->scheme.first != NULL) { + /* [03/19] append scheme to result; */ + const int charsToWrite + = (int)(uri->scheme.afterLast - uri->scheme.first); + if (dest != NULL) { + if (written + charsToWrite <= maxChars) { + memcpy(dest + written, uri->scheme.first, + charsToWrite * sizeof(URI_CHAR)); + written += charsToWrite; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += charsToWrite; + } + /* [04/19] append ":" to result; */ + if (dest != NULL) { + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT(":"), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += 1; + } + /* [05/19] endif; */ + } + /* [06/19] if defined(authority) then */ + if (URI_FUNC(IsHostSet)(uri)) { + /* [07/19] append "//" to result; */ + if (dest != NULL) { + if (written + 2 <= maxChars) { + memcpy(dest + written, _UT("//"), + 2 * sizeof(URI_CHAR)); + written += 2; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += 2; + } + /* [08/19] append authority to result; */ + /* UserInfo */ + if (uri->userInfo.first != NULL) { + const int charsToWrite = (int)(uri->userInfo.afterLast - uri->userInfo.first); + if (dest != NULL) { + if (written + charsToWrite <= maxChars) { + memcpy(dest + written, uri->userInfo.first, + charsToWrite * sizeof(URI_CHAR)); + written += charsToWrite; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT("@"), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += charsToWrite + 1; + } + } + + /* Host */ + if (uri->hostData.ip4 != NULL) { + /* IPv4 */ + int i = 0; + for (; i < 4; i++) { + const unsigned char value = uri->hostData.ip4->data[i]; + const int charsToWrite = (value > 99) ? 3 : ((value > 9) ? 2 : 1); + if (dest != NULL) { + if (written + charsToWrite <= maxChars) { + URI_CHAR text[4]; + if (value > 99) { + text[0] = _UT('0') + (value / 100); + text[1] = _UT('0') + ((value % 100) / 10); + text[2] = _UT('0') + (value % 10); + } else if (value > 9) { + text[0] = _UT('0') + (value / 10); + text[1] = _UT('0') + (value % 10); + } else { + text[0] = _UT('0') + value; + } + text[charsToWrite] = _UT('\0'); + memcpy(dest + written, text, charsToWrite * sizeof(URI_CHAR)); + written += charsToWrite; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + if (i < 3) { + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT("."), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } + } else { + (*charsRequired) += charsToWrite + ((i == 3) ? 0 : 1); + } + } + } else if (uri->hostData.ip6 != NULL) { + /* IPv6 */ + int i = 0; + if (dest != NULL) { + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT("["), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += 1; + } + + for (; i < 16; i++) { + const unsigned char value = uri->hostData.ip6->data[i]; + if (dest != NULL) { + if (written + 2 <= maxChars) { + URI_CHAR text[3]; + text[0] = URI_FUNC(HexToLetterEx)(value / 16, URI_FALSE); + text[1] = URI_FUNC(HexToLetterEx)(value % 16, URI_FALSE); + text[2] = _UT('\0'); + memcpy(dest + written, text, 2 * sizeof(URI_CHAR)); + written += 2; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += 2; + } + if (((i & 1) == 1) && (i < 15)) { + if (dest != NULL) { + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT(":"), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += 1; + } + } + } + + if (dest != NULL) { + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT("]"), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += 1; + } + } else if (uri->hostData.ipFuture.first != NULL) { + /* IPvFuture */ + const int charsToWrite = (int)(uri->hostData.ipFuture.afterLast + - uri->hostData.ipFuture.first); + if (dest != NULL) { + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT("["), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + + if (written + charsToWrite <= maxChars) { + memcpy(dest + written, uri->hostData.ipFuture.first, charsToWrite * sizeof(URI_CHAR)); + written += charsToWrite; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT("]"), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += 1 + charsToWrite + 1; + } + } else if (uri->hostText.first != NULL) { + /* Regname */ + const int charsToWrite = (int)(uri->hostText.afterLast - uri->hostText.first); + if (dest != NULL) { + if (written + charsToWrite <= maxChars) { + memcpy(dest + written, uri->hostText.first, + charsToWrite * sizeof(URI_CHAR)); + written += charsToWrite; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += charsToWrite; + } + } + + /* Port */ + if (uri->portText.first != NULL) { + const int charsToWrite = (int)(uri->portText.afterLast - uri->portText.first); + if (dest != NULL) { + /* Leading ':' */ + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT(":"), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + + /* Port number */ + if (written + charsToWrite <= maxChars) { + memcpy(dest + written, uri->portText.first, + charsToWrite * sizeof(URI_CHAR)); + written += charsToWrite; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += 1 + charsToWrite; + } + } + /* [09/19] endif; */ + } + /* [10/19] append path to result; */ + /* Slash needed here? */ + if (uri->absolutePath || ((uri->pathHead != NULL) + && URI_FUNC(IsHostSet)(uri))) { + if (dest != NULL) { + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT("/"), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += 1; + } + } + + if (uri->pathHead != NULL) { + URI_TYPE(PathSegment) * walker = uri->pathHead; + do { + const int charsToWrite = (int)(walker->text.afterLast - walker->text.first); + if (dest != NULL) { + if (written + charsToWrite <= maxChars) { + memcpy(dest + written, walker->text.first, + charsToWrite * sizeof(URI_CHAR)); + written += charsToWrite; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += charsToWrite; + } + + /* Not last segment -> append slash */ + if (walker->next != NULL) { + if (dest != NULL) { + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT("/"), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += 1; + } + } + + walker = walker->next; + } while (walker != NULL); + } + /* [11/19] if defined(query) then */ + if (uri->query.first != NULL) { + /* [12/19] append "?" to result; */ + if (dest != NULL) { + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT("?"), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += 1; + } + /* [13/19] append query to result; */ + { + const int charsToWrite + = (int)(uri->query.afterLast - uri->query.first); + if (dest != NULL) { + if (written + charsToWrite <= maxChars) { + memcpy(dest + written, uri->query.first, + charsToWrite * sizeof(URI_CHAR)); + written += charsToWrite; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += charsToWrite; + } + } + /* [14/19] endif; */ + } + /* [15/19] if defined(fragment) then */ + if (uri->fragment.first != NULL) { + /* [16/19] append "#" to result; */ + if (dest != NULL) { + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT("#"), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += 1; + } + /* [17/19] append fragment to result; */ + { + const int charsToWrite + = (int)(uri->fragment.afterLast - uri->fragment.first); + if (dest != NULL) { + if (written + charsToWrite <= maxChars) { + memcpy(dest + written, uri->fragment.first, + charsToWrite * sizeof(URI_CHAR)); + written += charsToWrite; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += charsToWrite; + } + } + /* [18/19] endif; */ + } + /* [19/19] return result; */ + if (dest != NULL) { + dest[written++] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = written; + } + } + return URI_SUCCESS; +} + + + +#endif diff --git a/ext/uri/uriparser/src/UriResolve.c b/ext/uri/uriparser/src/UriResolve.c new file mode 100644 index 0000000000000..80031a894d437 --- /dev/null +++ b/ext/uri/uriparser/src/UriResolve.c @@ -0,0 +1,329 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriResolve.c" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriResolve.c" +# undef URI_PASS_UNICODE +# endif +#else +# ifdef URI_PASS_ANSI +# include +# else +# include +# include +# endif + + + +#ifndef URI_DOXYGEN +# include +# include "UriCommon.h" +# include "UriMemory.h" +#endif + + + +/* Appends a relative URI to an absolute. The last path segment of + * the absolute URI is replaced. */ +static URI_INLINE UriBool URI_FUNC(MergePath)(URI_TYPE(Uri) * absWork, + const URI_TYPE(Uri) * relAppend, UriMemoryManager * memory) { + URI_TYPE(PathSegment) * sourceWalker; + URI_TYPE(PathSegment) * destPrev; + if (relAppend->pathHead == NULL) { + return URI_TRUE; + } + + /* Replace last segment ("" if trailing slash) with first of append chain */ + if (absWork->pathHead == NULL) { + URI_TYPE(PathSegment) * const dup = memory->malloc(memory, sizeof(URI_TYPE(PathSegment))); + if (dup == NULL) { + return URI_FALSE; /* Raises malloc error */ + } + dup->next = NULL; + absWork->pathHead = dup; + absWork->pathTail = dup; + } + absWork->pathTail->text.first = relAppend->pathHead->text.first; + absWork->pathTail->text.afterLast = relAppend->pathHead->text.afterLast; + + /* Append all the others */ + sourceWalker = relAppend->pathHead->next; + if (sourceWalker == NULL) { + return URI_TRUE; + } + destPrev = absWork->pathTail; + + for (;;) { + URI_TYPE(PathSegment) * const dup = memory->malloc(memory, sizeof(URI_TYPE(PathSegment))); + if (dup == NULL) { + destPrev->next = NULL; + absWork->pathTail = destPrev; + return URI_FALSE; /* Raises malloc error */ + } + dup->text = sourceWalker->text; + destPrev->next = dup; + + if (sourceWalker->next == NULL) { + absWork->pathTail = dup; + absWork->pathTail->next = NULL; + break; + } + destPrev = dup; + sourceWalker = sourceWalker->next; + } + + return URI_TRUE; +} + + +static int URI_FUNC(ResolveAbsolutePathFlag)(URI_TYPE(Uri) * absWork, + UriMemoryManager * memory) { + if (absWork == NULL) { + return URI_ERROR_NULL; + } + + if (URI_FUNC(IsHostSet)(absWork) && absWork->absolutePath) { + /* Empty segment needed, instead? */ + if (absWork->pathHead == NULL) { + URI_TYPE(PathSegment) * const segment = memory->malloc(memory, sizeof(URI_TYPE(PathSegment))); + if (segment == NULL) { + return URI_ERROR_MALLOC; + } + segment->text.first = URI_FUNC(SafeToPointTo); + segment->text.afterLast = URI_FUNC(SafeToPointTo); + segment->next = NULL; + + absWork->pathHead = segment; + absWork->pathTail = segment; + } + + absWork->absolutePath = URI_FALSE; + } + + return URI_SUCCESS; +} + + +static int URI_FUNC(AddBaseUriImpl)(URI_TYPE(Uri) * absDest, + const URI_TYPE(Uri) * relSource, + const URI_TYPE(Uri) * absBase, + UriResolutionOptions options, UriMemoryManager * memory) { + UriBool relSourceHasScheme; + + if (absDest == NULL) { + return URI_ERROR_NULL; + } + URI_FUNC(ResetUri)(absDest); + + if ((relSource == NULL) || (absBase == NULL)) { + return URI_ERROR_NULL; + } + + /* absBase absolute? */ + if (absBase->scheme.first == NULL) { + return URI_ERROR_ADDBASE_REL_BASE; + } + + /* [00/32] -- A non-strict parser may ignore a scheme in the reference */ + /* [00/32] -- if it is identical to the base URI's scheme. */ + /* [00/32] if ((not strict) and (R.scheme == Base.scheme)) then */ + relSourceHasScheme = (relSource->scheme.first != NULL) ? URI_TRUE : URI_FALSE; + if ((options & URI_RESOLVE_IDENTICAL_SCHEME_COMPAT) + && (absBase->scheme.first != NULL) + && (relSource->scheme.first != NULL) + && (0 == URI_FUNC(CompareRange)(&(absBase->scheme), &(relSource->scheme)))) { + /* [00/32] undefine(R.scheme); */ + relSourceHasScheme = URI_FALSE; + /* [00/32] endif; */ + } + + /* [01/32] if defined(R.scheme) then */ + if (relSourceHasScheme) { + /* [02/32] T.scheme = R.scheme; */ + absDest->scheme = relSource->scheme; + /* [03/32] T.authority = R.authority; */ + if (!URI_FUNC(CopyAuthority)(absDest, relSource, memory)) { + return URI_ERROR_MALLOC; + } + /* [04/32] T.path = remove_dot_segments(R.path); */ + if (!URI_FUNC(CopyPath)(absDest, relSource, memory)) { + return URI_ERROR_MALLOC; + } + if (!URI_FUNC(RemoveDotSegmentsAbsolute)(absDest, memory)) { + return URI_ERROR_MALLOC; + } + /* [05/32] T.query = R.query; */ + absDest->query = relSource->query; + /* [06/32] else */ + } else { + /* [07/32] if defined(R.authority) then */ + if (URI_FUNC(IsHostSet)(relSource)) { + /* [08/32] T.authority = R.authority; */ + if (!URI_FUNC(CopyAuthority)(absDest, relSource, memory)) { + return URI_ERROR_MALLOC; + } + /* [09/32] T.path = remove_dot_segments(R.path); */ + if (!URI_FUNC(CopyPath)(absDest, relSource, memory)) { + return URI_ERROR_MALLOC; + } + if (!URI_FUNC(RemoveDotSegmentsAbsolute)(absDest, memory)) { + return URI_ERROR_MALLOC; + } + /* [10/32] T.query = R.query; */ + absDest->query = relSource->query; + /* [11/32] else */ + } else { + /* [28/32] T.authority = Base.authority; */ + if (!URI_FUNC(CopyAuthority)(absDest, absBase, memory)) { + return URI_ERROR_MALLOC; + } + /* [12/32] if (R.path == "") then */ + if (relSource->pathHead == NULL && !relSource->absolutePath) { + /* [13/32] T.path = Base.path; */ + if (!URI_FUNC(CopyPath)(absDest, absBase, memory)) { + return URI_ERROR_MALLOC; + } + /* [14/32] if defined(R.query) then */ + if (relSource->query.first != NULL) { + /* [15/32] T.query = R.query; */ + absDest->query = relSource->query; + /* [16/32] else */ + } else { + /* [17/32] T.query = Base.query; */ + absDest->query = absBase->query; + /* [18/32] endif; */ + } + /* [19/32] else */ + } else { + /* [20/32] if (R.path starts-with "/") then */ + if (relSource->absolutePath) { + int res; + /* [21/32] T.path = remove_dot_segments(R.path); */ + if (!URI_FUNC(CopyPath)(absDest, relSource, memory)) { + return URI_ERROR_MALLOC; + } + res = URI_FUNC(ResolveAbsolutePathFlag)(absDest, memory); + if (res != URI_SUCCESS) { + return res; + } + if (!URI_FUNC(RemoveDotSegmentsAbsolute)(absDest, memory)) { + return URI_ERROR_MALLOC; + } + /* [22/32] else */ + } else { + /* [23/32] T.path = merge(Base.path, R.path); */ + if (!URI_FUNC(CopyPath)(absDest, absBase, memory)) { + return URI_ERROR_MALLOC; + } + if (!URI_FUNC(MergePath)(absDest, relSource, memory)) { + return URI_ERROR_MALLOC; + } + /* [24/32] T.path = remove_dot_segments(T.path); */ + if (!URI_FUNC(RemoveDotSegmentsAbsolute)(absDest, memory)) { + return URI_ERROR_MALLOC; + } + + if (!URI_FUNC(FixAmbiguity)(absDest, memory)) { + return URI_ERROR_MALLOC; + } + /* [25/32] endif; */ + } + /* [26/32] T.query = R.query; */ + absDest->query = relSource->query; + /* [27/32] endif; */ + } + URI_FUNC(FixEmptyTrailSegment)(absDest, memory); + /* [29/32] endif; */ + } + /* [30/32] T.scheme = Base.scheme; */ + absDest->scheme = absBase->scheme; + /* [31/32] endif; */ + } + /* [32/32] T.fragment = R.fragment; */ + absDest->fragment = relSource->fragment; + + return URI_SUCCESS; + +} + + + +int URI_FUNC(AddBaseUri)(URI_TYPE(Uri) * absDest, + const URI_TYPE(Uri) * relSource, const URI_TYPE(Uri) * absBase) { + const UriResolutionOptions options = URI_RESOLVE_STRICTLY; + return URI_FUNC(AddBaseUriEx)(absDest, relSource, absBase, options); +} + + + +int URI_FUNC(AddBaseUriEx)(URI_TYPE(Uri) * absDest, + const URI_TYPE(Uri) * relSource, const URI_TYPE(Uri) * absBase, + UriResolutionOptions options) { + return URI_FUNC(AddBaseUriExMm)(absDest, relSource, absBase, options, NULL); +} + + + +int URI_FUNC(AddBaseUriExMm)(URI_TYPE(Uri) * absDest, + const URI_TYPE(Uri) * relSource, const URI_TYPE(Uri) * absBase, + UriResolutionOptions options, UriMemoryManager * memory) { + int res; + + URI_CHECK_MEMORY_MANAGER(memory); /* may return */ + + res = URI_FUNC(AddBaseUriImpl)(absDest, relSource, absBase, options, memory); + if ((res != URI_SUCCESS) && (absDest != NULL)) { + URI_FUNC(FreeUriMembersMm)(absDest, memory); + } + return res; +} + + + +#endif diff --git a/ext/uri/uriparser/src/UriShorten.c b/ext/uri/uriparser/src/UriShorten.c new file mode 100644 index 0000000000000..d2f893592d9ce --- /dev/null +++ b/ext/uri/uriparser/src/UriShorten.c @@ -0,0 +1,324 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriShorten.c" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriShorten.c" +# undef URI_PASS_UNICODE +# endif +#else +# ifdef URI_PASS_ANSI +# include +# else +# include +# include +# endif + + + +#ifndef URI_DOXYGEN +# include +# include "UriCommon.h" +# include "UriMemory.h" +#endif + + + +static URI_INLINE UriBool URI_FUNC(AppendSegment)(URI_TYPE(Uri) * uri, + const URI_CHAR * first, const URI_CHAR * afterLast, + UriMemoryManager * memory) { + /* Create segment */ + URI_TYPE(PathSegment) * segment = memory->malloc(memory, 1 * sizeof(URI_TYPE(PathSegment))); + if (segment == NULL) { + return URI_FALSE; /* Raises malloc error */ + } + segment->next = NULL; + segment->text.first = first; + segment->text.afterLast = afterLast; + + /* Put into chain */ + if (uri->pathTail == NULL) { + uri->pathHead = segment; + } else { + uri->pathTail->next = segment; + } + uri->pathTail = segment; + + return URI_TRUE; +} + + + +static URI_INLINE UriBool URI_FUNC(EqualsAuthority)(const URI_TYPE(Uri) * first, + const URI_TYPE(Uri) * second) { + /* IPv4 */ + if (first->hostData.ip4 != NULL) { + return ((second->hostData.ip4 != NULL) + && !memcmp(first->hostData.ip4->data, + second->hostData.ip4->data, 4)) ? URI_TRUE : URI_FALSE; + } + + /* IPv6 */ + if (first->hostData.ip6 != NULL) { + return ((second->hostData.ip6 != NULL) + && !memcmp(first->hostData.ip6->data, + second->hostData.ip6->data, 16)) ? URI_TRUE : URI_FALSE; + } + + /* IPvFuture */ + if (first->hostData.ipFuture.first != NULL) { + return ((second->hostData.ipFuture.first != NULL) + && !URI_FUNC(CompareRange)(&first->hostData.ipFuture, + &second->hostData.ipFuture)) ? URI_TRUE : URI_FALSE; + } + + return !URI_FUNC(CompareRange)(&first->hostText, &second->hostText) + ? URI_TRUE : URI_FALSE; +} + + + +static int URI_FUNC(RemoveBaseUriImpl)(URI_TYPE(Uri) * dest, + const URI_TYPE(Uri) * absSource, + const URI_TYPE(Uri) * absBase, + UriBool domainRootMode, UriMemoryManager * memory) { + if (dest == NULL) { + return URI_ERROR_NULL; + } + URI_FUNC(ResetUri)(dest); + + if ((absSource == NULL) || (absBase == NULL)) { + return URI_ERROR_NULL; + } + + /* absBase absolute? */ + if (absBase->scheme.first == NULL) { + return URI_ERROR_REMOVEBASE_REL_BASE; + } + + /* absSource absolute? */ + if (absSource->scheme.first == NULL) { + return URI_ERROR_REMOVEBASE_REL_SOURCE; + } + + /* [01/50] if (A.scheme != Base.scheme) then */ + if (URI_FUNC(CompareRange)(&absSource->scheme, &absBase->scheme)) { + /* [02/50] T.scheme = A.scheme; */ + dest->scheme = absSource->scheme; + /* [03/50] T.authority = A.authority; */ + if (!URI_FUNC(CopyAuthority)(dest, absSource, memory)) { + return URI_ERROR_MALLOC; + } + /* [04/50] T.path = A.path; */ + if (!URI_FUNC(CopyPath)(dest, absSource, memory)) { + return URI_ERROR_MALLOC; + } + /* [05/50] else */ + } else { + /* [06/50] undef(T.scheme); */ + /* NOOP */ + /* [07/50] if (A.authority != Base.authority) then */ + if (!URI_FUNC(EqualsAuthority)(absSource, absBase)) { + /* [08/50] T.authority = A.authority; */ + if (!URI_FUNC(CopyAuthority)(dest, absSource, memory)) { + return URI_ERROR_MALLOC; + } + /* [09/50] T.path = A.path; */ + if (!URI_FUNC(CopyPath)(dest, absSource, memory)) { + return URI_ERROR_MALLOC; + } + /* [10/50] else */ + } else { + /* [11/50] if domainRootMode then */ + if (domainRootMode == URI_TRUE) { + /* [12/50] undef(T.authority); */ + /* NOOP */ + /* [13/50] if (first(A.path) == "") then */ + /* GROUPED */ + /* [14/50] T.path = "/." + A.path; */ + /* GROUPED */ + /* [15/50] else */ + /* GROUPED */ + /* [16/50] T.path = A.path; */ + /* GROUPED */ + /* [17/50] endif; */ + if (!URI_FUNC(CopyPath)(dest, absSource, memory)) { + return URI_ERROR_MALLOC; + } + dest->absolutePath = URI_TRUE; + + if (!URI_FUNC(FixAmbiguity)(dest, memory)) { + return URI_ERROR_MALLOC; + } + /* [18/50] else */ + } else { + const URI_TYPE(PathSegment) * sourceSeg = absSource->pathHead; + const URI_TYPE(PathSegment) * baseSeg = absBase->pathHead; + /* [19/50] bool pathNaked = true; */ + UriBool pathNaked = URI_TRUE; + /* [20/50] undef(last(Base.path)); */ + /* NOOP */ + /* [21/50] T.path = ""; */ + dest->absolutePath = URI_FALSE; + /* [22/50] while (first(A.path) == first(Base.path)) do */ + while ((sourceSeg != NULL) && (baseSeg != NULL) + && !URI_FUNC(CompareRange)(&sourceSeg->text, &baseSeg->text) + && !((sourceSeg->text.first == sourceSeg->text.afterLast) + && ((sourceSeg->next == NULL) != (baseSeg->next == NULL)))) { + /* [23/50] A.path++; */ + sourceSeg = sourceSeg->next; + /* [24/50] Base.path++; */ + baseSeg = baseSeg->next; + /* [25/50] endwhile; */ + } + /* [26/50] while defined(first(Base.path)) do */ + while ((baseSeg != NULL) && (baseSeg->next != NULL)) { + /* [27/50] Base.path++; */ + baseSeg = baseSeg->next; + /* [28/50] T.path += "../"; */ + if (!URI_FUNC(AppendSegment)(dest, URI_FUNC(ConstParent), + URI_FUNC(ConstParent) + 2, memory)) { + return URI_ERROR_MALLOC; + } + /* [29/50] pathNaked = false; */ + pathNaked = URI_FALSE; + /* [30/50] endwhile; */ + } + /* [31/50] while defined(first(A.path)) do */ + while (sourceSeg != NULL) { + /* [32/50] if pathNaked then */ + if (pathNaked == URI_TRUE) { + /* [33/50] if (first(A.path) contains ":") then */ + UriBool containsColon = URI_FALSE; + const URI_CHAR * ch = sourceSeg->text.first; + for (; ch < sourceSeg->text.afterLast; ch++) { + if (*ch == _UT(':')) { + containsColon = URI_TRUE; + break; + } + } + + if (containsColon) { + /* [34/50] T.path += "./"; */ + if (!URI_FUNC(AppendSegment)(dest, URI_FUNC(ConstPwd), + URI_FUNC(ConstPwd) + 1, memory)) { + return URI_ERROR_MALLOC; + } + /* [35/50] elseif (first(A.path) == "") then */ + } else if (sourceSeg->text.first == sourceSeg->text.afterLast) { + /* [36/50] T.path += "/."; */ + if (!URI_FUNC(AppendSegment)(dest, URI_FUNC(ConstPwd), + URI_FUNC(ConstPwd) + 1, memory)) { + return URI_ERROR_MALLOC; + } + /* [37/50] endif; */ + } + /* [38/50] endif; */ + } + /* [39/50] T.path += first(A.path); */ + if (!URI_FUNC(AppendSegment)(dest, sourceSeg->text.first, + sourceSeg->text.afterLast, memory)) { + return URI_ERROR_MALLOC; + } + /* [40/50] pathNaked = false; */ + pathNaked = URI_FALSE; + /* [41/50] A.path++; */ + sourceSeg = sourceSeg->next; + /* [42/50] if defined(first(A.path)) then */ + /* NOOP */ + /* [43/50] T.path += + "/"; */ + /* NOOP */ + /* [44/50] endif; */ + /* NOOP */ + /* [45/50] endwhile; */ + } + /* [46/50] endif; */ + } + /* [47/50] endif; */ + } + /* [48/50] endif; */ + } + /* [49/50] T.query = A.query; */ + dest->query = absSource->query; + /* [50/50] T.fragment = A.fragment; */ + dest->fragment = absSource->fragment; + + return URI_SUCCESS; +} + + + +int URI_FUNC(RemoveBaseUri)(URI_TYPE(Uri) * dest, + const URI_TYPE(Uri) * absSource, + const URI_TYPE(Uri) * absBase, + UriBool domainRootMode) { + return URI_FUNC(RemoveBaseUriMm)(dest, absSource, absBase, + domainRootMode, NULL); +} + + + +int URI_FUNC(RemoveBaseUriMm)(URI_TYPE(Uri) * dest, + const URI_TYPE(Uri) * absSource, + const URI_TYPE(Uri) * absBase, + UriBool domainRootMode, UriMemoryManager * memory) { + int res; + + URI_CHECK_MEMORY_MANAGER(memory); /* may return */ + + res = URI_FUNC(RemoveBaseUriImpl)(dest, absSource, + absBase, domainRootMode, memory); + if ((res != URI_SUCCESS) && (dest != NULL)) { + URI_FUNC(FreeUriMembersMm)(dest, memory); + } + return res; +} + + + +#endif diff --git a/scripts/dev/tidy.php b/scripts/dev/tidy.php index 08f8b278279a1..fb2c5c14b463e 100755 --- a/scripts/dev/tidy.php +++ b/scripts/dev/tidy.php @@ -24,6 +24,7 @@ // Bundled libraries / files. 'ext/date/lib/', 'ext/lexbor/lexbor/', + 'ext/uri/uriparser/', 'ext/fileinfo/data_file.c', 'ext/fileinfo/libmagic/', 'ext/gd/libgd/', From 61c42abdedb46fa80394d55344470b338714b927 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1t=C3=A9=20Kocsis?= Date: Sun, 25 May 2025 23:13:23 +0200 Subject: [PATCH 02/10] Review fixes --- ext/uri/php_uri.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ext/uri/php_uri.c b/ext/uri/php_uri.c index 5bf1cfbf2adb3..0abb0c7ab24ad 100644 --- a/ext/uri/php_uri.c +++ b/ext/uri/php_uri.c @@ -15,7 +15,7 @@ */ #ifdef HAVE_CONFIG_H -# include "config.h" +# include #endif #include "php.h" @@ -23,6 +23,7 @@ #include "Zend/zend_exceptions.h" #include "Zend/zend_attributes.h" #include "main/php_ini.h" +#include "ext/standard/info.h" #include "php_uri.h" #include "php_uri_arginfo.h" @@ -52,13 +53,11 @@ static PHP_MINFO_FUNCTION(uri) { php_info_print_table_start(); php_info_print_table_row(2, "uri support", "active"); - php_info_print_table_row(2, "uriparser version", URIPARSER_VERSION); php_info_print_table_end(); } static PHP_MSHUTDOWN_FUNCTION(uri) { - UNREGISTER_INI_ENTRIES(); return SUCCESS; } From ac9af23581b0b560b45a4cb0259cef1469cbdc5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1t=C3=A9=20Kocsis?= Date: Sun, 25 May 2025 23:26:06 +0200 Subject: [PATCH 03/10] Additional review fixes + uriparser sync with latest master --- ext/uri/php_uri.c | 11 +---- ext/uri/php_uri.h | 3 -- ext/uri/uriparser/include/uriparser/Uri.h | 20 ++++----- ext/uri/uriparser/src/UriCommon.c | 4 +- ext/uri/uriparser/src/UriCommon.h | 3 ++ ext/uri/uriparser/src/UriConfig.h | 51 ----------------------- ext/uri/uriparser/src/UriNormalize.c | 11 +++-- ext/uri/uriparser/src/UriParse.c | 4 +- 8 files changed, 23 insertions(+), 84 deletions(-) delete mode 100644 ext/uri/uriparser/src/UriConfig.h diff --git a/ext/uri/php_uri.c b/ext/uri/php_uri.c index 0abb0c7ab24ad..d29cff7eece07 100644 --- a/ext/uri/php_uri.c +++ b/ext/uri/php_uri.c @@ -53,6 +53,7 @@ static PHP_MINFO_FUNCTION(uri) { php_info_print_table_start(); php_info_print_table_row(2, "uri support", "active"); + php_info_print_table_row(2, "uriparser library version", URIPARSER_VERSION); php_info_print_table_end(); } @@ -64,9 +65,6 @@ static PHP_MSHUTDOWN_FUNCTION(uri) PHP_RINIT_FUNCTION(uri) { -#if defined(COMPILE_DL_URI) && defined(ZTS) - ZEND_TSRMLS_CACHE_UPDATE(); -#endif return SUCCESS; } @@ -89,10 +87,3 @@ zend_module_entry uri_module_entry = { PHP_VERSION, /* Version */ STANDARD_MODULE_PROPERTIES }; - -#ifdef COMPILE_DL_URI -#ifdef ZTS -ZEND_TSRMLS_CACHE_DEFINE() -#endif -ZEND_GET_MODULE(uri) -#endif diff --git a/ext/uri/php_uri.h b/ext/uri/php_uri.h index d988495eac04d..79dfced4a721a 100644 --- a/ext/uri/php_uri.h +++ b/ext/uri/php_uri.h @@ -20,8 +20,5 @@ extern zend_module_entry uri_module_entry; #define phpext_uri_ptr &uri_module_entry -#if defined(ZTS) && defined(COMPILE_DL_URI) -ZEND_TSRMLS_CACHE_EXTERN() -#endif #endif diff --git a/ext/uri/uriparser/include/uriparser/Uri.h b/ext/uri/uriparser/include/uriparser/Uri.h index d6a85f3d9ac57..44bc5acc54b5f 100644 --- a/ext/uri/uriparser/include/uriparser/Uri.h +++ b/ext/uri/uriparser/include/uriparser/Uri.h @@ -85,7 +85,7 @@ extern "C" { # include "UriBase.h" #endif -extern const URI_CHAR * const URI_FUNC(SafeToPointTo); + /** * Specifies a range of characters within a string. @@ -200,11 +200,7 @@ typedef struct URI_TYPE(QueryListStruct) { struct URI_TYPE(QueryListStruct) * next; /**< Pointer to the next key/value pair in the list, can be NULL if last already */ } URI_TYPE(QueryList); /**< @copydoc UriQueryListStructA */ -URI_PUBLIC UriBool URI_FUNC(IsHostSet)(const URI_TYPE(Uri) * uri); -URI_PUBLIC UriBool URI_FUNC(PushPathSegment)(URI_TYPE(ParserState) * state, - const URI_CHAR * first, const URI_CHAR * afterLast, - UriMemoryManager * memory); /** * Parses a RFC 3986 %URI. @@ -354,7 +350,7 @@ URI_PUBLIC int URI_FUNC(FreeUriMembersMm)(URI_TYPE(Uri) * uri, /** - * Percent-encodes all unreserved characters from the input string and + * Percent-encodes all but unreserved characters from the input string and * writes the encoded version to the output string. * * NOTE: Be sure to allocate 3 times the space of the input buffer for @@ -388,7 +384,7 @@ URI_PUBLIC URI_CHAR * URI_FUNC(EscapeEx)(const URI_CHAR * inFirst, /** - * Percent-encodes all unreserved characters from the input string and + * Percent-encodes all but unreserved characters from the input string and * writes the encoded version to the output string. * * NOTE: Be sure to allocate 3 times the space of the input buffer for @@ -465,7 +461,7 @@ URI_PUBLIC const URI_CHAR * URI_FUNC(UnescapeInPlace)(URI_CHAR * inout); /** * Performs reference resolution as described in - * section 5.2.2 of RFC 3986. + * section 5.2.2 of RFC 3986. * Uses default libc-based memory manager. * NOTE: On success you have to call uriFreeUriMembersA on \p absoluteDest manually later. * @@ -488,7 +484,7 @@ URI_PUBLIC int URI_FUNC(AddBaseUri)(URI_TYPE(Uri) * absoluteDest, /** * Performs reference resolution as described in - * section 5.2.2 of RFC 3986. + * section 5.2.2 of RFC 3986. * Uses default libc-based memory manager. * NOTE: On success you have to call uriFreeUriMembersA on \p absoluteDest manually later. * @@ -512,7 +508,7 @@ URI_PUBLIC int URI_FUNC(AddBaseUriEx)(URI_TYPE(Uri) * absoluteDest, /** * Performs reference resolution as described in - * section 5.2.2 of RFC 3986. + * section 5.2.2 of RFC 3986. * NOTE: On success you have to call uriFreeUriMembersMmA on \p absoluteDest manually later. * * @param absoluteDest OUT: Result %URI @@ -628,10 +624,10 @@ URI_PUBLIC int URI_FUNC(ToStringCharsRequired)(const URI_TYPE(Uri) * uri, /** * Converts a %URI structure back to text as described in - * section 5.3 of RFC 3986. + * section 5.3 of RFC 3986. * * NOTE: Scheme-based normalization - * (section 6.2.3 of RFC 3986) + * (section 6.2.3 of RFC 3986) * is not applied and is considered a responsibility of the application using uriparser. * * @param dest OUT: Output destination diff --git a/ext/uri/uriparser/src/UriCommon.c b/ext/uri/uriparser/src/UriCommon.c index 7ba92f2495176..88e2767d71cb2 100644 --- a/ext/uri/uriparser/src/UriCommon.c +++ b/ext/uri/uriparser/src/UriCommon.c @@ -431,8 +431,8 @@ unsigned char URI_FUNC(HexdigToInt)(URI_CHAR hexdig) { URI_CHAR URI_FUNC(HexToLetter)(unsigned int value) { - /* Uppercase recommended in section 2.1. of RFC 3986 * - * http://tools.ietf.org/html/rfc3986#section-2.1 */ + /* Uppercase recommended in section 2.1. of RFC 3986 * + * https://datatracker.ietf.org/doc/html/rfc3986#section-2.1 */ return URI_FUNC(HexToLetterEx)(value, URI_TRUE); } diff --git a/ext/uri/uriparser/src/UriCommon.h b/ext/uri/uriparser/src/UriCommon.h index 7937a3951ce5b..42311ddc98b2d 100644 --- a/ext/uri/uriparser/src/UriCommon.h +++ b/ext/uri/uriparser/src/UriCommon.h @@ -70,6 +70,7 @@ /* Used to point to from empty path segments. * X.first and X.afterLast must be the same non-NULL value then. */ +extern const URI_CHAR * const URI_FUNC(SafeToPointTo); extern const URI_CHAR * const URI_FUNC(ConstPwd); extern const URI_CHAR * const URI_FUNC(ConstParent); @@ -90,6 +91,8 @@ unsigned char URI_FUNC(HexdigToInt)(URI_CHAR hexdig); URI_CHAR URI_FUNC(HexToLetter)(unsigned int value); URI_CHAR URI_FUNC(HexToLetterEx)(unsigned int value, UriBool uppercase); +UriBool URI_FUNC(IsHostSet)(const URI_TYPE(Uri) * uri); + UriBool URI_FUNC(CopyPath)(URI_TYPE(Uri) * dest, const URI_TYPE(Uri) * source, UriMemoryManager * memory); UriBool URI_FUNC(CopyAuthority)(URI_TYPE(Uri) * dest, diff --git a/ext/uri/uriparser/src/UriConfig.h b/ext/uri/uriparser/src/UriConfig.h deleted file mode 100644 index e799cec33d697..0000000000000 --- a/ext/uri/uriparser/src/UriConfig.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * uriparser - RFC 3986 URI parsing library - * - * Copyright (C) 2018, Sebastian Pipping - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of - * its contributors may be used to endorse or promote products - * derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#if !defined(URI_CONFIG_H) -# define URI_CONFIG_H 1 - - - -#define PACKAGE_VERSION "0.9.8" - -//#define HAVE_WPRINTF -//#define HAVE_REALLOCARRAY - - - -#endif /* !defined(URI_CONFIG_H) */ diff --git a/ext/uri/uriparser/src/UriNormalize.c b/ext/uri/uriparser/src/UriNormalize.c index 36f53ba3653e5..0cf353f111119 100644 --- a/ext/uri/uriparser/src/UriNormalize.c +++ b/ext/uri/uriparser/src/UriNormalize.c @@ -104,7 +104,7 @@ static UriBool URI_FUNC(ContainsUglyPercentEncoding)(const URI_CHAR * first, static void URI_FUNC(LowercaseInplace)(const URI_CHAR * first, const URI_CHAR * afterLast); -static void URI_FUNC(LowercaseInplaceAfterPercentEncoding)(const URI_CHAR * first, +static void URI_FUNC(LowercaseInplaceExceptPercentEncoding)(const URI_CHAR * first, const URI_CHAR * afterLast); static UriBool URI_FUNC(LowercaseMalloc)(const URI_CHAR ** first, const URI_CHAR ** afterLast, UriMemoryManager * memory); @@ -247,7 +247,7 @@ static URI_INLINE void URI_FUNC(LowercaseInplace)(const URI_CHAR * first, -static URI_INLINE void URI_FUNC(LowercaseInplaceAfterPercentEncoding)(const URI_CHAR * first, +static URI_INLINE void URI_FUNC(LowercaseInplaceExceptPercentEncoding)(const URI_CHAR * first, const URI_CHAR * afterLast) { if ((first != NULL) && (afterLast != NULL) && (afterLast > first)) { URI_CHAR * i = (URI_CHAR *)first; @@ -256,6 +256,9 @@ static URI_INLINE void URI_FUNC(LowercaseInplaceAfterPercentEncoding)(const URI_ if ((*i >= _UT('A')) && (*i <=_UT('Z'))) { *i = (URI_CHAR)(*i + lowerUpperDiff); } else if (*i == _UT('%')) { + if (i + 3 >= afterLast) { + return; + } i += 2; } } @@ -634,7 +637,7 @@ static URI_INLINE int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri, uri->hostText.afterLast = uri->hostData.ipFuture.afterLast; } else if ((uri->hostText.first != NULL) && (uri->hostData.ip4 == NULL)) { - /* Regname and IPv6 */ + /* Regname or IPv6 */ if (uri->owner) { URI_FUNC(FixPercentEncodingInplace)(uri->hostText.first, &(uri->hostText.afterLast)); @@ -649,7 +652,7 @@ static URI_INLINE int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri, doneMask |= URI_NORMALIZE_HOST; } - URI_FUNC(LowercaseInplaceAfterPercentEncoding)(uri->hostText.first, + URI_FUNC(LowercaseInplaceExceptPercentEncoding)(uri->hostText.first, uri->hostText.afterLast); } } diff --git a/ext/uri/uriparser/src/UriParse.c b/ext/uri/uriparser/src/UriParse.c index b91e511a58492..a672c8eb1f2fc 100644 --- a/ext/uri/uriparser/src/UriParse.c +++ b/ext/uri/uriparser/src/UriParse.c @@ -196,7 +196,7 @@ static void URI_FUNC(OnExitPartHelperTwo)(URI_TYPE(ParserState) * state); static void URI_FUNC(ResetParserStateExceptUri)(URI_TYPE(ParserState) * state); -UriBool URI_FUNC(PushPathSegment)(URI_TYPE(ParserState) * state, +static UriBool URI_FUNC(PushPathSegment)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory); @@ -2119,7 +2119,7 @@ static URI_INLINE void URI_FUNC(ResetParserStateExceptUri)(URI_TYPE(ParserState) -UriBool URI_FUNC(PushPathSegment)( +static URI_INLINE UriBool URI_FUNC(PushPathSegment)( URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory) { URI_TYPE(PathSegment) * segment = memory->calloc(memory, 1, sizeof(URI_TYPE(PathSegment))); From f3f6d97af0dd1ddd05bc94883acdb396f4baf393 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1t=C3=A9=20Kocsis?= Date: Sun, 25 May 2025 23:33:17 +0200 Subject: [PATCH 04/10] Regenerate credits + remove unused files + fix build --- ext/standard/credits_ext.h | 1 + ext/uri/CREDITS | 2 +- ext/uri/config.m4 | 5 +---- ext/uri/config.w32 | 4 ++-- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/ext/standard/credits_ext.h b/ext/standard/credits_ext.h index 49b263ce017b1..509135a27e3e9 100644 --- a/ext/standard/credits_ext.h +++ b/ext/standard/credits_ext.h @@ -69,6 +69,7 @@ CREDIT_LINE("System V Semaphores", "Tom May"); CREDIT_LINE("System V Shared Memory", "Christian Cartus"); CREDIT_LINE("tidy", "John Coggeshall, Ilia Alshanetsky"); CREDIT_LINE("tokenizer", "Andrei Zmievski, Johannes Schlueter"); +CREDIT_LINE("uri", "Máté Kocsis, Tim Düsterhus, Ignace Nyamagana Butera, Arnaud Le Blanc, Dennis Snell, Niels Dossche, Nicolas Grekas"); CREDIT_LINE("XML", "Stig Bakken, Thies C. Arntzen, Sterling Hughes"); CREDIT_LINE("XMLReader", "Rob Richards"); CREDIT_LINE("XMLWriter", "Rob Richards, Pierre-Alain Joye"); diff --git a/ext/uri/CREDITS b/ext/uri/CREDITS index 07147e007507f..fc6ce83fec88f 100644 --- a/ext/uri/CREDITS +++ b/ext/uri/CREDITS @@ -1,2 +1,2 @@ uri -Máté Kocsis, Tim Düsterhus, Ignace Nyamagana Butera, Arnaud Le Blanc, Niels Dossche, Nicolas Grekas +Máté Kocsis, Tim Düsterhus, Ignace Nyamagana Butera, Arnaud Le Blanc, Dennis Snell, Niels Dossche, Nicolas Grekas diff --git a/ext/uri/config.m4 b/ext/uri/config.m4 index 794a31d21d208..f020c7c45e6c8 100644 --- a/ext/uri/config.m4 +++ b/ext/uri/config.m4 @@ -2,10 +2,7 @@ dnl Configure options dnl PHP_INSTALL_HEADERS([ext/uri], m4_normalize([ - php_lexbor.h php_uri.h - php_uri_common.h - php_uriparser.h ])) AC_DEFINE([URI_ENABLE_ANSI], [1], [Define to 1 for enabling ANSI support of uriparser.]) @@ -18,6 +15,6 @@ $URIPARSER_DIR/src/UriMemory.c $URIPARSER_DIR/src/UriNormalize.c $URIPARSER_DIR/ $URIPARSER_DIR/src/UriParse.c $URIPARSER_DIR/src/UriParseBase.c $URIPARSER_DIR/src/UriQuery.c \ $URIPARSER_DIR/src/UriRecompose.c $URIPARSER_DIR/src/UriResolve.c $URIPARSER_DIR/src/UriShorten.c" -PHP_NEW_EXTENSION(uri, [php_uri.c php_uri_common.c php_lexbor.c php_uriparser.c $URIPARSER_SOURCES], [no],,[-I$ext_builddir/$URIPARSER_DIR/include -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1]) +PHP_NEW_EXTENSION(uri, [php_uri.c $URIPARSER_SOURCES], [no],,[-I$ext_builddir/$URIPARSER_DIR/include -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1]) PHP_ADD_EXTENSION_DEP(uri, lexbor) PHP_ADD_BUILD_DIR($ext_builddir/$URIPARSER_DIR/src $ext_builddir/$URIPARSER_DIR/include) diff --git a/ext/uri/config.w32 b/ext/uri/config.w32 index 3d5a0fc44102f..6c3a70c50f5f1 100644 --- a/ext/uri/config.w32 +++ b/ext/uri/config.w32 @@ -1,8 +1,8 @@ -EXTENSION("uri", "php_lexbor.c php_uri.c php_uri_common.c php_uriparser.c", false /* never shared */, "/I ext/lexbor /DZEND_ENABLE_STATIC_TSRMLS_CACHE=1"); +EXTENSION("uri", "php_uri.c", false /* never shared */, "/I ext/lexbor /DZEND_ENABLE_STATIC_TSRMLS_CACHE=1"); AC_DEFINE("URI_ENABLE_ANSI", 1, "Define to 1 for enabling ANSI support of uriparser.") AC_DEFINE("URI_NO_UNICODE", 1, "Define to 1 for disabling unicode support of uriparser.") ADD_EXTENSION_DEP('uri', 'lexbor'); ADD_SOURCES("ext/uri/uriparser/src", "UriCommon.c UriCompare.c UriEscape.c UriFile.c UriIp4.c UriIp4Base.c UriMemory.c UriNormalize.c UriNormalizeBase.c UriParse.c UriQuery.c UriRecompose.c UriShorten.c", "lexbor"); -PHP_INSTALL_HEADERS("ext/uri", "php_lexbor.h php_uri.h php_uri_common.h php_uriparser.h uriparser/src uriparser/include"); +PHP_INSTALL_HEADERS("ext/uri", "php_uri.h uriparser/src uriparser/include"); From 908120d7e6adae1c8a4438a8977aa0c8f5f792d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1t=C3=A9=20Kocsis?= Date: Sun, 25 May 2025 23:55:29 +0200 Subject: [PATCH 05/10] Add back UriConfig.h --- ext/uri/php_uri.c | 2 +- ext/uri/uriparser/src/UriConfig.h | 53 +++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 ext/uri/uriparser/src/UriConfig.h diff --git a/ext/uri/php_uri.c b/ext/uri/php_uri.c index d29cff7eece07..87bbe368ed983 100644 --- a/ext/uri/php_uri.c +++ b/ext/uri/php_uri.c @@ -27,12 +27,12 @@ #include "php_uri.h" #include "php_uri_arginfo.h" +#include "uriparser/src/UriConfig.h" zend_class_entry *uri_exception_ce; zend_class_entry *invalid_uri_exception_ce; zend_class_entry *whatwg_invalid_url_exception_ce; -#define URIPARSER_VERSION "0.9.8" static const zend_module_dep uri_deps[] = { ZEND_MOD_REQUIRED("lexbor") diff --git a/ext/uri/uriparser/src/UriConfig.h b/ext/uri/uriparser/src/UriConfig.h new file mode 100644 index 0000000000000..3139da04f72c1 --- /dev/null +++ b/ext/uri/uriparser/src/UriConfig.h @@ -0,0 +1,53 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2018, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if !defined(URI_CONFIG_H) +# define URI_CONFIG_H 1 + + + +#define PACKAGE_VERSION "0.9.8" + +/* +#define HAVE_WPRINTF* +#define HAVE_REALLOCARRAY +*/ + + + +#endif /* !defined(URI_CONFIG_H) */ From 7a819d89acb75170dc1e333fad345867c9b752e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1t=C3=A9=20Kocsis?= Date: Mon, 26 May 2025 00:02:12 +0200 Subject: [PATCH 06/10] Add URIPARSER_VERSION --- ext/uri/php_uri.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ext/uri/php_uri.c b/ext/uri/php_uri.c index 87bbe368ed983..bd5c622ed3765 100644 --- a/ext/uri/php_uri.c +++ b/ext/uri/php_uri.c @@ -33,6 +33,7 @@ zend_class_entry *uri_exception_ce; zend_class_entry *invalid_uri_exception_ce; zend_class_entry *whatwg_invalid_url_exception_ce; +#define URIPARSER_VERSION PACKAGE_VERSION static const zend_module_dep uri_deps[] = { ZEND_MOD_REQUIRED("lexbor") From ce047d72ef850bed4c406d557cfa80a4825d8661 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1t=C3=A9=20Kocsis?= Date: Mon, 26 May 2025 00:15:28 +0200 Subject: [PATCH 07/10] Add uriparser includes --- ext/uri/config.w32 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/uri/config.w32 b/ext/uri/config.w32 index 6c3a70c50f5f1..21b623afd9359 100644 --- a/ext/uri/config.w32 +++ b/ext/uri/config.w32 @@ -1,4 +1,4 @@ -EXTENSION("uri", "php_uri.c", false /* never shared */, "/I ext/lexbor /DZEND_ENABLE_STATIC_TSRMLS_CACHE=1"); +EXTENSION("uri", "php_uri.c", false /* never shared */, "/I ext/lexbor /I ext/uri/uriparser/include /DZEND_ENABLE_STATIC_TSRMLS_CACHE=1"); AC_DEFINE("URI_ENABLE_ANSI", 1, "Define to 1 for enabling ANSI support of uriparser.") AC_DEFINE("URI_NO_UNICODE", 1, "Define to 1 for disabling unicode support of uriparser.") From 35106c6d85ab16107473df7db92e4fd1cbf2b634 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1t=C3=A9=20Kocsis?= Date: Mon, 26 May 2025 08:54:01 +0200 Subject: [PATCH 08/10] Try to fix Windows build --- codecov.yml | 3 +-- ext/uri/config.w32 | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/codecov.yml b/codecov.yml index a489bffa656ab..ce64fae04ec33 100644 --- a/codecov.yml +++ b/codecov.yml @@ -9,5 +9,4 @@ ignore: - "ext/lexbor/lexbor/ports" - "ext/lexbor/lexbor/tag" - "ext/pcre/pcre2lib" - - "ext/uri/uriparser/include" - - "ext/uri/uriparser/src" + - "ext/uri/uriparser" diff --git a/ext/uri/config.w32 b/ext/uri/config.w32 index 21b623afd9359..d2629feb8ea8c 100644 --- a/ext/uri/config.w32 +++ b/ext/uri/config.w32 @@ -2,7 +2,8 @@ EXTENSION("uri", "php_uri.c", false /* never shared */, "/I ext/lexbor /I ext/ur AC_DEFINE("URI_ENABLE_ANSI", 1, "Define to 1 for enabling ANSI support of uriparser.") AC_DEFINE("URI_NO_UNICODE", 1, "Define to 1 for disabling unicode support of uriparser.") +ADD_FLAG("CFLAGS_URI", "/D URI_LIBRARY_BUILD"); ADD_EXTENSION_DEP('uri', 'lexbor'); -ADD_SOURCES("ext/uri/uriparser/src", "UriCommon.c UriCompare.c UriEscape.c UriFile.c UriIp4.c UriIp4Base.c UriMemory.c UriNormalize.c UriNormalizeBase.c UriParse.c UriQuery.c UriRecompose.c UriShorten.c", "lexbor"); +ADD_SOURCES("ext/uri/uriparser/src", "UriCommon.c UriCompare.c UriEscape.c UriFile.c UriIp4.c UriIp4Base.c UriMemory.c UriNormalize.c UriNormalizeBase.c UriParse.c UriQuery.c UriRecompose.c UriShorten.c", "uri"); PHP_INSTALL_HEADERS("ext/uri", "php_uri.h uriparser/src uriparser/include"); From 19a9f4b7bc194b7769deafb08be49288f79f4f45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1t=C3=A9=20Kocsis?= Date: Mon, 26 May 2025 09:36:04 +0200 Subject: [PATCH 09/10] One last Windows fix --- ext/uri/config.w32 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/uri/config.w32 b/ext/uri/config.w32 index d2629feb8ea8c..7dabaac07e6d9 100644 --- a/ext/uri/config.w32 +++ b/ext/uri/config.w32 @@ -5,5 +5,5 @@ AC_DEFINE("URI_NO_UNICODE", 1, "Define to 1 for disabling unicode support of uri ADD_FLAG("CFLAGS_URI", "/D URI_LIBRARY_BUILD"); ADD_EXTENSION_DEP('uri', 'lexbor'); -ADD_SOURCES("ext/uri/uriparser/src", "UriCommon.c UriCompare.c UriEscape.c UriFile.c UriIp4.c UriIp4Base.c UriMemory.c UriNormalize.c UriNormalizeBase.c UriParse.c UriQuery.c UriRecompose.c UriShorten.c", "uri"); +ADD_SOURCES("ext/uri/uriparser/src", "UriCommon.c UriCompare.c UriEscape.c UriFile.c UriIp4.c UriIp4Base.c UriMemory.c UriNormalize.c UriNormalizeBase.c UriParse.c UriParseBase.c UriQuery.c UriRecompose.c UriShorten.c", "uri"); PHP_INSTALL_HEADERS("ext/uri", "php_uri.h uriparser/src uriparser/include"); From 24a802252c5ef275f0d1d69107daeb908c9a6ac0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1t=C3=A9=20Kocsis?= Date: Mon, 26 May 2025 22:30:08 +0200 Subject: [PATCH 10/10] Fix URI compile flag --- ext/uri/config.m4 | 2 +- ext/uri/config.w32 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/uri/config.m4 b/ext/uri/config.m4 index f020c7c45e6c8..6cb0e6a27bcb4 100644 --- a/ext/uri/config.m4 +++ b/ext/uri/config.m4 @@ -15,6 +15,6 @@ $URIPARSER_DIR/src/UriMemory.c $URIPARSER_DIR/src/UriNormalize.c $URIPARSER_DIR/ $URIPARSER_DIR/src/UriParse.c $URIPARSER_DIR/src/UriParseBase.c $URIPARSER_DIR/src/UriQuery.c \ $URIPARSER_DIR/src/UriRecompose.c $URIPARSER_DIR/src/UriResolve.c $URIPARSER_DIR/src/UriShorten.c" -PHP_NEW_EXTENSION(uri, [php_uri.c $URIPARSER_SOURCES], [no],,[-I$ext_builddir/$URIPARSER_DIR/include -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1]) +PHP_NEW_EXTENSION(uri, [php_uri.c $URIPARSER_SOURCES], [no],,[-I$ext_builddir/$URIPARSER_DIR/include -DURI_STATIC_BUILD -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1]) PHP_ADD_EXTENSION_DEP(uri, lexbor) PHP_ADD_BUILD_DIR($ext_builddir/$URIPARSER_DIR/src $ext_builddir/$URIPARSER_DIR/include) diff --git a/ext/uri/config.w32 b/ext/uri/config.w32 index 7dabaac07e6d9..962f7bee0660e 100644 --- a/ext/uri/config.w32 +++ b/ext/uri/config.w32 @@ -2,7 +2,7 @@ EXTENSION("uri", "php_uri.c", false /* never shared */, "/I ext/lexbor /I ext/ur AC_DEFINE("URI_ENABLE_ANSI", 1, "Define to 1 for enabling ANSI support of uriparser.") AC_DEFINE("URI_NO_UNICODE", 1, "Define to 1 for disabling unicode support of uriparser.") -ADD_FLAG("CFLAGS_URI", "/D URI_LIBRARY_BUILD"); +ADD_FLAG("CFLAGS_URI", "/D URI_STATIC_BUILD"); ADD_EXTENSION_DEP('uri', 'lexbor'); ADD_SOURCES("ext/uri/uriparser/src", "UriCommon.c UriCompare.c UriEscape.c UriFile.c UriIp4.c UriIp4Base.c UriMemory.c UriNormalize.c UriNormalizeBase.c UriParse.c UriParseBase.c UriQuery.c UriRecompose.c UriShorten.c", "uri");