Skip to content

Commit 6e27188

Browse files
committed
More improvements
1 parent 1904e5a commit 6e27188

File tree

5 files changed

+204
-42
lines changed

5 files changed

+204
-42
lines changed

ext/uri/php_uriparser.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ static UriUriA *uriparser_copy_uri(UriUriA *uriparser_uri)
4242

4343
static void uriparser_normalize_uri(UriUriA *uriparser_uri)
4444
{
45-
ZEND_ASSERT(uriNormalizeSyntaxExA(uriparser_uri, (unsigned int)-1) == URI_SUCCESS);
45+
int result = uriNormalizeSyntaxExA(uriparser_uri, (unsigned int)-1);
46+
ZEND_ASSERT(result == URI_SUCCESS);
4647
}
4748

4849
static UriUriA *uriparser_read_uri(uriparser_uris_t *uriparser_uris, uri_component_read_mode_t read_mode)
@@ -375,12 +376,14 @@ static zend_string *uriparser_uri_to_string(void *uri, uri_recomposition_mode_t
375376
}
376377

377378
int charsRequired = 0;
378-
ZEND_ASSERT(uriToStringCharsRequiredA(uriparser_uri, &charsRequired) == URI_SUCCESS);
379+
int result = uriToStringCharsRequiredA(uriparser_uri, &charsRequired);
380+
ZEND_ASSERT(result == URI_SUCCESS);
379381

380382
charsRequired++;
381383

382384
zend_string *uri_string = zend_string_alloc(charsRequired - 1, false);
383-
ZEND_ASSERT(uriToStringA(ZSTR_VAL(uri_string), uriparser_uri, charsRequired, NULL) == URI_SUCCESS);
385+
result = uriToStringA(ZSTR_VAL(uri_string), uriparser_uri, charsRequired, NULL);
386+
ZEND_ASSERT(result == URI_SUCCESS);
384387

385388
if (exclude_fragment) {
386389
const char *pos = zend_memrchr(ZSTR_VAL(uri_string), '#', ZSTR_LEN(uri_string));

ext/uri/uriparser/src/UriCopy.c

Lines changed: 32 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
# include <uriparser/Uri.h>
7373
# include "UriCommon.h"
7474
# include "UriMemory.h"
75+
# include "UriNormalize.h"
7576
# include "UriCopy.h"
7677
#endif
7778

@@ -102,14 +103,14 @@ static void URI_FUNC(PreventLeakageAfterCopy)(URI_TYPE(Uri) * uri,
102103

103104
int URI_FUNC(CopyUriMm)(URI_TYPE(Uri) * destUri,
104105
const URI_TYPE(Uri) * sourceUri, UriMemoryManager * memory) {
105-
if (sourceUri == NULL) {
106+
unsigned int doneMask = URI_NORMALIZED;
107+
108+
if (sourceUri == NULL || destUri == NULL) {
106109
return URI_ERROR_NULL;
107110
}
108111

109112
URI_CHECK_MEMORY_MANAGER(memory); /* may return */
110113

111-
unsigned int doneMask = URI_NORMALIZED;
112-
113114
if (URI_FUNC(CopyRangeAsNeeded)(&destUri->scheme, &sourceUri->scheme, URI_FALSE, memory) == URI_FALSE) {
114115
return URI_ERROR_MALLOC;
115116
}
@@ -141,8 +142,6 @@ int URI_FUNC(CopyUriMm)(URI_TYPE(Uri) * destUri,
141142
*(destUri->hostData.ip4) = *(sourceUri->hostData.ip4);
142143
}
143144

144-
doneMask |= URI_NORMALIZE_HOST;
145-
146145
if (sourceUri->hostData.ip6 == NULL) {
147146
destUri->hostData.ip6 = NULL;
148147
} else {
@@ -166,47 +165,44 @@ int URI_FUNC(CopyUriMm)(URI_TYPE(Uri) * destUri,
166165

167166
doneMask |= URI_NORMALIZE_PORT;
168167

169-
if (sourceUri->pathHead != NULL && sourceUri->pathTail != NULL) {
170-
URI_TYPE(PathSegment) *walker;
171-
URI_TYPE(PathSegment) *walkerNew;
172-
173-
destUri->pathHead = memory->malloc(memory, sizeof(URI_TYPE(PathSegment)));
174-
if (destUri->pathHead == NULL) {
175-
URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
176-
return URI_ERROR_MALLOC;
177-
}
178-
179-
if (URI_FUNC(CopyRangeAsNeeded)(&destUri->pathHead->text, &sourceUri->pathHead->text, URI_TRUE, memory) == URI_FALSE) {
180-
URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
181-
memory->free(memory, destUri->pathHead);
182-
return URI_ERROR_MALLOC;
183-
}
184-
destUri->pathHead->reserved = NULL;
168+
destUri->pathHead = NULL;
169+
destUri->pathTail = NULL;
185170

186-
doneMask |= URI_NORMALIZE_PATH;
171+
if (sourceUri->pathHead != NULL) {
172+
URI_TYPE(PathSegment) * sourceWalker = sourceUri->pathHead;
173+
URI_TYPE(PathSegment) * destPrev = NULL;
187174

188-
walker = sourceUri->pathHead->next;
189-
walkerNew = destUri->pathHead;
190-
while (walker != NULL && (walker->text.first != walker->text.afterLast || walker->text.first == URI_FUNC(SafeToPointTo))) {
191-
walkerNew->next = memory->malloc(memory, sizeof(URI_TYPE(PathSegment)));
192-
if (walkerNew->next == NULL) {
175+
while (sourceWalker != NULL) {
176+
URI_TYPE(PathSegment) * destWalker = memory->malloc(memory, sizeof(URI_TYPE(PathSegment)));
177+
if (destWalker == NULL) {
193178
URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
194179
return URI_ERROR_MALLOC;
195180
}
196181

197-
walkerNew = walkerNew->next;
198-
if (URI_FUNC(CopyRangeAsNeeded)(&walkerNew->text, &walker->text, URI_TRUE, memory) == URI_FALSE) {
182+
destWalker->text.first = NULL;
183+
destWalker->text.afterLast = NULL;
184+
destWalker->next = NULL;
185+
destWalker->reserved = NULL;
186+
187+
if (destUri->pathHead == NULL) {
188+
destUri->pathHead = destWalker;
189+
doneMask |= URI_NORMALIZE_PATH;
190+
}
191+
192+
if (URI_FUNC(CopyRangeAsNeeded)(&destWalker->text, &sourceWalker->text, URI_TRUE, memory) == URI_FALSE) {
199193
URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
200194
return URI_ERROR_MALLOC;
201195
}
202-
walkerNew->reserved = NULL;
203-
walker = walker->next;
196+
197+
if (destPrev != NULL) {
198+
destPrev->next = destWalker;
199+
}
200+
201+
destPrev = destWalker;
202+
sourceWalker = sourceWalker->next;
203+
204+
destUri->pathTail = destPrev;
204205
}
205-
walkerNew->next = NULL;
206-
destUri->pathTail = walkerNew;
207-
} else {
208-
destUri->pathHead = NULL;
209-
destUri->pathTail = NULL;
210206
}
211207

212208
if (URI_FUNC(CopyRangeAsNeeded)(&destUri->query, &sourceUri->query, URI_FALSE, memory) == URI_FALSE) {

ext/uri/uriparser/src/UriCopy.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,6 @@
6969

7070

7171

72-
void URI_FUNC(PreventLeakage)(URI_TYPE(Uri) * uri,
73-
unsigned int revertMask, UriMemoryManager * memory);
7472
int URI_FUNC(CopyUriMm)(URI_TYPE(Uri) * destUri,
7573
const URI_TYPE(Uri) * sourceUri, UriMemoryManager * memory);
7674
int URI_FUNC(CopyUri)(URI_TYPE(Uri) * destUri,

ext/uri/uriparser/src/UriNormalize.c

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@
7171
# include <uriparser/Uri.h>
7272
# include "UriNormalizeBase.h"
7373
# include "UriCommon.h"
74-
# include "UriCopy.h"
7574
# include "UriMemory.h"
7675
#endif
7776

@@ -547,6 +546,75 @@ int URI_FUNC(NormalizeSyntax)(URI_TYPE(Uri) * uri) {
547546
}
548547

549548

549+
static const URI_CHAR * URI_FUNC(PastLeadingZeros)(const URI_CHAR * first, const URI_CHAR * afterLast) {
550+
assert(first != NULL);
551+
assert(afterLast != NULL);
552+
assert(first != afterLast);
553+
554+
{
555+
/* Find the first non-zero character */
556+
const URI_CHAR * remainderFirst = first;
557+
while ((remainderFirst < afterLast) && (remainderFirst[0] == _UT('0'))) {
558+
remainderFirst++;
559+
}
560+
561+
/* Is the string /all/ zeros? */
562+
if (remainderFirst == afterLast) {
563+
/* Yes, and length is >=1 because we ruled out the empty string earlier;
564+
* pull back onto rightmost zero */
565+
assert(remainderFirst > first);
566+
remainderFirst--;
567+
assert(remainderFirst[0] == _UT('0'));
568+
}
569+
570+
return remainderFirst;
571+
}
572+
}
573+
574+
575+
576+
static void URI_FUNC(DropLeadingZerosInplace)(URI_CHAR * first, const URI_CHAR ** afterLast) {
577+
assert(first != NULL);
578+
assert(afterLast != NULL);
579+
assert(*afterLast != NULL);
580+
581+
if (first == *afterLast) {
582+
return;
583+
}
584+
585+
{
586+
const URI_CHAR * const remainderFirst = URI_FUNC(PastLeadingZeros)(first, *afterLast);
587+
588+
if (remainderFirst > first) {
589+
const size_t remainderLen = *afterLast - remainderFirst;
590+
memmove(first, remainderFirst, remainderLen * sizeof(URI_CHAR));
591+
first[remainderLen] = _UT('\0');
592+
*afterLast = first + remainderLen;
593+
}
594+
}
595+
}
596+
597+
598+
599+
static void URI_FUNC(AdvancePastLeadingZeros)(
600+
const URI_CHAR ** first, const URI_CHAR * afterLast) {
601+
assert(first != NULL);
602+
assert(*first != NULL);
603+
assert(afterLast != NULL);
604+
605+
if (*first == afterLast) {
606+
return;
607+
}
608+
609+
{
610+
const URI_CHAR * const remainderFirst = URI_FUNC(PastLeadingZeros)(*first, afterLast);
611+
612+
/* Cut off leading zeros */
613+
*first = remainderFirst;
614+
}
615+
}
616+
617+
550618

551619
static URI_INLINE int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri,
552620
unsigned int inMask, unsigned int * outMask,
@@ -648,6 +716,27 @@ static URI_INLINE int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri,
648716
}
649717
}
650718

719+
/* Port */
720+
if (outMask != NULL) {
721+
/* Is there a port even? */
722+
if (uri->portText.first != NULL) {
723+
/* Determine whether the port is already normalized, i.e. either "", "0" or no leading zeros */
724+
const size_t portLen = uri->portText.afterLast - uri->portText.first;
725+
if ((portLen > 1) && (uri->portText.first[0] == _UT('0'))) {
726+
*outMask |= URI_NORMALIZE_PORT;
727+
}
728+
}
729+
} else {
730+
/* Normalize the port, i.e. drop leading zeros (except for string "0") */
731+
if ((inMask & URI_NORMALIZE_PORT) && (uri->portText.first != NULL)) {
732+
if (uri->owner) {
733+
URI_FUNC(DropLeadingZerosInplace)((URI_CHAR *)uri->portText.first, &(uri->portText.afterLast));
734+
} else {
735+
URI_FUNC(AdvancePastLeadingZeros)(&(uri->portText.first), uri->portText.afterLast);
736+
}
737+
}
738+
}
739+
651740
/* User info */
652741
if (outMask != NULL) {
653742
const UriBool normalizeUserInfo = URI_FUNC(ContainsUglyPercentEncoding)(

ext/uri/uriparser/src/UriNormalize.h

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
/*
2+
* uriparser - RFC 3986 URI parsing library
3+
*
4+
* Copyright (C) 2018, Weijia Song <songweijia@gmail.com>
5+
* Copyright (C) 2018, Sebastian Pipping <sebastian@pipping.org>
6+
* Copyright (C) 2025, Máté Kocsis <kocsismate@php.net>
7+
* All rights reserved.
8+
*
9+
* Redistribution and use in source and binary forms, with or without
10+
* modification, are permitted provided that the following conditions
11+
* are met:
12+
*
13+
* 1. Redistributions of source code must retain the above
14+
* copyright notice, this list of conditions and the following
15+
* disclaimer.
16+
*
17+
* 2. Redistributions in binary form must reproduce the above
18+
* copyright notice, this list of conditions and the following
19+
* disclaimer in the documentation and/or other materials
20+
* provided with the distribution.
21+
*
22+
* 3. Neither the name of the copyright holder nor the names of
23+
* its contributors may be used to endorse or promote products
24+
* derived from this software without specific prior written
25+
* permission.
26+
*
27+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28+
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29+
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30+
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
31+
* THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
32+
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
33+
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
34+
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35+
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
36+
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
38+
* OF THE POSSIBILITY OF SUCH DAMAGE.
39+
*/
40+
41+
#if (defined(URI_PASS_ANSI) && !defined(URI_COPY_H_ANSI)) \
42+
|| (defined(URI_PASS_UNICODE) && !defined(URI_COPY_H_UNICODE)) \
43+
|| (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
44+
/* What encodings are enabled? */
45+
#include <uriparser/UriDefsConfig.h>
46+
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
47+
/* Include SELF twice */
48+
# ifdef URI_ENABLE_ANSI
49+
# define URI_PASS_ANSI 1
50+
# include "UriNormalize.h"
51+
# undef URI_PASS_ANSI
52+
# endif
53+
# ifdef URI_ENABLE_UNICODE
54+
# define URI_PASS_UNICODE 1
55+
# include "UriNormalize.h"
56+
# undef URI_PASS_UNICODE
57+
# endif
58+
/* Only one pass for each encoding */
59+
#elif (defined(URI_PASS_ANSI) && !defined(URI_NORMALIZE_H_ANSI) \
60+
&& defined(URI_ENABLE_ANSI)) || (defined(URI_PASS_UNICODE) \
61+
&& !defined(URI_NORMALIZE_H_UNICODE) && defined(URI_ENABLE_UNICODE))
62+
# ifdef URI_PASS_ANSI
63+
# define URI_NORMALIZE_H_ANSI 1
64+
# include <uriparser/UriDefsAnsi.h>
65+
# else
66+
# define URI_NORMALIZE_H_UNICODE 1
67+
# include <uriparser/UriDefsUnicode.h>
68+
# endif
69+
70+
71+
72+
void URI_FUNC(PreventLeakage)(URI_TYPE(Uri) * uri,
73+
unsigned int revertMask, UriMemoryManager * memory);
74+
75+
#endif
76+
#endif

0 commit comments

Comments
 (0)