diff --git a/UPGRADING b/UPGRADING
index 93cca9c2c9510..476660917ae99 100644
--- a/UPGRADING
+++ b/UPGRADING
@@ -141,6 +141,11 @@ PHP 8.4 UPGRADE NOTES
. The IntlDateFormatter class exposes now the new PATTERN constant
reflecting udat api's UDAT_PATTERN.
+- XML:
+ . Added XML_OPTION_PARSE_HUGE to allow large inputs in xml_parse and
+ xml_parse_into_struct.
+ RFC: https://wiki.php.net/rfc/xml_option_parse_huge.
+
========================================
11. Changes to INI File Handling
========================================
diff --git a/ext/xml/php_xml.h b/ext/xml/php_xml.h
index e01a0264df9e8..8be95fa7aa8f9 100644
--- a/ext/xml/php_xml.h
+++ b/ext/xml/php_xml.h
@@ -43,7 +43,8 @@ enum php_xml_option {
PHP_XML_OPTION_CASE_FOLDING = 1,
PHP_XML_OPTION_TARGET_ENCODING,
PHP_XML_OPTION_SKIP_TAGSTART,
- PHP_XML_OPTION_SKIP_WHITE
+ PHP_XML_OPTION_SKIP_WHITE,
+ PHP_XML_OPTION_PARSE_HUGE,
};
#ifdef LIBXML_EXPAT_COMPAT
diff --git a/ext/xml/tests/XML_OPTION_PARSE_HUGE.phpt b/ext/xml/tests/XML_OPTION_PARSE_HUGE.phpt
new file mode 100644
index 0000000000000..b429a075b1e02
--- /dev/null
+++ b/ext/xml/tests/XML_OPTION_PARSE_HUGE.phpt
@@ -0,0 +1,91 @@
+--TEST--
+Request #68325 (XML_OPTION_PARSE_HUGE cannot be set for xml_parser_create)
+--EXTENSIONS--
+xml
+--SKIPIF--
+
+--FILE--
+ 20) {
+ echo substr($str, 0, 20) . "...\n";
+ } else {
+ echo $str . "\n";
+ }
+}
+
+function createParser(bool $huge) {
+ $parser = xml_parser_create();
+ echo "old option value: "; var_dump(xml_parser_get_option($parser, XML_OPTION_PARSE_HUGE));
+ xml_parser_set_option($parser, XML_OPTION_PARSE_HUGE, $huge);
+ echo "new option value: "; var_dump(xml_parser_get_option($parser, XML_OPTION_PARSE_HUGE));
+ xml_set_element_handler($parser, function($parser, $data) {
+ echo "open: ";
+ logName($data);
+ }, function($parser, $data) {
+ });
+ return $parser;
+}
+
+// Construct XML that is too large to parse without XML_OPTION_PARSE_HUGE
+$long_text = str_repeat("A", 1000 * 1000 * 5 /* 5 MB */);
+$long_xml_head = "<$long_text/><$long_text/>foo";
+$long_xml_tail = "";
+
+echo "--- Parse using xml_parse (failure) ---\n";
+$parser = createParser(false);
+$ret = xml_parse($parser, $long_xml_head, true);
+echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n";
+
+echo "--- Parse using xml_parse (success) ---\n";
+$parser = createParser(true);
+$ret = xml_parse($parser, $long_xml_head, false);
+echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n";
+$ret = xml_parse($parser, $long_xml_tail, true);
+echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n";
+
+echo "--- Parse using xml_parse_into_struct (failure) ---\n";
+$parser = createParser(false);
+$ret = xml_parse_into_struct($parser, $long_xml_head, $values, $index);
+echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n";
+
+echo "--- Parse using xml_parse_into_struct (success) ---\n";
+$parser = createParser(true);
+$ret = xml_parse_into_struct($parser, $long_xml_head . $long_xml_tail, $values, $index);
+var_dump(count($values), count($index)); // Not printing out the raw array because the long string will be contained in them as key
+echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n";
+
+?>
+--EXPECT--
+--- Parse using xml_parse (failure) ---
+old option value: bool(false)
+new option value: bool(false)
+open: CONTAINER
+ret = 0 (XML_ERR_NAME_REQUIRED)
+--- Parse using xml_parse (success) ---
+old option value: bool(false)
+new option value: bool(true)
+open: CONTAINER
+open: AAAAAAAAAAAAAAAAAAAA...
+open: AAAAAAAAAAAAAAAAAAAA...
+open: SECOND
+ret = 1 (No error)
+ret = 1 (No error)
+--- Parse using xml_parse_into_struct (failure) ---
+old option value: bool(false)
+new option value: bool(false)
+open: CONTAINER
+ret = 0 (XML_ERR_NAME_REQUIRED)
+--- Parse using xml_parse_into_struct (success) ---
+old option value: bool(false)
+new option value: bool(true)
+open: CONTAINER
+open: AAAAAAAAAAAAAAAAAAAA...
+open: AAAAAAAAAAAAAAAAAAAA...
+open: SECOND
+int(5)
+int(3)
+ret = 1 (No error)
diff --git a/ext/xml/tests/XML_OPTION_PARSE_HUGE_during_parsing.phpt b/ext/xml/tests/XML_OPTION_PARSE_HUGE_during_parsing.phpt
new file mode 100644
index 0000000000000..6b19052b78aac
--- /dev/null
+++ b/ext/xml/tests/XML_OPTION_PARSE_HUGE_during_parsing.phpt
@@ -0,0 +1,27 @@
+--TEST--
+Request #68325 (XML_OPTION_PARSE_HUGE cannot be set for xml_parser_create - setting during parsing)
+--EXTENSIONS--
+xml
+--SKIPIF--
+
+--FILE--
+", true);
+
+?>
+--EXPECTF--
+Fatal error: Uncaught Error: Cannot change option XML_OPTION_PARSE_HUGE while parsing in %s:%d
+Stack trace:
+#0 %s(%d): xml_parser_set_option(Object(XMLParser), 5, true)
+#1 [internal function]: {closure}(Object(XMLParser), 'FOO', Array)
+#2 %s(%d): xml_parse(Object(XMLParser), '', true)
+#3 {main}
+ thrown in %s on line %d
diff --git a/ext/xml/xml.c b/ext/xml/xml.c
index 410e57d8813a3..636aa4392aac8 100644
--- a/ext/xml/xml.c
+++ b/ext/xml/xml.c
@@ -91,6 +91,7 @@ typedef struct {
int lastwasopen;
int skipwhite;
int isparsing;
+ bool parsehuge;
XML_Char *baseURI;
@@ -264,6 +265,28 @@ PHP_MINFO_FUNCTION(xml)
/* {{{ extension-internal functions */
+static int xml_parse_helper(xml_parser *parser, const char *data, size_t data_len, bool is_final)
+{
+ ZEND_ASSERT(!parser->isparsing);
+
+ /* libxml2 specific options */
+#if LIBXML_EXPAT_COMPAT
+ /* See xmlInitSAXParserCtxt() and xmlCtxtUseOptions() */
+ if (parser->parsehuge) {
+ parser->parser->parser->options |= XML_PARSE_HUGE;
+ xmlDictSetLimit(parser->parser->parser->dict, 0);
+ } else {
+ parser->parser->parser->options &= ~XML_PARSE_HUGE;
+ xmlDictSetLimit(parser->parser->parser->dict, XML_MAX_DICTIONARY_LIMIT);
+ }
+#endif
+
+ parser->isparsing = 1;
+ int ret = XML_Parse(parser->parser, (const XML_Char *) data, data_len, is_final);
+ parser->isparsing = 0;
+ return ret;
+}
+
static void _xml_xmlchar_zval(const XML_Char *s, int len, const XML_Char *encoding, zval *ret)
{
if (s == NULL) {
@@ -1024,6 +1047,7 @@ static void php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAMETERS, int ns_supp
parser->target_encoding = encoding;
parser->case_folding = 1;
parser->isparsing = 0;
+ parser->parsehuge = false; /* It's the default for BC & DoS protection */
XML_SetUserData(parser->parser, parser);
ZVAL_COPY_VALUE(&parser->index, return_value);
@@ -1283,7 +1307,6 @@ PHP_FUNCTION(xml_parse)
zval *pind;
char *data;
size_t data_len;
- int ret;
bool isFinal = 0;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "Os|b", &pind, xml_parser_ce, &data, &data_len, &isFinal) == FAILURE) {
@@ -1295,10 +1318,7 @@ PHP_FUNCTION(xml_parse)
zend_throw_error(NULL, "Parser must not be called recursively");
RETURN_THROWS();
}
- parser->isparsing = 1;
- ret = XML_Parse(parser->parser, (XML_Char*)data, data_len, isFinal);
- parser->isparsing = 0;
- RETVAL_LONG(ret);
+ RETURN_LONG(xml_parse_helper(parser, data, data_len, isFinal));
}
/* }}} */
@@ -1310,7 +1330,6 @@ PHP_FUNCTION(xml_parse_into_struct)
zval *pind, *xdata, *info = NULL;
char *data;
size_t data_len;
- int ret;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "Osz|z", &pind, xml_parser_ce, &data, &data_len, &xdata, &info) == FAILURE) {
RETURN_THROWS();
@@ -1348,11 +1367,7 @@ PHP_FUNCTION(xml_parse_into_struct)
XML_SetElementHandler(parser->parser, _xml_startElementHandler, _xml_endElementHandler);
XML_SetCharacterDataHandler(parser->parser, _xml_characterDataHandler);
- parser->isparsing = 1;
- ret = XML_Parse(parser->parser, (XML_Char*)data, data_len, 1);
- parser->isparsing = 0;
-
- RETVAL_LONG(ret);
+ RETURN_LONG(xml_parse_helper(parser, data, data_len, true));
}
/* }}} */
@@ -1481,6 +1496,15 @@ PHP_FUNCTION(xml_parser_set_option)
case PHP_XML_OPTION_SKIP_WHITE:
parser->skipwhite = zend_is_true(value);
break;
+ /* Boolean option */
+ case PHP_XML_OPTION_PARSE_HUGE:
+ /* Prevent wreaking havock to the parser internals during parsing */
+ if (UNEXPECTED(parser->isparsing)) {
+ zend_throw_error(NULL, "Cannot change option XML_OPTION_PARSE_HUGE while parsing");
+ RETURN_THROWS();
+ }
+ parser->parsehuge = zend_is_true(value);
+ break;
/* Integer option */
case PHP_XML_OPTION_SKIP_TAGSTART:
/* The tag start offset is stored in an int */
@@ -1542,6 +1566,9 @@ PHP_FUNCTION(xml_parser_get_option)
case PHP_XML_OPTION_SKIP_WHITE:
RETURN_BOOL(parser->skipwhite);
break;
+ case PHP_XML_OPTION_PARSE_HUGE:
+ RETURN_BOOL(parser->parsehuge);
+ break;
case PHP_XML_OPTION_TARGET_ENCODING:
RETURN_STRING((char *)parser->target_encoding);
break;
diff --git a/ext/xml/xml.stub.php b/ext/xml/xml.stub.php
index 32917c56ee085..8b2bb9fd91ae7 100644
--- a/ext/xml/xml.stub.php
+++ b/ext/xml/xml.stub.php
@@ -133,6 +133,11 @@
* @cvalue PHP_XML_OPTION_SKIP_WHITE
*/
const XML_OPTION_SKIP_WHITE = UNKNOWN;
+/**
+ * @var int
+ * @cvalue PHP_XML_OPTION_PARSE_HUGE
+ */
+const XML_OPTION_PARSE_HUGE = UNKNOWN;
/**
* @var string
diff --git a/ext/xml/xml_arginfo.h b/ext/xml/xml_arginfo.h
index d14523fd761ad..f75d10030a49f 100644
--- a/ext/xml/xml_arginfo.h
+++ b/ext/xml/xml_arginfo.h
@@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
- * Stub hash: eb168a134e8acf6f19f0cc2c9ddeae95da61045d */
+ * Stub hash: 69734dd8094fd69c878383d488900886d1162998 */
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_xml_parser_create, 0, 0, XMLParser, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 1, "null")
@@ -166,6 +166,7 @@ static void register_xml_symbols(int module_number)
REGISTER_LONG_CONSTANT("XML_OPTION_TARGET_ENCODING", PHP_XML_OPTION_TARGET_ENCODING, CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("XML_OPTION_SKIP_TAGSTART", PHP_XML_OPTION_SKIP_TAGSTART, CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("XML_OPTION_SKIP_WHITE", PHP_XML_OPTION_SKIP_WHITE, CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("XML_OPTION_PARSE_HUGE", PHP_XML_OPTION_PARSE_HUGE, CONST_PERSISTENT);
REGISTER_STRING_CONSTANT("XML_SAX_IMPL", PHP_XML_SAX_IMPL, CONST_PERSISTENT);
}