diff --git a/UPGRADING b/UPGRADING index 93cca9c2c9510..476660917ae99 100644 --- a/UPGRADING +++ b/UPGRADING @@ -141,6 +141,11 @@ PHP 8.4 UPGRADE NOTES . The IntlDateFormatter class exposes now the new PATTERN constant reflecting udat api's UDAT_PATTERN. +- XML: + . Added XML_OPTION_PARSE_HUGE to allow large inputs in xml_parse and + xml_parse_into_struct. + RFC: https://wiki.php.net/rfc/xml_option_parse_huge. + ======================================== 11. Changes to INI File Handling ======================================== diff --git a/ext/xml/php_xml.h b/ext/xml/php_xml.h index e01a0264df9e8..8be95fa7aa8f9 100644 --- a/ext/xml/php_xml.h +++ b/ext/xml/php_xml.h @@ -43,7 +43,8 @@ enum php_xml_option { PHP_XML_OPTION_CASE_FOLDING = 1, PHP_XML_OPTION_TARGET_ENCODING, PHP_XML_OPTION_SKIP_TAGSTART, - PHP_XML_OPTION_SKIP_WHITE + PHP_XML_OPTION_SKIP_WHITE, + PHP_XML_OPTION_PARSE_HUGE, }; #ifdef LIBXML_EXPAT_COMPAT diff --git a/ext/xml/tests/XML_OPTION_PARSE_HUGE.phpt b/ext/xml/tests/XML_OPTION_PARSE_HUGE.phpt new file mode 100644 index 0000000000000..b429a075b1e02 --- /dev/null +++ b/ext/xml/tests/XML_OPTION_PARSE_HUGE.phpt @@ -0,0 +1,91 @@ +--TEST-- +Request #68325 (XML_OPTION_PARSE_HUGE cannot be set for xml_parser_create) +--EXTENSIONS-- +xml +--SKIPIF-- + +--FILE-- + 20) { + echo substr($str, 0, 20) . "...\n"; + } else { + echo $str . "\n"; + } +} + +function createParser(bool $huge) { + $parser = xml_parser_create(); + echo "old option value: "; var_dump(xml_parser_get_option($parser, XML_OPTION_PARSE_HUGE)); + xml_parser_set_option($parser, XML_OPTION_PARSE_HUGE, $huge); + echo "new option value: "; var_dump(xml_parser_get_option($parser, XML_OPTION_PARSE_HUGE)); + xml_set_element_handler($parser, function($parser, $data) { + echo "open: "; + logName($data); + }, function($parser, $data) { + }); + return $parser; +} + +// Construct XML that is too large to parse without XML_OPTION_PARSE_HUGE +$long_text = str_repeat("A", 1000 * 1000 * 5 /* 5 MB */); +$long_xml_head = "<$long_text/><$long_text/>foo"; +$long_xml_tail = ""; + +echo "--- Parse using xml_parse (failure) ---\n"; +$parser = createParser(false); +$ret = xml_parse($parser, $long_xml_head, true); +echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n"; + +echo "--- Parse using xml_parse (success) ---\n"; +$parser = createParser(true); +$ret = xml_parse($parser, $long_xml_head, false); +echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n"; +$ret = xml_parse($parser, $long_xml_tail, true); +echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n"; + +echo "--- Parse using xml_parse_into_struct (failure) ---\n"; +$parser = createParser(false); +$ret = xml_parse_into_struct($parser, $long_xml_head, $values, $index); +echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n"; + +echo "--- Parse using xml_parse_into_struct (success) ---\n"; +$parser = createParser(true); +$ret = xml_parse_into_struct($parser, $long_xml_head . $long_xml_tail, $values, $index); +var_dump(count($values), count($index)); // Not printing out the raw array because the long string will be contained in them as key +echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n"; + +?> +--EXPECT-- +--- Parse using xml_parse (failure) --- +old option value: bool(false) +new option value: bool(false) +open: CONTAINER +ret = 0 (XML_ERR_NAME_REQUIRED) +--- Parse using xml_parse (success) --- +old option value: bool(false) +new option value: bool(true) +open: CONTAINER +open: AAAAAAAAAAAAAAAAAAAA... +open: AAAAAAAAAAAAAAAAAAAA... +open: SECOND +ret = 1 (No error) +ret = 1 (No error) +--- Parse using xml_parse_into_struct (failure) --- +old option value: bool(false) +new option value: bool(false) +open: CONTAINER +ret = 0 (XML_ERR_NAME_REQUIRED) +--- Parse using xml_parse_into_struct (success) --- +old option value: bool(false) +new option value: bool(true) +open: CONTAINER +open: AAAAAAAAAAAAAAAAAAAA... +open: AAAAAAAAAAAAAAAAAAAA... +open: SECOND +int(5) +int(3) +ret = 1 (No error) diff --git a/ext/xml/tests/XML_OPTION_PARSE_HUGE_during_parsing.phpt b/ext/xml/tests/XML_OPTION_PARSE_HUGE_during_parsing.phpt new file mode 100644 index 0000000000000..6b19052b78aac --- /dev/null +++ b/ext/xml/tests/XML_OPTION_PARSE_HUGE_during_parsing.phpt @@ -0,0 +1,27 @@ +--TEST-- +Request #68325 (XML_OPTION_PARSE_HUGE cannot be set for xml_parser_create - setting during parsing) +--EXTENSIONS-- +xml +--SKIPIF-- + +--FILE-- +", true); + +?> +--EXPECTF-- +Fatal error: Uncaught Error: Cannot change option XML_OPTION_PARSE_HUGE while parsing in %s:%d +Stack trace: +#0 %s(%d): xml_parser_set_option(Object(XMLParser), 5, true) +#1 [internal function]: {closure}(Object(XMLParser), 'FOO', Array) +#2 %s(%d): xml_parse(Object(XMLParser), '', true) +#3 {main} + thrown in %s on line %d diff --git a/ext/xml/xml.c b/ext/xml/xml.c index 410e57d8813a3..636aa4392aac8 100644 --- a/ext/xml/xml.c +++ b/ext/xml/xml.c @@ -91,6 +91,7 @@ typedef struct { int lastwasopen; int skipwhite; int isparsing; + bool parsehuge; XML_Char *baseURI; @@ -264,6 +265,28 @@ PHP_MINFO_FUNCTION(xml) /* {{{ extension-internal functions */ +static int xml_parse_helper(xml_parser *parser, const char *data, size_t data_len, bool is_final) +{ + ZEND_ASSERT(!parser->isparsing); + + /* libxml2 specific options */ +#if LIBXML_EXPAT_COMPAT + /* See xmlInitSAXParserCtxt() and xmlCtxtUseOptions() */ + if (parser->parsehuge) { + parser->parser->parser->options |= XML_PARSE_HUGE; + xmlDictSetLimit(parser->parser->parser->dict, 0); + } else { + parser->parser->parser->options &= ~XML_PARSE_HUGE; + xmlDictSetLimit(parser->parser->parser->dict, XML_MAX_DICTIONARY_LIMIT); + } +#endif + + parser->isparsing = 1; + int ret = XML_Parse(parser->parser, (const XML_Char *) data, data_len, is_final); + parser->isparsing = 0; + return ret; +} + static void _xml_xmlchar_zval(const XML_Char *s, int len, const XML_Char *encoding, zval *ret) { if (s == NULL) { @@ -1024,6 +1047,7 @@ static void php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAMETERS, int ns_supp parser->target_encoding = encoding; parser->case_folding = 1; parser->isparsing = 0; + parser->parsehuge = false; /* It's the default for BC & DoS protection */ XML_SetUserData(parser->parser, parser); ZVAL_COPY_VALUE(&parser->index, return_value); @@ -1283,7 +1307,6 @@ PHP_FUNCTION(xml_parse) zval *pind; char *data; size_t data_len; - int ret; bool isFinal = 0; if (zend_parse_parameters(ZEND_NUM_ARGS(), "Os|b", &pind, xml_parser_ce, &data, &data_len, &isFinal) == FAILURE) { @@ -1295,10 +1318,7 @@ PHP_FUNCTION(xml_parse) zend_throw_error(NULL, "Parser must not be called recursively"); RETURN_THROWS(); } - parser->isparsing = 1; - ret = XML_Parse(parser->parser, (XML_Char*)data, data_len, isFinal); - parser->isparsing = 0; - RETVAL_LONG(ret); + RETURN_LONG(xml_parse_helper(parser, data, data_len, isFinal)); } /* }}} */ @@ -1310,7 +1330,6 @@ PHP_FUNCTION(xml_parse_into_struct) zval *pind, *xdata, *info = NULL; char *data; size_t data_len; - int ret; if (zend_parse_parameters(ZEND_NUM_ARGS(), "Osz|z", &pind, xml_parser_ce, &data, &data_len, &xdata, &info) == FAILURE) { RETURN_THROWS(); @@ -1348,11 +1367,7 @@ PHP_FUNCTION(xml_parse_into_struct) XML_SetElementHandler(parser->parser, _xml_startElementHandler, _xml_endElementHandler); XML_SetCharacterDataHandler(parser->parser, _xml_characterDataHandler); - parser->isparsing = 1; - ret = XML_Parse(parser->parser, (XML_Char*)data, data_len, 1); - parser->isparsing = 0; - - RETVAL_LONG(ret); + RETURN_LONG(xml_parse_helper(parser, data, data_len, true)); } /* }}} */ @@ -1481,6 +1496,15 @@ PHP_FUNCTION(xml_parser_set_option) case PHP_XML_OPTION_SKIP_WHITE: parser->skipwhite = zend_is_true(value); break; + /* Boolean option */ + case PHP_XML_OPTION_PARSE_HUGE: + /* Prevent wreaking havock to the parser internals during parsing */ + if (UNEXPECTED(parser->isparsing)) { + zend_throw_error(NULL, "Cannot change option XML_OPTION_PARSE_HUGE while parsing"); + RETURN_THROWS(); + } + parser->parsehuge = zend_is_true(value); + break; /* Integer option */ case PHP_XML_OPTION_SKIP_TAGSTART: /* The tag start offset is stored in an int */ @@ -1542,6 +1566,9 @@ PHP_FUNCTION(xml_parser_get_option) case PHP_XML_OPTION_SKIP_WHITE: RETURN_BOOL(parser->skipwhite); break; + case PHP_XML_OPTION_PARSE_HUGE: + RETURN_BOOL(parser->parsehuge); + break; case PHP_XML_OPTION_TARGET_ENCODING: RETURN_STRING((char *)parser->target_encoding); break; diff --git a/ext/xml/xml.stub.php b/ext/xml/xml.stub.php index 32917c56ee085..8b2bb9fd91ae7 100644 --- a/ext/xml/xml.stub.php +++ b/ext/xml/xml.stub.php @@ -133,6 +133,11 @@ * @cvalue PHP_XML_OPTION_SKIP_WHITE */ const XML_OPTION_SKIP_WHITE = UNKNOWN; +/** + * @var int + * @cvalue PHP_XML_OPTION_PARSE_HUGE + */ +const XML_OPTION_PARSE_HUGE = UNKNOWN; /** * @var string diff --git a/ext/xml/xml_arginfo.h b/ext/xml/xml_arginfo.h index d14523fd761ad..f75d10030a49f 100644 --- a/ext/xml/xml_arginfo.h +++ b/ext/xml/xml_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: eb168a134e8acf6f19f0cc2c9ddeae95da61045d */ + * Stub hash: 69734dd8094fd69c878383d488900886d1162998 */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_xml_parser_create, 0, 0, XMLParser, 0) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 1, "null") @@ -166,6 +166,7 @@ static void register_xml_symbols(int module_number) REGISTER_LONG_CONSTANT("XML_OPTION_TARGET_ENCODING", PHP_XML_OPTION_TARGET_ENCODING, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("XML_OPTION_SKIP_TAGSTART", PHP_XML_OPTION_SKIP_TAGSTART, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("XML_OPTION_SKIP_WHITE", PHP_XML_OPTION_SKIP_WHITE, CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("XML_OPTION_PARSE_HUGE", PHP_XML_OPTION_PARSE_HUGE, CONST_PERSISTENT); REGISTER_STRING_CONSTANT("XML_SAX_IMPL", PHP_XML_SAX_IMPL, CONST_PERSISTENT); }