From 92174b3c9c2d9df7d39d41362eb0bebddf05a6f8 Mon Sep 17 00:00:00 2001 From: Gregor Morrill Date: Mon, 6 Aug 2018 18:46:26 -0700 Subject: [PATCH 1/4] Add tests and fix for implied photo parsing --- Mf2/Parser.php | 4 ++-- tests/Mf2/ParseImpliedTest.php | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/Mf2/Parser.php b/Mf2/Parser.php index 1ba275f..34fc3dd 100644 --- a/Mf2/Parser.php +++ b/Mf2/Parser.php @@ -1166,8 +1166,8 @@ public function parseImpliedPhoto(\DOMElement $e) { $xpaths = array( './img', './object', - './*[not(contains(concat(" ", @class), " h-"))]/img[count(preceding-sibling::img)+count(following-sibling::img)=0]', - './*[not(contains(concat(" ", @class), " h-"))]/object[count(preceding-sibling::object)+count(following-sibling::object)=0]', + './*[count(preceding-sibling::*)+count(following-sibling::*)=0][not(contains(concat(" ", @class), " h-"))]/img[count(preceding-sibling::img)+count(following-sibling::img)=0]', + './*[count(preceding-sibling::*)+count(following-sibling::*)=0][not(contains(concat(" ", @class), " h-"))]/object[count(preceding-sibling::object)+count(following-sibling::object)=0]', ); foreach ($xpaths as $path) { diff --git a/tests/Mf2/ParseImpliedTest.php b/tests/Mf2/ParseImpliedTest.php index 48a821c..7e8a36a 100644 --- a/tests/Mf2/ParseImpliedTest.php +++ b/tests/Mf2/ParseImpliedTest.php @@ -293,6 +293,26 @@ public function testIgnoredPhotoObjectInNestedH() { $this->assertArrayNotHasKey('photo', $result['items'][0]['properties']); } + /** + * @see https://github.com/indieweb/php-mf2/issues/190 + */ + public function testIgnoredMultiChildrenWithNestedPhotoImg() { + $input = '
Max Mustermann
'; + $result = Mf2\parse($input); + + $this->assertArrayNotHasKey('photo', $result['items'][0]['properties']); + } + + /** + * @see https://github.com/indieweb/php-mf2/issues/190 + */ + public function testIgnoredMultiChildrenWithNestedPhotoObject() { + $input = '
Max Mustermann
'; + $result = Mf2\parse($input); + + $this->assertArrayNotHasKey('photo', $result['items'][0]['properties']); + } + /** * Imply properties only on explicit h-x class name root microformat element (no backcompat roots) * @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties From d000afcc63b8eb7125cfa17c1d552febb64c69f1 Mon Sep 17 00:00:00 2001 From: Gregor Morrill Date: Tue, 7 Aug 2018 10:44:30 -0700 Subject: [PATCH 2/4] Update implied photo xpaths Simplified xpath count() using traversal. Added selectors from spec as comments. Moved child h-* check into xpaths. Added check for xpath query() returning false. --- Mf2/Parser.php | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/Mf2/Parser.php b/Mf2/Parser.php index 34fc3dd..f12b9ac 100644 --- a/Mf2/Parser.php +++ b/Mf2/Parser.php @@ -1155,38 +1155,37 @@ public function parseH(\DOMElement $e, $is_backcompat = false, $has_nested_mf = */ public function parseImpliedPhoto(\DOMElement $e) { + // img.h-x[src] if ($e->tagName == 'img') { return $e->getAttribute('src'); } + // object.h-x[data] if ($e->tagName == 'object' && $e->hasAttribute('data')) { return $e->getAttribute('data'); } $xpaths = array( - './img', - './object', - './*[count(preceding-sibling::*)+count(following-sibling::*)=0][not(contains(concat(" ", @class), " h-"))]/img[count(preceding-sibling::img)+count(following-sibling::img)=0]', - './*[count(preceding-sibling::*)+count(following-sibling::*)=0][not(contains(concat(" ", @class), " h-"))]/object[count(preceding-sibling::object)+count(following-sibling::object)=0]', + // .h-x>img[src]:only-of-type:not[.h-*] + './img[not(contains(concat(" ", @class), " h-")) and count(../img) = 1]', + // .h-x>object[data]:only-of-type:not[.h-*] + './object[not(contains(concat(" ", @class), " h-")) and count(../object) = 1]', + // .h-x>:only-child:not[.h-*]>img[src]:only-of-type:not[.h-*] + './*[not(contains(concat(" ", @class), " h-")) and count(../*) = 1 and count(img) = 1]/img[not(contains(concat(" ", @class), " h-"))]', + // .h-x>:only-child:not[.h-*]>object[data]:only-of-type:not[.h-*] + './*[not(contains(concat(" ", @class), " h-")) and count(../*) = 1 and count(object) = 1]/object[not(contains(concat(" ", @class), " h-"))]', ); foreach ($xpaths as $path) { $els = $this->xpath->query($path, $e); - if ($els->length == 1) { + if ($els !== false && $els->length === 1) { $el = $els->item(0); - $hClasses = mfNamesFromElement($el, 'h-'); - - // no nested h- - if (empty($hClasses)) { - - if ($el->tagName == 'img') { - return $el->getAttribute('src'); - } else if ($el->tagName == 'object' && $el->hasAttribute('data')) { - return $el->getAttribute('data'); - } - - } // no nested h- + if ($el->tagName == 'img') { + return $el->getAttribute('src'); + } else if ($el->tagName == 'object' && $el->hasAttribute('data')) { + return $el->getAttribute('data'); + } } } From 4d0361168a7f0897da399e76d19abc7446927b62 Mon Sep 17 00:00:00 2001 From: Gregor Morrill Date: Tue, 7 Aug 2018 11:01:40 -0700 Subject: [PATCH 3/4] Add properties to xpaths and resolve URLs within parseImpliedPhoto() --- Mf2/Parser.php | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Mf2/Parser.php b/Mf2/Parser.php index f12b9ac..b8d6e16 100644 --- a/Mf2/Parser.php +++ b/Mf2/Parser.php @@ -1078,7 +1078,7 @@ public function parseH(\DOMElement $e, $is_backcompat = false, $has_nested_mf = $photo = $this->parseImpliedPhoto($e); if ($photo !== false) { - $return['photo'][] = $this->resolveUrl($photo); + $return['photo'][] = $photo; } } @@ -1167,13 +1167,13 @@ public function parseImpliedPhoto(\DOMElement $e) { $xpaths = array( // .h-x>img[src]:only-of-type:not[.h-*] - './img[not(contains(concat(" ", @class), " h-")) and count(../img) = 1]', + './img[not(contains(concat(" ", @class), " h-")) and count(../img) = 1 and @src]', // .h-x>object[data]:only-of-type:not[.h-*] - './object[not(contains(concat(" ", @class), " h-")) and count(../object) = 1]', + './object[not(contains(concat(" ", @class), " h-")) and count(../object) = 1 and @data]', // .h-x>:only-child:not[.h-*]>img[src]:only-of-type:not[.h-*] - './*[not(contains(concat(" ", @class), " h-")) and count(../*) = 1 and count(img) = 1]/img[not(contains(concat(" ", @class), " h-"))]', + './*[not(contains(concat(" ", @class), " h-")) and count(../*) = 1 and count(img) = 1]/img[not(contains(concat(" ", @class), " h-")) and @src]', // .h-x>:only-child:not[.h-*]>object[data]:only-of-type:not[.h-*] - './*[not(contains(concat(" ", @class), " h-")) and count(../*) = 1 and count(object) = 1]/object[not(contains(concat(" ", @class), " h-"))]', + './*[not(contains(concat(" ", @class), " h-")) and count(../*) = 1 and count(object) = 1]/object[not(contains(concat(" ", @class), " h-")) and @data]', ); foreach ($xpaths as $path) { @@ -1182,9 +1182,9 @@ public function parseImpliedPhoto(\DOMElement $e) { if ($els !== false && $els->length === 1) { $el = $els->item(0); if ($el->tagName == 'img') { - return $el->getAttribute('src'); - } else if ($el->tagName == 'object' && $el->hasAttribute('data')) { - return $el->getAttribute('data'); + return $this->resolveUrl($el->getAttribute('src')); + } else if ($el->tagName == 'object') { + return $this->resolveUrl($el->getAttribute('data')); } } } From 04aadf23cdfdc41778819ec9727aac9fe8aac818 Mon Sep 17 00:00:00 2001 From: Gregor Morrill Date: Tue, 7 Aug 2018 11:44:59 -0700 Subject: [PATCH 4/4] Always resolveUrl before returning implied photo --- Mf2/Parser.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Mf2/Parser.php b/Mf2/Parser.php index b8d6e16..c9ac910 100644 --- a/Mf2/Parser.php +++ b/Mf2/Parser.php @@ -1157,12 +1157,12 @@ public function parseImpliedPhoto(\DOMElement $e) { // img.h-x[src] if ($e->tagName == 'img') { - return $e->getAttribute('src'); + return $this->resolveUrl($e->getAttribute('src')); } // object.h-x[data] if ($e->tagName == 'object' && $e->hasAttribute('data')) { - return $e->getAttribute('data'); + return $this->resolveUrl($e->getAttribute('data')); } $xpaths = array(