From 33f1619d0fe5a3b67e098dda45c3230d8a245ae2 Mon Sep 17 00:00:00 2001 From: Kay Strobach Date: Wed, 29 Mar 2017 10:08:11 +0200 Subject: [PATCH 1/3] [BUGFIX] allow to load unclean html without exception fixes #754 --- src/PhpWord/Shared/Html.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/PhpWord/Shared/Html.php b/src/PhpWord/Shared/Html.php index d03d0adf..c551fd5b 100644 --- a/src/PhpWord/Shared/Html.php +++ b/src/PhpWord/Shared/Html.php @@ -58,9 +58,8 @@ class Html // Load DOM $dom = new \DOMDocument(); $dom->preserveWhiteSpace = true; - $dom->loadXML($html); + $dom->loadHTML($html, LIBXML_NOWARNING); $node = $dom->getElementsByTagName('body'); - self::parseNode($node->item(0), $element); } From 5bc9250cccf45492a2003303c66ae762689e2e7a Mon Sep 17 00:00:00 2001 From: troosan Date: Wed, 2 Jan 2019 09:28:51 +0100 Subject: [PATCH 2/3] Fix unit tests --- src/PhpWord/Shared/Html.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/PhpWord/Shared/Html.php b/src/PhpWord/Shared/Html.php index a92d9047..ff13c35d 100644 --- a/src/PhpWord/Shared/Html.php +++ b/src/PhpWord/Shared/Html.php @@ -70,6 +70,9 @@ class Html $html = '' . $html . ''; } + //need to remove whitespaces between tags, as loadHTML seems to take those into account + $html = preg_replace('/(\>)\s*(\<)/m', '$1$2', $html); + // Load DOM libxml_disable_entity_loader(true); $dom = new \DOMDocument(); @@ -77,6 +80,7 @@ class Html $dom->loadHTML($html, LIBXML_NOWARNING); self::$xpath = new \DOMXPath($dom); $node = $dom->getElementsByTagName('body'); + self::parseNode($node->item(0), $element); } From 06a92710f6990a17e48e73c673480b7988fb2191 Mon Sep 17 00:00:00 2001 From: troosan Date: Wed, 2 Jan 2019 14:06:32 +0100 Subject: [PATCH 3/3] remove whitespaces --- src/PhpWord/Shared/Html.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PhpWord/Shared/Html.php b/src/PhpWord/Shared/Html.php index ff13c35d..df2a4990 100644 --- a/src/PhpWord/Shared/Html.php +++ b/src/PhpWord/Shared/Html.php @@ -77,7 +77,7 @@ class Html libxml_disable_entity_loader(true); $dom = new \DOMDocument(); $dom->preserveWhiteSpace = $preserveWhiteSpace; - $dom->loadHTML($html, LIBXML_NOWARNING); + $dom->loadHTML($html, LIBXML_NOWARNING); self::$xpath = new \DOMXPath($dom); $node = $dom->getElementsByTagName('body');