Merge pull request #1032 from kaystrobach/patch-1
[BUGFIX] allow to load unclean html without exception
This commit is contained in:
commit
8d92409bfe
@ -71,11 +71,14 @@ class Html
|
|||||||
$html = '<body>' . $html . '</body>';
|
$html = '<body>' . $html . '</body>';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//need to remove whitespaces between tags, as loadHTML seems to take those into account
|
||||||
|
$html = preg_replace('/(\>)\s*(\<)/m', '$1$2', $html);
|
||||||
|
|
||||||
// Load DOM
|
// Load DOM
|
||||||
libxml_disable_entity_loader(true);
|
libxml_disable_entity_loader(true);
|
||||||
$dom = new \DOMDocument();
|
$dom = new \DOMDocument();
|
||||||
$dom->preserveWhiteSpace = $preserveWhiteSpace;
|
$dom->preserveWhiteSpace = $preserveWhiteSpace;
|
||||||
$dom->loadXML($html);
|
$dom->loadHTML($html, LIBXML_NOWARNING);
|
||||||
self::$xpath = new \DOMXPath($dom);
|
self::$xpath = new \DOMXPath($dom);
|
||||||
$node = $dom->getElementsByTagName('body');
|
$node = $dom->getElementsByTagName('body');
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user