HTML Reader : Add basic support for CSS Style Tag

This commit is contained in:
Progi1984 2022-11-16 21:55:28 +01:00
parent 8521612b39
commit ab3a2c0418
4 changed files with 229 additions and 38 deletions

View File

@ -0,0 +1,80 @@
<?php
/**
* This file is part of PHPWord - A pure PHP library for reading and writing
* word processing documents.
*
* PHPWord is free software distributed under the terms of the GNU Lesser
* General Public License version 3 as published by the Free Software Foundation.
*
* For the full copyright and license information, please read the LICENSE
* file that was distributed with this source code. For the full list of
* contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
*
* @see https://github.com/PHPOffice/PHPWord
*
* @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3
*/
declare(strict_types=1);
namespace PhpOffice\PhpWord\Shared;
class Css
{
/**
* @var string
*/
private $cssContent;
/**
* @var array<string, array<string, string>>
*/
private $styles = [];
public function __construct(string $cssContent)
{
$this->cssContent = $cssContent;
}
public function process(): void
{
$cssContent = str_replace(["\r", "\n"], '', $this->cssContent);
preg_match_all('/(.+?)\s?\{\s?(.+?)\s?\}/', $cssContent, $cssExtracted);
// Check the number of extracted
if (count($cssExtracted) != 3) {
return;
}
// Check if there are x selectors and x rules
if (count($cssExtracted[1]) != count($cssExtracted[2])) {
return;
}
foreach ($cssExtracted[1] as $key => $selector) {
$rules = trim($cssExtracted[2][$key]);
$rules = explode(';', $rules);
foreach ($rules as $rule) {
if (empty($rule)) {
continue;
}
[$key, $value] = explode(':', trim($rule));
$this->styles[$this->sanitize($selector)][$this->sanitize($key)] = $this->sanitize($value);
}
}
}
public function getStyles(): array
{
return $this->styles;
}
public function getStyle(string $selector): array
{
$selector = $this->sanitize($selector);
return $this->styles[$selector] ?? [];
}
private function sanitize(string $value): string
{
return addslashes(trim($value));
}
}

View File

@ -43,6 +43,11 @@ class Html
protected static $options;
/**
* @var Css
*/
protected static $css;
/**
* Add HTML parts.
*
@ -149,6 +154,16 @@ class Html
}
}
$attributeIdentifier = $attributes->getNamedItem('id');
if ($attributeIdentifier && self::$css) {
$styles = self::parseStyleDeclarations(self::$css->getStyle('#' . $attributeIdentifier->value), $styles);
}
$attributeClass = $attributes->getNamedItem('class');
if ($attributeClass && self::$css) {
$styles = self::parseStyleDeclarations(self::$css->getStyle('.' . $attributeClass->value), $styles);
}
$attributeStyle = $attributes->getNamedItem('style');
if ($attributeStyle) {
$styles = self::parseStyle($attributeStyle, $styles);
@ -168,6 +183,13 @@ class Html
*/
protected static function parseNode($node, $element, $styles = [], $data = []): void
{
if ($node->nodeName == 'style') {
self::$css = new Css($node->textContent);
self::$css->process();
return;
}
// Populate styles array
$styleTypes = ['font', 'paragraph', 'list', 'table', 'row', 'cell'];
foreach ($styleTypes as $styleType) {
@ -635,13 +657,21 @@ class Html
{
$properties = explode(';', trim($attribute->value, " \t\n\r\0\x0B;"));
$selectors = [];
foreach ($properties as $property) {
[$cKey, $cValue] = array_pad(explode(':', $property, 2), 2, null);
$cValue = trim($cValue ?? '');
$cKey = strtolower(trim($cKey));
switch ($cKey) {
$selectors[strtolower(trim($cKey))] = trim($cValue ?? '');
}
return self::parseStyleDeclarations($selectors, $styles);
}
protected static function parseStyleDeclarations(array $selectors, array $styles)
{
foreach ($selectors as $property => $value) {
switch ($property) {
case 'text-decoration':
switch ($cValue) {
switch ($value) {
case 'underline':
$styles['underline'] = 'single';
@ -654,44 +684,44 @@ class Html
break;
case 'text-align':
$styles['alignment'] = self::mapAlign($cValue);
$styles['alignment'] = self::mapAlign($value);
break;
case 'display':
$styles['hidden'] = $cValue === 'none' || $cValue === 'hidden';
$styles['hidden'] = $value === 'none' || $value === 'hidden';
break;
case 'direction':
$styles['rtl'] = $cValue === 'rtl';
$styles['rtl'] = $value === 'rtl';
break;
case 'font-size':
$styles['size'] = Converter::cssToPoint($cValue);
$styles['size'] = Converter::cssToPoint($value);
break;
case 'font-family':
$cValue = array_map('trim', explode(',', $cValue));
$styles['name'] = ucwords($cValue[0]);
$value = array_map('trim', explode(',', $value));
$styles['name'] = ucwords($value[0]);
break;
case 'color':
$styles['color'] = trim($cValue, '#');
$styles['color'] = trim($value, '#');
break;
case 'background-color':
$styles['bgColor'] = trim($cValue, '#');
$styles['bgColor'] = trim($value, '#');
break;
case 'line-height':
$matches = [];
if ($cValue === 'normal') {
if ($value === 'normal') {
$spacingLineRule = \PhpOffice\PhpWord\SimpleType\LineSpacingRule::AUTO;
$spacing = 0;
} elseif (preg_match('/([0-9]+\.?[0-9]*[a-z]+)/', $cValue, $matches)) {
} elseif (preg_match('/([0-9]+\.?[0-9]*[a-z]+)/', $value, $matches)) {
//matches number with a unit, e.g. 12px, 15pt, 20mm, ...
$spacingLineRule = \PhpOffice\PhpWord\SimpleType\LineSpacingRule::EXACT;
$spacing = Converter::cssToTwip($matches[1]);
} elseif (preg_match('/([0-9]+)%/', $cValue, $matches)) {
} elseif (preg_match('/([0-9]+)%/', $value, $matches)) {
//matches percentages
$spacingLineRule = \PhpOffice\PhpWord\SimpleType\LineSpacingRule::AUTO;
//we are subtracting 1 line height because the Spacing writer is adding one line
@ -700,23 +730,23 @@ class Html
//any other, wich is a multiplier. E.g. 1.2
$spacingLineRule = \PhpOffice\PhpWord\SimpleType\LineSpacingRule::AUTO;
//we are subtracting 1 line height because the Spacing writer is adding one line
$spacing = ($cValue * Paragraph::LINE_HEIGHT) - Paragraph::LINE_HEIGHT;
$spacing = ($value * Paragraph::LINE_HEIGHT) - Paragraph::LINE_HEIGHT;
}
$styles['spacingLineRule'] = $spacingLineRule;
$styles['line-spacing'] = $spacing;
break;
case 'letter-spacing':
$styles['letter-spacing'] = Converter::cssToTwip($cValue);
$styles['letter-spacing'] = Converter::cssToTwip($value);
break;
case 'text-indent':
$styles['indentation']['firstLine'] = Converter::cssToTwip($cValue);
$styles['indentation']['firstLine'] = Converter::cssToTwip($value);
break;
case 'font-weight':
$tValue = false;
if (preg_match('#bold#', $cValue)) {
if (preg_match('#bold#', $value)) {
$tValue = true; // also match bolder
}
$styles['bold'] = $tValue;
@ -724,48 +754,48 @@ class Html
break;
case 'font-style':
$tValue = false;
if (preg_match('#(?:italic|oblique)#', $cValue)) {
if (preg_match('#(?:italic|oblique)#', $value)) {
$tValue = true;
}
$styles['italic'] = $tValue;
break;
case 'margin':
$cValue = Converter::cssToTwip($cValue);
$styles['spaceBefore'] = $cValue;
$styles['spaceAfter'] = $cValue;
$value = Converter::cssToTwip($value);
$styles['spaceBefore'] = $value;
$styles['spaceAfter'] = $value;
break;
case 'margin-top':
// BC change: up to ver. 0.17.0 incorrectly converted to points - Converter::cssToPoint($cValue)
$styles['spaceBefore'] = Converter::cssToTwip($cValue);
// BC change: up to ver. 0.17.0 incorrectly converted to points - Converter::cssToPoint($value)
$styles['spaceBefore'] = Converter::cssToTwip($value);
break;
case 'margin-bottom':
// BC change: up to ver. 0.17.0 incorrectly converted to points - Converter::cssToPoint($cValue)
$styles['spaceAfter'] = Converter::cssToTwip($cValue);
// BC change: up to ver. 0.17.0 incorrectly converted to points - Converter::cssToPoint($value)
$styles['spaceAfter'] = Converter::cssToTwip($value);
break;
case 'border-color':
self::mapBorderColor($styles, $cValue);
self::mapBorderColor($styles, $value);
break;
case 'border-width':
$styles['borderSize'] = Converter::cssToPoint($cValue);
$styles['borderSize'] = Converter::cssToPoint($value);
break;
case 'border-style':
$styles['borderStyle'] = self::mapBorderStyle($cValue);
$styles['borderStyle'] = self::mapBorderStyle($value);
break;
case 'width':
if (preg_match('/([0-9]+[a-z]+)/', $cValue, $matches)) {
if (preg_match('/([0-9]+[a-z]+)/', $value, $matches)) {
$styles['width'] = Converter::cssToTwip($matches[1]);
$styles['unit'] = \PhpOffice\PhpWord\SimpleType\TblWidth::TWIP;
} elseif (preg_match('/([0-9]+)%/', $cValue, $matches)) {
} elseif (preg_match('/([0-9]+)%/', $value, $matches)) {
$styles['width'] = $matches[1] * 50;
$styles['unit'] = \PhpOffice\PhpWord\SimpleType\TblWidth::PERCENT;
} elseif (preg_match('/([0-9]+)/', $cValue, $matches)) {
} elseif (preg_match('/([0-9]+)/', $value, $matches)) {
$styles['width'] = $matches[1];
$styles['unit'] = \PhpOffice\PhpWord\SimpleType\TblWidth::AUTO;
}
@ -778,9 +808,9 @@ class Html
case 'border-left':
// must have exact order [width color style], e.g. "1px #0011CC solid" or "2pt green solid"
// Word does not accept shortened hex colors e.g. #CCC, only full e.g. #CCCCCC
if (preg_match('/([0-9]+[^0-9]*)\s+(\#[a-fA-F0-9]+|[a-zA-Z]+)\s+([a-z]+)/', $cValue, $matches)) {
if (false !== strpos($cKey, '-')) {
$tmp = explode('-', $cKey);
if (preg_match('/([0-9]+[^0-9]*)\s+(\#[a-fA-F0-9]+|[a-zA-Z]+)\s+([a-z]+)/', $value, $matches)) {
if (false !== strpos($property, '-')) {
$tmp = explode('-', $property);
$which = $tmp[1];
$which = ucfirst($which); // e.g. bottom -> Bottom
} else {
@ -803,13 +833,13 @@ class Html
break;
case 'vertical-align':
// https://developer.mozilla.org/en-US/docs/Web/CSS/vertical-align
if (preg_match('#(?:top|bottom|middle|sub|baseline)#i', $cValue, $matches)) {
if (preg_match('#(?:top|bottom|middle|sub|baseline)#i', $value, $matches)) {
$styles['valign'] = self::mapAlignVertical($matches[0]);
}
break;
case 'page-break-after':
if ($cValue == 'always') {
if ($value == 'always') {
$styles['isPageBreak'] = true;
}

View File

@ -0,0 +1,54 @@
<?php
/**
* This file is part of PHPWord - A pure PHP library for reading and writing
* word processing documents.
*
* PHPWord is free software distributed under the terms of the GNU Lesser
* General Public License version 3 as published by the Free Software Foundation.
*
* For the full copyright and license information, please read the LICENSE
* file that was distributed with this source code. For the full list of
* contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
*
* @see https://github.com/PHPOffice/PHPWord
*
* @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3
*/
namespace PhpOffice\PhpWordTests\Shared;
use PhpOffice\PhpWord\Shared\Css;
use PHPUnit\Framework\TestCase;
/**
* Test class for PhpOffice\PhpWord\Shared\Css.
*/
class CssTest extends TestCase
{
public function testEmptyCss(): void
{
$css = new Css('');
$css->process();
self::assertEquals([], $css->getStyles());
}
public function testBasicCss(): void
{
$cssContent = '.pStyle {
font-size:15px;
}';
$css = new Css($cssContent);
$css->process();
self::assertEquals([
'.pStyle' => [
'font-size' => '15px',
],
], $css->getStyles());
self::assertEquals([
'font-size' => '15px',
], $css->getStyle('.pStyle'));
}
}

View File

@ -107,6 +107,33 @@ class HtmlTest extends AbstractWebServerEmbeddedTest
self::assertEquals('text with entities <my text>', $doc->getElement('/w:document/w:body/w:p[1]/w:r/w:t')->nodeValue);
}
public function testParseStyle(): void
{
$html = '<style type="text/css">
.pStyle {
font-size:15px;
}
.tableStyle {
width:100%;
background-color:red;
}
</style>
<p class="pStyle">Calculator</p>';
$phpWord = new PhpWord();
$section = $phpWord->addSection();
Html::addHtml($section, $html);
$doc = TestHelperDOCX::getDocument($phpWord, 'Word2007');
self::assertTrue($doc->elementExists('/w:document/w:body/w:p[2]'));
self::assertTrue($doc->elementExists('/w:document/w:body/w:p[2]/w:r'));
self::assertTrue($doc->elementExists('/w:document/w:body/w:p[2]/w:r/w:t'));
self::assertEquals('Calculator', $doc->getElement('/w:document/w:body/w:p[2]/w:r/w:t')->nodeValue);
self::assertTrue($doc->elementExists('/w:document/w:body/w:p[2]/w:r/w:rPr'));
self::assertTrue($doc->elementExists('/w:document/w:body/w:p[2]/w:r/w:rPr/w:sz'));
self::assertEquals('22.5', $doc->getElementAttribute('/w:document/w:body/w:p[2]/w:r/w:rPr/w:sz', 'w:val'));
}
/**
* Test underline.
*/