#158: Convert UTF8 text to Unicode before writing RTF (support UTF8 in RTF)

This commit is contained in:
Ivan Lanin 2014-05-12 22:55:06 +07:00
parent 55e715b5b1
commit e589961e68
19 changed files with 144 additions and 65 deletions

View File

@ -4,7 +4,7 @@ This is the changelog between releases of PHPWord. Releases are listed in revers
## 0.11.0 - Not yet released ## 0.11.0 - Not yet released
This release changed PHPWord license from LGPL 2.1 to LGPL 3. This release marked the change of PHPWord license from LGPL 2.1 to LGPL 3; new relative and absolute positioning for image; new `TextBox` and `ListItemRun` element; refactorings of writer classes into parts, elements, and styles; and ability to add elements to PHPWord object via HTML.
### Features ### Features
@ -15,6 +15,7 @@ This release changed PHPWord license from LGPL 2.1 to LGPL 3.
- HTML: Ability to add elements to PHPWord object via html - @basjan GH-231 - HTML: Ability to add elements to PHPWord object via html - @basjan GH-231
- ListItemRun: New element that can add a list item with inline formatting like a textrun - @basjan GH-235 - ListItemRun: New element that can add a list item with inline formatting like a textrun - @basjan GH-235
- Table: Ability to add table inside a cell (nested table) - @ivanlanin GH-149 - Table: Ability to add table inside a cell (nested table) - @ivanlanin GH-149
- RTF: UTF8 support for RTF: Internal UTF8 text is converted to Unicode before writing - @ivanlanin GH-158
### Bugfixes ### Bugfixes

2
composer.lock generated
View File

@ -3,7 +3,7 @@
"This file locks the dependencies of your project to a known state", "This file locks the dependencies of your project to a known state",
"Read more about it at http://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file" "Read more about it at http://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file"
], ],
"hash": "6daefa91649add98af3850b0a3f13415", "hash": "77631436badcf4f49d673498ab6f1916",
"packages": [ "packages": [
], ],

View File

@ -85,6 +85,54 @@ class String
return $value; return $value;
} }
/**
* Returns unicode from UTF8 text
*
* @param string $text UTF8 text
* @return string Unicode text
* @since 0.11.0
* @link http://www.randomchaos.com/documents/?source=php_and_unicode
*/
public static function toUnicode($text)
{
$unicode = array();
$values = array();
$lookingFor = 1;
// Gets unicode for each character
for ($i = 0; $i < strlen($text); $i++) {
$thisValue = ord($text[$i]);
if ($thisValue < 128) {
$unicode[] = $thisValue;
} else {
if (count($values) == 0) {
$lookingFor = $thisValue < 224 ? 2 : 3;
}
$values[] = $thisValue;
if (count($values) == $lookingFor) {
if ($lookingFor == 3) {
$number = (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64);
} else {
$number = (($values[0] % 32) * 64) + ($values[1] % 64);
}
$unicode[] = $number;
$values = array();
$lookingFor = 1;
}
}
}
// Converts text with utf8 characters into rtf utf8 entites preserving ascii
$entities = '';
foreach ($unicode as $value) {
if ($value != 65279) {
$entities .= $value > 127 ? '\uc0{\u' . $value . '}' : chr($value);
}
}
return $entities;
}
/** /**
* Return name without underscore for < 0.10.0 variable name compatibility * Return name without underscore for < 0.10.0 variable name compatibility
* *

View File

@ -25,8 +25,20 @@ use PhpOffice\PhpWord\Shared\String;
*/ */
class Paragraph extends AbstractStyle class Paragraph extends AbstractStyle
{ {
/**
* @const int One line height equals 240 twip
*/
const LINE_HEIGHT = 240; const LINE_HEIGHT = 240;
/**
* @const string Alignment http://www.schemacentral.com/sc/ooxml/t-w_ST_Jc.html
*/
const ALIGN_LEFT = 'left'; // Align left
const ALIGN_RIGHT = 'right'; // Align right
const ALIGN_CENTER = 'center'; // Align center
const ALIGN_BOTH = 'both'; // Align both
const ALIGN_JUSTIFY = 'justify'; // Alias for align both
/** /**
* Aliases * Aliases
* *
@ -147,10 +159,11 @@ class Paragraph extends AbstractStyle
*/ */
public function setAlign($value = null) public function setAlign($value = null)
{ {
if (strtolower($value) == 'justify') { if (strtolower($value) == self::ALIGN_JUSTIFY) {
$value = 'both'; $value = self::ALIGN_BOTH;
} }
$this->align = $value; $enum = array(self::ALIGN_LEFT, self::ALIGN_RIGHT, self::ALIGN_CENTER, self::ALIGN_BOTH, self::ALIGN_JUSTIFY);
$this->align = $this->setEnumVal($value, $enum, $this->align);
return $this; return $this;
} }

View File

@ -44,9 +44,7 @@ class Container extends \PhpOffice\PhpWord\Writer\HTML\Element\Container
$writerClass = str_replace('\\Element', '\\Writer\\RTF\\Element', get_class($element)); $writerClass = str_replace('\\Element', '\\Writer\\RTF\\Element', get_class($element));
if (class_exists($writerClass)) { if (class_exists($writerClass)) {
$writer = new $writerClass($this->parentWriter, $element, $withoutP); $writer = new $writerClass($this->parentWriter, $element, $withoutP);
$content .= '{';
$content .= $writer->write(); $content .= $writer->write();
$content .= '}' . PHP_EOL;
} }
} }

View File

@ -18,6 +18,7 @@
namespace PhpOffice\PhpWord\Writer\RTF\Element; namespace PhpOffice\PhpWord\Writer\RTF\Element;
use PhpOffice\PhpWord\Element\Text as TextElement; use PhpOffice\PhpWord\Element\Text as TextElement;
use PhpOffice\PhpWord\Shared\String;
use PhpOffice\PhpWord\Style; use PhpOffice\PhpWord\Style;
use PhpOffice\PhpWord\Style\Font as FontStyle; use PhpOffice\PhpWord\Style\Font as FontStyle;
use PhpOffice\PhpWord\Writer\RTF\Style\Font as FontStyleWriter; use PhpOffice\PhpWord\Writer\RTF\Style\Font as FontStyleWriter;
@ -46,12 +47,17 @@ class Text extends AbstractElement
$content = ''; $content = '';
$content .= $this->writeParagraphStyle($this->element); $content .= $this->writeParagraphStyle($this->element);
$content .= $this->writeFontStyleBegin($fontStyle); $content .= '{';
if ($parentWriter->getLastParagraphStyle() != '' || $fontStyle) { $content .= $this->writeFontStyle($fontStyle);
if ($fontStyle || $parentWriter->getLastParagraphStyle() != '') {
$content .= ' '; $content .= ' ';
} }
$content .= $this->element->getText(); $content .= String::toUnicode($this->element->getText());
$content .= $this->writeFontStyleEnd($fontStyle); $content .= '}';
// Remarked to test using closure {} to avoid closing tags
// @since 0.11.0
// $content .= $this->writeFontStyleClosing($fontStyle);
if (!$this->withoutP) { if (!$this->withoutP) {
$content .= '\par' . PHP_EOL; $content .= '\par' . PHP_EOL;
@ -80,9 +86,10 @@ class Text extends AbstractElement
// Write style when applicable // Write style when applicable
if ($paragraphStyle && !$this->withoutP) { if ($paragraphStyle && !$this->withoutP) {
if ($parentWriter->getLastParagraphStyle() != $element->getParagraphStyle()) { if ($parentWriter->getLastParagraphStyle() != $element->getParagraphStyle()) {
$parentWriter->setLastParagraphStyle($element->getParagraphStyle());
$styleWriter = new ParagraphStyleWriter($paragraphStyle); $styleWriter = new ParagraphStyleWriter($paragraphStyle);
$content = $styleWriter->write(); $content = $styleWriter->write();
$parentWriter->setLastParagraphStyle($element->getParagraphStyle());
} else { } else {
$parentWriter->setLastParagraphStyle(); $parentWriter->setLastParagraphStyle();
} }
@ -99,7 +106,7 @@ class Text extends AbstractElement
* @param mixed $style * @param mixed $style
* @return string * @return string
*/ */
private function writeFontStyleBegin($style) private function writeFontStyle($style)
{ {
if (!$style instanceof FontStyle) { if (!$style instanceof FontStyle) {
return ''; return '';
@ -135,14 +142,14 @@ class Text extends AbstractElement
* @param \PhpOffice\PhpWord\Style\Font $style * @param \PhpOffice\PhpWord\Style\Font $style
* @return string * @return string
*/ */
private function writeFontStyleEnd($style) private function writeFontStyleClosing($style)
{ {
if (!$style instanceof FontStyle) { if (!$style instanceof FontStyle) {
return ''; return '';
} }
$styleWriter = new FontStyleWriter($style); $styleWriter = new FontStyleWriter($style);
$content = $styleWriter->writeEnd(); $content = $styleWriter->writeClosing();
return $content; return $content;
} }

View File

@ -35,10 +35,10 @@ class TextRun extends AbstractElement
{ {
$content = ''; $content = '';
$content .= '\pard\nowidctlpar' . PHP_EOL; $content .= '{\pard\nowidctlpar';
$writer = new Container($this->parentWriter, $this->element); $writer = new Container($this->parentWriter, $this->element);
$content .= $writer->write(); $content .= $writer->write();
$content .= '\par' . PHP_EOL; $content .= '\par}' . PHP_EOL;
return $content; return $content;
} }

View File

@ -37,8 +37,8 @@ class Title extends AbstractElement
$content = ''; $content = '';
$content .= '\pard\nowidctlpar' . PHP_EOL; $content .= '\pard\nowidctlpar';
$content .= $this->element->getText(); $content .= String::toUnicode($this->element->getText());
$content .= '\par' . PHP_EOL; $content .= '\par' . PHP_EOL;
return $content; return $content;

View File

@ -18,6 +18,7 @@
namespace PhpOffice\PhpWord\Writer\RTF\Style; namespace PhpOffice\PhpWord\Writer\RTF\Style;
use PhpOffice\PhpWord\PhpWord; use PhpOffice\PhpWord\PhpWord;
use PhpOffice\PhpWord\Style\Font as FontStyle;
/** /**
* RTF font style writer * RTF font style writer
@ -51,12 +52,17 @@ class Font extends AbstractStyle
$content = ''; $content = '';
$content .= '\cf' . $this->colorIndex; $content .= '\cf' . $this->colorIndex;
$content .= '\f' . $this->nameIndex; $content .= '\f' . $this->nameIndex;
$content .= $this->getValueIf($style->isBold(), '\b');
$content .= $this->getValueIf($style->isItalic(), '\i');
$size = $style->getSize(); $size = $style->getSize();
$content .= $this->getValueIf(is_numeric($size), '\fs' . ($size * 2)); $content .= $this->getValueIf(is_numeric($size), '\fs' . ($size * 2));
$content .= $this->getValueIf($style->isBold(), '\b');
$content .= $this->getValueIf($style->isItalic(), '\i');
$content .= $this->getValueIf($style->getUnderline() != FontStyle::UNDERLINE_NONE, '\ul');
$content .= $this->getValueIf($style->isStrikethrough(), '\strike');
$content .= $this->getValueIf($style->isSuperScript(), '\super');
$content .= $this->getValueIf($style->isSubScript(), '\sub');
return $content; return $content;
} }
@ -65,7 +71,7 @@ class Font extends AbstractStyle
* *
* @return string * @return string
*/ */
public function writeEnd() public function writeClosing()
{ {
$style = $this->getStyle(); $style = $this->getStyle();
if (!$style instanceof \PhpOffice\PhpWord\Style\Font) { if (!$style instanceof \PhpOffice\PhpWord\Style\Font) {
@ -75,12 +81,17 @@ class Font extends AbstractStyle
$content = ''; $content = '';
$content .= '\cf0'; $content .= '\cf0';
$content .= '\f0'; $content .= '\f0';
$content .= $this->getValueIf($style->isBold(), '\b0');
$content .= $this->getValueIf($style->isItalic(), '\i0');
$size = $style->getSize(); $size = $style->getSize();
$content .= $this->getValueIf(is_numeric($size), '\fs' . (PhpWord::DEFAULT_FONT_SIZE * 2)); $content .= $this->getValueIf(is_numeric($size), '\fs' . (PhpWord::DEFAULT_FONT_SIZE * 2));
$content .= $this->getValueIf($style->isBold(), '\b0');
$content .= $this->getValueIf($style->isItalic(), '\i0');
$content .= $this->getValueIf($style->getUnderline() != FontStyle::UNDERLINE_NONE, '\ul0');
$content .= $this->getValueIf($style->isStrikethrough(), '\strike0');
$content .= $this->getValueIf($style->isSuperScript(), '\super0');
$content .= $this->getValueIf($style->isSubScript(), '\sub0');
return $content; return $content;
} }

View File

@ -17,6 +17,8 @@
namespace PhpOffice\PhpWord\Writer\RTF\Style; namespace PhpOffice\PhpWord\Writer\RTF\Style;
use PhpOffice\PhpWord\Style\Paragraph as ParagraphStyle;
/** /**
* RTF paragraph style writer * RTF paragraph style writer
* *
@ -36,15 +38,23 @@ class Paragraph extends AbstractStyle
return; return;
} }
$content = '\pard\nowidctlpar'; $alignments = array(
ParagraphStyle::ALIGN_LEFT => '\ql',
ParagraphStyle::ALIGN_RIGHT => '\qr',
ParagraphStyle::ALIGN_CENTER => '\qc',
ParagraphStyle::ALIGN_BOTH => '\qj',
);
// Alignment
$align = $style->getAlign(); $align = $style->getAlign();
$content .= $this->getValueIf(!is_null($align) && $align == 'center', '\qc');
// Spacing
$spaceAfter = $style->getSpaceAfter(); $spaceAfter = $style->getSpaceAfter();
$content .= $this->getValueIf(!is_null($spaceAfter), '\sa' . $spaceAfter); $spaceBefore = $style->getSpaceBefore();
$content = '\pard\nowidctlpar';
if (isset($alignments[$align])) {
$content .= $alignments[$align];
}
$content .= $this->getValueIf($spaceBefore !== null, '\sb' . $spaceBefore);
$content .= $this->getValueIf($spaceAfter !== null, '\sa' . $spaceAfter);
return $content; return $content;
} }

View File

@ -19,6 +19,7 @@ namespace PhpOffice\PhpWord\Writer\Word2007\Element;
use PhpOffice\PhpWord\Element\AbstractElement as Element; use PhpOffice\PhpWord\Element\AbstractElement as Element;
use PhpOffice\PhpWord\Exception\Exception; use PhpOffice\PhpWord\Exception\Exception;
use PhpOffice\PhpWord\Shared\String;
use PhpOffice\PhpWord\Shared\XMLWriter; use PhpOffice\PhpWord\Shared\XMLWriter;
/** /**
@ -77,7 +78,7 @@ abstract class AbstractElement
} }
/** /**
* Get Element * Get element
* *
* @return \PhpOffice\PhpWord\Element\AbstractElement * @return \PhpOffice\PhpWord\Element\AbstractElement
*/ */
@ -89,4 +90,15 @@ abstract class AbstractElement
throw new Exception('No element assigned.'); throw new Exception('No element assigned.');
} }
} }
/**
* Convert text to valid format
*
* @param string $text
* @return string
*/
protected function getText($text)
{
return String::controlCharacterPHP2OOXML(htmlspecialchars($text));
}
} }

View File

@ -17,8 +17,6 @@
namespace PhpOffice\PhpWord\Writer\Word2007\Element; namespace PhpOffice\PhpWord\Writer\Word2007\Element;
use PhpOffice\PhpWord\Shared\String;
/** /**
* CheckBox element writer * CheckBox element writer
* *
@ -37,11 +35,6 @@ class CheckBox extends Text
return; return;
} }
$name = htmlspecialchars($element->getName());
$name = String::controlCharacterPHP2OOXML($name);
$text = htmlspecialchars($element->getText());
$text = String::controlCharacterPHP2OOXML($text);
$this->writeOpeningWP(); $this->writeOpeningWP();
$xmlWriter->startElement('w:r'); $xmlWriter->startElement('w:r');
@ -49,7 +42,7 @@ class CheckBox extends Text
$xmlWriter->writeAttribute('w:fldCharType', 'begin'); $xmlWriter->writeAttribute('w:fldCharType', 'begin');
$xmlWriter->startElement('w:ffData'); $xmlWriter->startElement('w:ffData');
$xmlWriter->startElement('w:name'); $xmlWriter->startElement('w:name');
$xmlWriter->writeAttribute('w:val', $name); $xmlWriter->writeAttribute('w:val', $this->getText($element->getName()));
$xmlWriter->endElement(); //w:name $xmlWriter->endElement(); //w:name
$xmlWriter->writeAttribute('w:enabled', ''); $xmlWriter->writeAttribute('w:enabled', '');
$xmlWriter->startElement('w:calcOnExit'); $xmlWriter->startElement('w:calcOnExit');
@ -88,10 +81,10 @@ class CheckBox extends Text
$xmlWriter->startElement('w:t'); $xmlWriter->startElement('w:t');
$xmlWriter->writeAttribute('xml:space', 'preserve'); $xmlWriter->writeAttribute('xml:space', 'preserve');
$xmlWriter->writeRaw($text); $xmlWriter->writeRaw($this->getText($element->getText()));
$xmlWriter->endElement(); // w:t $xmlWriter->endElement(); // w:t
$xmlWriter->endElement(); // w:r $xmlWriter->endElement(); // w:r
$this->writeEndingWP(); $this->writeClosingWP();
} }
} }

View File

@ -55,6 +55,6 @@ class Footnote extends Text
$xmlWriter->endElement(); // w:$referenceType $xmlWriter->endElement(); // w:$referenceType
$xmlWriter->endElement(); // w:r $xmlWriter->endElement(); // w:r
$this->writeEndingWP(); $this->writeClosingWP();
} }
} }

View File

@ -53,6 +53,6 @@ class Link extends Text
$xmlWriter->endElement(); // w:r $xmlWriter->endElement(); // w:r
$xmlWriter->endElement(); // w:hyperlink $xmlWriter->endElement(); // w:hyperlink
$this->writeEndingWP(); $this->writeClosingWP();
} }
} }

View File

@ -17,8 +17,6 @@
namespace PhpOffice\PhpWord\Writer\Word2007\Element; namespace PhpOffice\PhpWord\Writer\Word2007\Element;
use PhpOffice\PhpWord\Shared\String;
/** /**
* PreserveText element writer * PreserveText element writer
* *
@ -76,21 +74,18 @@ class PreserveText extends Text
$xmlWriter->endElement(); $xmlWriter->endElement();
$xmlWriter->endElement(); $xmlWriter->endElement();
} else { } else {
$text = htmlspecialchars($text);
$text = String::controlCharacterPHP2OOXML($text);
$xmlWriter->startElement('w:r'); $xmlWriter->startElement('w:r');
$this->writeFontStyle(); $this->writeFontStyle();
$xmlWriter->startElement('w:t'); $xmlWriter->startElement('w:t');
$xmlWriter->writeAttribute('xml:space', 'preserve'); $xmlWriter->writeAttribute('xml:space', 'preserve');
$xmlWriter->writeRaw($text); $xmlWriter->writeRaw($this->getText($text));
$xmlWriter->endElement(); $xmlWriter->endElement();
$xmlWriter->endElement(); $xmlWriter->endElement();
} }
} }
$this->writeEndingWP(); $this->writeClosingWP();
} }
} }

View File

@ -17,7 +17,6 @@
namespace PhpOffice\PhpWord\Writer\Word2007\Element; namespace PhpOffice\PhpWord\Writer\Word2007\Element;
use PhpOffice\PhpWord\Shared\String;
use PhpOffice\PhpWord\Writer\Word2007\Style\Font as FontStyleWriter; use PhpOffice\PhpWord\Writer\Word2007\Style\Font as FontStyleWriter;
use PhpOffice\PhpWord\Writer\Word2007\Style\Paragraph as ParagraphStyleWriter; use PhpOffice\PhpWord\Writer\Word2007\Style\Paragraph as ParagraphStyleWriter;
@ -39,9 +38,6 @@ class Text extends AbstractElement
return; return;
} }
$text = htmlspecialchars($element->getText());
$text = String::controlCharacterPHP2OOXML($text);
$this->writeOpeningWP(); $this->writeOpeningWP();
$xmlWriter->startElement('w:r'); $xmlWriter->startElement('w:r');
@ -50,11 +46,11 @@ class Text extends AbstractElement
$xmlWriter->startElement('w:t'); $xmlWriter->startElement('w:t');
$xmlWriter->writeAttribute('xml:space', 'preserve'); $xmlWriter->writeAttribute('xml:space', 'preserve');
$xmlWriter->writeRaw($text); $xmlWriter->writeRaw($this->getText($element->getText()));
$xmlWriter->endElement(); $xmlWriter->endElement();
$xmlWriter->endElement(); // w:r $xmlWriter->endElement(); // w:r
$this->writeEndingWP(); $this->writeClosingWP();
} }
/** /**
@ -77,7 +73,7 @@ class Text extends AbstractElement
/** /**
* Write ending * Write ending
*/ */
protected function writeEndingWP() protected function writeClosingWP()
{ {
$xmlWriter = $this->getXmlWriter(); $xmlWriter = $this->getXmlWriter();

View File

@ -42,7 +42,7 @@ class TextBreak extends Text
$xmlWriter->startElement('w:pPr'); $xmlWriter->startElement('w:pPr');
$this->writeFontStyle(); $this->writeFontStyle();
$xmlWriter->endElement(); // w:pPr $xmlWriter->endElement(); // w:pPr
$this->writeEndingWP(); $this->writeClosingWP();
} else { } else {
$xmlWriter->writeElement('w:p'); $xmlWriter->writeElement('w:p');
} }

View File

@ -37,6 +37,6 @@ class TextRun extends Text
$containerWriter = new Container($xmlWriter, $element); $containerWriter = new Container($xmlWriter, $element);
$containerWriter->write(); $containerWriter->write();
$this->writeEndingWP(); $this->writeClosingWP();
} }
} }

View File

@ -17,8 +17,6 @@
namespace PhpOffice\PhpWord\Writer\Word2007\Element; namespace PhpOffice\PhpWord\Writer\Word2007\Element;
use PhpOffice\PhpWord\Shared\String;
/** /**
* TextRun element writer * TextRun element writer
* *
@ -41,9 +39,6 @@ class Title extends AbstractElement
$anchor = '_Toc' . ($rId + 252634154); $anchor = '_Toc' . ($rId + 252634154);
$style = $element->getStyle(); $style = $element->getStyle();
$text = htmlspecialchars($element->getText());
$text = String::controlCharacterPHP2OOXML($text);
$xmlWriter->startElement('w:p'); $xmlWriter->startElement('w:p');
if (!empty($style)) { if (!empty($style)) {
@ -67,7 +62,7 @@ class Title extends AbstractElement
$xmlWriter->startElement('w:r'); $xmlWriter->startElement('w:r');
$xmlWriter->startElement('w:t'); $xmlWriter->startElement('w:t');
$xmlWriter->writeRaw($text); $xmlWriter->writeRaw($this->getText($element->getText()));
$xmlWriter->endElement(); $xmlWriter->endElement();
$xmlWriter->endElement(); $xmlWriter->endElement();