PHP检索DOCX上的页面脚注
我正在尝试将DOCX转换为html,在谷歌上搜索后,我可以找到这个简单的库作为这个转换器,但是在页面上无法检测到PHP检索DOCX上的页面脚注,php,Php,我正在尝试将DOCX转换为html,在谷歌上搜索后,我可以找到这个简单的库作为这个转换器,但是在页面上无法检测到footnote,我正在尝试将它添加到这个库中 如何检测样式文本或简单文本是/是脚注?有特殊风格的吗 <?php class Docx_reader { private $fileData = false; private $errors = array(); private $styles = array(); public function _
footnote
,我正在尝试将它添加到这个库中
如何检测样式文本或简单文本是/是脚注?有特殊风格的吗
<?php
class Docx_reader {
private $fileData = false;
private $errors = array();
private $styles = array();
public function __construct() {
}
private function load($file) {
if (file_exists($file)) {
$zip = new ZipArchive();
$openedZip = $zip->open($file);
if ($openedZip === true) {
//attempt to load styles:
if (($styleIndex = $zip->locateName('word/styles.xml')) !== false) {
$stylesXml = $zip->getFromIndex($styleIndex);
$xml = simplexml_load_string($stylesXml);
$namespaces = $xml->getNamespaces(true);
$children = $xml->children($namespaces['w']);
foreach ($children->style as $s) {
$attr = $s->attributes('w', true);
if (isset($attr['styleId'])) {
$tags = array();
$attrs = array();
foreach (get_object_vars($s->rPr) as $tag => $style) {
$att = $style->attributes('w', true);
switch ($tag) {
case "b":
$tags[] = 'strong';
break;
case "i":
$tags[] = 'em';
break;
case "color":
//echo (String) $att['val'];
$attrs[] = 'color:#' . $att['val'];
break;
case "sz":
$attrs[] = 'font-size:' . $att['val'] . 'px';
break;
}
}
$styles[(String)$attr['styleId']] = array('tags' => $tags, 'attrs' => $attrs);
}
}
$this->styles = $styles;
}
if (($index = $zip->locateName('word/document.xml')) !== false) {
// If found, read it to the string
$data = $zip->getFromIndex($index);
// Close archive file
$zip->close();
return $data;
}
$zip->close();
}
}
} else {
$this->errors[] = 'File does not exist.';
}
}
public function setFile($path) {
$this->fileData = $this->load($path);
}
public function to_plain_text() {
if ($this->fileData) {
return strip_tags($this->fileData);
} else {
return false;
}
}
public function to_html() {
if ($this->fileData) {
$xml = simplexml_load_string($this->fileData);
$namespaces = $xml->getNamespaces(true);
$children = $xml->children($namespaces['w']);
$html = '<!doctype html><html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /><title></title><style>span.block { display: block; }</style></head><body>';
foreach ($children->body->p as $p) {
$style = '';
$startTags = array();
$startAttrs = array();
if($p->pPr->pStyle) {
$objectAttrs = $p->pPr->pStyle->attributes('w',true);
$objectStyle = (String) $objectAttrs['val'];
if(isset($this->styles[$objectStyle])) {
$startTags = $this->styles[$objectStyle]['tags'];
$startAttrs = $this->styles[$objectStyle]['attrs'];
}
}
if ($p->pPr->spacing) {
$att = $p->pPr->spacing->attributes('w', true);
if (isset($att['before'])) {
$style.='padding-top:' . ($att['before'] / 10) . 'px;';
}
if (isset($att['after'])) {
$style.='padding-bottom:' . ($att['after'] / 10) . 'px;';
}
}
$html.='<span class="block" style="' . $style . '">';
$li = false;
if ($p->pPr->numPr) {
$li = true;
$html.='<li>';
}
foreach ($p->r as $part) {
//echo $part->t;
$tags = $startTags;
$attrs = $startAttrs;
foreach (get_object_vars($part->pPr) as $k => $v) {
if ($k = 'numPr') {
$tags[] = 'li';
}
}
foreach (get_object_vars($part->rPr) as $tag => $style) {
//print_r($style->attributes());
$att = $style->attributes('w', true);
switch ($tag) {
case "b":
$tags[] = 'strong';
break;
case "i":
$tags[] = 'em';
break;
case "color":
//echo (String) $att['val'];
$attrs[] = 'color:#' . $att['val'];
break;
case "sz":
$attrs[] = 'font-size:' . $att['val'] . 'px';
break;
}
}
$openTags = '';
$closeTags = '';
foreach ($tags as $tag) {
$openTags.='<' . $tag . '>';
$closeTags.='</' . $tag . '>';
}
$html.='<span style="' . implode(';', $attrs) . '">' . $openTags . $part->t . $closeTags . '</span>';
}
if ($li) {
$html.='</li>';
}
$html.="</span>";
}
//Trying to weed out non-utf8 stuff from the file:
$regex = <<<'END'
/
(
(?: [\x00-\x7F] # single-byte sequences 0xxxxxxx
| [\xC0-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx
| [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences 1110xxxx 10xxxxxx * 2
| [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
){1,100} # ...one or more times
)
| . # anything else
/x
END;
preg_replace($regex, '$1', $html);
return $html . '</body></html>';
exit();
}
}
public function get_errors() {
return $this->errors;
}
private function getStyles() {
}
}
getFromIndex($styleIndex);
$xml=simplexml\u load\u字符串($stylesXml);
$namespaces=$xml->getNamespaces(true);
$children=$xml->children($namespaces['w']);
foreach($children->style as$s){
$attr=$s->attributes('w',true);
如果(isset($attr['styleId'])){
$tags=array();
$attrs=array();
foreach(获取对象变量($s->rPr)作为$tag=>$style){
$att=$style->attributes('w',true);
交换机($tag){
案例“b”:
$tags[]='strong';
打破
案例“一”:
$tags[]='em';
打破
案例“颜色”:
//回音(字符串)$att['val'];
$attrs[]=“颜色:#”。$att['val';
打破
案例“sz”:
$attrs[]=“字体大小:”.$att['val']。'px';
打破
}
}
$styles[(字符串)$attr['styleId']]=array('tags'=>$tags,'attr'=>$attr);
}
}
$this->styles=$styles;
}
if(($index=$zip->locateName('word/document.xml'))!==false){
//如果找到,将其读取到字符串
$data=$zip->getFromIndex($index);
//关闭存档文件
$zip->close();
返回$data;
}
$zip->close();
}
}
}否则{
$this->errors[]='文件不存在';
}
}
公共函数setFile($path){
$this->fileData=$this->load($path);
}
公共函数到纯文本(){
如果($this->fileData){
返回条带标签($this->fileData);
}否则{
返回false;
}
}
公共函数到_html(){
如果($this->fileData){
$xml=simplexml\u load\u字符串($this->fileData);
$namespaces=$xml->getNamespaces(true);
$children=$xml->children($namespaces['w']);
$html='span.block{display:block;}';
foreach($children->body->p作为$p){
$style='';
$startTags=array();
$startAttrs=array();
如果($p->pPr->pStyle){
$objectAttrs=$p->pPr->pStyle->attributes('w',true);
$objectStyle=(字符串)$objectAttrs['val'];
if(设置($this->styles[$objectStyle])){
$startTags=$this->style[$objectStyle]['tags'];
$startAttrs=$this->style[$objectStyle]['attrs'];
}
}
如果($p->pPr->间距){
$att=$p->pPr->间距->属性('w',true);
如果(isset($att['before'])){
$style.='padding-top:'。($att['before']/10)。'px;';
}
如果(isset($att['after'])){
$style.='padding-bottom:'。($att['after']/10)。'px;';
}
}
$html.='';
$li=假;
如果($p->pPr->numPr){
$li=真;
$html.='';
}
foreach($p->r作为$part){
//echo$part->t;
$tags=$startTags;
$attrs=$startAttrs;
foreach(获取对象变量($part->pPr)为$k=>$v){
如果($k='numPr'){
$tags[]='li';
}
}
foreach(获取对象变量($part->rPr)作为$tag=>$style){
//打印($style->attributes());
$att=$style->attributes('w',true);
交换机($tag){
案例“b”:
$tags[]='strong';
打破
案例“一”:
$tags[]='em';
打破
案例“颜色”:
//回音(字符串)$att['val'];
$attrs[]=“颜色:#”。$att['val';
打破
案例“sz”:
$attrs[]=“字体大小:”.$att['val']。'px';
打破
}
}
$openTags='';
$closeTags='';
foreach($tags作为$tag){
$openTags.='';
$closeTags.='';
}
$html.=''.$openTags.$part->t.$closeTags';
}
若有($li){
$html.=' ';
}