PHP检索DOCX上的页面脚注

PHP检索DOCX上的页面脚注,php,Php,我正在尝试将DOCX转换为html,在谷歌上搜索后,我可以找到这个简单的库作为这个转换器,但是在页面上无法检测到footnote,我正在尝试将它添加到这个库中 如何检测样式文本或简单文本是/是脚注?有特殊风格的吗 <?php class Docx_reader { private $fileData = false; private $errors = array(); private $styles = array(); public function _

我正在尝试将DOCX转换为html,在谷歌上搜索后,我可以找到这个简单的库作为这个转换器,但是在页面上无法检测到
footnote
,我正在尝试将它添加到这个库中

如何检测样式文本或简单文本是/是脚注?有特殊风格的吗

<?php
class Docx_reader {
    private $fileData = false;
    private $errors = array();
    private $styles = array();
    public function __construct() {

    }
    private function load($file) {
        if (file_exists($file)) {
            $zip = new ZipArchive();
            $openedZip = $zip->open($file);
            if ($openedZip === true) {
                //attempt to load styles:
                if (($styleIndex = $zip->locateName('word/styles.xml')) !== false) {
                    $stylesXml = $zip->getFromIndex($styleIndex);
                    $xml = simplexml_load_string($stylesXml);
                    $namespaces = $xml->getNamespaces(true);
                    $children = $xml->children($namespaces['w']);
                    foreach ($children->style as $s) {
                        $attr = $s->attributes('w', true);
                        if (isset($attr['styleId'])) {
                            $tags = array();
                            $attrs = array();
                            foreach (get_object_vars($s->rPr) as $tag => $style) {
                                $att = $style->attributes('w', true);
                                switch ($tag) {
                                    case "b":
                                        $tags[] = 'strong';
                                        break;
                                    case "i":
                                        $tags[] = 'em';
                                        break;
                                    case "color":
                                        //echo (String) $att['val'];
                                        $attrs[] = 'color:#' . $att['val'];
                                        break;
                                    case "sz":
                                        $attrs[] = 'font-size:' . $att['val'] . 'px';
                                        break;
                                }
                            }
                            $styles[(String)$attr['styleId']] = array('tags' => $tags, 'attrs' => $attrs);
                        }
                    }
                    $this->styles = $styles;
                }
                if (($index = $zip->locateName('word/document.xml')) !== false) {
                    // If found, read it to the string
                    $data = $zip->getFromIndex($index);
                    // Close archive file
                    $zip->close();
                    return $data;
                }
                $zip->close();
            }
            }
        } else {
            $this->errors[] = 'File does not exist.';
        }
    }
    public function setFile($path) {
        $this->fileData = $this->load($path);
    }
    public function to_plain_text() {
        if ($this->fileData) {
            return strip_tags($this->fileData);
        } else {
            return false;
        }
    }
    public function to_html() {
        if ($this->fileData) {
            $xml = simplexml_load_string($this->fileData);
            $namespaces = $xml->getNamespaces(true);
            $children = $xml->children($namespaces['w']);
            $html = '<!doctype html><html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /><title></title><style>span.block { display: block; }</style></head><body>';
            foreach ($children->body->p as $p) {
                $style = '';

                $startTags = array();
                $startAttrs = array();

                if($p->pPr->pStyle) {                    
                    $objectAttrs = $p->pPr->pStyle->attributes('w',true);
                    $objectStyle = (String) $objectAttrs['val'];
                    if(isset($this->styles[$objectStyle])) {
                        $startTags = $this->styles[$objectStyle]['tags'];
                        $startAttrs = $this->styles[$objectStyle]['attrs'];
                    }
                }

                if ($p->pPr->spacing) {
                    $att = $p->pPr->spacing->attributes('w', true);
                    if (isset($att['before'])) {
                        $style.='padding-top:' . ($att['before'] / 10) . 'px;';
                    }
                    if (isset($att['after'])) {
                        $style.='padding-bottom:' . ($att['after'] / 10) . 'px;';
                    }
                }
                $html.='<span class="block" style="' . $style . '">';
                $li = false;
                if ($p->pPr->numPr) {
                    $li = true;
                    $html.='<li>';
                }

                foreach ($p->r as $part) {
                    //echo $part->t;
                    $tags = $startTags;
                    $attrs = $startAttrs;                                        
                    foreach (get_object_vars($part->pPr) as $k => $v) {
                        if ($k = 'numPr') {
                            $tags[] = 'li';
                        }
                    }
                    foreach (get_object_vars($part->rPr) as $tag => $style) {
                        //print_r($style->attributes());
                        $att = $style->attributes('w', true);
                        switch ($tag) {
                            case "b":
                                $tags[] = 'strong';
                                break;
                            case "i":
                                $tags[] = 'em';
                                break;
                            case "color":
                                //echo (String) $att['val'];
                                $attrs[] = 'color:#' . $att['val'];
                                break;
                            case "sz":
                                $attrs[] = 'font-size:' . $att['val'] . 'px';
                                break;
                        }
                    }
                    $openTags = '';
                    $closeTags = '';
                    foreach ($tags as $tag) {
                        $openTags.='<' . $tag . '>';
                        $closeTags.='</' . $tag . '>';
                    }
                    $html.='<span style="' . implode(';', $attrs) . '">' . $openTags . $part->t . $closeTags . '</span>';
                }
                if ($li) {
                    $html.='</li>';
                }
                $html.="</span>";
            }
            //Trying to weed out non-utf8 stuff from the file:
            $regex = <<<'END'
/
  (
    (?: [\x00-\x7F]                 # single-byte sequences   0xxxxxxx
    |   [\xC0-\xDF][\x80-\xBF]      # double-byte sequences   110xxxxx 10xxxxxx
    |   [\xE0-\xEF][\x80-\xBF]{2}   # triple-byte sequences   1110xxxx 10xxxxxx * 2
    |   [\xF0-\xF7][\x80-\xBF]{3}   # quadruple-byte sequence 11110xxx 10xxxxxx * 3 
    ){1,100}                        # ...one or more times
  )
| .                                 # anything else
/x
END;
            preg_replace($regex, '$1', $html);
            return $html . '</body></html>';
            exit();
        }
    }
    public function get_errors() {
        return $this->errors;
    }
    private function getStyles() {

    }
}
getFromIndex($styleIndex);
$xml=simplexml\u load\u字符串($stylesXml);
$namespaces=$xml->getNamespaces(true);
$children=$xml->children($namespaces['w']);
foreach($children->style as$s){
$attr=$s->attributes('w',true);
如果(isset($attr['styleId'])){
$tags=array();
$attrs=array();
foreach(获取对象变量($s->rPr)作为$tag=>$style){
$att=$style->attributes('w',true);
交换机($tag){
案例“b”:
$tags[]='strong';
打破
案例“一”:
$tags[]='em';
打破
案例“颜色”:
//回音(字符串)$att['val'];
$attrs[]=“颜色:#”。$att['val';
打破
案例“sz”:
$attrs[]=“字体大小:”.$att['val']。'px';
打破
}
}
$styles[(字符串)$attr['styleId']]=array('tags'=>$tags,'attr'=>$attr);
}
}
$this->styles=$styles;
}
if(($index=$zip->locateName('word/document.xml'))!==false){
//如果找到,将其读取到字符串
$data=$zip->getFromIndex($index);
//关闭存档文件
$zip->close();
返回$data;
}
$zip->close();
}
}
}否则{
$this->errors[]='文件不存在';
}
}
公共函数setFile($path){
$this->fileData=$this->load($path);
}
公共函数到纯文本(){
如果($this->fileData){
返回条带标签($this->fileData);
}否则{
返回false;
}
}
公共函数到_html(){
如果($this->fileData){
$xml=simplexml\u load\u字符串($this->fileData);
$namespaces=$xml->getNamespaces(true);
$children=$xml->children($namespaces['w']);
$html='span.block{display:block;}';
foreach($children->body->p作为$p){
$style='';
$startTags=array();
$startAttrs=array();
如果($p->pPr->pStyle){
$objectAttrs=$p->pPr->pStyle->attributes('w',true);
$objectStyle=(字符串)$objectAttrs['val'];
if(设置($this->styles[$objectStyle])){
$startTags=$this->style[$objectStyle]['tags'];
$startAttrs=$this->style[$objectStyle]['attrs'];
}
}
如果($p->pPr->间距){
$att=$p->pPr->间距->属性('w',true);
如果(isset($att['before'])){
$style.='padding-top:'。($att['before']/10)。'px;';
}
如果(isset($att['after'])){
$style.='padding-bottom:'。($att['after']/10)。'px;';
}
}
$html.='';
$li=假;
如果($p->pPr->numPr){
$li=真;
$html.='
  • '; } foreach($p->r作为$part){ //echo$part->t; $tags=$startTags; $attrs=$startAttrs; foreach(获取对象变量($part->pPr)为$k=>$v){ 如果($k='numPr'){ $tags[]='li'; } } foreach(获取对象变量($part->rPr)作为$tag=>$style){ //打印($style->attributes()); $att=$style->attributes('w',true); 交换机($tag){ 案例“b”: $tags[]='strong'; 打破 案例“一”: $tags[]='em'; 打破 案例“颜色”: //回音(字符串)$att['val']; $attrs[]=“颜色:#”。$att['val'; 打破 案例“sz”: $attrs[]=“字体大小:”.$att['val']。'px'; 打破 } } $openTags=''; $closeTags=''; foreach($tags作为$tag){ $openTags.=''; $closeTags.=''; } $html.=''.$openTags.$part->t.$closeTags'; } 若有($li){ $html.='
  • '; }