Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/.net/22.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Java Word XML到RTF的转换_Java_.net_Python_Xml_Rtf - Fatal编程技术网

Java Word XML到RTF的转换

Java Word XML到RTF的转换,java,.net,python,xml,rtf,Java,.net,Python,Xml,Rtf,我需要通过编程将Word XML文件转换为RTF文件。由于一些第三方库的存在,它已经成为一种需求。有任何API/库可以做到这一点吗 实际上语言不是问题,因为我只需要完成工作。但是Java、.NET语言或Python是首选。Java 我过去曾使用ApachePOI进行解析。它似乎工作得很好。然后在这里写RTF .Net 关于在.Net中写入Word文档。我相信你可以用同一个图书馆来阅读 Python 对于Python 相关问题 另外,.看看。您必须自己设置它,因为我相信演示只允许您上载打开的off

我需要通过编程将Word XML文件转换为RTF文件。由于一些第三方库的存在,它已经成为一种需求。有任何API/库可以做到这一点吗

实际上语言不是问题,因为我只需要完成工作。但是Java、.NET语言或Python是首选。

Java

我过去曾使用ApachePOI进行解析。它似乎工作得很好。然后在这里写RTF

.Net

关于在.Net中写入Word文档。我相信你可以用同一个图书馆来阅读

Python

对于Python

相关问题


另外,.

看看。您必须自己设置它,因为我相信演示只允许您上载打开的office文档。

您可以使用AutoIt自动在word中打开XML文件并执行另存为RTF

我使用Word的用户定义函数将RTF文件保存为纯文本进行转换,效果很好。语法非常简单


Python/linux方式:

import uno
from os.path import abspath, isfile, splitext
from com.sun.star.beans import PropertyValue
from com.sun.star.task import ErrorCodeIOException
from com.sun.star.connection import NoConnectException

FAMILY_TEXT = "Text"
FAMILY_SPREADSHEET = "Spreadsheet"
FAMILY_PRESENTATION = "Presentation"
FAMILY_DRAWING = "Drawing"
DEFAULT_OPENOFFICE_PORT = 8100

FILTER_MAP = {
    "pdf": {
        FAMILY_TEXT: "writer_pdf_Export",
        FAMILY_SPREADSHEET: "calc_pdf_Export",
        FAMILY_PRESENTATION: "impress_pdf_Export",
        FAMILY_DRAWING: "draw_pdf_Export"
    },
    "html": {
        FAMILY_TEXT: "HTML (StarWriter)",
        FAMILY_SPREADSHEET: "HTML (StarCalc)",
        FAMILY_PRESENTATION: "impress_html_Export"
    },
    "odt": { FAMILY_TEXT: "writer8" },
    "doc": { FAMILY_TEXT: "MS Word 97" },
    "rtf": { FAMILY_TEXT: "Rich Text Format" },
    "txt": { FAMILY_TEXT: "Text" },
    "docx": { FAMILY_TEXT: "MS Word 2007 XML" },
    "ods": { FAMILY_SPREADSHEET: "calc8" },
    "xls": { FAMILY_SPREADSHEET: "MS Excel 97" },
    "odp": { FAMILY_PRESENTATION: "impress8" },
    "ppt": { FAMILY_PRESENTATION: "MS PowerPoint 97" },
    "swf": { FAMILY_PRESENTATION: "impress_flash_Export" }
}

class DocumentConverter:

    def __init__(self, port=DEFAULT_OPENOFFICE_PORT):
        localContext = uno.getComponentContext()
        resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)
        try:
            self.context = resolver.resolve("uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext" % port)
        except NoConnectException:
            raise Exception, "failed to connect to OpenOffice.org on port %s" % port
        self.desktop = self.context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", self.context)

    def convert(self, inputFile, outputFile):

        inputUrl = self._toFileUrl(inputFile)
        outputUrl = self._toFileUrl(outputFile)

        document = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self._toProperties(Hidden=True))
        #document.setPropertyValue("DocumentTitle", "saf" ) TODO: Check how this can be set and set doc update mode to  FULL_UPDATE

        if self._detectFamily(document) == FAMILY_TEXT:
            indexes = document.getDocumentIndexes()
            for i in range(0, indexes.getCount()):
                index = indexes.getByIndex(i)
                index.update()

            try:
                document.refresh()
            except AttributeError:
                pass

            indexes = document.getDocumentIndexes()
            for i in range(0, indexes.getCount()):
                index = indexes.getByIndex(i)
                index.update()

        outputExt = self._getFileExt(outputFile)
        filterName = self._filterName(document, outputExt)

        try:
            document.storeToURL(outputUrl, self._toProperties(FilterName=filterName))
        finally:
            document.close(True)

    def _filterName(self, document, outputExt):
        family = self._detectFamily(document)
        try:
            filterByFamily = FILTER_MAP[outputExt]
        except KeyError:
            raise Exception, "unknown output format: '%s'" % outputExt
        try:
            return filterByFamily[family]
        except KeyError:
            raise Exception, "unsupported conversion: from '%s' to '%s'" % (family, outputExt)

    def _detectFamily(self, document):
        if document.supportsService("com.sun.star.text.GenericTextDocument"):
            # NOTE: a GenericTextDocument is either a TextDocument, a WebDocument, or a GlobalDocument
            # but this further distinction doesn't seem to matter for conversions
            return FAMILY_TEXT
        if document.supportsService("com.sun.star.sheet.SpreadsheetDocument"):
            return FAMILY_SPREADSHEET
        if document.supportsService("com.sun.star.presentation.PresentationDocument"):
            return FAMILY_PRESENTATION
        if document.supportsService("com.sun.star.drawing.DrawingDocument"):
            return FAMILY_DRAWING
        raise Exception, "unknown document family: %s" % document

    def _getFileExt(self, path):
        ext = splitext(path)[1]
        if ext is not None:
            return ext[1:].lower()

    def _toFileUrl(self, path):
        return uno.systemPathToFileUrl(abspath(path))

    def _toProperties(self, **args):
        props = []
        for key in args:
            prop = PropertyValue()
            prop.Name = key
            prop.Value = args[key]
            props.append(prop)
        return tuple(props)

if __name__ == "__main__":
    from sys import argv, exit

    if len(argv) < 3:
        print "USAGE: python %s <input-file> <output-file>" % argv[0]
        exit(255)
    if not isfile(argv[1]):
        print "no such input file: %s" % argv[1]
        exit(1)

    try:
        converter = DocumentConverter()    
        converter.convert(argv[1], argv[2])
    except Exception, exception:
        print "ERROR!" + str(exception)
        exit(1)
您需要OpenOffice Uno Bride(在服务器上,您可以在无头模式下运行OO)。 因此,您可以将每个OO可读格式转换为每个OO可写格式:

运行示例代码

/usr/lib64/openoffice.org/program/soffice.bin -accept=socket,host=localhost,port=8100\;urp -headless
Python示例:

import uno
from os.path import abspath, isfile, splitext
from com.sun.star.beans import PropertyValue
from com.sun.star.task import ErrorCodeIOException
from com.sun.star.connection import NoConnectException

FAMILY_TEXT = "Text"
FAMILY_SPREADSHEET = "Spreadsheet"
FAMILY_PRESENTATION = "Presentation"
FAMILY_DRAWING = "Drawing"
DEFAULT_OPENOFFICE_PORT = 8100

FILTER_MAP = {
    "pdf": {
        FAMILY_TEXT: "writer_pdf_Export",
        FAMILY_SPREADSHEET: "calc_pdf_Export",
        FAMILY_PRESENTATION: "impress_pdf_Export",
        FAMILY_DRAWING: "draw_pdf_Export"
    },
    "html": {
        FAMILY_TEXT: "HTML (StarWriter)",
        FAMILY_SPREADSHEET: "HTML (StarCalc)",
        FAMILY_PRESENTATION: "impress_html_Export"
    },
    "odt": { FAMILY_TEXT: "writer8" },
    "doc": { FAMILY_TEXT: "MS Word 97" },
    "rtf": { FAMILY_TEXT: "Rich Text Format" },
    "txt": { FAMILY_TEXT: "Text" },
    "docx": { FAMILY_TEXT: "MS Word 2007 XML" },
    "ods": { FAMILY_SPREADSHEET: "calc8" },
    "xls": { FAMILY_SPREADSHEET: "MS Excel 97" },
    "odp": { FAMILY_PRESENTATION: "impress8" },
    "ppt": { FAMILY_PRESENTATION: "MS PowerPoint 97" },
    "swf": { FAMILY_PRESENTATION: "impress_flash_Export" }
}

class DocumentConverter:

    def __init__(self, port=DEFAULT_OPENOFFICE_PORT):
        localContext = uno.getComponentContext()
        resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)
        try:
            self.context = resolver.resolve("uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext" % port)
        except NoConnectException:
            raise Exception, "failed to connect to OpenOffice.org on port %s" % port
        self.desktop = self.context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", self.context)

    def convert(self, inputFile, outputFile):

        inputUrl = self._toFileUrl(inputFile)
        outputUrl = self._toFileUrl(outputFile)

        document = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self._toProperties(Hidden=True))
        #document.setPropertyValue("DocumentTitle", "saf" ) TODO: Check how this can be set and set doc update mode to  FULL_UPDATE

        if self._detectFamily(document) == FAMILY_TEXT:
            indexes = document.getDocumentIndexes()
            for i in range(0, indexes.getCount()):
                index = indexes.getByIndex(i)
                index.update()

            try:
                document.refresh()
            except AttributeError:
                pass

            indexes = document.getDocumentIndexes()
            for i in range(0, indexes.getCount()):
                index = indexes.getByIndex(i)
                index.update()

        outputExt = self._getFileExt(outputFile)
        filterName = self._filterName(document, outputExt)

        try:
            document.storeToURL(outputUrl, self._toProperties(FilterName=filterName))
        finally:
            document.close(True)

    def _filterName(self, document, outputExt):
        family = self._detectFamily(document)
        try:
            filterByFamily = FILTER_MAP[outputExt]
        except KeyError:
            raise Exception, "unknown output format: '%s'" % outputExt
        try:
            return filterByFamily[family]
        except KeyError:
            raise Exception, "unsupported conversion: from '%s' to '%s'" % (family, outputExt)

    def _detectFamily(self, document):
        if document.supportsService("com.sun.star.text.GenericTextDocument"):
            # NOTE: a GenericTextDocument is either a TextDocument, a WebDocument, or a GlobalDocument
            # but this further distinction doesn't seem to matter for conversions
            return FAMILY_TEXT
        if document.supportsService("com.sun.star.sheet.SpreadsheetDocument"):
            return FAMILY_SPREADSHEET
        if document.supportsService("com.sun.star.presentation.PresentationDocument"):
            return FAMILY_PRESENTATION
        if document.supportsService("com.sun.star.drawing.DrawingDocument"):
            return FAMILY_DRAWING
        raise Exception, "unknown document family: %s" % document

    def _getFileExt(self, path):
        ext = splitext(path)[1]
        if ext is not None:
            return ext[1:].lower()

    def _toFileUrl(self, path):
        return uno.systemPathToFileUrl(abspath(path))

    def _toProperties(self, **args):
        props = []
        for key in args:
            prop = PropertyValue()
            prop.Name = key
            prop.Value = args[key]
            props.append(prop)
        return tuple(props)

if __name__ == "__main__":
    from sys import argv, exit

    if len(argv) < 3:
        print "USAGE: python %s <input-file> <output-file>" % argv[0]
        exit(255)
    if not isfile(argv[1]):
        print "no such input file: %s" % argv[1]
        exit(1)

    try:
        converter = DocumentConverter()    
        converter.convert(argv[1], argv[2])
    except Exception, exception:
        print "ERROR!" + str(exception)
        exit(1)
导入uno 从os.path导入abspath、isfile、splitext 从com.sun.star.beans导入PropertyValue 从com.sun.star.task导入ErrorCodeIOException 从com.sun.star.connection导入NoConnectionException FAMILY_TEXT=“TEXT” 族\ u电子表格=“电子表格” 家庭展示=“展示” 族_DRAWING=“绘图” 默认\u OPENOFFICE\u端口=8100 过滤器映射={ “pdf”:{ 家庭文本:“writer\u pdf\u导出”, 家庭电子表格:“calc\u pdf\u导出”, 家庭演示:“impress\u pdf\u导出”, 族\图形:“绘制\ pdf \导出” }, “html”:{ 家庭文本:“HTML(StarWriter)”, 家庭电子表格:“HTML(StarCalc)”, 家庭演示:“印象”\u html\u导出” }, “odt”:{FAMILY_TEXT:“writer8”}, “doc”:{家庭文字:“MS Word 97”}, “rtf”:{FAMILY_TEXT:“富文本格式”}, “txt”:{FAMILY_TEXT:“TEXT”}, “docx”:{FAMILY_TEXT:“MS Word 2007 XML”}, “ods”:{家庭电子表格:“calc8”}, “xls”:{家庭电子表格:“MS Excel 97”}, “odp”:{家庭介绍:“印象8”}, “ppt”:{家庭演示文稿:“MS PowerPoint 97”}, “swf”:{家庭介绍:“印象深刻的闪光输出”} } 类文档转换器: def u u init u uuu(self,port=DEFAULT_OPENOFFICE_port): localContext=uno.getComponentContext() resolver=localContext.ServiceManager.createInstanceWithContext(“com.sun.star.bridge.UnoUrlResolver”,localContext) 尝试: self.context=resolver.resolve(“uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext”%port) 除无连接例外: 引发异常,“无法连接到端口%s“%port”上的OpenOffice.org self.desktop=self.context.ServiceManager.createInstanceWithContext(“com.sun.star.frame.desktop”,self.context) def转换(自身、输入文件、输出文件): inputUrl=self.\u文件URL(inputFile) outputUrl=self.\u toFileUrl(outputFile) document=self.desktop.loadComponentFromURL(inputUrl,“\u blank”,0,self.\u TopProperties(Hidden=True)) #setPropertyValue(“DocumentTitle”、“saf”)TODO:检查如何设置此选项,并将文档更新模式设置为完全更新 如果自检测家庭(文档)=家庭文本: index=document.getDocumentIndexes() 对于范围(0,index.getCount())中的i: index=index.getByIndex(i) index.update() 尝试: document.refresh() 除属性错误外: 通过 index=document.getDocumentIndexes() 对于范围(0,index.getCount())中的i: index=index.getByIndex(i) index.update() outputExt=self.\u getFileExt(outputFile) filterName=self.\u filterName(文档,outputExt) 尝试: document.storeToURL(outputUrl,self.\u-toProperty(FilterName=FilterName)) 最后: 文档关闭(True) 定义过滤器名称(自身、文档、输出文本): 家庭=自身。\u检测家庭(文件) 尝试: filterByFamily=过滤器映射[outputExt] 除KeyError外: 引发异常,“未知输出格式:'%s'%OutputText” 尝试: 返回过滤器家庭[家庭] 除KeyError外: 引发异常,“不支持的转换:从'%s'到'%s'”(系列,outputExt) def_detectFamily(自身、文档): if document.supportsService(“com.sun.star.text.GenericTextDocument”): #注意:GenericTextDocument可以是TextDocument、WebDocument或GlobalDocument #但这种进一步的区别似乎对转换并不重要 返回族文本 if document.supportsService(“com.sun.star.sheet.SpreadsheetDocument”): 返回家庭电子表格 如果document.supportsService(“com.sun.star.presentation.PresentationDocument”): 返回家庭演示文稿 if document.supportsService(“com.sun.star.drawing.DrawingDocument”): 返回族图 引发异常,“未知文档系列:%s”%document def_getFileExt(self,path): ext=拆分ext(路径)[1] 如果ext不是None: return ext[1:][.lower() def(自身,路径): 返回uno.systemPathToFileUrl(abspath(path)) 定义属性(自身,**参数): 道具=[] 对于输入参数: prop=属性值() prop.Name=key 属性值=参数[键]