Warning: file_get_contents(/data/phpspider/zhask/data//catemap/8/swift/19.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python PyPDF合并和写入问题_Python_Pdf_Merge_Pypdf - Fatal编程技术网

Python PyPDF合并和写入问题

Python PyPDF合并和写入问题,python,pdf,merge,pypdf,Python,Pdf,Merge,Pypdf,使用此选项时出现意外错误。第一部分来自我在网上找到的一个脚本,我试图用它来提取PDF大纲中确定的特定部分。除了在output.write(outputfile1)上写着: PdfReadError:字典中有多个定义 还有人碰到这个吗?请原谅所有不必要的打印s结尾。:) 解决了。必须修改generic.py中的第534行。这里的信息:面对同样的问题,只需忽略错误并传递即可解决。if not data.get(key):data[key]=value elif pdf.strict:pass#不允许

使用此选项时出现意外错误。第一部分来自我在网上找到的一个脚本,我试图用它来提取PDF大纲中确定的特定部分。除了在
output.write(outputfile1)
上写着:

PdfReadError:字典中有多个定义

还有人碰到这个吗?请原谅所有不必要的
打印
s结尾。:)


解决了。必须修改generic.py中的第534行。这里的信息:面对同样的问题,只需忽略错误并传递即可解决。if not data.get(key):data[key]=value elif pdf.strict:pass#不允许多个键定义#raise utils.PdfReadError(“字典中字节%s处的多个定义对应于键%s”\#%(utils.hexStr(stream.tell()),key))是否有人发现了导致此错误的原因?
import pyPdf
import glob

class Darrell(pyPdf.PdfFileReader):

    def getDestinationPageNumbers(self):
        def _setup_outline_page_ids(outline, _result=None):
            if _result is None:
                _result = {}
            for obj in outline:
                if isinstance(obj, pyPdf.pdf.Destination):
                    _result[(id(obj), obj.title)] = obj.page.idnum
                elif isinstance(obj, list):
                    _setup_outline_page_ids(obj, _result)
            return _result

        def _setup_page_id_to_num(pages=None, _result=None, _num_pages=None):
            if _result is None:
                _result = {}
            if pages is None:
                _num_pages = []
                pages = self.trailer["/Root"].getObject()["/Pages"].getObject()
            t = pages["/Type"]
            if t == "/Pages":
                for page in pages["/Kids"]:
                    _result[page.idnum] = len(_num_pages)
                    _setup_page_id_to_num(page.getObject(), _result, _num_pages)
            elif t == "/Page":
                _num_pages.append(1)
            return _result

        outline_page_ids = _setup_outline_page_ids(self.getOutlines())
        page_id_to_page_numbers = _setup_page_id_to_num()

        result = {}
        for (_, title), page_idnum in outline_page_ids.iteritems():
            result[title] = page_id_to_page_numbers.get(page_idnum, '???')
        return result

for fileName in glob.glob("*.pdf"):
    output = pyPdf.PdfFileWriter()
    print fileName
    pdf = Darrell(open(fileName, 'rb'))
    template = '%-5s  %s'
    print template % ('page', 'title')
    for p,t in sorted([(v,k) for k,v in pdf.getDestinationPageNumbers().iteritems()]):
        print template % (p+1,t)

    for p,t in sorted([(v,k) for k,v in pdf.getDestinationPageNumbers().iteritems()]):
        if t == "CATEGORY 1":
            startpg = p+1
            print p+1,'is the first page of Category 1.'
            if t == "CATEGORY 2":
                endpg = p+1
                print p+1,'is the last page of Category 1.'
    print startpg, endpg
    pagenums = range(startpg,endpg)
    print pagenums
    for i in pagenums:
        output.addPage(pdf.getPage(i))
    fileName2 = "%sCategory1_data.pdf" % (str(fileName[:-13]))
    print "%s has %s pages." % (fileName2,output.getNumPages())
    outputfile1 = file(r"%s" % (fileName2), 'wb')
    output.write(outputfile1)
    outputfile1.close()