Python 覆盖ziparchive中的文件

Python 覆盖ziparchive中的文件,python,ziparchive,Python,Ziparchive,我有archive.zip和两个文件:hello.txt和world.txt 我想用带有该代码的新文件覆盖hello.txt文件: import zipfile z = zipfile.ZipFile('archive.zip','a') z.write('hello.txt') z.close() 但它不会覆盖文件,它会以某种方式创建另一个hello.txt实例-查看winzip屏幕截图: 既然没有类似于zipfile.remove()的smth,那么处理此问题的最佳方法是什么 使用

我有
archive.zip
和两个文件:
hello.txt
world.txt

我想用带有该代码的新文件覆盖
hello.txt
文件:

import zipfile

z = zipfile.ZipFile('archive.zip','a')
z.write('hello.txt')
z.close()  
但它不会覆盖文件,它会以某种方式创建另一个
hello.txt
实例-查看winzip屏幕截图:


既然没有类似于zipfile.remove()的smth,那么处理此问题的最佳方法是什么

使用python zipfile模块无法做到这一点。您必须创建一个新的zip文件,并重新压缩第一个文件以及新修改的文件中的所有内容

下面是一些代码。但请注意,它效率不高,因为它先解压缩然后再重新压缩所有数据

import tempfile
import zipfile
import shutil
import os

def remove_from_zip(zipfname, *filenames):
    tempdir = tempfile.mkdtemp()
    try:
        tempname = os.path.join(tempdir, 'new.zip')
        with zipfile.ZipFile(zipfname, 'r') as zipread:
            with zipfile.ZipFile(tempname, 'w') as zipwrite:
                for item in zipread.infolist():
                    if item.filename not in filenames:
                        data = zipread.read(item.filename)
                        zipwrite.writestr(item, data)
        shutil.move(tempname, zipfname)
    finally:
        shutil.rmtree(tempdir)
用法:

remove_from_zip('archive.zip', 'hello.txt')
with zipfile.ZipFile('archive.zip', 'a') as z:
    z.write('hello.txt')

基于nosklo的答案。 UpdateableZipFile继承自ZipFile的类,具有相同的接口,但增加了覆盖文件(通过writestr或write)和删除文件的能力

import os
import shutil
import tempfile
from zipfile import ZipFile, ZIP_STORED, ZipInfo


class UpdateableZipFile(ZipFile):
    """
    Add delete (via remove_file) and update (via writestr and write methods)
    To enable update features use UpdateableZipFile with the 'with statement',
    Upon  __exit__ (if updates were applied) a new zip file will override the exiting one with the updates
    """

    class DeleteMarker(object):
        pass

    def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
        # Init base
        super(UpdateableZipFile, self).__init__(file, mode=mode,
                                                compression=compression,
                                                allowZip64=allowZip64)
        # track file to override in zip
        self._replace = {}
        # Whether the with statement was called
        self._allow_updates = False

    def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
        if isinstance(zinfo_or_arcname, ZipInfo):
            name = zinfo_or_arcname.filename
        else:
            name = zinfo_or_arcname
        # If the file exits, and needs to be overridden,
        # mark the entry, and create a temp-file for it
        # we allow this only if the with statement is used
        if self._allow_updates and name in self.namelist():
            temp_file = self._replace[name] = self._replace.get(name,
                                                                tempfile.TemporaryFile())
            temp_file.write(bytes)
        # Otherwise just act normally
        else:
            super(UpdateableZipFile, self).writestr(zinfo_or_arcname,
                                                    bytes, compress_type=compress_type)

    def write(self, filename, arcname=None, compress_type=None):
        arcname = arcname or filename
        # If the file exits, and needs to be overridden,
        # mark the entry, and create a temp-file for it
        # we allow this only if the with statement is used
        if self._allow_updates and arcname in self.namelist():
            temp_file = self._replace[arcname] = self._replace.get(arcname,
                                                                   tempfile.TemporaryFile())
            with open(filename, "rb") as source:
                shutil.copyfileobj(source, temp_file)
        # Otherwise just act normally
        else:
            super(UpdateableZipFile, self).write(filename, 
                                                 arcname=arcname, compress_type=compress_type)

    def __enter__(self):
        # Allow updates
        self._allow_updates = True
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        # call base to close zip file, organically
        try:
            super(UpdateableZipFile, self).__exit__(exc_type, exc_val, exc_tb)
            if len(self._replace) > 0:
                self._rebuild_zip()
        finally:
            # In case rebuild zip failed,
            # be sure to still release all the temp files
            self._close_all_temp_files()
            self._allow_updates = False

    def _close_all_temp_files(self):
        for temp_file in self._replace.itervalues():
            if hasattr(temp_file, 'close'):
                temp_file.close()

    def remove_file(self, path):
        self._replace[path] = self.DeleteMarker()

    def _rebuild_zip(self):
        tempdir = tempfile.mkdtemp()
        try:
            temp_zip_path = os.path.join(tempdir, 'new.zip')
            with ZipFile(self.filename, 'r') as zip_read:
                # Create new zip with assigned properties
                with ZipFile(temp_zip_path, 'w', compression=self.compression,
                             allowZip64=self._allowZip64) as zip_write:
                    for item in zip_read.infolist():
                        # Check if the file should be replaced / or deleted
                        replacement = self._replace.get(item.filename, None)
                        # If marked for deletion, do not copy file to new zipfile
                        if isinstance(replacement, self.DeleteMarker):
                            del self._replace[item.filename]
                            continue
                        # If marked for replacement, copy temp_file, instead of old file
                        elif replacement is not None:
                            del self._replace[item.filename]
                            # Write replacement to archive,
                            # and then close it (deleting the temp file)
                            replacement.seek(0)
                            data = replacement.read()
                            replacement.close()
                        else:
                            data = zip_read.read(item.filename)
                        zip_write.writestr(item, data)
            # Override the archive with the updated one
            shutil.move(temp_zip_path, self.filename)
        finally:
            shutil.rmtree(tempdir)
用法示例:

with UpdateableZipFile("C:\Temp\Test2.docx", "a") as o:
    # Overwrite a file with a string
    o.writestr("word/document.xml", "Some data")
    # exclude an exiting file from the zip
    o.remove_file("word/fontTable.xml")
    # Write a new file (with no conflict) to the zp
    o.writestr("new_file", "more data")
    # Overwrite a file with a file
    o.write(r"C:\Temp\example.png", "word/settings.xml")

所以,没有有效的方法来覆盖文件吗?也许是另一个拉链模块?无论如何,谢谢你的帮助that@cru3l:这正是我在回答中所说的。您可以调用外部zip工具。此外,您还可以创建自己的zip库接口。它对我来说工作得很好,只是
tempfile.mkdtemp()
抛出了un异常,这可能是因为它试图在服务器中的某个地方写入脚本,而脚本没有写入权限。如果我在同一个文件夹中用一个可见的zip文件替换它,它就可以正常工作了。您试图在Py3中运行Py2代码,因此出现错误。将
itervalues
更改为
values
。考虑到上行表决数较低,我不确定该代码,但它工作得很好,似乎编码很好,并且符合此问题和一些重复问题的要求。正如@madpysicator所说,对于Python3.7,请用值替换itervalues,但也可能需要将用法调整为
o.writestr(r“word/document.xml”,“Some data”.encode('utf-8')
,并可以选择添加
compresslevel=None
参数来writestr和write。我还将writestr
中的参数“bytes”替换为带有UpdateableZipFile的“data”(“C:\\Users\\mamahajan\\Downloads\\TuningLog\u 2018\u 05_17_1_1.zip”,“a”)作为o:zf=zipfile.zipfile(“C:\\Users\\mamamahajan\\Downloads\\TuningLog\u 2018\u 05_17_1_1.zip”)text_files=zf.infolist()列表[]对于text_文件中的文本文件:print(text_file.filename)df=pd.read\u csv(zf.open(text_file.filename),sep='|',low_memory=False)columns=['actimizeTransactionKey','actimizeTransactionIdentity']df.drop(columns,inplace=True,axis=1)o.writestr(text_file.filename,“这是需要传递df的地方”)
我认为您应该提取csv,使用pandas重建它,然后重新插入。可能值得开始您自己的问题,参考这一个。