Source code for plmapp.utils.archive

import os.path
import tarfile
import itertools
from cStringIO import StringIO
import struct, time, sys
import binascii, stat
from zipfile import ZipInfo, ZIP_STORED, ZIP_DEFLATED, LargeZipFile, ZIP64_LIMIT

try:
    import zlib # We may need its compression method
    crc32 = zlib.crc32
except ImportError:
    zlib = None
    crc32 = binascii.crc32

[docs]def get_available_name(name, exiting_files):
    """
    """
    dir_name, file_name = os.path.split(name)
    file_root, file_ext = os.path.splitext(file_name)
    # If the filename already exists, add an underscore and a number (before
    # the file extension, if one exists) to the filename until the generated
    # filename doesn't exist.
    count = itertools.count(1)
    while name in exiting_files:
        # file_ext includes the dot.
        name = os.path.join(dir_name, "%s_%s%s" % (file_root, count.next(), file_ext))

    return name

#: True if files are compressed or not according to their extension
ZIP_AUTO = -1

#: formats that are stored uncompressed
STORED_FORMATS = set((
    "zip", "gz", "bz2", "tgz", "xz", "rar", ".zipx", # archives
    "png", "gif", "jpg", "jpeg", "svgz", # images
    "odt", "odf", "ods", "odm", "ott", "odp", "otp", # openDocument
    "odg", "odf",
    "docx", "docm", "xlsx", "xlsm", "pptx", "pptm", "dotx", # openXML
    "flac", "ogg", "mp3", "m4a", "ace", "aac", "m4p", "mpa", # audio
    "mp2", "ra", "rm",
    "avi", "dat", "mpeg", "mpg", "mkv", "mov", "ogg", "wmv", # video
    "flv", "3gp", "aaf", "ram", 
))

# constants taken from zipfile module

ZIP_FILECOUNT_LIMIT = 1 << 16
ZIP_MAX_COMMENT = (1 << 16) - 1
structCentralDir = "<4s4B4HL2L5H2L"
stringCentralDir = "PK\001\002"
sizeCentralDir = struct.calcsize(structCentralDir)
structEndArchive = "<4s4H2LH"
stringEndArchive = "PK\005\006"
sizeEndCentDir = struct.calcsize(structEndArchive)

# The "Zip64 end of central directory" record, magic number, size, and indices
# (section V.G in the format document)
structEndArchive64 = "<4sQ2H2L4Q"
stringEndArchive64 = "PK\x06\x06"
sizeEndCentDir64 = struct.calcsize(structEndArchive64)

# The "Zip64 end of central directory locator" structure, magic number, and size
structEndArchive64Locator = "<4sLQL"
stringEndArchive64Locator = "PK\x06\x07"
sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)

[docs]class IterZipFile:
    """ A write-only ZipFile that does not write to a file but yields
    its output.

    Example::
        
        z = IterZipFile()
        for buf in z.write(filename, arcname):
            # do stuff with buf
        for buf in z.close():
            # do stuff with buf

    The code is mostly based on :class:`zipfile.ZipFile`.

    :param compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib)
                        or ZIP_AUTO (compression or not according to the filename).
    :param allowZip64: if True ZipFile will create files with ZIP64 extensions when
                    needed, otherwise it will raise an exception when this would
                    be necessary.
    """

    def __init__(self, compression=ZIP_AUTO, allowZip64=False):

        if compression == ZIP_STORED:
            pass
        elif compression in (ZIP_DEFLATED, ZIP_AUTO):
            if not zlib:
                raise RuntimeError,\
                      "Compression requires the (missing) zlib module"
        else:
            raise RuntimeError, "That compression method is not supported"

        self._allowZip64 = allowZip64
        self.debug = 0  # Level of printing: 0 through 3
        self.NameToInfo = {}    # Find file info given name
        self.filelist = []      # List of ZipInfo instances for archive
        self.compression = compression  # Method of compression
        self.mode = key = "w"
        self.comment = ''
        self.tell = 0

    def _writecheck(self, zinfo):
        """Check for errors before writing a file to the archive."""
        if zinfo.filename in self.NameToInfo:
            if self.debug:      # Warning for duplicate names
                print "Duplicate name:", zinfo.filename
        if zinfo.compress_type == ZIP_DEFLATED and not zlib:
            raise RuntimeError, \
                  "Compression requires the (missing) zlib module"
        if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
            raise RuntimeError, \
                  "That compression method is not supported"
        if zinfo.file_size > ZIP64_LIMIT:
            if not self._allowZip64:
                raise LargeZipFile("Filesize would require ZIP64 extensions")
        if zinfo.header_offset > ZIP64_LIMIT:
            if not self._allowZip64:
                raise LargeZipFile("Zipfile size would require ZIP64 extensions")

[docs]    def write(self, filename, arcname=None, compress_type=None):
        """Put the bytes from filename into the archive under the name
        arcname."""

        st = os.stat(filename)
        isdir = stat.S_ISDIR(st.st_mode)
        mtime = time.localtime(st.st_mtime)
        date_time = mtime[0:6]
        # Create ZipInfo instance to store file information
        if arcname is None:
            arcname = filename
        arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
        while arcname[0] in (os.sep, os.altsep):
            arcname = arcname[1:]
        if isdir:
            arcname += '/'
        zinfo = ZipInfo(arcname, date_time)
        zinfo.external_attr = (st[0] & 0xFFFF) << 16L      # Unix attributes
        if self.compression == ZIP_AUTO:
            ext = os.path.splitext(filename)[1].lower()
            compression = ZIP_STORED if ext and ext[1:] in STORED_FORMATS \
                    else ZIP_DEFLATED
        else:
            compression = self.compression
        if compress_type is None:
            zinfo.compress_type = compression
        else:
            zinfo.compress_type = compress_type

        zinfo.file_size = st.st_size
        zinfo.flag_bits |= 0x08
        zinfo.header_offset = self.tell    # Start of header bytes

        self._writecheck(zinfo)
        self._didModify = True

        if isdir:
            zinfo.file_size = 0
            zinfo.compress_size = 0
            zinfo.CRC = 0
            self.filelist.append(zinfo)
            self.NameToInfo[zinfo.filename] = zinfo
            header = zinfo.FileHeader()
            yield header
            self.tell += len(header)
            return

        fp = open(filename, "rb")
        # Must overwrite CRC and sizes with correct data later
        zinfo.CRC = CRC = 0
        zinfo.compress_size = compress_size = 0
        zinfo.file_size = file_size = 0
        header = zinfo.FileHeader()
        yield header
        self.tell += len(header)
        if zinfo.compress_type == ZIP_DEFLATED:
            cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
                 zlib.DEFLATED, -15)
        else:
            cmpr = None
        while 1:
            buf = fp.read(1024 * 8)
            if not buf:
                break
            file_size = file_size + len(buf)
            CRC = crc32(buf, CRC) & 0xffffffff
            if cmpr:
                buf = cmpr.compress(buf)
                compress_size = compress_size + len(buf)
            yield buf
        fp.close()
        if cmpr:
            buf = cmpr.flush()
            compress_size = compress_size + len(buf)
            yield buf
            zinfo.compress_size = compress_size
        else:
            zinfo.compress_size = file_size
        self.tell += zinfo.compress_size
        zinfo.CRC = CRC
        zinfo.file_size = file_size
        # write the data descriptor
        data_descriptor =  struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
              zinfo.file_size)
        yield data_descriptor
        self.tell += len(data_descriptor)
        self.filelist.append(zinfo)
        self.NameToInfo[zinfo.filename] = zinfo

[docs]    def close(self):
        """Close the file, and for mode "w" and "a" write the ending
        records."""

        count = 0
        pos1 = self.tell
        for zinfo in self.filelist:         # write central directory
            count = count + 1
            dt = zinfo.date_time
            dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
            dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
            extra = []
            if zinfo.file_size > ZIP64_LIMIT \
                    or zinfo.compress_size > ZIP64_LIMIT:
                extra.append(zinfo.file_size)
                extra.append(zinfo.compress_size)
                file_size = 0xffffffff
                compress_size = 0xffffffff
            else:
                file_size = zinfo.file_size
                compress_size = zinfo.compress_size

            if zinfo.header_offset > ZIP64_LIMIT:
                extra.append(zinfo.header_offset)
                header_offset = 0xffffffffL
            else:
                header_offset = zinfo.header_offset

            extra_data = zinfo.extra
            if extra:
                # Append a ZIP64 field to the extra's
                extra_data = struct.pack(
                        '<HH' + 'Q'*len(extra),
                        1, 8*len(extra), *extra) + extra_data

                extract_version = max(45, zinfo.extract_version)
                create_version = max(45, zinfo.create_version)
            else:
                extract_version = zinfo.extract_version
                create_version = zinfo.create_version

            try:
                filename, flag_bits = zinfo._encodeFilenameFlags()
                centdir = struct.pack(structCentralDir,
                 stringCentralDir, create_version,
                 zinfo.create_system, extract_version, zinfo.reserved,
                 flag_bits, zinfo.compress_type, dostime, dosdate,
                 zinfo.CRC, compress_size, file_size,
                 len(filename), len(extra_data), len(zinfo.comment),
                 0, zinfo.internal_attr, zinfo.external_attr,
                 header_offset)
            except DeprecationWarning:
                print >>sys.stderr, (structCentralDir,
                 stringCentralDir, create_version,
                 zinfo.create_system, extract_version, zinfo.reserved,
                 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
                 zinfo.CRC, compress_size, file_size,
                 len(zinfo.filename), len(extra_data), len(zinfo.comment),
                 0, zinfo.internal_attr, zinfo.external_attr,
                 header_offset)
                raise
            yield centdir
            yield filename
            yield extra_data
            yield zinfo.comment
            self.tell += len(centdir) + len(filename) + len(extra_data) + len(zinfo.comment)

        pos2 = self.tell
        # Write end-of-zip-archive record
        centDirCount = count
        centDirSize = pos2 - pos1
        centDirOffset = pos1
        if (centDirCount >= ZIP_FILECOUNT_LIMIT or
            centDirOffset > ZIP64_LIMIT or
            centDirSize > ZIP64_LIMIT):
            # Need to write the ZIP64 end-of-archive records
            zip64endrec = struct.pack(
                    structEndArchive64, stringEndArchive64,
                    44, 45, 45, 0, 0, centDirCount, centDirCount,
                    centDirSize, centDirOffset)
            yield zip64endrec

            zip64locrec = struct.pack(
                    structEndArchive64Locator,
                    stringEndArchive64Locator, 0, pos2, 1)
            yield zip64locrec
            centDirCount = min(centDirCount, 0xFFFF)
            centDirSize = min(centDirSize, 0xFFFFFFFF)
            centDirOffset = min(centDirOffset, 0xFFFFFFFF)

        # check for valid comment length
        if len(self.comment) >= ZIP_MAX_COMMENT:
            if self.debug > 0:
                msg = 'Archive comment is too long; truncating to %d bytes' \
                      % ZIP_MAX_COMMENT
            self.comment = self.comment[:ZIP_MAX_COMMENT]

        endrec = struct.pack(structEndArchive, stringEndArchive,
                             0, 0, centDirCount, centDirCount,
                             centDirSize, centDirOffset, len(self.comment))
        yield endrec
        yield self.comment


[docs]def generate_tarfile(files):
    """
    Returns a generator that yields *files* as a tar file.
    
    This generator does **not** create temporary files and is designed to not
    consume too much memory so it can be used to serve efficiently a tar file
    of large files.

    :param files: a sequence of class:`.DocumentFile`
    """
    fake_file = StringIO()
    tf = tarfile.open(mode= "w", fileobj=fake_file)
    filenames = set()
    for df in files:
        # yields the header
        filename = get_available_name(df.filename, filenames)
        filenames.add(filename)
        info = tf.gettarinfo(df.file.path, filename)
        f, size = df.document.get_leaf_object().get_content_and_size(df)
        # change the name of the owner
        info.uname = info.gname = df.document.owner.username
        info.size = size
        yield info.tobuf()
        # yields the content of the file
        try:
            s = f.read(512)
            while s:
                yield s
                s = f.read(512)
            yield s
            blocks, remainder = divmod(info.size, tarfile.BLOCKSIZE)
            if remainder > 0:
                yield (tarfile.NUL * (tarfile.BLOCKSIZE - remainder))
        finally:
            f.close()
    # yields the nul blocks that mark the end of the tar file
    yield (tarfile.NUL * tarfile.BLOCKSIZE * 2)


[docs]def generate_zipfile(files):
    """
    Returns a generator that yields *files* as a zip file.
    
    This generator does **not** create temporary files and is designed to not
    consume too much memory so it can be used to serve efficiently a tar file
    of large files.

    :param files: a sequence of class:`.DocumentFile`
    :param compressed: ``True`` if files should be compressed (default: True)
    """
    zf = IterZipFile()
    filenames = set()
    for df in files:
        filename = get_available_name(df.filename, filenames)
        filenames.add(filename)
        f, size = df.document.get_leaf_object().get_content_and_size(df)
        path = f.name
        try:
            for s in zf.write(path, filename):
                yield s
        finally:
            f.close()
    for s in zf.close():
        yield s

_generators = {
    "zip" : generate_zipfile,
    "tar" : generate_tarfile,
}

#: List of available archive formats (currently: ``zip`` and ``tar``).
ARCHIVE_FORMATS = _generators.keys()

[docs]def generate_archive(files, format):
    return _generators[format](files)
Languages

External links

Previous versions

Source code for plmapp.utils.archive

Navigation

Quick search

Languages

External links

Previous versions

Source code for plmapp.utils.archive

Navigation