source: main/trunk/openPLM/plmapp/archive.py @ 1505

Revision 1505, 14.4 KB checked in by zali, 9 years ago (diff)

doc : typo

Line 
1import os.path
2import tarfile
3import itertools
4from cStringIO import StringIO
5import struct, time, sys
6import binascii, stat
7from zipfile import ZipInfo, ZIP_STORED, ZIP_DEFLATED, LargeZipFile, ZIP64_LIMIT
8
9try:
10    import zlib # We may need its compression method
11    crc32 = zlib.crc32
12except ImportError:
13    zlib = None
14    crc32 = binascii.crc32
15
16def get_available_name(name, exiting_files):
17    """
18    """
19    dir_name, file_name = os.path.split(name)
20    file_root, file_ext = os.path.splitext(file_name)
21    # If the filename already exists, add an underscore and a number (before
22    # the file extension, if one exists) to the filename until the generated
23    # filename doesn't exist.
24    count = itertools.count(1)
25    while name in exiting_files:
26        # file_ext includes the dot.
27        name = os.path.join(dir_name, "%s_%s%s" % (file_root, count.next(), file_ext))
28
29    return name
30
31#: True if files are compressed or not according to their extension
32ZIP_AUTO = -1
33
34#: formats that are stored uncompressed
35STORED_FORMATS = set((
36    "zip", "gz", "bz2", "tgz", "xz", "rar", ".zipx", # archives
37    "png", "gif", "jpg", "jpeg", "svgz", # images
38    "odt", "odf", "ods", "odm", "ott", "odp", "otp", # openDocument
39    "odg", "odf",
40    "docx", "docm", "xlsx", "xlsm", "pptx", "pptm", "dotx", # openXML
41    "flac", "ogg", "mp3", "m4a", "ace", "aac", "m4p", "mpa", # audio
42    "mp2", "ra", "rm",
43    "avi", "dat", "mpeg", "mpg", "mkv", "mov", "ogg", "wmv", # video
44    "flv", "3gp", "aaf", "ram",
45))
46
47# constants taken from zipfile module
48
49ZIP_FILECOUNT_LIMIT = 1 << 16
50ZIP_MAX_COMMENT = (1 << 16) - 1
51structCentralDir = "<4s4B4HL2L5H2L"
52stringCentralDir = "PK\001\002"
53sizeCentralDir = struct.calcsize(structCentralDir)
54structEndArchive = "<4s4H2LH"
55stringEndArchive = "PK\005\006"
56sizeEndCentDir = struct.calcsize(structEndArchive)
57
58# The "Zip64 end of central directory" record, magic number, size, and indices
59# (section V.G in the format document)
60structEndArchive64 = "<4sQ2H2L4Q"
61stringEndArchive64 = "PK\x06\x06"
62sizeEndCentDir64 = struct.calcsize(structEndArchive64)
63
64# The "Zip64 end of central directory locator" structure, magic number, and size
65structEndArchive64Locator = "<4sLQL"
66stringEndArchive64Locator = "PK\x06\x07"
67sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
68
69class IterZipFile:
70    """ A write-only ZipFile that does not write to a file but yields
71    its output.
72
73    Example::
74       
75        z = IterZipFile()
76        for buf in z.write(filename, arcname):
77            # do stuff with buf
78        for buf in z.close():
79            # do stuff with buf
80
81    The code is mostly based on :class:`zipfile.ZipFile`.
82
83    :param compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib)
84                        or ZIP_AUTO (compression or not according to the filename).
85    :param allowZip64: if True ZipFile will create files with ZIP64 extensions when
86                    needed, otherwise it will raise an exception when this would
87                    be necessary.
88    """
89
90    def __init__(self, compression=ZIP_AUTO, allowZip64=False):
91
92        if compression == ZIP_STORED:
93            pass
94        elif compression in (ZIP_DEFLATED, ZIP_AUTO):
95            if not zlib:
96                raise RuntimeError,\
97                      "Compression requires the (missing) zlib module"
98        else:
99            raise RuntimeError, "That compression method is not supported"
100
101        self._allowZip64 = allowZip64
102        self.debug = 0  # Level of printing: 0 through 3
103        self.NameToInfo = {}    # Find file info given name
104        self.filelist = []      # List of ZipInfo instances for archive
105        self.compression = compression  # Method of compression
106        self.mode = key = "w"
107        self.comment = ''
108        self.tell = 0
109
110    def _writecheck(self, zinfo):
111        """Check for errors before writing a file to the archive."""
112        if zinfo.filename in self.NameToInfo:
113            if self.debug:      # Warning for duplicate names
114                print "Duplicate name:", zinfo.filename
115        if zinfo.compress_type == ZIP_DEFLATED and not zlib:
116            raise RuntimeError, \
117                  "Compression requires the (missing) zlib module"
118        if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
119            raise RuntimeError, \
120                  "That compression method is not supported"
121        if zinfo.file_size > ZIP64_LIMIT:
122            if not self._allowZip64:
123                raise LargeZipFile("Filesize would require ZIP64 extensions")
124        if zinfo.header_offset > ZIP64_LIMIT:
125            if not self._allowZip64:
126                raise LargeZipFile("Zipfile size would require ZIP64 extensions")
127
128    def write(self, filename, arcname=None, compress_type=None):
129        """Put the bytes from filename into the archive under the name
130        arcname."""
131
132        st = os.stat(filename)
133        isdir = stat.S_ISDIR(st.st_mode)
134        mtime = time.localtime(st.st_mtime)
135        date_time = mtime[0:6]
136        # Create ZipInfo instance to store file information
137        if arcname is None:
138            arcname = filename
139        arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
140        while arcname[0] in (os.sep, os.altsep):
141            arcname = arcname[1:]
142        if isdir:
143            arcname += '/'
144        zinfo = ZipInfo(arcname, date_time)
145        zinfo.external_attr = (st[0] & 0xFFFF) << 16L      # Unix attributes
146        if self.compression == ZIP_AUTO:
147            ext = os.path.splitext(filename)[1].lower()
148            compression = ZIP_STORED if ext and ext[1:] in STORED_FORMATS \
149                    else ZIP_DEFLATED
150        else:
151            compression = self.compression
152        if compress_type is None:
153            zinfo.compress_type = compression
154        else:
155            zinfo.compress_type = compress_type
156
157        zinfo.file_size = st.st_size
158        zinfo.flag_bits |= 0x08
159        zinfo.header_offset = self.tell    # Start of header bytes
160
161        self._writecheck(zinfo)
162        self._didModify = True
163
164        if isdir:
165            zinfo.file_size = 0
166            zinfo.compress_size = 0
167            zinfo.CRC = 0
168            self.filelist.append(zinfo)
169            self.NameToInfo[zinfo.filename] = zinfo
170            header = zinfo.FileHeader()
171            yield header
172            self.tell += len(header)
173            return
174
175        fp = open(filename, "rb")
176        # Must overwrite CRC and sizes with correct data later
177        zinfo.CRC = CRC = 0
178        zinfo.compress_size = compress_size = 0
179        zinfo.file_size = file_size = 0
180        header = zinfo.FileHeader()
181        yield header
182        self.tell += len(header)
183        if zinfo.compress_type == ZIP_DEFLATED:
184            cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
185                 zlib.DEFLATED, -15)
186        else:
187            cmpr = None
188        while 1:
189            buf = fp.read(1024 * 8)
190            if not buf:
191                break
192            file_size = file_size + len(buf)
193            CRC = crc32(buf, CRC) & 0xffffffff
194            if cmpr:
195                buf = cmpr.compress(buf)
196                compress_size = compress_size + len(buf)
197            yield buf
198        fp.close()
199        if cmpr:
200            buf = cmpr.flush()
201            compress_size = compress_size + len(buf)
202            yield buf
203            zinfo.compress_size = compress_size
204        else:
205            zinfo.compress_size = file_size
206        self.tell += zinfo.compress_size
207        zinfo.CRC = CRC
208        zinfo.file_size = file_size
209        # write the data descriptor
210        data_descriptor =  struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
211              zinfo.file_size)
212        yield data_descriptor
213        self.tell += len(data_descriptor)
214        self.filelist.append(zinfo)
215        self.NameToInfo[zinfo.filename] = zinfo
216
217    def close(self):
218        """Close the file, and for mode "w" and "a" write the ending
219        records."""
220
221        count = 0
222        pos1 = self.tell
223        for zinfo in self.filelist:         # write central directory
224            count = count + 1
225            dt = zinfo.date_time
226            dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
227            dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
228            extra = []
229            if zinfo.file_size > ZIP64_LIMIT \
230                    or zinfo.compress_size > ZIP64_LIMIT:
231                extra.append(zinfo.file_size)
232                extra.append(zinfo.compress_size)
233                file_size = 0xffffffff
234                compress_size = 0xffffffff
235            else:
236                file_size = zinfo.file_size
237                compress_size = zinfo.compress_size
238
239            if zinfo.header_offset > ZIP64_LIMIT:
240                extra.append(zinfo.header_offset)
241                header_offset = 0xffffffffL
242            else:
243                header_offset = zinfo.header_offset
244
245            extra_data = zinfo.extra
246            if extra:
247                # Append a ZIP64 field to the extra's
248                extra_data = struct.pack(
249                        '<HH' + 'Q'*len(extra),
250                        1, 8*len(extra), *extra) + extra_data
251
252                extract_version = max(45, zinfo.extract_version)
253                create_version = max(45, zinfo.create_version)
254            else:
255                extract_version = zinfo.extract_version
256                create_version = zinfo.create_version
257
258            try:
259                filename, flag_bits = zinfo._encodeFilenameFlags()
260                centdir = struct.pack(structCentralDir,
261                 stringCentralDir, create_version,
262                 zinfo.create_system, extract_version, zinfo.reserved,
263                 flag_bits, zinfo.compress_type, dostime, dosdate,
264                 zinfo.CRC, compress_size, file_size,
265                 len(filename), len(extra_data), len(zinfo.comment),
266                 0, zinfo.internal_attr, zinfo.external_attr,
267                 header_offset)
268            except DeprecationWarning:
269                print >>sys.stderr, (structCentralDir,
270                 stringCentralDir, create_version,
271                 zinfo.create_system, extract_version, zinfo.reserved,
272                 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
273                 zinfo.CRC, compress_size, file_size,
274                 len(zinfo.filename), len(extra_data), len(zinfo.comment),
275                 0, zinfo.internal_attr, zinfo.external_attr,
276                 header_offset)
277                raise
278            yield centdir
279            yield filename
280            yield extra_data
281            yield zinfo.comment
282            self.tell += len(centdir) + len(filename) + len(extra_data) + len(zinfo.comment)
283
284        pos2 = self.tell
285        # Write end-of-zip-archive record
286        centDirCount = count
287        centDirSize = pos2 - pos1
288        centDirOffset = pos1
289        if (centDirCount >= ZIP_FILECOUNT_LIMIT or
290            centDirOffset > ZIP64_LIMIT or
291            centDirSize > ZIP64_LIMIT):
292            # Need to write the ZIP64 end-of-archive records
293            zip64endrec = struct.pack(
294                    structEndArchive64, stringEndArchive64,
295                    44, 45, 45, 0, 0, centDirCount, centDirCount,
296                    centDirSize, centDirOffset)
297            yield zip64endrec
298
299            zip64locrec = struct.pack(
300                    structEndArchive64Locator,
301                    stringEndArchive64Locator, 0, pos2, 1)
302            yield zip64locrec
303            centDirCount = min(centDirCount, 0xFFFF)
304            centDirSize = min(centDirSize, 0xFFFFFFFF)
305            centDirOffset = min(centDirOffset, 0xFFFFFFFF)
306
307        # check for valid comment length
308        if len(self.comment) >= ZIP_MAX_COMMENT:
309            if self.debug > 0:
310                msg = 'Archive comment is too long; truncating to %d bytes' \
311                      % ZIP_MAX_COMMENT
312            self.comment = self.comment[:ZIP_MAX_COMMENT]
313
314        endrec = struct.pack(structEndArchive, stringEndArchive,
315                             0, 0, centDirCount, centDirCount,
316                             centDirSize, centDirOffset, len(self.comment))
317        yield endrec
318        yield self.comment
319
320
321def generate_tarfile(files):
322    """
323    Returns a generator that yields *files* as a tar file.
324   
325    This generator does **not** create tempory files and is designed to not
326    consume too much memory so it can be used to serve efficiently a tar file
327    of large files.
328
329    :param files: a sequence of class:`.DocumentFile`
330    """
331    fake_file = StringIO()
332    tf = tarfile.open(mode= "w", fileobj=fake_file)
333    filenames = set()
334    for df in files:
335        # yields the header
336        filename = get_available_name(df.filename, filenames)
337        filenames.add(filename)
338        info = tf.gettarinfo(df.file.path, filename)
339        f, size = df.document.get_leaf_object().get_content_and_size(df)
340        # change the name of the owner
341        info.uname = info.gname = df.document.owner.username
342        info.size = size
343        yield info.tobuf()
344        # yields the content of the file
345        try:
346            s = f.read(512)
347            while s:
348                yield s
349                s = f.read(512)
350            yield s
351            blocks, remainder = divmod(info.size, tarfile.BLOCKSIZE)
352            if remainder > 0:
353                yield (tarfile.NUL * (tarfile.BLOCKSIZE - remainder))
354        finally:
355            f.close()
356    # yields the nul blocks that mark the end of the tar file
357    yield (tarfile.NUL * tarfile.BLOCKSIZE * 2)
358
359
360def generate_zipfile(files):
361    """
362    Returns a generator that yields *files* as a zip file.
363   
364    This generator does **not** create tempory files and is designed to not
365    consume too much memory so it can be used to serve efficiently a tar file
366    of large files.
367
368    :param files: a sequence of class:`.DocumentFile`
369    :param compressed: ``True`` if files should be compressed (default: True)
370    """
371    zf = IterZipFile()
372    filenames = set()
373    for df in files:
374        filename = get_available_name(df.filename, filenames)
375        filenames.add(filename)
376        f, size = df.document.get_leaf_object().get_content_and_size(df)
377        path = f.name
378        try:
379            for s in zf.write(path, filename):
380                yield s
381        finally:
382            f.close()
383    for s in zf.close():
384        yield s
385
386_generators = {
387    "zip" : generate_zipfile,
388    "tar" : generate_tarfile,
389}
390
391#: List of available archive formats (currently: ``zip`` and ``tar``).
392ARCHIVE_FORMATS = _generators.keys()
393
394def generate_archive(files, format):
395    return _generators[format](files)
396
Note: See TracBrowser for help on using the repository browser.