source: main/trunk/openPLM/plmapp/archive.py @ 939

Revision 939, 14.3 KB checked in by pcosquer, 10 years ago (diff)

add a method to get the content of a document file, useful if it is dynamically generated
document3D now implements this method instead of redifining the "files" view

Line 
1import os.path
2import tarfile
3import itertools
4from cStringIO import StringIO
5import struct, time, sys
6import binascii, stat
7from zipfile import ZipInfo, ZIP_STORED, ZIP_DEFLATED, LargeZipFile, ZIP64_LIMIT
8
9try:
10    import zlib # We may need its compression method
11    crc32 = zlib.crc32
12except ImportError:
13    zlib = None
14    crc32 = binascii.crc32
15
16def get_available_name(name, exiting_files):
17    """
18    """
19    dir_name, file_name = os.path.split(name)
20    file_root, file_ext = os.path.splitext(file_name)
21    # If the filename already exists, add an underscore and a number (before
22    # the file extension, if one exists) to the filename until the generated
23    # filename doesn't exist.
24    count = itertools.count(1)
25    while name in exiting_files:
26        # file_ext includes the dot.
27        name = os.path.join(dir_name, "%s_%s%s" % (file_root, count.next(), file_ext))
28
29    return name
30
31#: True if files are compressed or not according their extension
32ZIP_AUTO = -1
33
34#: formats that are stored uncompressed
35STORED_FORMATS = set((
36    "zip", "gz", "bz2", "tgz", "xz", "rar", ".zipx", # archives
37    "png", "gif", "jpg", "jpeg", "svgz", # images
38    "odt", "odf", "ods", "odm", "ott", "odp", "otp", # openDocument
39    "odg", "odf",
40    "docx", "docm", "xlsx", "xlsm", "pptx", "pptm", "dotx", # openXML
41    "flac", "ogg", "mp3", "m4a", "ace", "aac", "m4p", "mpa", # audio
42    "mp2", "ra", "rm",
43    "avi", "dat", "mpeg", "mpg", "mkv", "mov", "ogg", "wmv", # video
44    "flv", "3gp", "aaf", "ram",
45))
46
47# constants taken from zipfile module
48
49ZIP_FILECOUNT_LIMIT = 1 << 16
50ZIP_MAX_COMMENT = (1 << 16) - 1
51structCentralDir = "<4s4B4HL2L5H2L"
52stringCentralDir = "PK\001\002"
53sizeCentralDir = struct.calcsize(structCentralDir)
54structEndArchive = "<4s4H2LH"
55stringEndArchive = "PK\005\006"
56sizeEndCentDir = struct.calcsize(structEndArchive)
57
58# The "Zip64 end of central directory" record, magic number, size, and indices
59# (section V.G in the format document)
60structEndArchive64 = "<4sQ2H2L4Q"
61stringEndArchive64 = "PK\x06\x06"
62sizeEndCentDir64 = struct.calcsize(structEndArchive64)
63
64# The "Zip64 end of central directory locator" structure, magic number, and size
65structEndArchive64Locator = "<4sLQL"
66stringEndArchive64Locator = "PK\x06\x07"
67sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
68
69class IterZipFile:
70    """ A write-only ZipFile that does not write to a file but yields
71    its output.
72
73    Example::
74       
75        z = IterZipFile()
76        for buf in z.write(filename, arcname):
77            # do stuff with buf
78        for buf in z.close():
79            # do stuff with buf
80
81    The code is mostly based on :class:`zipfile.ZipFile`.
82
83    :param compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib)
84                        or ZIP_AUTO (compression or not according to the filename).
85    :param allowZip64: if True ZipFile will create files with ZIP64 extensions when
86                    needed, otherwise it will raise an exception when this would
87                    be necessary.
88    """
89
90    def __init__(self, compression=ZIP_AUTO, allowZip64=False):
91
92        if compression == ZIP_STORED:
93            pass
94        elif compression in (ZIP_DEFLATED, ZIP_AUTO):
95            if not zlib:
96                raise RuntimeError,\
97                      "Compression requires the (missing) zlib module"
98        else:
99            raise RuntimeError, "That compression method is not supported"
100
101        self._allowZip64 = allowZip64
102        self.debug = 0  # Level of printing: 0 through 3
103        self.NameToInfo = {}    # Find file info given name
104        self.filelist = []      # List of ZipInfo instances for archive
105        self.compression = compression  # Method of compression
106        self.mode = key = "w"
107        self.comment = ''
108        self.tell = 0
109
110    def _writecheck(self, zinfo):
111        """Check for errors before writing a file to the archive."""
112        if zinfo.filename in self.NameToInfo:
113            if self.debug:      # Warning for duplicate names
114                print "Duplicate name:", zinfo.filename
115        if zinfo.compress_type == ZIP_DEFLATED and not zlib:
116            raise RuntimeError, \
117                  "Compression requires the (missing) zlib module"
118        if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
119            raise RuntimeError, \
120                  "That compression method is not supported"
121        if zinfo.file_size > ZIP64_LIMIT:
122            if not self._allowZip64:
123                raise LargeZipFile("Filesize would require ZIP64 extensions")
124        if zinfo.header_offset > ZIP64_LIMIT:
125            if not self._allowZip64:
126                raise LargeZipFile("Zipfile size would require ZIP64 extensions")
127
128    def write(self, filename, arcname=None, compress_type=None):
129        """Put the bytes from filename into the archive under the name
130        arcname."""
131
132        st = os.stat(filename)
133        isdir = stat.S_ISDIR(st.st_mode)
134        mtime = time.localtime(st.st_mtime)
135        date_time = mtime[0:6]
136        # Create ZipInfo instance to store file information
137        if arcname is None:
138            arcname = filename
139        arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
140        while arcname[0] in (os.sep, os.altsep):
141            arcname = arcname[1:]
142        if isdir:
143            arcname += '/'
144        zinfo = ZipInfo(arcname, date_time)
145        zinfo.external_attr = (st[0] & 0xFFFF) << 16L      # Unix attributes
146        if self.compression == ZIP_AUTO:
147            ext = os.path.splitext(filename)[1].lower()
148            compression = ZIP_STORED if ext and ext[1:] in STORED_FORMATS \
149                    else ZIP_DEFLATED
150        else:
151            compression = self.compression
152        if compress_type is None:
153            zinfo.compress_type = compression
154        else:
155            zinfo.compress_type = compress_type
156
157        zinfo.file_size = st.st_size
158        zinfo.flag_bits |= 0x08
159        zinfo.header_offset = self.tell    # Start of header bytes
160
161        self._writecheck(zinfo)
162        self._didModify = True
163
164        if isdir:
165            zinfo.file_size = 0
166            zinfo.compress_size = 0
167            zinfo.CRC = 0
168            self.filelist.append(zinfo)
169            self.NameToInfo[zinfo.filename] = zinfo
170            header = zinfo.FileHeader()
171            yield header
172            self.tell += len(header)
173            return
174
175        fp = open(filename, "rb")
176        # Must overwrite CRC and sizes with correct data later
177        zinfo.CRC = CRC = 0
178        zinfo.compress_size = compress_size = 0
179        zinfo.file_size = file_size = 0
180        header = zinfo.FileHeader()
181        yield header
182        self.tell += len(header)
183        if zinfo.compress_type == ZIP_DEFLATED:
184            cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
185                 zlib.DEFLATED, -15)
186        else:
187            cmpr = None
188        while 1:
189            buf = fp.read(1024 * 8)
190            if not buf:
191                break
192            file_size = file_size + len(buf)
193            CRC = crc32(buf, CRC) & 0xffffffff
194            if cmpr:
195                buf = cmpr.compress(buf)
196                compress_size = compress_size + len(buf)
197            yield buf
198        fp.close()
199        if cmpr:
200            buf = cmpr.flush()
201            compress_size = compress_size + len(buf)
202            yield buf
203            zinfo.compress_size = compress_size
204        else:
205            zinfo.compress_size = file_size
206        self.tell += zinfo.compress_size
207        zinfo.CRC = CRC
208        zinfo.file_size = file_size
209        # write the data descriptor
210        data_descriptor =  struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
211              zinfo.file_size)
212        yield data_descriptor
213        self.tell += len(data_descriptor)
214        self.filelist.append(zinfo)
215        self.NameToInfo[zinfo.filename] = zinfo
216
217    def close(self):
218        """Close the file, and for mode "w" and "a" write the ending
219        records."""
220
221        count = 0
222        pos1 = self.tell
223        for zinfo in self.filelist:         # write central directory
224            count = count + 1
225            dt = zinfo.date_time
226            dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
227            dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
228            extra = []
229            if zinfo.file_size > ZIP64_LIMIT \
230                    or zinfo.compress_size > ZIP64_LIMIT:
231                extra.append(zinfo.file_size)
232                extra.append(zinfo.compress_size)
233                file_size = 0xffffffff
234                compress_size = 0xffffffff
235            else:
236                file_size = zinfo.file_size
237                compress_size = zinfo.compress_size
238
239            if zinfo.header_offset > ZIP64_LIMIT:
240                extra.append(zinfo.header_offset)
241                header_offset = 0xffffffffL
242            else:
243                header_offset = zinfo.header_offset
244
245            extra_data = zinfo.extra
246            if extra:
247                # Append a ZIP64 field to the extra's
248                extra_data = struct.pack(
249                        '<HH' + 'Q'*len(extra),
250                        1, 8*len(extra), *extra) + extra_data
251
252                extract_version = max(45, zinfo.extract_version)
253                create_version = max(45, zinfo.create_version)
254            else:
255                extract_version = zinfo.extract_version
256                create_version = zinfo.create_version
257
258            try:
259                filename, flag_bits = zinfo._encodeFilenameFlags()
260                centdir = struct.pack(structCentralDir,
261                 stringCentralDir, create_version,
262                 zinfo.create_system, extract_version, zinfo.reserved,
263                 flag_bits, zinfo.compress_type, dostime, dosdate,
264                 zinfo.CRC, compress_size, file_size,
265                 len(filename), len(extra_data), len(zinfo.comment),
266                 0, zinfo.internal_attr, zinfo.external_attr,
267                 header_offset)
268            except DeprecationWarning:
269                print >>sys.stderr, (structCentralDir,
270                 stringCentralDir, create_version,
271                 zinfo.create_system, extract_version, zinfo.reserved,
272                 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
273                 zinfo.CRC, compress_size, file_size,
274                 len(zinfo.filename), len(extra_data), len(zinfo.comment),
275                 0, zinfo.internal_attr, zinfo.external_attr,
276                 header_offset)
277                raise
278            yield centdir
279            yield filename
280            yield extra_data
281            yield zinfo.comment
282            self.tell += len(centdir) + len(filename) + len(extra_data) + len(zinfo.comment)
283
284        pos2 = self.tell
285        # Write end-of-zip-archive record
286        centDirCount = count
287        centDirSize = pos2 - pos1
288        centDirOffset = pos1
289        if (centDirCount >= ZIP_FILECOUNT_LIMIT or
290            centDirOffset > ZIP64_LIMIT or
291            centDirSize > ZIP64_LIMIT):
292            # Need to write the ZIP64 end-of-archive records
293            zip64endrec = struct.pack(
294                    structEndArchive64, stringEndArchive64,
295                    44, 45, 45, 0, 0, centDirCount, centDirCount,
296                    centDirSize, centDirOffset)
297            yield zip64endrec
298
299            zip64locrec = struct.pack(
300                    structEndArchive64Locator,
301                    stringEndArchive64Locator, 0, pos2, 1)
302            yield zip64locrec
303            centDirCount = min(centDirCount, 0xFFFF)
304            centDirSize = min(centDirSize, 0xFFFFFFFF)
305            centDirOffset = min(centDirOffset, 0xFFFFFFFF)
306
307        # check for valid comment length
308        if len(self.comment) >= ZIP_MAX_COMMENT:
309            if self.debug > 0:
310                msg = 'Archive comment is too long; truncating to %d bytes' \
311                      % ZIP_MAX_COMMENT
312            self.comment = self.comment[:ZIP_MAX_COMMENT]
313
314        endrec = struct.pack(structEndArchive, stringEndArchive,
315                             0, 0, centDirCount, centDirCount,
316                             centDirSize, centDirOffset, len(self.comment))
317        yield endrec
318        yield self.comment
319
320
321def generate_tarfile(files):
322    """
323    Returns a generator that yields *files* as a tar file.
324   
325    This generator does **not** create tempory files and is designed to not
326    consume too much memory so it can be used to serve efficiently a tar file
327    of large files.
328
329    :param files: a sequence of class:`.DocumentFile`
330    """
331    fake_file = StringIO()
332    tf = tarfile.open(mode= "w", fileobj=fake_file)
333    filenames = set()
334    for df in files:
335        # yields the header
336        filename = get_available_name(df.filename, filenames)
337        filenames.add(filename)
338        info = tf.gettarinfo(df.file.path, filename)
339        f, size = df.document.get_leaf_object().get_content_and_size(df)
340        # change the name of the owner
341        info.uname = info.gname = df.document.owner.username
342        info.size = size
343        yield info.tobuf()
344        # yields the content of the file
345        try:
346            s = f.read(512)
347            while s:
348                yield s
349                s = f.read(512)
350            yield s
351            blocks, remainder = divmod(info.size, tarfile.BLOCKSIZE)
352            if remainder > 0:
353                yield (tarfile.NUL * (tarfile.BLOCKSIZE - remainder))
354        finally:
355            f.close()
356    # yields the nul blocks that mark the end of the tar file
357    yield (tarfile.NUL * tarfile.BLOCKSIZE * 2)
358
359
360def generate_zipfile(files):
361    """
362    Returns a generator that yields *files* as a zip file.
363   
364    This generator does **not** create tempory files and is designed to not
365    consume too much memory so it can be used to serve efficiently a tar file
366    of large files.
367
368    :param files: a sequence of class:`.DocumentFile`
369    :param compressed: ``True`` if files should be compressed (default: True)
370    """
371    zf = IterZipFile()
372    filenames = set()
373    for df in files:
374        filename = get_available_name(df.filename, filenames)
375        filenames.add(filename)
376        f, size = df.document.get_leaf_object().get_content_and_size(df)
377        path = f.name
378        try:
379            for s in zf.write(path, filename):
380                yield s
381        finally:
382            f.close()
383    for s in zf.close():
384        yield s
385
386_generators = {
387    "zip" : generate_zipfile,
388    "tar" : generate_tarfile,
389}
390
391#: List of available archive formats (currently: ``zip`` and ``tar``).
392ARCHIVE_FORMATS = _generators.keys()
393
394def generate_archive(files, format):
395    return _generators[format](files)
396
Note: See TracBrowser for help on using the repository browser.