1 "Read and write ZIP files."
5 import struct, os, time
13 __all__ = [
"BadZipfile",
"error",
"ZIP_STORED",
"ZIP_DEFLATED",
"is_zipfile",
14 "ZipInfo",
"ZipFile",
"PyZipFile"]
26 structEndArchive =
"<4s4H2lH"
27 stringEndArchive =
"PK\005\006"
28 structCentralDir =
"<4s4B4H3l5H2l"
29 stringCentralDir =
"PK\001\002"
30 structFileHeader =
"<4s2B4H3l2H"
31 stringFileHeader =
"PK\003\004"
35 _CD_CREATE_VERSION = 1
37 _CD_EXTRACT_VERSION = 3
38 _CD_EXTRACT_SYSTEM = 4
44 _CD_COMPRESSED_SIZE = 10
45 _CD_UNCOMPRESSED_SIZE = 11
46 _CD_FILENAME_LENGTH = 12
47 _CD_EXTRA_FIELD_LENGTH = 13
48 _CD_COMMENT_LENGTH = 14
49 _CD_DISK_NUMBER_START = 15
50 _CD_INTERNAL_FILE_ATTRIBUTES = 16
51 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
52 _CD_LOCAL_HEADER_OFFSET = 18
56 _FH_EXTRACT_VERSION = 1
57 _FH_EXTRACT_SYSTEM = 2
58 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
59 _FH_COMPRESSION_METHOD = 4
63 _FH_COMPRESSED_SIZE = 8
64 _FH_UNCOMPRESSED_SIZE = 9
65 _FH_FILENAME_LENGTH = 10
66 _FH_EXTRA_FIELD_LENGTH = 11
70 _STRING_TYPES = (types.StringType,)
71 if hasattr(types,
"UnicodeType"):
72 _STRING_TYPES = _STRING_TYPES + (types.UnicodeType,)
76 """Quickly see if file is a ZIP file by checking the magic number.
78 Will not accept a ZIP archive with an ending comment.
81 fpin =
open(filename,
"rb")
85 if endrec[0:4] ==
"PK\005\006" and endrec[-2:] ==
"\000\000":
92 """Class with attributes describing each file in the ZIP archive."""
94 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
117 """Return the per-file header as a string."""
119 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
120 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
123 CRC = compress_size = file_size = 0
126 compress_size = self.compress_size
127 file_size = self.file_size
128 header = struct.pack(structFileHeader, stringFileHeader,
131 compress_size, file_size,
141 return path.replace(os.sep,
"/")
148 """ Class with methods to open, read, write, close, list zip files.
150 z = ZipFile(file, mode="r", compression=ZIP_STORED)
152 file: Either the path to the file, or a file-like object.
153 If it
is a path, the file will be opened
and closed by ZipFile.
154 mode: The mode can be either read
"r", write "w" or append "a".
155 compression: ZIP_STORED (no compression)
or ZIP_DEFLATED (requires zlib).
158 fp = None # Set here since __del__ checks it
160 def __init__(self, file, mode="r", compression=ZIP_STORED):
161 """Open the ZIP file with mode read "r", write "w" or append "a"."""
162 if compression == ZIP_STORED:
164 elif compression == ZIP_DEFLATED:
167 "Compression requires the (missing) zlib module"
169 raise RuntimeError, "That compression method is not supported"
170 self.debug = 0 # Level of printing: 0 through 3
171 self.NameToInfo = {} # Find file info given name
172 self.filelist = [] # List of ZipInfo instances for archive
173 self.compression = compression # Method of compression
174 self.mode = key = mode[0]
176 # Check if we were passed a file-like object
177 if type(file) in _STRING_TYPES:
180 modeDict = {'
r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
181 self.fp = open(file, modeDict[mode])
185 self.filename = getattr(file, 'name', None)
195 if endrec[0:4] == stringEndArchive
and \
196 endrec[-2:] ==
"\000\000":
206 raise RuntimeError,
'Mode must be "r", "w" or "a"'
208 def _GetContents(self):
209 """Read the directory, making sure we close the file if the format
219 def _RealGetContents(self):
220 """Read in the table of contents for the ZIP file."""
223 filesize = fp.tell() + 22
225 if endrec[0:4] != stringEndArchive
or endrec[-2:] !=
"\000\000":
226 raise BadZipfile,
"File is not a zip file, or ends with a comment"
227 endrec = struct.unpack(structEndArchive, endrec)
231 offset_cd = endrec[6]
232 x = filesize - 22 - size_cd
234 concat = x - offset_cd
236 print "given, inferred, offset", offset_cd, x, concat
241 while total < size_cd:
242 centdir = fp.read(46)
244 if centdir[0:4] != stringCentralDir:
245 raise BadZipfile,
"Bad magic number for central directory"
246 centdir = struct.unpack(structCentralDir, centdir)
249 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
252 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
253 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
254 total = (total + centdir[_CD_FILENAME_LENGTH]
255 + centdir[_CD_EXTRA_FIELD_LENGTH]
256 + centdir[_CD_COMMENT_LENGTH])
257 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat
259 (x.create_version, x.create_system, x.extract_version, x.reserved,
260 x.flag_bits, x.compress_type, t, d,
261 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
262 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
264 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
265 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
266 self.filelist.append(x)
271 fp.seek(data.header_offset, 0)
272 fheader = fp.read(30)
273 if fheader[0:4] != stringFileHeader:
274 raise BadZipfile,
"Bad magic number for file header"
275 fheader = struct.unpack(structFileHeader, fheader)
280 data.file_offset = (data.header_offset + 30
281 + fheader[_FH_FILENAME_LENGTH]
282 + fheader[_FH_EXTRA_FIELD_LENGTH])
283 fname = fp.read(fheader[_FH_FILENAME_LENGTH])
284 if fname != data.filename:
285 raise RuntimeError, \
286 'File name in directory "%s" and header "%s" differ.' % (
287 data.filename, fname)
290 """Return a list of file names in the archive."""
293 l.append(data.filename)
297 """Return a list of class ZipInfo instances for files in the
302 """Print a table of contents for the zip file."""
303 print "%-46s %19s %12s" % (
"File Name",
"Modified ",
"Size")
305 date =
"%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
306 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
309 """Read all the files and check the CRC."""
312 self.
read(zinfo.filename)
314 return zinfo.filename
317 """Return the instance of ZipInfo given 'name'."""
321 """Return file bytes (as a string) for name."""
322 if self.
mode not in (
"r", "a"):
323 raise RuntimeError,
'read() requires mode "r" or "a"'
325 raise RuntimeError, \
326 "Attempt to read ZIP archive that was already closed"
328 filepos = self.fp.tell()
329 self.fp.seek(zinfo.file_offset, 0)
330 bytes = self.fp.read(zinfo.compress_size)
331 self.fp.seek(filepos, 0)
332 if zinfo.compress_type == ZIP_STORED:
334 elif zinfo.compress_type == ZIP_DEFLATED:
336 raise RuntimeError, \
337 "De-compression requires the (missing) zlib module"
339 dc = zlib.decompressobj(-15)
340 bytes = dc.decompress(bytes)
342 ex = dc.decompress(
'Z') + dc.flush()
347 "Unsupported compression method %d for file %s" % \
348 (zinfo.compress_type, name)
349 crc = binascii.crc32(bytes)
351 raise BadZipfile,
"Bad CRC-32 for file %s" % name
354 def _writecheck(self, zinfo):
355 """Check for errors before writing a file to the archive."""
356 if self.NameToInfo.has_key(zinfo.filename):
358 print "Duplicate name:", zinfo.filename
359 if self.
mode not in (
"w",
"a"):
360 raise RuntimeError,
'write() requires mode "w" or "a"'
362 raise RuntimeError, \
363 "Attempt to write ZIP archive that was already closed"
364 if zinfo.compress_type == ZIP_DEFLATED
and not zlib:
365 raise RuntimeError, \
366 "Compression requires the (missing) zlib module"
367 if zinfo.compress_type
not in (ZIP_STORED, ZIP_DEFLATED):
368 raise RuntimeError, \
369 "That compression method is not supported"
371 def write(self, filename, arcname=None, compress_type=None):
372 """Put the bytes from filename into the archive under the name
374 st = os.stat(filename)
375 mtime = time.localtime(st[8])
376 date_time = mtime[0:6]
379 zinfo =
ZipInfo(filename, date_time)
381 zinfo =
ZipInfo(arcname, date_time)
382 zinfo.external_attr = st[0] << 16
383 if compress_type
is None:
386 zinfo.compress_type = compress_type
388 fp =
open(filename,
"rb")
389 zinfo.flag_bits = 0x00
390 zinfo.header_offset = self.fp.tell()
393 zinfo.compress_size = compress_size = 0
394 zinfo.file_size = file_size = 0
395 self.fp.write(zinfo.FileHeader())
396 zinfo.file_offset = self.fp.tell()
397 if zinfo.compress_type == ZIP_DEFLATED:
398 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
403 buf = fp.read(1024 * 8)
406 file_size = file_size + len(buf)
407 CRC = binascii.crc32(buf, CRC)
409 buf = cmpr.compress(buf)
410 compress_size = compress_size + len(buf)
415 compress_size = compress_size + len(buf)
417 zinfo.compress_size = compress_size
419 zinfo.compress_size = file_size
421 zinfo.file_size = file_size
423 position = self.fp.tell()
424 self.fp.seek(zinfo.header_offset + 14, 0)
425 self.fp.write(struct.pack(
"<lll", zinfo.CRC, zinfo.compress_size,
427 self.fp.seek(position, 0)
428 self.filelist.append(zinfo)
432 """Write a file into the archive. The contents is the string
435 zinfo.file_size = len(bytes)
436 zinfo.CRC = binascii.crc32(bytes)
437 if zinfo.compress_type == ZIP_DEFLATED:
438 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
440 bytes = co.compress(bytes) + co.flush()
441 zinfo.compress_size = len(bytes)
443 zinfo.compress_size = zinfo.file_size
444 zinfo.header_offset = self.fp.tell()
445 self.fp.write(zinfo.FileHeader())
446 zinfo.file_offset = self.fp.tell()
448 if zinfo.flag_bits & 0x08:
450 self.fp.write(struct.pack(
"<lll", zinfo.CRC, zinfo.compress_size,
452 self.filelist.append(zinfo)
456 """Call the "close()" method in case the user forgot."""
460 """Close the file, and for mode "w" and "a" write the ending
464 if self.
mode in (
"w",
"a"):
466 pos1 = self.fp.tell()
470 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
471 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
472 centdir = struct.pack(structCentralDir,
473 stringCentralDir, zinfo.create_version,
474 zinfo.create_system, zinfo.extract_version, zinfo.reserved,
475 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
476 zinfo.CRC, zinfo.compress_size, zinfo.file_size,
477 len(zinfo.filename), len(zinfo.extra), len(zinfo.comment),
478 0, zinfo.internal_attr, zinfo.external_attr,
480 self.fp.write(centdir)
481 self.fp.write(zinfo.filename)
482 self.fp.write(zinfo.extra)
483 self.fp.write(zinfo.comment)
484 pos2 = self.fp.tell()
486 endrec = struct.pack(structEndArchive, stringEndArchive,
487 0, 0, count, count, pos2 - pos1, pos1, 0)
488 self.fp.write(endrec)
496 """Class to create ZIP archives with Python library files and packages."""
499 """Add all files from "pathname" to the ZIP archive.
501 If pathname is a package directory, search the directory and
502 all package subdirectories recursively for all *.py and enter
503 the modules into the archive. If pathname is a plain
504 directory, listdir *.py and enter all modules. Else, pathname
505 must be a Python *.py file and the module will be put into the
506 archive. Added modules are always module.pyo or module.pyc.
507 This method will compile the module.py into module.pyc if
510 dir, name = os.path.split(pathname)
511 if os.path.isdir(pathname):
512 initname = os.path.join(pathname,
"__init__.py")
513 if os.path.isfile(initname):
516 basename =
"%s/%s" % (basename, name)
520 print "Adding package in", pathname,
"as", basename
521 fname, arcname = self.
_get_codename(initname[0:-3], basename)
523 print "Adding", arcname
524 self.
write(fname, arcname)
525 dirlist = os.listdir(pathname)
526 dirlist.remove(
"__init__.py")
528 for filename
in dirlist:
529 path = os.path.join(pathname, filename)
530 root, ext = os.path.splitext(filename)
531 if os.path.isdir(path):
532 if os.path.isfile(os.path.join(path,
"__init__.py")):
539 print "Adding", arcname
540 self.
write(fname, arcname)
544 print "Adding files from directory", pathname
545 for filename
in os.listdir(pathname):
546 path = os.path.join(pathname, filename)
547 root, ext = os.path.splitext(filename)
552 print "Adding", arcname
553 self.
write(fname, arcname)
555 if pathname[-3:] !=
".py":
556 raise RuntimeError, \
557 'Files added with writepy() must end with ".py"'
558 fname, arcname = self.
_get_codename(pathname[0:-3], basename)
560 print "Adding file", arcname
561 self.
write(fname, arcname)
563 def _get_codename(self, pathname, basename):
564 """Return (filename, archivename) for the path.
566 Given a module name path, return the correct file path and
567 archive name, compiling if necessary. For example, given
568 /python/lib/string, return (/python/lib/string.pyc, string).
570 file_py = pathname +
".py"
571 file_pyc = pathname +
".pyc"
572 file_pyo = pathname +
".pyo"
573 if os.path.isfile(file_pyo)
and \
574 os.stat(file_pyo)[8] >= os.stat(file_py)[8]:
576 elif not os.path.isfile(file_pyc)
or \
577 os.stat(file_pyc)[8] < os.stat(file_py)[8]:
580 print "Compiling", file_py
585 archivename = os.path.split(fname)[1]
587 archivename =
"%s/%s" % (basename, archivename)
588 return (fname, archivename)