1 """Functions that read and write gzipped files.
3 The user of the file doesn't have to worry about the compression,
4 but random access is not allowed."""
8 import struct, sys, time
12 __all__ = [
"GzipFile",
"open"]
14 FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
19 output.write(struct.pack(
"<l", value))
23 value = value + 0x100000000L
24 output.write(struct.pack(
"<L", value))
27 return struct.unpack(
"<l", input.read(4))[0]
29 def open(filename, mode="rb", compresslevel=9):
30 return GzipFile(filename, mode, compresslevel)
36 def __init__(self, filename=None, mode=None,
37 compresslevel=9, fileobj=
None):
39 fileobj = self.
myfileobj = __builtin__.open(filename, mode
or 'rb')
41 if hasattr(fileobj,
'name'): filename = fileobj.name
44 if hasattr(fileobj,
'mode'): mode = fileobj.mode
55 elif mode[0:1] ==
'w' or mode[0:1] ==
'a':
58 self.
compress = zlib.compressobj(compresslevel,
64 raise ValueError,
"Mode " + mode +
" not supported"
69 if self.
mode == WRITE:
74 return '<gzip ' + s[1:-1] +
' ' + hex(id(self)) +
'>'
76 def _init_write(self, filename):
77 if filename[-3:] !=
'.gz':
78 filename = filename +
'.gz'
80 self.
crc = zlib.crc32(
"")
85 def _write_gzip_header(self):
86 self.fileobj.write(
'\037\213')
87 self.fileobj.write(
'\010')
92 self.fileobj.write(chr(flags))
94 self.fileobj.write(
'\002')
95 self.fileobj.write(
'\377')
97 self.fileobj.write(fname +
'\000')
100 self.
crc = zlib.crc32(
"")
103 def _read_gzip_header(self):
104 magic = self.fileobj.read(2)
105 if magic !=
'\037\213':
106 raise IOError,
'Not a gzipped file'
107 method = ord( self.fileobj.read(1) )
109 raise IOError,
'Unknown compression method'
110 flag = ord( self.fileobj.read(1) )
118 xlen=ord(self.fileobj.read(1))
119 xlen=xlen+256*ord(self.fileobj.read(1))
120 self.fileobj.read(xlen)
124 s=self.fileobj.read(1)
125 if not s
or s==
'\000':
break
129 s=self.fileobj.read(1)
130 if not s
or s==
'\000':
break
137 raise ValueError,
"write() on closed GzipFile object"
140 self.
crc = zlib.crc32(data, self.
crc)
141 self.fileobj.write( self.compress.compress(data) )
153 readsize = readsize * 2
160 readsize = readsize * 2
172 def _unread(self, buf):
177 def _read(self, size=1024):
178 if self.
fileobj is None:
raise EOFError,
"Reached EOF"
186 pos = self.fileobj.tell()
187 self.fileobj.seek(0, 2)
188 if pos == self.fileobj.tell():
189 raise EOFError,
"Reached EOF"
191 self.fileobj.seek( pos )
199 buf = self.fileobj.read(size)
205 uncompress = self.decompress.flush()
208 raise EOFError,
'Reached EOF'
210 uncompress = self.decompress.decompress(buf)
213 if self.decompress.unused_data !=
"":
219 self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
226 def _add_read_data(self, data):
227 self.
crc = zlib.crc32(data, self.
crc)
237 self.fileobj.seek(-8, 1)
240 if crc32%0x100000000L != self.
crc%0x100000000L:
241 raise ValueError,
"CRC check failed"
242 elif isize != self.
size:
243 raise ValueError,
"Incorrect length of data produced"
246 if self.
mode == WRITE:
247 self.fileobj.write(self.compress.flush())
251 elif self.
mode == READ:
254 self.myfileobj.close()
262 except AttributeError:
276 '''Return the uncompressed stream file position indicator to the
277 beginning of the file'''
278 if self.
mode != READ:
279 raise IOError(
"Can't rewind in write mode")
287 if self.
mode == WRITE:
289 raise IOError(
'Negative seek in write mode')
290 count = offset - self.
offset
291 for i
in range(count/1024):
292 self.
write(1024*
'\0')
293 self.
write((count%1024)*
'\0')
294 elif self.
mode == READ:
298 count = offset - self.
offset
299 for i
in range(count/1024): self.
read(1024)
300 self.
read(count % 1024)
303 if size < 0: size = sys.maxint
305 readsize =
min(100, size)
310 c = self.
read(readsize)
317 if i==-1
and len(c) > size: i=size-1
318 elif size <= i: i = size -1
320 if i >= 0
or c ==
'':
328 readsize =
min(size, readsize * 2)
332 if sizehint <= 0: sizehint = sys.maxint
338 sizehint = sizehint - len(line)
352 decompress = args
and args[0] ==
"-d"
360 f =
GzipFile(filename=
"", mode=
"rb", fileobj=sys.stdin)
363 if arg[-3:] !=
".gz":
364 print "filename doesn't end in .gz:", `arg`
367 g = __builtin__.open(arg[:-3],
"wb")
371 g =
GzipFile(filename=
"", mode=
"wb", fileobj=sys.stdout)
373 f = __builtin__.open(arg,
"rb")
374 g =
open(arg +
".gz",
"wb")
380 if g
is not sys.stdout:
382 if f
is not sys.stdin:
385 if __name__ ==
'__main__':