Vega strike Python Modules doc  0.5.1
Documentation of the " Modules " folder of Vega strike
 All Data Structures Namespaces Files Functions Variables
gzip.py
Go to the documentation of this file.
1 """Functions that read and write gzipped files.
2 
3 The user of the file doesn't have to worry about the compression,
4 but random access is not allowed."""
5 
6 # based on Andrew Kuchling's minigzip.py distributed with the zlib module
7 
8 import struct, sys, time
9 import zlib
10 import __builtin__
11 
12 __all__ = ["GzipFile","open"]
13 
14 FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
15 
16 READ, WRITE = 1, 2
17 
18 def write32(output, value):
19  output.write(struct.pack("<l", value))
20 
21 def write32u(output, value):
22  if value < 0:
23  value = value + 0x100000000L
24  output.write(struct.pack("<L", value))
25 
26 def read32(input):
27  return struct.unpack("<l", input.read(4))[0]
28 
29 def open(filename, mode="rb", compresslevel=9):
30  return GzipFile(filename, mode, compresslevel)
31 
32 class GzipFile:
33 
34  myfileobj = None
35 
36  def __init__(self, filename=None, mode=None,
37  compresslevel=9, fileobj=None):
38  if fileobj is None:
39  fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
40  if filename is None:
41  if hasattr(fileobj, 'name'): filename = fileobj.name
42  else: filename = ''
43  if mode is None:
44  if hasattr(fileobj, 'mode'): mode = fileobj.mode
45  else: mode = 'rb'
46 
47  if mode[0:1] == 'r':
48  self.mode = READ
49  # Set flag indicating start of a new member
50  self._new_member = 1
51  self.extrabuf = ""
52  self.extrasize = 0
53  self.filename = filename
54 
55  elif mode[0:1] == 'w' or mode[0:1] == 'a':
56  self.mode = WRITE
57  self._init_write(filename)
58  self.compress = zlib.compressobj(compresslevel,
59  zlib.DEFLATED,
60  -zlib.MAX_WBITS,
61  zlib.DEF_MEM_LEVEL,
62  0)
63  else:
64  raise ValueError, "Mode " + mode + " not supported"
65 
66  self.fileobj = fileobj
67  self.offset = 0
68 
69  if self.mode == WRITE:
70  self._write_gzip_header()
71 
72  def __repr__(self):
73  s = repr(self.fileobj)
74  return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
75 
76  def _init_write(self, filename):
77  if filename[-3:] != '.gz':
78  filename = filename + '.gz'
79  self.filename = filename
80  self.crc = zlib.crc32("")
81  self.size = 0
82  self.writebuf = []
83  self.bufsize = 0
84 
85  def _write_gzip_header(self):
86  self.fileobj.write('\037\213') # magic header
87  self.fileobj.write('\010') # compression method
88  fname = self.filename[:-3]
89  flags = 0
90  if fname:
91  flags = FNAME
92  self.fileobj.write(chr(flags))
93  write32u(self.fileobj, long(time.time()))
94  self.fileobj.write('\002')
95  self.fileobj.write('\377')
96  if fname:
97  self.fileobj.write(fname + '\000')
98 
99  def _init_read(self):
100  self.crc = zlib.crc32("")
101  self.size = 0
102 
103  def _read_gzip_header(self):
104  magic = self.fileobj.read(2)
105  if magic != '\037\213':
106  raise IOError, 'Not a gzipped file'
107  method = ord( self.fileobj.read(1) )
108  if method != 8:
109  raise IOError, 'Unknown compression method'
110  flag = ord( self.fileobj.read(1) )
111  # modtime = self.fileobj.read(4)
112  # extraflag = self.fileobj.read(1)
113  # os = self.fileobj.read(1)
114  self.fileobj.read(6)
115 
116  if flag & FEXTRA:
117  # Read & discard the extra field, if present
118  xlen=ord(self.fileobj.read(1))
119  xlen=xlen+256*ord(self.fileobj.read(1))
120  self.fileobj.read(xlen)
121  if flag & FNAME:
122  # Read and discard a null-terminated string containing the filename
123  while (1):
124  s=self.fileobj.read(1)
125  if not s or s=='\000': break
126  if flag & FCOMMENT:
127  # Read and discard a null-terminated string containing a comment
128  while (1):
129  s=self.fileobj.read(1)
130  if not s or s=='\000': break
131  if flag & FHCRC:
132  self.fileobj.read(2) # Read & discard the 16-bit header CRC
133 
134 
135  def write(self,data):
136  if self.fileobj is None:
137  raise ValueError, "write() on closed GzipFile object"
138  if len(data) > 0:
139  self.size = self.size + len(data)
140  self.crc = zlib.crc32(data, self.crc)
141  self.fileobj.write( self.compress.compress(data) )
142  self.offset += len(data)
143 
144  def read(self, size=-1):
145  if self.extrasize <= 0 and self.fileobj is None:
146  return ''
147 
148  readsize = 1024
149  if size < 0: # get the whole thing
150  try:
151  while 1:
152  self._read(readsize)
153  readsize = readsize * 2
154  except EOFError:
155  size = self.extrasize
156  else: # just get some more of it
157  try:
158  while size > self.extrasize:
159  self._read(readsize)
160  readsize = readsize * 2
161  except EOFError:
162  if size > self.extrasize:
163  size = self.extrasize
164 
165  chunk = self.extrabuf[:size]
166  self.extrabuf = self.extrabuf[size:]
167  self.extrasize = self.extrasize - size
168 
169  self.offset += size
170  return chunk
171 
172  def _unread(self, buf):
173  self.extrabuf = buf + self.extrabuf
174  self.extrasize = len(buf) + self.extrasize
175  self.offset -= len(buf)
176 
177  def _read(self, size=1024):
178  if self.fileobj is None: raise EOFError, "Reached EOF"
179 
180  if self._new_member:
181  # If the _new_member flag is set, we have to
182  # jump to the next member, if there is one.
183  #
184  # First, check if we're at the end of the file;
185  # if so, it's time to stop; no more members to read.
186  pos = self.fileobj.tell() # Save current position
187  self.fileobj.seek(0, 2) # Seek to end of file
188  if pos == self.fileobj.tell():
189  raise EOFError, "Reached EOF"
190  else:
191  self.fileobj.seek( pos ) # Return to original position
192 
193  self._init_read()
194  self._read_gzip_header()
195  self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
196  self._new_member = 0
197 
198  # Read a chunk of data from the file
199  buf = self.fileobj.read(size)
200 
201  # If the EOF has been reached, flush the decompression object
202  # and mark this object as finished.
203 
204  if buf == "":
205  uncompress = self.decompress.flush()
206  self._read_eof()
207  self._add_read_data( uncompress )
208  raise EOFError, 'Reached EOF'
209 
210  uncompress = self.decompress.decompress(buf)
211  self._add_read_data( uncompress )
212 
213  if self.decompress.unused_data != "":
214  # Ending case: we've come to the end of a member in the file,
215  # so seek back to the start of the unused data, finish up
216  # this member, and read a new gzip header.
217  # (The number of bytes to seek back is the length of the unused
218  # data, minus 8 because _read_eof() will rewind a further 8 bytes)
219  self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
220 
221  # Check the CRC and file size, and set the flag so we read
222  # a new member on the next call
223  self._read_eof()
224  self._new_member = 1
225 
226  def _add_read_data(self, data):
227  self.crc = zlib.crc32(data, self.crc)
228  self.extrabuf = self.extrabuf + data
229  self.extrasize = self.extrasize + len(data)
230  self.size = self.size + len(data)
231 
232  def _read_eof(self):
233  # We've read to the end of the file, so we have to rewind in order
234  # to reread the 8 bytes containing the CRC and the file size.
235  # We check the that the computed CRC and size of the
236  # uncompressed data matches the stored values.
237  self.fileobj.seek(-8, 1)
238  crc32 = read32(self.fileobj)
239  isize = read32(self.fileobj)
240  if crc32%0x100000000L != self.crc%0x100000000L:
241  raise ValueError, "CRC check failed"
242  elif isize != self.size:
243  raise ValueError, "Incorrect length of data produced"
244 
245  def close(self):
246  if self.mode == WRITE:
247  self.fileobj.write(self.compress.flush())
248  write32(self.fileobj, self.crc)
249  write32(self.fileobj, self.size)
250  self.fileobj = None
251  elif self.mode == READ:
252  self.fileobj = None
253  if self.myfileobj:
254  self.myfileobj.close()
255  self.myfileobj = None
256 
257  def __del__(self):
258  try:
259  if (self.myfileobj is None and
260  self.fileobj is None):
261  return
262  except AttributeError:
263  return
264  self.close()
265 
266  def flush(self):
267  self.fileobj.flush()
268 
269  def isatty(self):
270  return 0
271 
272  def tell(self):
273  return self.offset
274 
275  def rewind(self):
276  '''Return the uncompressed stream file position indicator to the
277  beginning of the file'''
278  if self.mode != READ:
279  raise IOError("Can't rewind in write mode")
280  self.fileobj.seek(0)
281  self._new_member = 1
282  self.extrabuf = ""
283  self.extrasize = 0
284  self.offset = 0
285 
286  def seek(self, offset):
287  if self.mode == WRITE:
288  if offset < self.offset:
289  raise IOError('Negative seek in write mode')
290  count = offset - self.offset
291  for i in range(count/1024):
292  self.write(1024*'\0')
293  self.write((count%1024)*'\0')
294  elif self.mode == READ:
295  if offset < self.offset:
296  # for negative seek, rewind and do positive seek
297  self.rewind()
298  count = offset - self.offset
299  for i in range(count/1024): self.read(1024)
300  self.read(count % 1024)
301 
302  def readline(self, size=-1):
303  if size < 0: size = sys.maxint
304  bufs = []
305  readsize = min(100, size) # Read from the file in small chunks
306  while 1:
307  if size == 0:
308  return "".join(bufs) # Return resulting line
309 
310  c = self.read(readsize)
311  i = c.find('\n')
312  if size is not None:
313  # We set i=size to break out of the loop under two
314  # conditions: 1) there's no newline, and the chunk is
315  # larger than size, or 2) there is a newline, but the
316  # resulting line would be longer than 'size'.
317  if i==-1 and len(c) > size: i=size-1
318  elif size <= i: i = size -1
319 
320  if i >= 0 or c == '':
321  bufs.append(c[:i+1]) # Add portion of last chunk
322  self._unread(c[i+1:]) # Push back rest of chunk
323  return ''.join(bufs) # Return resulting line
324 
325  # Append chunk to list, decrease 'size',
326  bufs.append(c)
327  size = size - len(c)
328  readsize = min(size, readsize * 2)
329 
330  def readlines(self, sizehint=0):
331  # Negative numbers result in reading all the lines
332  if sizehint <= 0: sizehint = sys.maxint
333  L = []
334  while sizehint > 0:
335  line = self.readline()
336  if line == "": break
337  L.append( line )
338  sizehint = sizehint - len(line)
339 
340  return L
341 
342  def writelines(self, L):
343  for line in L:
344  self.write(line)
345 
346 
347 def _test():
348  # Act like gzip; with -d, act like gunzip.
349  # The input file is not deleted, however, nor are any other gzip
350  # options or features supported.
351  args = sys.argv[1:]
352  decompress = args and args[0] == "-d"
353  if decompress:
354  args = args[1:]
355  if not args:
356  args = ["-"]
357  for arg in args:
358  if decompress:
359  if arg == "-":
360  f = GzipFile(filename="", mode="rb", fileobj=sys.stdin)
361  g = sys.stdout
362  else:
363  if arg[-3:] != ".gz":
364  print "filename doesn't end in .gz:", `arg`
365  continue
366  f = open(arg, "rb")
367  g = __builtin__.open(arg[:-3], "wb")
368  else:
369  if arg == "-":
370  f = sys.stdin
371  g = GzipFile(filename="", mode="wb", fileobj=sys.stdout)
372  else:
373  f = __builtin__.open(arg, "rb")
374  g = open(arg + ".gz", "wb")
375  while 1:
376  chunk = f.read(1024)
377  if not chunk:
378  break
379  g.write(chunk)
380  if g is not sys.stdout:
381  g.close()
382  if f is not sys.stdin:
383  f.close()
384 
385 if __name__ == '__main__':
386  _test()