Vega strike Python Modules doc  0.5.1
Documentation of the " Modules " folder of Vega strike
 All Data Structures Namespaces Files Functions Variables
mimetypes.py
Go to the documentation of this file.
1 """Guess the MIME type of a file.
2 
3 This module defines two useful functions:
4 
5 guess_type(url, strict=1) -- guess the MIME type and encoding of a URL.
6 
7 guess_extension(type, strict=1) -- guess the extension for a given MIME type.
8 
9 It also contains the following, for tuning the behavior:
10 
11 Data:
12 
13 knownfiles -- list of files to parse
14 inited -- flag set when init() has been called
15 suffix_map -- dictionary mapping suffixes to suffixes
16 encodings_map -- dictionary mapping suffixes to encodings
17 types_map -- dictionary mapping suffixes to types
18 
19 Functions:
20 
21 init([files]) -- parse a list of files, default knownfiles
22 read_mime_types(file) -- parse one file, return a dictionary or None
23 """
24 
25 import os
26 import posixpath
27 import urllib
28 
29 __all__ = ["guess_type","guess_extension","read_mime_types","init"]
30 
31 knownfiles = [
32  "/usr/local/etc/httpd/conf/mime.types",
33  "/usr/local/lib/netscape/mime.types",
34  "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
35  "/usr/local/etc/mime.types", # Apache 1.3
36  ]
37 
38 inited = 0
39 
40 
41 class MimeTypes:
42  """MIME-types datastore.
43 
44  This datastore can handle information from mime.types-style files
45  and supports basic determination of MIME type from a filename or
46  URL, and can guess a reasonable extension given a MIME type.
47  """
48 
49  def __init__(self, filenames=()):
50  if not inited:
51  init()
52  self.encodings_map = encodings_map.copy()
53  self.suffix_map = suffix_map.copy()
54  self.types_map = types_map.copy()
55  self.common_types = common_types.copy()
56  for name in filenames:
57  self.read(name)
58 
59  def guess_type(self, url, strict=1):
60  """Guess the type of a file based on its URL.
61 
62  Return value is a tuple (type, encoding) where type is None if
63  the type can't be guessed (no or unknown suffix) or a string
64  of the form type/subtype, usable for a MIME Content-type
65  header; and encoding is None for no encoding or the name of
66  the program used to encode (e.g. compress or gzip). The
67  mappings are table driven. Encoding suffixes are case
68  sensitive; type suffixes are first tried case sensitive, then
69  case insensitive.
70 
71  The suffixes .tgz, .taz and .tz (case sensitive!) are all
72  mapped to '.tar.gz'. (This is table-driven too, using the
73  dictionary suffix_map.)
74 
75  Optional `strict' argument when false adds a bunch of commonly found,
76  but non-standard types.
77  """
78  scheme, url = urllib.splittype(url)
79  if scheme == 'data':
80  # syntax of data URLs:
81  # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
82  # mediatype := [ type "/" subtype ] *( ";" parameter )
83  # data := *urlchar
84  # parameter := attribute "=" value
85  # type/subtype defaults to "text/plain"
86  comma = url.find(',')
87  if comma < 0:
88  # bad data URL
89  return None, None
90  semi = url.find(';', 0, comma)
91  if semi >= 0:
92  type = url[:semi]
93  else:
94  type = url[:comma]
95  if '=' in type or '/' not in type:
96  type = 'text/plain'
97  return type, None # never compressed, so encoding is None
98  base, ext = posixpath.splitext(url)
99  while self.suffix_map.has_key(ext):
100  base, ext = posixpath.splitext(base + self.suffix_map[ext])
101  if self.encodings_map.has_key(ext):
102  encoding = self.encodings_map[ext]
103  base, ext = posixpath.splitext(base)
104  else:
105  encoding = None
106  types_map = self.types_map
107  common_types = self.common_types
108  if types_map.has_key(ext):
109  return types_map[ext], encoding
110  elif types_map.has_key(ext.lower()):
111  return types_map[ext.lower()], encoding
112  elif strict:
113  return None, encoding
114  elif common_types.has_key(ext):
115  return common_types[ext], encoding
116  elif common_types.has_key(ext.lower()):
117  return common_types[ext.lower()], encoding
118  else:
119  return None, encoding
120 
121  def guess_extension(self, type, strict=1):
122  """Guess the extension for a file based on its MIME type.
123 
124  Return value is a string giving a filename extension,
125  including the leading dot ('.'). The extension is not
126  guaranteed to have been associated with any particular data
127  stream, but would be mapped to the MIME type `type' by
128  guess_type(). If no extension can be guessed for `type', None
129  is returned.
130 
131  Optional `strict' argument when false adds a bunch of commonly found,
132  but non-standard types.
133  """
134  type = type.lower()
135  for ext, stype in self.types_map.items():
136  if type == stype:
137  return ext
138  if not strict:
139  for ext, stype in common_types.items():
140  if type == stype:
141  return ext
142  return None
143 
144  def read(self, filename):
145  """Read a single mime.types-format file, specified by pathname."""
146  fp = open(filename)
147  self.readfp(fp)
148  fp.close()
149 
150  def readfp(self, fp):
151  """Read a single mime.types-format file."""
152  map = self.types_map
153  while 1:
154  line = fp.readline()
155  if not line:
156  break
157  words = line.split()
158  for i in range(len(words)):
159  if words[i][0] == '#':
160  del words[i:]
161  break
162  if not words:
163  continue
164  type, suffixes = words[0], words[1:]
165  for suff in suffixes:
166  map['.' + suff] = type
167 
168 
169 def guess_type(url, strict=1):
170  """Guess the type of a file based on its URL.
171 
172  Return value is a tuple (type, encoding) where type is None if the
173  type can't be guessed (no or unknown suffix) or a string of the
174  form type/subtype, usable for a MIME Content-type header; and
175  encoding is None for no encoding or the name of the program used
176  to encode (e.g. compress or gzip). The mappings are table
177  driven. Encoding suffixes are case sensitive; type suffixes are
178  first tried case sensitive, then case insensitive.
179 
180  The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
181  to ".tar.gz". (This is table-driven too, using the dictionary
182  suffix_map).
183 
184  Optional `strict' argument when false adds a bunch of commonly found, but
185  non-standard types.
186  """
187  init()
188  return guess_type(url, strict)
189 
190 
191 def guess_extension(type, strict=1):
192  """Guess the extension for a file based on its MIME type.
193 
194  Return value is a string giving a filename extension, including the
195  leading dot ('.'). The extension is not guaranteed to have been
196  associated with any particular data stream, but would be mapped to the
197  MIME type `type' by guess_type(). If no extension can be guessed for
198  `type', None is returned.
199 
200  Optional `strict' argument when false adds a bunch of commonly found,
201  but non-standard types.
202  """
203  init()
204  return guess_extension(type, strict)
205 
206 
207 def init(files=None):
208  global guess_extension, guess_type
209  global suffix_map, types_map, encodings_map, common_types
210  global inited
211  inited = 1
212  db = MimeTypes()
213  if files is None:
214  files = knownfiles
215  for file in files:
216  if os.path.isfile(file):
217  db.readfp(open(file))
218  encodings_map = db.encodings_map
219  suffix_map = db.suffix_map
220  types_map = db.types_map
221  guess_extension = db.guess_extension
222  guess_type = db.guess_type
223  common_types = db.common_types
224 
225 
226 def read_mime_types(file):
227  try:
228  f = open(file)
229  except IOError:
230  return None
231  db = MimeTypes()
232  db.readfp(f)
233  return db.types_map
234 
235 
236 suffix_map = {
237  '.tgz': '.tar.gz',
238  '.taz': '.tar.gz',
239  '.tz': '.tar.gz',
240  }
241 
242 encodings_map = {
243  '.gz': 'gzip',
244  '.Z': 'compress',
245  }
246 
247 # Before adding new types, make sure they are either registered with IANA, at
248 # http://www.isi.edu/in-notes/iana/assignments/media-types
249 # or extensions, i.e. using the x- prefix
250 
251 # If you add to these, please keep them sorted!
252 types_map = {
253  '.a' : 'application/octet-stream',
254  '.ai' : 'application/postscript',
255  '.aif' : 'audio/x-aiff',
256  '.aifc' : 'audio/x-aiff',
257  '.aiff' : 'audio/x-aiff',
258  '.au' : 'audio/basic',
259  '.avi' : 'video/x-msvideo',
260  '.bat' : 'text/plain',
261  '.bcpio' : 'application/x-bcpio',
262  '.bin' : 'application/octet-stream',
263  '.bmp' : 'image/x-ms-bmp',
264  '.c' : 'text/plain',
265  # Duplicates :(
266  '.cdf' : 'application/x-cdf',
267  '.cdf' : 'application/x-netcdf',
268  '.cpio' : 'application/x-cpio',
269  '.csh' : 'application/x-csh',
270  '.css' : 'text/css',
271  '.dll' : 'application/octet-stream',
272  '.doc' : 'application/msword',
273  '.dot' : 'application/msword',
274  '.dvi' : 'application/x-dvi',
275  '.eml' : 'message/rfc822',
276  '.eps' : 'application/postscript',
277  '.etx' : 'text/x-setext',
278  '.exe' : 'application/octet-stream',
279  '.gif' : 'image/gif',
280  '.gtar' : 'application/x-gtar',
281  '.h' : 'text/plain',
282  '.hdf' : 'application/x-hdf',
283  '.htm' : 'text/html',
284  '.html' : 'text/html',
285  '.ief' : 'image/ief',
286  '.jpe' : 'image/jpeg',
287  '.jpeg' : 'image/jpeg',
288  '.jpg' : 'image/jpeg',
289  '.js' : 'application/x-javascript',
290  '.ksh' : 'text/plain',
291  '.latex' : 'application/x-latex',
292  '.m1v' : 'video/mpeg',
293  '.man' : 'application/x-troff-man',
294  '.me' : 'application/x-troff-me',
295  '.mht' : 'message/rfc822',
296  '.mhtml' : 'message/rfc822',
297  '.mif' : 'application/x-mif',
298  '.mov' : 'video/quicktime',
299  '.movie' : 'video/x-sgi-movie',
300  '.mp2' : 'audio/mpeg',
301  '.mp3' : 'audio/mpeg',
302  '.mpa' : 'video/mpeg',
303  '.mpe' : 'video/mpeg',
304  '.mpeg' : 'video/mpeg',
305  '.mpg' : 'video/mpeg',
306  '.ms' : 'application/x-troff-ms',
307  '.nc' : 'application/x-netcdf',
308  '.nws' : 'message/rfc822',
309  '.o' : 'application/octet-stream',
310  '.obj' : 'application/octet-stream',
311  '.oda' : 'application/oda',
312  '.p12' : 'application/x-pkcs12',
313  '.p7c' : 'application/pkcs7-mime',
314  '.pbm' : 'image/x-portable-bitmap',
315  '.pdf' : 'application/pdf',
316  '.pfx' : 'application/x-pkcs12',
317  '.pgm' : 'image/x-portable-graymap',
318  '.pl' : 'text/plain',
319  '.png' : 'image/png',
320  '.pnm' : 'image/x-portable-anymap',
321  '.pot' : 'application/vnd.ms-powerpoint',
322  '.ppa' : 'application/vnd.ms-powerpoint',
323  '.ppm' : 'image/x-portable-pixmap',
324  '.pps' : 'application/vnd.ms-powerpoint',
325  '.ppt' : 'application/vnd.ms-powerpoint',
326  '.ps' : 'application/postscript',
327  '.pwz' : 'application/vnd.ms-powerpoint',
328  '.py' : 'text/x-python',
329  '.pyc' : 'application/x-python-code',
330  '.pyo' : 'application/x-python-code',
331  '.qt' : 'video/quicktime',
332  '.ra' : 'audio/x-pn-realaudio',
333  '.ram' : 'application/x-pn-realaudio',
334  '.ras' : 'image/x-cmu-raster',
335  '.rdf' : 'application/xml',
336  '.rgb' : 'image/x-rgb',
337  '.roff' : 'application/x-troff',
338  '.rtx' : 'text/richtext',
339  '.sgm' : 'text/x-sgml',
340  '.sgml' : 'text/x-sgml',
341  '.sh' : 'application/x-sh',
342  '.shar' : 'application/x-shar',
343  '.snd' : 'audio/basic',
344  '.so' : 'application/octet-stream',
345  '.src' : 'application/x-wais-source',
346  '.sv4cpio': 'application/x-sv4cpio',
347  '.sv4crc' : 'application/x-sv4crc',
348  '.t' : 'application/x-troff',
349  '.tar' : 'application/x-tar',
350  '.tcl' : 'application/x-tcl',
351  '.tex' : 'application/x-tex',
352  '.texi' : 'application/x-texinfo',
353  '.texinfo': 'application/x-texinfo',
354  '.tif' : 'image/tiff',
355  '.tiff' : 'image/tiff',
356  '.tr' : 'application/x-troff',
357  '.tsv' : 'text/tab-separated-values',
358  '.txt' : 'text/plain',
359  '.ustar' : 'application/x-ustar',
360  '.vcf' : 'text/x-vcard',
361  '.wav' : 'audio/x-wav',
362  '.wiz' : 'application/msword',
363  '.xbm' : 'image/x-xbitmap',
364  '.xlb' : 'application/vnd.ms-excel',
365  # Duplicates :(
366  '.xls' : 'application/excel',
367  '.xls' : 'application/vnd.ms-excel',
368  '.xml' : 'text/xml',
369  '.xpm' : 'image/x-xpixmap',
370  '.xsl' : 'application/xml',
371  '.xwd' : 'image/x-xwindowdump',
372  '.zip' : 'application/zip',
373  }
374 
375 # These are non-standard types, commonly found in the wild. They will only
376 # match if strict=0 flag is given to the API methods.
377 
378 # Please sort these too
379 common_types = {
380  '.jpg' : 'image/jpg',
381  '.mid' : 'audio/midi',
382  '.midi': 'audio/midi',
383  '.pct' : 'image/pict',
384  '.pic' : 'image/pict',
385  '.pict': 'image/pict',
386  '.rtf' : 'application/rtf',
387  '.xul' : 'text/xul'
388  }
389 
390 
391 if __name__ == '__main__':
392  import sys
393  import getopt
394 
395  USAGE = """\
396 Usage: mimetypes.py [options] type
397 
398 Options:
399  --help / -h -- print this message and exit
400  --lenient / -l -- additionally search of some common, but non-standard
401  types.
402  --extension / -e -- guess extension instead of type
403 
404 More than one type argument may be given.
405 """
406 
407  def usage(code, msg=''):
408  print USAGE
409  if msg: print msg
410  sys.exit(code)
411 
412  try:
413  opts, args = getopt.getopt(sys.argv[1:], 'hle',
414  ['help', 'lenient', 'extension'])
415  except getopt.error, msg:
416  usage(1, msg)
417 
418  strict = 1
419  extension = 0
420  for opt, arg in opts:
421  if opt in ('-h', '--help'):
422  usage(0)
423  elif opt in ('-l', '--lenient'):
424  strict = 0
425  elif opt in ('-e', '--extension'):
426  extension = 1
427  for gtype in args:
428  if extension:
429  guess = guess_extension(gtype, strict)
430  if not guess: print "I don't know anything about type", gtype
431  else: print guess
432  else:
433  guess, encoding = guess_type(gtype, strict)
434  if not guess: print "I don't know anything about type", gtype
435  else: print 'type:', guess, 'encoding:', encoding