Vega strike Python Modules doc  0.5.1
Documentation of the " Modules " folder of Vega strike
 All Data Structures Namespaces Files Functions Variables
URLopener Class Reference
Inheritance diagram for URLopener:
FancyURLopener URLopener

Public Member Functions

def __init__
 
def __del__
 
def close
 
def cleanup
 
def addheader
 
def open
 
def open_unknown
 
def open_unknown_proxy
 
def retrieve
 
def open_http
 
def http_error
 
def http_error_default
 
def open_https
 
def open_gopher
 
def open_file
 
def open_local_file
 
def open_ftp
 
def open_data
 

Data Fields

 proxies
 
 key_file
 
 cert_file
 
 addheaders
 
 tempcache
 
 ftpcache
 
 type
 

Static Public Attributes

string version = "Python-urllib/%s"
 

Detailed Description

Class to open URLs.
This is a class rather than just a subroutine because we may need
more than one set of global protocol-specific options.
Note -- this is a base class for those who don't want the
automatic handling of errors type 302 (relocated) and 401
(authorization needed).

Definition at line 87 of file urllib.py.

Constructor & Destructor Documentation

def __init__ (   self,
  proxies = None,
  x509 
)

Definition at line 100 of file urllib.py.

References urllib.getproxies.

101  def __init__(self, proxies=None, **x509):
102  if proxies is None:
103  proxies = getproxies()
104  assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
105  self.proxies = proxies
106  self.key_file = x509.get('key_file')
107  self.cert_file = x509.get('cert_file')
108  self.addheaders = [('User-agent', self.version)]
109  self.__tempfiles = []
110  self.__unlink = os.unlink # See cleanup()
111  self.tempcache = None
112  # Undocumented feature: if you assign {} to tempcache,
113  # it is used to cache files retrieved with
114  # self.retrieve(). This is not enabled by default
115  # since it does not work for changing documents (and I
116  # haven't got the logic to check expiration headers
117  # yet).
118  self.ftpcache = ftpcache
119  # Undocumented feature: you can use a different
120  # ftp cache by assigning to the .ftpcache member;
121  # in case you want logically independent URL openers
122  # XXX This is not threadsafe. Bah.
def __del__ (   self)

Member Function Documentation

def addheader (   self,
  args 
)
Add a header to be used by the HTTP interface only
e.g. u.addheader('Accept', 'sound/basic')

Definition at line 143 of file urllib.py.

144  def addheader(self, *args):
145  """Add a header to be used by the HTTP interface only
146  e.g. u.addheader('Accept', 'sound/basic')"""
147  self.addheaders.append(args)
def cleanup (   self)

Definition at line 129 of file urllib.py.

References URLopener.__tempfiles, URLopener.__unlink, and URLopener.tempcache.

130  def cleanup(self):
131  # This code sometimes runs when the rest of this module
132  # has already been deleted, so it can't use any globals
133  # or import anything.
134  if self.__tempfiles:
135  for file in self.__tempfiles:
136  try:
137  self.__unlink(file)
138  except OSError:
139  pass
140  del self.__tempfiles[:]
141  if self.tempcache:
142  self.tempcache.clear()
def close (   self)

Definition at line 126 of file urllib.py.

References URLopener.cleanup().

127  def close(self):
128  self.cleanup()
def http_error (   self,
  url,
  fp,
  errcode,
  errmsg,
  headers,
  data = None 
)
Handle http errors.
Derived class can override this, or provide specific handlers
named http_error_DDD where DDD is the 3-digit error code.

Definition at line 305 of file urllib.py.

References URLopener.http_error_default().

306  def http_error(self, url, fp, errcode, errmsg, headers, data=None):
307  """Handle http errors.
308  Derived class can override this, or provide specific handlers
309  named http_error_DDD where DDD is the 3-digit error code."""
310  # First check if there's a specific handler for this error
311  name = 'http_error_%d' % errcode
312  if hasattr(self, name):
313  method = getattr(self, name)
314  if data is None:
315  result = method(url, fp, errcode, errmsg, headers)
316  else:
317  result = method(url, fp, errcode, errmsg, headers, data)
318  if result: return result
319  return self.http_error_default(url, fp, errcode, errmsg, headers)
def http_error_default (   self,
  url,
  fp,
  errcode,
  errmsg,
  headers 
)
Default error handler: close the connection and raise IOError.

Definition at line 320 of file urllib.py.

321  def http_error_default(self, url, fp, errcode, errmsg, headers):
322  """Default error handler: close the connection and raise IOError."""
323  void = fp.read()
324  fp.close()
325  raise IOError, ('http error', errcode, errmsg, headers)
def open (   self,
  fullurl,
  data = None 
)
Use URLopener().open(file) instead of open(file, 'r').

Definition at line 149 of file urllib.py.

References URLopener.proxies, urllib.splithost(), urllib.splittype(), URLopener.tempcache, urllib.toBytes(), and urllib.unwrap().

150  def open(self, fullurl, data=None):
151  """Use URLopener().open(file) instead of open(file, 'r')."""
152  fullurl = unwrap(toBytes(fullurl))
153  if self.tempcache and self.tempcache.has_key(fullurl):
154  filename, headers = self.tempcache[fullurl]
155  fp = open(filename, 'rb')
156  return addinfourl(fp, headers, fullurl)
157  urltype, url = splittype(fullurl)
158  if not urltype:
159  urltype = 'file'
160  if self.proxies.has_key(urltype):
161  proxy = self.proxies[urltype]
162  urltype, proxyhost = splittype(proxy)
163  host, selector = splithost(proxyhost)
164  url = (host, fullurl) # Signal special case to open_*()
165  else:
166  proxy = None
167  name = 'open_' + urltype
168  self.type = urltype
169  if '-' in name:
170  # replace - with _
171  name = '_'.join(name.split('-'))
172  if not hasattr(self, name):
173  if proxy:
174  return self.open_unknown_proxy(proxy, fullurl, data)
175  else:
176  return self.open_unknown(fullurl, data)
177  try:
178  if data is None:
179  return getattr(self, name)(url)
180  else:
181  return getattr(self, name)(url, data)
182  except socket.error, msg:
183  raise IOError, ('socket error', msg), sys.exc_info()[2]
def open_data (   self,
  url,
  data = None 
)
Use "data" URL.

Definition at line 491 of file urllib.py.

References base64.decodestring(), dospath.join(), and urllib.unquote().

492  def open_data(self, url, data=None):
493  """Use "data" URL."""
494  # ignore POSTed data
495  #
496  # syntax of data URLs:
497  # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
498  # mediatype := [ type "/" subtype ] *( ";" parameter )
499  # data := *urlchar
500  # parameter := attribute "=" value
501  import StringIO, mimetools, time
502  try:
503  [type, data] = url.split(',', 1)
504  except ValueError:
505  raise IOError, ('data error', 'bad data URL')
506  if not type:
507  type = 'text/plain;charset=US-ASCII'
508  semi = type.rfind(';')
509  if semi >= 0 and '=' not in type[semi:]:
510  encoding = type[semi+1:]
511  type = type[:semi]
512  else:
513  encoding = ''
514  msg = []
515  msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
516  time.gmtime(time.time())))
517  msg.append('Content-type: %s' % type)
518  if encoding == 'base64':
519  import base64
520  data = base64.decodestring(data)
521  else:
522  data = unquote(data)
523  msg.append('Content-length: %d' % len(data))
524  msg.append('')
525  msg.append(data)
526  msg = '\n'.join(msg)
527  f = StringIO.StringIO(msg)
528  headers = mimetools.Message(f, 0)
529  f.fileno = None # needed for addinfourl
530  return addinfourl(f, headers, url)
531 
def open_file (   self,
  url 
)
Use local file or FTP depending on form of URL.

Definition at line 400 of file urllib.py.

References URLopener.open_ftp(), and URLopener.open_local_file().

401  def open_file(self, url):
402  """Use local file or FTP depending on form of URL."""
403  if url[:2] == '//' and url[2:3] != '/':
404  return self.open_ftp(url)
405  else:
406  return self.open_local_file(url)
def open_ftp (   self,
  url 
)
Use FTP protocol.

Definition at line 435 of file urllib.py.

References URLopener.ftpcache, urllib.ftperrors(), mimetypes.guess_type(), dospath.join(), urllib.splitattr(), urllib.splithost(), urllib.splitpasswd(), urllib.splitport(), urllib.splituser(), urllib.splitvalue(), and urllib.unquote().

436  def open_ftp(self, url):
437  """Use FTP protocol."""
438  import mimetypes, mimetools, StringIO
439  host, path = splithost(url)
440  if not host: raise IOError, ('ftp error', 'no host given')
441  host, port = splitport(host)
442  user, host = splituser(host)
443  if user: user, passwd = splitpasswd(user)
444  else: passwd = None
445  host = unquote(host)
446  user = unquote(user or '')
447  passwd = unquote(passwd or '')
448  host = socket.gethostbyname(host)
449  if not port:
450  import ftplib
451  port = ftplib.FTP_PORT
452  else:
453  port = int(port)
454  path, attrs = splitattr(path)
455  path = unquote(path)
456  dirs = path.split('/')
457  dirs, file = dirs[:-1], dirs[-1]
458  if dirs and not dirs[0]: dirs = dirs[1:]
459  if dirs and not dirs[0]: dirs[0] = '/'
460  key = user, host, port, '/'.join(dirs)
461  # XXX thread unsafe!
462  if len(self.ftpcache) > MAXFTPCACHE:
463  # Prune the cache, rather arbitrarily
464  for k in self.ftpcache.keys():
465  if k != key:
466  v = self.ftpcache[k]
467  del self.ftpcache[k]
468  v.close()
469  try:
470  if not self.ftpcache.has_key(key):
471  self.ftpcache[key] = \
472  ftpwrapper(user, passwd, host, port, dirs)
473  if not file: type = 'D'
474  else: type = 'I'
475  for attr in attrs:
476  attr, value = splitvalue(attr)
477  if attr.lower() == 'type' and \
478  value in ('a', 'A', 'i', 'I', 'd', 'D'):
479  type = value.upper()
480  (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
481  mtype = mimetypes.guess_type("ftp:" + url)[0]
482  headers = ""
483  if mtype:
484  headers += "Content-Type: %s\n" % mtype
485  if retrlen is not None and retrlen >= 0:
486  headers += "Content-Length: %d\n" % retrlen
487  headers = mimetools.Message(StringIO.StringIO(headers))
488  return addinfourl(fp, headers, "ftp:" + url)
489  except ftperrors(), msg:
490  raise IOError, ('ftp error', msg), sys.exc_info()[2]
def open_gopher (   self,
  url 
)
Use Gopher protocol.

Definition at line 384 of file urllib.py.

References urllib.noheaders(), gopherlib.send_query(), gopherlib.send_selector(), urllib.splitgophertype(), urllib.splithost(), urllib.splitquery(), and urllib.unquote().

385  def open_gopher(self, url):
386  """Use Gopher protocol."""
387  import gopherlib
388  host, selector = splithost(url)
389  if not host: raise IOError, ('gopher error', 'no host given')
390  host = unquote(host)
391  type, selector = splitgophertype(selector)
392  selector, query = splitquery(selector)
393  selector = unquote(selector)
394  if query:
395  query = unquote(query)
396  fp = gopherlib.send_query(selector, query, host)
397  else:
398  fp = gopherlib.send_selector(selector, host)
399  return addinfourl(fp, noheaders(), "gopher:" + url)
def open_http (   self,
  url,
  data = None 
)
Use HTTP protocol.

Definition at line 249 of file urllib.py.

References URLopener.addheaders, base64.encodestring(), URLopener.http_error(), urllib.proxy_bypass(), urllib.splithost(), urllib.splittype(), urllib.splituser(), string.strip(), URLopener.type, and urllib.unquote().

250  def open_http(self, url, data=None):
251  """Use HTTP protocol."""
252  import httplib
253  user_passwd = None
254  if type(url) is types.StringType:
255  host, selector = splithost(url)
256  if host:
257  user_passwd, host = splituser(host)
258  host = unquote(host)
259  realhost = host
260  else:
261  host, selector = url
262  urltype, rest = splittype(selector)
263  url = rest
264  user_passwd = None
265  if urltype.lower() != 'http':
266  realhost = None
267  else:
268  realhost, rest = splithost(rest)
269  if realhost:
270  user_passwd, realhost = splituser(realhost)
271  if user_passwd:
272  selector = "%s://%s%s" % (urltype, realhost, rest)
273  if proxy_bypass(realhost):
274  host = realhost
275 
276  #print "proxy via http:", host, selector
277  if not host: raise IOError, ('http error', 'no host given')
278  if user_passwd:
279  import base64
280  auth = base64.encodestring(user_passwd).strip()
281  else:
282  auth = None
283  h = httplib.HTTP(host)
284  if data is not None:
285  h.putrequest('POST', selector)
286  h.putheader('Content-type', 'application/x-www-form-urlencoded')
287  h.putheader('Content-length', '%d' % len(data))
288  else:
289  h.putrequest('GET', selector)
290  if auth: h.putheader('Authorization', 'Basic %s' % auth)
291  if realhost: h.putheader('Host', realhost)
292  for args in self.addheaders: apply(h.putheader, args)
293  h.endheaders()
294  if data is not None:
295  h.send(data)
296  errcode, errmsg, headers = h.getreply()
297  fp = h.getfile()
298  if errcode == 200:
299  return addinfourl(fp, headers, "http:" + url)
300  else:
301  if data is None:
302  return self.http_error(url, fp, errcode, errmsg, headers)
303  else:
304  return self.http_error(url, fp, errcode, errmsg, headers, data)
def open_https (   self,
  url,
  data = None 
)
Use HTTPS protocol.

Definition at line 327 of file urllib.py.

References URLopener.addheaders, URLopener.cert_file, HTTPSConnection.cert_file, HTTPS.cert_file, base64.encodestring(), URLopener.http_error(), URLopener.key_file, HTTPSConnection.key_file, HTTPS.key_file, urllib.splithost(), urllib.splittype(), urllib.splituser(), string.strip(), URLopener.type, and urllib.unquote().

328  def open_https(self, url, data=None):
329  """Use HTTPS protocol."""
330  import httplib
331  user_passwd = None
332  if type(url) is types.StringType:
333  host, selector = splithost(url)
334  if host:
335  user_passwd, host = splituser(host)
336  host = unquote(host)
337  realhost = host
338  else:
339  host, selector = url
340  urltype, rest = splittype(selector)
341  url = rest
342  user_passwd = None
343  if urltype.lower() != 'https':
344  realhost = None
345  else:
346  realhost, rest = splithost(rest)
347  if realhost:
348  user_passwd, realhost = splituser(realhost)
349  if user_passwd:
350  selector = "%s://%s%s" % (urltype, realhost, rest)
351  #print "proxy via https:", host, selector
352  if not host: raise IOError, ('https error', 'no host given')
353  if user_passwd:
354  import base64
355  auth = base64.encodestring(user_passwd).strip()
356  else:
357  auth = None
358  h = httplib.HTTPS(host, 0,
359  key_file=self.key_file,
360  cert_file=self.cert_file)
361  if data is not None:
362  h.putrequest('POST', selector)
363  h.putheader('Content-type',
364  'application/x-www-form-urlencoded')
365  h.putheader('Content-length', '%d' % len(data))
366  else:
367  h.putrequest('GET', selector)
368  if auth: h.putheader('Authorization: Basic %s' % auth)
369  if realhost: h.putheader('Host', realhost)
370  for args in self.addheaders: apply(h.putheader, args)
371  h.endheaders()
372  if data is not None:
373  h.send(data)
374  errcode, errmsg, headers = h.getreply()
375  fp = h.getfile()
376  if errcode == 200:
377  return addinfourl(fp, headers, "https:" + url)
378  else:
379  if data is None:
380  return self.http_error(url, fp, errcode, errmsg, headers)
381  else:
382  return self.http_error(url, fp, errcode, errmsg, headers,
383  data)
def open_local_file (   self,
  url 
)
Use local file.

Definition at line 407 of file urllib.py.

References rfc822.formatdate(), mimetypes.guess_type(), urllib.localhost(), URLopener.open(), urllib.splithost(), urllib.splitport(), urllib.thishost(), and urllib.url2pathname().

408  def open_local_file(self, url):
409  """Use local file."""
410  import mimetypes, mimetools, rfc822, StringIO
411  host, file = splithost(url)
412  localname = url2pathname(file)
413  stats = os.stat(localname)
414  size = stats[stat.ST_SIZE]
415  modified = rfc822.formatdate(stats[stat.ST_MTIME])
416  mtype = mimetypes.guess_type(url)[0]
418  'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
419  (mtype or 'text/plain', size, modified)))
420  if not host:
421  urlfile = file
422  if file[:1] == '/':
423  urlfile = 'file://' + file
424  return addinfourl(open(localname, 'rb'),
425  headers, urlfile)
426  host, port = splitport(host)
427  if not port \
428  and socket.gethostbyname(host) in (localhost(), thishost()):
429  urlfile = file
430  if file[:1] == '/':
431  urlfile = 'file://' + file
432  return addinfourl(open(localname, 'rb'),
433  headers, urlfile)
434  raise IOError, ('local file error', 'not on local host')
def open_unknown (   self,
  fullurl,
  data = None 
)
Overridable interface to open unknown URL type.

Definition at line 184 of file urllib.py.

References urllib.splittype().

185  def open_unknown(self, fullurl, data=None):
186  """Overridable interface to open unknown URL type."""
187  type, url = splittype(fullurl)
188  raise IOError, ('url error', 'unknown url type', type)
def open_unknown_proxy (   self,
  proxy,
  fullurl,
  data = None 
)
Overridable interface to open unknown URL type.

Definition at line 189 of file urllib.py.

References urllib.splittype().

190  def open_unknown_proxy(self, proxy, fullurl, data=None):
191  """Overridable interface to open unknown URL type."""
192  type, url = splittype(fullurl)
193  raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
def retrieve (   self,
  url,
  filename = None,
  reporthook = None,
  data = None 
)
retrieve(url) returns (filename, None) for a local object
or (tempfilename, headers) for a remote object.

Definition at line 195 of file urllib.py.

References tempfile.mktemp(), Pattern.open, _posixfile_.open(), URLopener.open(), Template.open(), Telnet.open(), URLopener.open_local_file(), urllib.reporthook(), urllib.splitattr(), urllib.splithost(), urllib.splitquery(), urllib.splittype(), URLopener.tempcache, urllib.toBytes(), urllib.unwrap(), and urllib.url2pathname().

196  def retrieve(self, url, filename=None, reporthook=None, data=None):
197  """retrieve(url) returns (filename, None) for a local object
198  or (tempfilename, headers) for a remote object."""
199  url = unwrap(toBytes(url))
200  if self.tempcache and self.tempcache.has_key(url):
201  return self.tempcache[url]
202  type, url1 = splittype(url)
203  if not filename and (not type or type == 'file'):
204  try:
205  fp = self.open_local_file(url1)
206  hdrs = fp.info()
207  del fp
208  return url2pathname(splithost(url1)[1]), hdrs
209  except IOError, msg:
210  pass
211  fp = self.open(url, data)
212  headers = fp.info()
213  if not filename:
214  import tempfile
215  garbage, path = splittype(url)
216  garbage, path = splithost(path or "")
217  path, garbage = splitquery(path or "")
218  path, garbage = splitattr(path or "")
219  suffix = os.path.splitext(path)[1]
220  filename = tempfile.mktemp(suffix)
221  self.__tempfiles.append(filename)
222  result = filename, headers
223  if self.tempcache is not None:
224  self.tempcache[url] = result
225  tfp = open(filename, 'wb')
226  bs = 1024*8
227  size = -1
228  blocknum = 1
229  if reporthook:
230  if headers.has_key("content-length"):
231  size = int(headers["Content-Length"])
232  reporthook(0, bs, size)
233  block = fp.read(bs)
234  if reporthook:
235  reporthook(1, bs, size)
236  while block:
237  tfp.write(block)
238  block = fp.read(bs)
239  blocknum = blocknum + 1
240  if reporthook:
241  reporthook(blocknum, bs, size)
242  fp.close()
243  tfp.close()
244  del fp
245  del tfp
246  return result

Field Documentation

addheaders

Definition at line 107 of file urllib.py.

cert_file

Definition at line 106 of file urllib.py.

ftpcache

Definition at line 117 of file urllib.py.

key_file

Definition at line 105 of file urllib.py.

proxies

Definition at line 104 of file urllib.py.

tempcache

Definition at line 110 of file urllib.py.

type

Definition at line 167 of file urllib.py.

string version = "Python-urllib/%s"
static

Definition at line 97 of file urllib.py.


The documentation for this class was generated from the following file: