Vega strike Python Modules doc  0.5.1
Documentation of the " Modules " folder of Vega strike
 All Data Structures Namespaces Files Functions Variables
urllib2.py
Go to the documentation of this file.
1 """An extensible library for opening URLs using a variety of protocols
2 
3 The simplest way to use this module is to call the urlopen function,
4 which accepts a string containing a URL or a Request object (described
5 below). It opens the URL and returns the results as file-like
6 object; the returned object has some extra methods described below.
7 
8 The OpenerDirectory manages a collection of Handler objects that do
9 all the actual work. Each Handler implements a particular protocol or
10 option. The OpenerDirector is a composite object that invokes the
11 Handlers needed to open the requested URL. For example, the
12 HTTPHandler performs HTTP GET and POST requests and deals with
13 non-error returns. The HTTPRedirectHandler automatically deals with
14 HTTP 301 & 302 redirect errors, and the HTTPDigestAuthHandler deals
15 with digest authentication.
16 
17 urlopen(url, data=None) -- basic usage is that same as original
18 urllib. pass the url and optionally data to post to an HTTP URL, and
19 get a file-like object back. One difference is that you can also pass
20 a Request instance instead of URL. Raises a URLError (subclass of
21 IOError); for HTTP errors, raises an HTTPError, which can also be
22 treated as a valid response.
23 
24 build_opener -- function that creates a new OpenerDirector instance.
25 will install the default handlers. accepts one or more Handlers as
26 arguments, either instances or Handler classes that it will
27 instantiate. if one of the argument is a subclass of the default
28 handler, the argument will be installed instead of the default.
29 
30 install_opener -- installs a new opener as the default opener.
31 
32 objects of interest:
33 OpenerDirector --
34 
35 Request -- an object that encapsulates the state of a request. the
36 state can be a simple as the URL. it can also include extra HTTP
37 headers, e.g. a User-Agent.
38 
39 BaseHandler --
40 
41 exceptions:
42 URLError-- a subclass of IOError, individual protocols have their own
43 specific subclass
44 
45 HTTPError-- also a valid HTTP response, so you can treat an HTTP error
46 as an exceptional event or valid response
47 
48 internals:
49 BaseHandler and parent
50 _call_chain conventions
51 
52 Example usage:
53 
54 import urllib2
55 
56 # set up authentication info
57 authinfo = urllib2.HTTPBasicAuthHandler()
58 authinfo.add_password('realm', 'host', 'username', 'password')
59 
60 proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
61 
62 # build a new opener that adds authentication and caching FTP handlers
63 opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
64 
65 # install it
66 urllib2.install_opener(opener)
67 
68 f = urllib2.urlopen('http://www.python.org/')
69 
70 
71 """
72 
73 # XXX issues:
74 # If an authentication error handler that tries to perform
75 # authentication for some reason but fails, how should the error be
76 # signalled? The client needs to know the HTTP error code. But if
77 # the handler knows that the problem was, e.g., that it didn't know
78 # that hash algo that requested in the challenge, it would be good to
79 # pass that information along to the client, too.
80 
81 # XXX to do:
82 # name!
83 # documentation (getting there)
84 # complex proxies
85 # abstract factory for opener
86 # ftp errors aren't handled cleanly
87 # gopher can return a socket.error
88 # check digest against correct (i.e. non-apache) implementation
89 
90 import socket
91 import httplib
92 import inspect
93 import re
94 import base64
95 import types
96 import urlparse
97 import md5
98 import mimetypes
99 import mimetools
100 import rfc822
101 import ftplib
102 import sys
103 import time
104 import os
105 import stat
106 import gopherlib
107 import posixpath
108 
109 try:
110  from cStringIO import StringIO
111 except ImportError:
112  from StringIO import StringIO
113 
114 try:
115  import sha
116 except ImportError:
117  # need 1.5.2 final
118  sha = None
119 
120 # not sure how many of these need to be gotten rid of
121 from urllib import unwrap, unquote, splittype, splithost, \
122  addinfourl, splitport, splitgophertype, splitquery, \
123  splitattr, ftpwrapper, noheaders
124 
125 # support for proxies via environment variables
126 from urllib import getproxies
127 
128 # support for FileHandler
129 from urllib import localhost, url2pathname
130 
131 __version__ = "2.0a1"
132 
133 _opener = None
134 def urlopen(url, data=None):
135  global _opener
136  if _opener is None:
137  _opener = build_opener()
138  return _opener.open(url, data)
139 
140 def install_opener(opener):
141  global _opener
142  _opener = opener
143 
144 # do these error classes make sense?
145 # make sure all of the IOError stuff is overridden. we just want to be
146  # subtypes.
147 
148 class URLError(IOError):
149  # URLError is a sub-type of IOError, but it doesn't share any of
150  # the implementation. need to override __init__ and __str__
151  def __init__(self, reason):
152  self.reason = reason
153 
154  def __str__(self):
155  return '<urlopen error %s>' % self.reason
156 
157 class HTTPError(URLError, addinfourl):
158  """Raised when HTTP error occurs, but also acts like non-error return"""
159  __super_init = addinfourl.__init__
160 
161  def __init__(self, url, code, msg, hdrs, fp):
162  self.__super_init(fp, hdrs, url)
163  self.code = code
164  self.msg = msg
165  self.hdrs = hdrs
166  self.fp = fp
167  # XXX
168  self.filename = url
169 
170  def __str__(self):
171  return 'HTTP Error %s: %s' % (self.code, self.msg)
172 
173  def __del__(self):
174  # XXX is this safe? what if user catches exception, then
175  # extracts fp and discards exception?
176  if self.fp:
177  self.fp.close()
178 
180  pass
181 
182 
183 class Request:
184 
185  def __init__(self, url, data=None, headers={}):
186  # unwrap('<URL:type://host/path>') --> 'type://host/path'
187  self.__original = unwrap(url)
188  self.type = None
189  # self.__r_type is what's left after doing the splittype
190  self.host = None
191  self.port = None
192  self.data = data
193  self.headers = {}
194  self.headers.update(headers)
195 
196  def __getattr__(self, attr):
197  # XXX this is a fallback mechanism to guard against these
198  # methods getting called in a non-standard order. this may be
199  # too complicated and/or unnecessary.
200  # XXX should the __r_XXX attributes be public?
201  if attr[:12] == '_Request__r_':
202  name = attr[12:]
203  if hasattr(Request, 'get_' + name):
204  getattr(self, 'get_' + name)()
205  return getattr(self, attr)
206  raise AttributeError, attr
207 
208  def add_data(self, data):
209  self.data = data
210 
211  def has_data(self):
212  return self.data is not None
213 
214  def get_data(self):
215  return self.data
216 
217  def get_full_url(self):
218  return self.__original
219 
220  def get_type(self):
221  if self.type is None:
222  self.type, self.__r_type = splittype(self.__original)
223  if self.type is None:
224  raise ValueError, "unknown url type: %s" % self.__original
225  return self.type
226 
227  def get_host(self):
228  if self.host is None:
229  self.host, self.__r_host = splithost(self.__r_type)
230  if self.host:
231  self.host = unquote(self.host)
232  return self.host
233 
234  def get_selector(self):
235  return self.__r_host
236 
237  def set_proxy(self, host, type):
238  self.host, self.type = host, type
239  self.__r_host = self.__original
240 
241  def add_header(self, key, val):
242  # useful for something like authentication
243  self.headers[key] = val
244 
246  def __init__(self):
247  server_version = "Python-urllib/%s" % __version__
248  self.addheaders = [('User-agent', server_version)]
249  # manage the individual handlers
250  self.handlers = []
251  self.handle_open = {}
252  self.handle_error = {}
253 
254  def add_handler(self, handler):
255  added = 0
256  for meth in dir(handler):
257  if meth[-5:] == '_open':
258  protocol = meth[:-5]
259  if self.handle_open.has_key(protocol):
260  self.handle_open[protocol].append(handler)
261  else:
262  self.handle_open[protocol] = [handler]
263  added = 1
264  continue
265  i = meth.find('_')
266  j = meth[i+1:].find('_') + i + 1
267  if j != -1 and meth[i+1:j] == 'error':
268  proto = meth[:i]
269  kind = meth[j+1:]
270  try:
271  kind = int(kind)
272  except ValueError:
273  pass
274  dict = self.handle_error.get(proto, {})
275  if dict.has_key(kind):
276  dict[kind].append(handler)
277  else:
278  dict[kind] = [handler]
279  self.handle_error[proto] = dict
280  added = 1
281  continue
282  if added:
283  self.handlers.append(handler)
284  handler.add_parent(self)
285 
286  def __del__(self):
287  self.close()
288 
289  def close(self):
290  for handler in self.handlers:
291  handler.close()
292  self.handlers = []
293 
294  def _call_chain(self, chain, kind, meth_name, *args):
295  # XXX raise an exception if no one else should try to handle
296  # this url. return None if you can't but someone else could.
297  handlers = chain.get(kind, ())
298  for handler in handlers:
299  func = getattr(handler, meth_name)
300 
301  result = func(*args)
302  if result is not None:
303  return result
304 
305  def open(self, fullurl, data=None):
306  # accept a URL or a Request object
307  if isinstance(fullurl, (types.StringType, types.UnicodeType)):
308  req = Request(fullurl, data)
309  else:
310  req = fullurl
311  if data is not None:
312  req.add_data(data)
313  assert isinstance(req, Request) # really only care about interface
314 
315  result = self._call_chain(self.handle_open, 'default',
316  'default_open', req)
317  if result:
318  return result
319 
320  type_ = req.get_type()
321  result = self._call_chain(self.handle_open, type_, type_ + \
322  '_open', req)
323  if result:
324  return result
325 
326  return self._call_chain(self.handle_open, 'unknown',
327  'unknown_open', req)
328 
329  def error(self, proto, *args):
330  if proto in ['http', 'https']:
331  # XXX http[s] protocols are special-cased
332  dict = self.handle_error['http'] # https is not different than http
333  proto = args[2] # YUCK!
334  meth_name = 'http_error_%d' % proto
335  http_err = 1
336  orig_args = args
337  else:
338  dict = self.handle_error
339  meth_name = proto + '_error'
340  http_err = 0
341  args = (dict, proto, meth_name) + args
342  result = self._call_chain(*args)
343  if result:
344  return result
345 
346  if http_err:
347  args = (dict, 'default', 'http_error_default') + orig_args
348  return self._call_chain(*args)
349 
350 # XXX probably also want an abstract factory that knows things like
351  # the fact that a ProxyHandler needs to get inserted first.
352 # would also know when it makes sense to skip a superclass in favor of
353  # a subclass and when it might make sense to include both
354 
355 def build_opener(*handlers):
356  """Create an opener object from a list of handlers.
357 
358  The opener will use several default handlers, including support
359  for HTTP and FTP. If there is a ProxyHandler, it must be at the
360  front of the list of handlers. (Yuck.)
361 
362  If any of the handlers passed as arguments are subclasses of the
363  default handlers, the default handlers will not be used.
364  """
365 
366  opener = OpenerDirector()
367  default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
368  HTTPDefaultErrorHandler, HTTPRedirectHandler,
369  FTPHandler, FileHandler]
370  if hasattr(httplib, 'HTTPS'):
371  default_classes.append(HTTPSHandler)
372  skip = []
373  for klass in default_classes:
374  for check in handlers:
375  if inspect.isclass(check):
376  if issubclass(check, klass):
377  skip.append(klass)
378  elif isinstance(check, klass):
379  skip.append(klass)
380  for klass in skip:
381  default_classes.remove(klass)
382 
383  for klass in default_classes:
384  opener.add_handler(klass())
385 
386  for h in handlers:
387  if inspect.isclass(h):
388  h = h()
389  opener.add_handler(h)
390  return opener
391 
393  def add_parent(self, parent):
394  self.parent = parent
395  def close(self):
396  self.parent = None
397 
399  def http_error_default(self, req, fp, code, msg, hdrs):
400  raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
401 
403  # Implementation note: To avoid the server sending us into an
404  # infinite loop, the request object needs to track what URLs we
405  # have already seen. Do this by adding a handler-specific
406  # attribute to the Request object.
407  def http_error_302(self, req, fp, code, msg, headers):
408  if headers.has_key('location'):
409  newurl = headers['location']
410  elif headers.has_key('uri'):
411  newurl = headers['uri']
412  else:
413  return
414  newurl = urlparse.urljoin(req.get_full_url(), newurl)
415 
416  # XXX Probably want to forget about the state of the current
417  # request, although that might interact poorly with other
418  # handlers that also use handler-specific request attributes
419  new = Request(newurl, req.get_data(), req.headers)
420  new.error_302_dict = {}
421  if hasattr(req, 'error_302_dict'):
422  if len(req.error_302_dict)>10 or \
423  req.error_302_dict.has_key(newurl):
424  raise HTTPError(req.get_full_url(), code,
425  self.inf_msg + msg, headers, fp)
426  new.error_302_dict.update(req.error_302_dict)
427  new.error_302_dict[newurl] = newurl
428 
429  # Don't close the fp until we are sure that we won't use it
430  # with HTTPError.
431  fp.read()
432  fp.close()
433 
434  return self.parent.open(new)
435 
436  http_error_301 = http_error_302
437 
438  inf_msg = "The HTTP server returned a redirect error that would" \
439  "lead to an infinite loop.\n" \
440  "The last 302 error message was:\n"
441 
443  def __init__(self, proxies=None):
444  if proxies is None:
445  proxies = getproxies()
446  assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
447  self.proxies = proxies
448  for type, url in proxies.items():
449  setattr(self, '%s_open' % type,
450  lambda r, proxy=url, type=type, meth=self.proxy_open: \
451  meth(r, proxy, type))
452 
453  def proxy_open(self, req, proxy, type):
454  orig_type = req.get_type()
455  type, r_type = splittype(proxy)
456  host, XXX = splithost(r_type)
457  if '@' in host:
458  user_pass, host = host.split('@', 1)
459  user_pass = base64.encodestring(unquote(user_pass)).strip()
460  req.add_header('Proxy-Authorization', 'Basic '+user_pass)
461  host = unquote(host)
462  req.set_proxy(host, type)
463  if orig_type == type:
464  # let other handlers take care of it
465  # XXX this only makes sense if the proxy is before the
466  # other handlers
467  return None
468  else:
469  # need to start over, because the other handlers don't
470  # grok the proxy's URL type
471  return self.parent.open(req)
472 
473 # feature suggested by Duncan Booth
474 # XXX custom is not a good name
476  # either pass a function to the constructor or override handle
477  def __init__(self, proto, func=None, proxy_addr=None):
478  self.proto = proto
479  self.func = func
480  self.addr = proxy_addr
481 
482  def handle(self, req):
483  if self.func and self.func(req):
484  return 1
485 
486  def get_proxy(self):
487  return self.addr
488 
490  def __init__(self, *proxies):
491  self.proxies = {}
492 
493  def proxy_open(self, req):
494  proto = req.get_type()
495  try:
496  proxies = self.proxies[proto]
497  except KeyError:
498  return None
499  for p in proxies:
500  if p.handle(req):
501  req.set_proxy(p.get_proxy())
502  return self.parent.open(req)
503  return None
504 
505  def do_proxy(self, p, req):
506  return self.parent.open(req)
507 
508  def add_proxy(self, cpo):
509  if self.proxies.has_key(cpo.proto):
510  self.proxies[cpo.proto].append(cpo)
511  else:
512  self.proxies[cpo.proto] = [cpo]
513 
515  def __init__(self):
516  self.passwd = {}
517 
518  def add_password(self, realm, uri, user, passwd):
519  # uri could be a single URI or a sequence
520  if isinstance(uri, (types.StringType, types.UnicodeType)):
521  uri = [uri]
522  uri = tuple(map(self.reduce_uri, uri))
523  if not self.passwd.has_key(realm):
524  self.passwd[realm] = {}
525  self.passwd[realm][uri] = (user, passwd)
526 
527  def find_user_password(self, realm, authuri):
528  domains = self.passwd.get(realm, {})
529  authuri = self.reduce_uri(authuri)
530  for uris, authinfo in domains.items():
531  for uri in uris:
532  if self.is_suburi(uri, authuri):
533  return authinfo
534  return None, None
535 
536  def reduce_uri(self, uri):
537  """Accept netloc or URI and extract only the netloc and path"""
538  parts = urlparse.urlparse(uri)
539  if parts[1]:
540  return parts[1], parts[2] or '/'
541  else:
542  return parts[2], '/'
543 
544  def is_suburi(self, base, test):
545  """Check if test is below base in a URI tree
546 
547  Both args must be URIs in reduced form.
548  """
549  if base == test:
550  return 1
551  if base[0] != test[0]:
552  return 0
553  common = posixpath.commonprefix((base[1], test[1]))
554  if len(common) == len(base[1]):
555  return 1
556  return 0
557 
558 
560 
561  def find_user_password(self, realm, authuri):
562  user, password = HTTPPasswordMgr.find_user_password(self,realm,authuri)
563  if user is not None:
564  return user, password
565  return HTTPPasswordMgr.find_user_password(self, None, authuri)
566 
567 
569 
570  rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"')
571 
572  # XXX there can actually be multiple auth-schemes in a
573  # www-authenticate header. should probably be a lot more careful
574  # in parsing them to extract multiple alternatives
575 
576  def __init__(self, password_mgr=None):
577  if password_mgr is None:
578  password_mgr = HTTPPasswordMgr()
579  self.passwd = password_mgr
580  self.add_password = self.passwd.add_password
581 
582  def http_error_auth_reqed(self, authreq, host, req, headers):
583  # XXX could be multiple headers
584  authreq = headers.get(authreq, None)
585  if authreq:
586  mo = AbstractBasicAuthHandler.rx.match(authreq)
587  if mo:
588  scheme, realm = mo.groups()
589  if scheme.lower() == 'basic':
590  return self.retry_http_basic_auth(host, req, realm)
591 
592  def retry_http_basic_auth(self, host, req, realm):
593  user,pw = self.passwd.find_user_password(realm, host)
594  if pw:
595  raw = "%s:%s" % (user, pw)
596  auth = 'Basic %s' % base64.encodestring(raw).strip()
597  if req.headers.get(self.auth_header, None) == auth:
598  return None
599  req.add_header(self.auth_header, auth)
600  return self.parent.open(req)
601  else:
602  return None
603 
605 
606  auth_header = 'Authorization'
607 
608  def http_error_401(self, req, fp, code, msg, headers):
609  host = urlparse.urlparse(req.get_full_url())[1]
610  return self.http_error_auth_reqed('www-authenticate',
611  host, req, headers)
612 
613 
615 
616  auth_header = 'Proxy-Authorization'
617 
618  def http_error_407(self, req, fp, code, msg, headers):
619  host = req.get_host()
620  return self.http_error_auth_reqed('proxy-authenticate',
621  host, req, headers)
622 
623 
625 
626  def __init__(self, passwd=None):
627  if passwd is None:
628  passwd = HTTPPasswordMgr()
629  self.passwd = passwd
630  self.add_password = self.passwd.add_password
631 
632  def http_error_auth_reqed(self, authreq, host, req, headers):
633  authreq = headers.get(self.auth_header, None)
634  if authreq:
635  kind = authreq.split()[0]
636  if kind == 'Digest':
637  return self.retry_http_digest_auth(req, authreq)
638 
639  def retry_http_digest_auth(self, req, auth):
640  token, challenge = auth.split(' ', 1)
641  chal = parse_keqv_list(parse_http_list(challenge))
642  auth = self.get_authorization(req, chal)
643  if auth:
644  auth_val = 'Digest %s' % auth
645  if req.headers.get(self.auth_header, None) == auth_val:
646  return None
647  req.add_header(self.auth_header, auth_val)
648  resp = self.parent.open(req)
649  return resp
650 
651  def get_authorization(self, req, chal):
652  try:
653  realm = chal['realm']
654  nonce = chal['nonce']
655  algorithm = chal.get('algorithm', 'MD5')
656  # mod_digest doesn't send an opaque, even though it isn't
657  # supposed to be optional
658  opaque = chal.get('opaque', None)
659  except KeyError:
660  return None
661 
662  H, KD = self.get_algorithm_impls(algorithm)
663  if H is None:
664  return None
665 
666  user, pw = self.passwd.find_user_password(realm,
667  req.get_full_url())
668  if user is None:
669  return None
670 
671  # XXX not implemented yet
672  if req.has_data():
673  entdig = self.get_entity_digest(req.get_data(), chal)
674  else:
675  entdig = None
676 
677  A1 = "%s:%s:%s" % (user, realm, pw)
678  A2 = "%s:%s" % (req.has_data() and 'POST' or 'GET',
679  # XXX selector: what about proxies and full urls
680  req.get_selector())
681  respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
682  # XXX should the partial digests be encoded too?
683 
684  base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
685  'response="%s"' % (user, realm, nonce, req.get_selector(),
686  respdig)
687  if opaque:
688  base = base + ', opaque="%s"' % opaque
689  if entdig:
690  base = base + ', digest="%s"' % entdig
691  if algorithm != 'MD5':
692  base = base + ', algorithm="%s"' % algorithm
693  return base
694 
695  def get_algorithm_impls(self, algorithm):
696  # lambdas assume digest modules are imported at the top level
697  if algorithm == 'MD5':
698  H = lambda x, e=encode_digest:e(md5.new(x).digest())
699  elif algorithm == 'SHA':
700  H = lambda x, e=encode_digest:e(sha.new(x).digest())
701  # XXX MD5-sess
702  KD = lambda s, d, H=H: H("%s:%s" % (s, d))
703  return H, KD
704 
705  def get_entity_digest(self, data, chal):
706  # XXX not implemented yet
707  return None
708 
709 
711  """An authentication protocol defined by RFC 2069
712 
713  Digest authentication improves on basic authentication because it
714  does not transmit passwords in the clear.
715  """
716 
717  header = 'Authorization'
718 
719  def http_error_401(self, req, fp, code, msg, headers):
720  host = urlparse.urlparse(req.get_full_url())[1]
721  self.http_error_auth_reqed('www-authenticate', host, req, headers)
722 
723 
725 
726  header = 'Proxy-Authorization'
727 
728  def http_error_407(self, req, fp, code, msg, headers):
729  host = req.get_host()
730  self.http_error_auth_reqed('proxy-authenticate', host, req, headers)
731 
732 
733 def encode_digest(digest):
734  hexrep = []
735  for c in digest:
736  n = (ord(c) >> 4) & 0xf
737  hexrep.append(hex(n)[-1])
738  n = ord(c) & 0xf
739  hexrep.append(hex(n)[-1])
740  return ''.join(hexrep)
741 
742 
744 
745  def do_open(self, http_class, req):
746  host = req.get_host()
747  if not host:
748  raise URLError('no host given')
749 
750  try:
751  h = http_class(host) # will parse host:port
752  if req.has_data():
753  data = req.get_data()
754  h.putrequest('POST', req.get_selector())
755  if not req.headers.has_key('Content-type'):
756  h.putheader('Content-type',
757  'application/x-www-form-urlencoded')
758  if not req.headers.has_key('Content-length'):
759  h.putheader('Content-length', '%d' % len(data))
760  else:
761  h.putrequest('GET', req.get_selector())
762  except socket.error, err:
763  raise URLError(err)
764 
765  h.putheader('Host', host)
766  for args in self.parent.addheaders:
767  h.putheader(*args)
768  for k, v in req.headers.items():
769  h.putheader(k, v)
770  h.endheaders()
771  if req.has_data():
772  h.send(data)
773 
774  code, msg, hdrs = h.getreply()
775  fp = h.getfile()
776  if code == 200:
777  return addinfourl(fp, hdrs, req.get_full_url())
778  else:
779  return self.parent.error('http', req, fp, code, msg, hdrs)
780 
781 
783 
784  def http_open(self, req):
785  return self.do_open(httplib.HTTP, req)
786 
787 
788 if hasattr(httplib, 'HTTPS'):
790 
791  def https_open(self, req):
792  return self.do_open(httplib.HTTPS, req)
793 
794 
796  def unknown_open(self, req):
797  type = req.get_type()
798  raise URLError('unknown url type: %s' % type)
799 
801  """Parse list of key=value strings where keys are not duplicated."""
802  parsed = {}
803  for elt in l:
804  k, v = elt.split('=', 1)
805  if v[0] == '"' and v[-1] == '"':
806  v = v[1:-1]
807  parsed[k] = v
808  return parsed
809 
811  """Parse lists as described by RFC 2068 Section 2.
812 
813  In particular, parse comman-separated lists where the elements of
814  the list may include quoted-strings. A quoted-string could
815  contain a comma.
816  """
817  # XXX this function could probably use more testing
818 
819  list = []
820  end = len(s)
821  i = 0
822  inquote = 0
823  start = 0
824  while i < end:
825  cur = s[i:]
826  c = cur.find(',')
827  q = cur.find('"')
828  if c == -1:
829  list.append(s[start:])
830  break
831  if q == -1:
832  if inquote:
833  raise ValueError, "unbalanced quotes"
834  else:
835  list.append(s[start:i+c])
836  i = i + c + 1
837  continue
838  if inquote:
839  if q < c:
840  list.append(s[start:i+c])
841  i = i + c + 1
842  start = i
843  inquote = 0
844  else:
845  i = i + q
846  else:
847  if c < q:
848  list.append(s[start:i+c])
849  i = i + c + 1
850  start = i
851  else:
852  inquote = 1
853  i = i + q + 1
854  return map(lambda x: x.strip(), list)
855 
857  # Use local file or FTP depending on form of URL
858  def file_open(self, req):
859  url = req.get_selector()
860  if url[:2] == '//' and url[2:3] != '/':
861  req.type = 'ftp'
862  return self.parent.open(req)
863  else:
864  return self.open_local_file(req)
865 
866  # names for the localhost
867  names = None
868  def get_names(self):
869  if FileHandler.names is None:
870  FileHandler.names = (socket.gethostbyname('localhost'),
871  socket.gethostbyname(socket.gethostname()))
872  return FileHandler.names
873 
874  # not entirely sure what the rules are here
875  def open_local_file(self, req):
876  host = req.get_host()
877  file = req.get_selector()
878  localfile = url2pathname(file)
879  stats = os.stat(localfile)
880  size = stats[stat.ST_SIZE]
881  modified = rfc822.formatdate(stats[stat.ST_MTIME])
882  mtype = mimetypes.guess_type(file)[0]
883  stats = os.stat(localfile)
884  headers = mimetools.Message(StringIO(
885  'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
886  (mtype or 'text/plain', size, modified)))
887  if host:
888  host, port = splitport(host)
889  if not host or \
890  (not port and socket.gethostbyname(host) in self.get_names()):
891  return addinfourl(open(localfile, 'rb'),
892  headers, 'file:'+file)
893  raise URLError('file not on local host')
894 
896  def ftp_open(self, req):
897  host = req.get_host()
898  if not host:
899  raise IOError, ('ftp error', 'no host given')
900  # XXX handle custom username & password
901  try:
902  host = socket.gethostbyname(host)
903  except socket.error, msg:
904  raise URLError(msg)
905  host, port = splitport(host)
906  if port is None:
907  port = ftplib.FTP_PORT
908  path, attrs = splitattr(req.get_selector())
909  path = unquote(path)
910  dirs = path.split('/')
911  dirs, file = dirs[:-1], dirs[-1]
912  if dirs and not dirs[0]:
913  dirs = dirs[1:]
914  user = passwd = '' # XXX
915  try:
916  fw = self.connect_ftp(user, passwd, host, port, dirs)
917  type = file and 'I' or 'D'
918  for attr in attrs:
919  attr, value = splitattr(attr)
920  if attr.lower() == 'type' and \
921  value in ('a', 'A', 'i', 'I', 'd', 'D'):
922  type = value.upper()
923  fp, retrlen = fw.retrfile(file, type)
924  headers = ""
925  mtype = mimetypes.guess_type(req.get_full_url())[0]
926  if mtype:
927  headers += "Content-Type: %s\n" % mtype
928  if retrlen is not None and retrlen >= 0:
929  headers += "Content-Length: %d\n" % retrlen
930  sf = StringIO(headers)
931  headers = mimetools.Message(sf)
932  return addinfourl(fp, headers, req.get_full_url())
933  except ftplib.all_errors, msg:
934  raise IOError, ('ftp error', msg), sys.exc_info()[2]
935 
936  def connect_ftp(self, user, passwd, host, port, dirs):
937  fw = ftpwrapper(user, passwd, host, port, dirs)
938 ## fw.ftp.set_debuglevel(1)
939  return fw
940 
942  # XXX would be nice to have pluggable cache strategies
943  # XXX this stuff is definitely not thread safe
944  def __init__(self):
945  self.cache = {}
946  self.timeout = {}
947  self.soonest = 0
948  self.delay = 60
949  self.max_conns = 16
950 
951  def setTimeout(self, t):
952  self.delay = t
953 
954  def setMaxConns(self, m):
955  self.max_conns = m
956 
957  def connect_ftp(self, user, passwd, host, port, dirs):
958  key = user, passwd, host, port
959  if self.cache.has_key(key):
960  self.timeout[key] = time.time() + self.delay
961  else:
962  self.cache[key] = ftpwrapper(user, passwd, host, port, dirs)
963  self.timeout[key] = time.time() + self.delay
964  self.check_cache()
965  return self.cache[key]
966 
967  def check_cache(self):
968  # first check for old ones
969  t = time.time()
970  if self.soonest <= t:
971  for k, v in self.timeout.items():
972  if v < t:
973  self.cache[k].close()
974  del self.cache[k]
975  del self.timeout[k]
976  self.soonest = min(self.timeout.values())
977 
978  # then check the size
979  if len(self.cache) == self.max_conns:
980  for k, v in self.timeout.items():
981  if v == self.soonest:
982  del self.cache[k]
983  del self.timeout[k]
984  break
985  self.soonest = min(self.timeout.values())
986 
988  def gopher_open(self, req):
989  host = req.get_host()
990  if not host:
991  raise GopherError('no host given')
992  host = unquote(host)
993  selector = req.get_selector()
994  type, selector = splitgophertype(selector)
995  selector, query = splitquery(selector)
996  selector = unquote(selector)
997  if query:
998  query = unquote(query)
999  fp = gopherlib.send_query(selector, query, host)
1000  else:
1001  fp = gopherlib.send_selector(selector, host)
1002  return addinfourl(fp, noheaders(), req.get_full_url())
1003 
1004 #bleck! don't use this yet
1006 
1007  default_handlers = [UnknownHandler, HTTPHandler,
1008  HTTPDefaultErrorHandler, HTTPRedirectHandler,
1009  FTPHandler, FileHandler]
1010  proxy_handlers = [ProxyHandler]
1011  handlers = []
1012  replacement_handlers = []
1013 
1014  def add_proxy_handler(self, ph):
1015  self.proxy_handlers = self.proxy_handlers + [ph]
1016 
1017  def add_handler(self, h):
1018  self.handlers = self.handlers + [h]
1019 
1020  def replace_handler(self, h):
1021  pass
1022 
1023  def build_opener(self):
1024  opener = OpenerDirector()
1025  for ph in self.proxy_handlers:
1026  if inspect.isclass(ph):
1027  ph = ph()
1028  opener.add_handler(ph)
1029 
1030 if __name__ == "__main__":
1031  # XXX some of the test code depends on machine configurations that
1032  # are internal to CNRI. Need to set up a public server with the
1033  # right authentication configuration for test purposes.
1034  if socket.gethostname() == 'bitdiddle':
1035  localhost = 'bitdiddle.cnri.reston.va.us'
1036  elif socket.gethostname() == 'bitdiddle.concentric.net':
1037  localhost = 'localhost'
1038  else:
1039  localhost = None
1040  urls = [
1041  # Thanks to Fred for finding these!
1042  'gopher://gopher.lib.ncsu.edu/11/library/stacks/Alex',
1043  'gopher://gopher.vt.edu:10010/10/33',
1044 
1045  'file:/etc/passwd',
1046  'file://nonsensename/etc/passwd',
1047  'ftp://www.python.org/pub/python/misc/sousa.au',
1048  'ftp://www.python.org/pub/tmp/blat',
1049  'http://www.espn.com/', # redirect
1050  'http://www.python.org/Spanish/Inquistion/',
1051  ('http://www.python.org/cgi-bin/faqw.py',
1052  'query=pythonistas&querytype=simple&casefold=yes&req=search'),
1053  'http://www.python.org/',
1054  'ftp://gatekeeper.research.compaq.com/pub/DEC/SRC/research-reports/00README-Legal-Rules-Regs',
1055  ]
1056 
1057 ## if localhost is not None:
1058 ## urls = urls + [
1059 ## 'file://%s/etc/passwd' % localhost,
1060 ## 'http://%s/simple/' % localhost,
1061 ## 'http://%s/digest/' % localhost,
1062 ## 'http://%s/not/found.h' % localhost,
1063 ## ]
1064 
1065 ## bauth = HTTPBasicAuthHandler()
1066 ## bauth.add_password('basic_test_realm', localhost, 'jhylton',
1067 ## 'password')
1068 ## dauth = HTTPDigestAuthHandler()
1069 ## dauth.add_password('digest_test_realm', localhost, 'jhylton',
1070 ## 'password')
1071 
1072 
1074  cfh.setTimeout(1)
1075 
1076 ## # XXX try out some custom proxy objects too!
1077 ## def at_cnri(req):
1078 ## host = req.get_host()
1079 ## print host
1080 ## if host[-18:] == '.cnri.reston.va.us':
1081 ## return 1
1082 ## p = CustomProxy('http', at_cnri, 'proxy.cnri.reston.va.us')
1083 ## ph = CustomProxyHandler(p)
1084 
1085 ## install_opener(build_opener(dauth, bauth, cfh, GopherHandler, ph))
1086  install_opener(build_opener(cfh, GopherHandler))
1087 
1088  for url in urls:
1089  if isinstance(url, types.TupleType):
1090  url, req = url
1091  else:
1092  req = None
1093  print url
1094  try:
1095  f = urlopen(url, req)
1096  except IOError, err:
1097  print "IOError:", err
1098  except socket.error, err:
1099  print "socket.error:", err
1100  else:
1101  buf = f.read()
1102  f.close()
1103  print "read %d bytes" % len(buf)
1104  print
1105  time.sleep(0.1)