Vega strike Python Modules doc  0.5.1
Documentation of the " Modules " folder of Vega strike
 All Data Structures Namespaces Files Functions Variables
urlparse Namespace Reference

Functions

def clear_cache
 
def urlparse
 
def urlsplit
 
def urlunparse
 
def urlunsplit
 
def urljoin
 
def urldefrag
 
def test
 

Variables

list __all__ = ["urlparse", "urlunparse", "urljoin"]
 
list uses_relative
 
list uses_netloc
 
list non_hierarchical
 
list uses_params
 
list uses_query
 
list uses_fragment
 
tuple scheme_chars
 
int MAX_CACHE_SIZE = 20
 
dictionary _parse_cache = {}
 
string test_input
 

Detailed Description

Parse (absolute and relative) URLs.

See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding,
UC Irvine, June 1995.

Function Documentation

def urlparse.clear_cache ( )
Clear the parse cache.

Definition at line 40 of file urlparse.py.

40 
41 def clear_cache():
42  """Clear the parse cache."""
43  global _parse_cache
44  _parse_cache = {}
45 
def urlparse.test ( )

Definition at line 244 of file urlparse.py.

References aifc.open(), and urljoin().

245 def test():
246  import sys
247  base = ''
248  if sys.argv[1:]:
249  fn = sys.argv[1]
250  if fn == '-':
251  fp = sys.stdin
252  else:
253  fp = open(fn)
254  else:
255  import StringIO
256  fp = StringIO.StringIO(test_input)
257  while 1:
258  line = fp.readline()
259  if not line: break
260  words = line.split()
261  if not words:
262  continue
263  url = words[0]
264  parts = urlparse(url)
265  print '%-10s : %s' % (url, parts)
266  abs = urljoin(base, url)
267  if not base:
268  base = abs
269  wrapped = '<URL:%s>' % abs
270  print '%-10s = %s' % (url, wrapped)
271  if len(words) == 3 and words[1] == '=':
272  if wrapped != words[2]:
273  print 'EXPECTED', words[2], '!!!!!!!!!!'
def urlparse.urldefrag (   url)
Removes any existing fragment from URL.

Returns a tuple of the defragmented URL and the fragment.  If
the URL contained no fragments, the second element is the
empty string.

Definition at line 194 of file urlparse.py.

References urlunparse().

195 def urldefrag(url):
196  """Removes any existing fragment from URL.
197 
198  Returns a tuple of the defragmented URL and the fragment. If
199  the URL contained no fragments, the second element is the
200  empty string.
201  """
202  if '#' in url:
203  s, n, p, a, q, frag = urlparse(url)
204  defrag = urlunparse((s, n, p, a, q, ''))
205  return defrag, frag
206  else:
207  return url, ''
208 
def urlparse.urljoin (   base,
  url,
  allow_fragments = 1 
)
Join a base URL and a possibly relative URL to form an absolute
interpretation of the latter.

Definition at line 142 of file urlparse.py.

References dospath.join(), and urlunparse().

143 def urljoin(base, url, allow_fragments = 1):
144  """Join a base URL and a possibly relative URL to form an absolute
145  interpretation of the latter."""
146  if not base:
147  return url
148  if not url:
149  return base
150  bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
151  urlparse(base, '', allow_fragments)
152  scheme, netloc, path, params, query, fragment = \
153  urlparse(url, bscheme, allow_fragments)
154  if scheme != bscheme or scheme not in uses_relative:
155  return url
156  if scheme in uses_netloc:
157  if netloc:
158  return urlunparse((scheme, netloc, path,
159  params, query, fragment))
160  netloc = bnetloc
161  if path[:1] == '/':
162  return urlunparse((scheme, netloc, path,
163  params, query, fragment))
164  if not path:
165  if not params:
166  params = bparams
167  if not query:
168  query = bquery
169  return urlunparse((scheme, netloc, bpath,
170  params, query, fragment))
171  segments = bpath.split('/')[:-1] + path.split('/')
172  # XXX The stuff below is bogus in various ways...
173  if segments[-1] == '.':
174  segments[-1] = ''
175  while '.' in segments:
176  segments.remove('.')
177  while 1:
178  i = 1
179  n = len(segments) - 1
180  while i < n:
181  if (segments[i] == '..'
182  and segments[i-1] not in ('', '..')):
183  del segments[i-1:i+1]
184  break
185  i = i+1
186  else:
187  break
188  if segments == ['', '..']:
189  segments[-1] = ''
190  elif len(segments) >= 2 and segments[-1] == '..':
191  segments[-2:] = ['']
192  return urlunparse((scheme, netloc, '/'.join(segments),
193  params, query, fragment))
def urlparse.urlparse (   url,
  scheme = '',
  allow_fragments = 1 
)
Parse a URL into 6 components:
<scheme>://<netloc>/<path>;<params>?<query>#<fragment>
Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
Note that we don't break the components up in smaller bits
(e.g. netloc is a single string) and we don't expand % escapes.

Definition at line 46 of file urlparse.py.

References urlsplit().

46 
47 def urlparse(url, scheme='', allow_fragments=1):
48  """Parse a URL into 6 components:
49  <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
50  Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
51  Note that we don't break the components up in smaller bits
52  (e.g. netloc is a single string) and we don't expand % escapes."""
53  tuple = urlsplit(url, scheme, allow_fragments)
54  scheme, netloc, url, query, fragment = tuple
55  if scheme in uses_params and ';' in url:
56  url, params = _splitparams(url)
57  else:
58  params = ''
59  return scheme, netloc, url, params, query, fragment
def urlparse.urlsplit (   url,
  scheme = '',
  allow_fragments = 1 
)
Parse a URL into 5 components:
<scheme>://<netloc>/<path>?<query>#<fragment>
Return a 5-tuple: (scheme, netloc, path, query, fragment).
Note that we don't break the components up in smaller bits
(e.g. netloc is a single string) and we don't expand % escapes.

Definition at line 69 of file urlparse.py.

References clear_cache(), and string.lower().

69 
70 def urlsplit(url, scheme='', allow_fragments=1):
71  """Parse a URL into 5 components:
72  <scheme>://<netloc>/<path>?<query>#<fragment>
73  Return a 5-tuple: (scheme, netloc, path, query, fragment).
74  Note that we don't break the components up in smaller bits
75  (e.g. netloc is a single string) and we don't expand % escapes."""
76  key = url, scheme, allow_fragments
77  cached = _parse_cache.get(key, None)
78  if cached:
79  return cached
80  if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
81  clear_cache()
82  netloc = query = fragment = ''
83  i = url.find(':')
84  if i > 0:
85  if url[:i] == 'http': # optimize the common case
86  scheme = url[:i].lower()
87  url = url[i+1:]
88  if url[:2] == '//':
89  i = url.find('/', 2)
90  if i < 0:
91  i = url.find('#')
92  if i < 0:
93  i = len(url)
94  netloc = url[2:i]
95  url = url[i:]
96  if allow_fragments and '#' in url:
97  url, fragment = url.split('#', 1)
98  if '?' in url:
99  url, query = url.split('?', 1)
100  tuple = scheme, netloc, url, query, fragment
101  _parse_cache[key] = tuple
102  return tuple
103  for c in url[:i]:
104  if c not in scheme_chars:
105  break
106  else:
107  scheme, url = url[:i].lower(), url[i+1:]
108  if scheme in uses_netloc:
109  if url[:2] == '//':
110  i = url.find('/', 2)
111  if i < 0:
112  i = len(url)
113  netloc, url = url[2:i], url[i:]
114  if allow_fragments and scheme in uses_fragment and '#' in url:
115  url, fragment = url.split('#', 1)
116  if scheme in uses_query and '?' in url:
117  url, query = url.split('?', 1)
118  tuple = scheme, netloc, url, query, fragment
119  _parse_cache[key] = tuple
120  return tuple
def urlparse.urlunparse (   scheme,
  netloc,
  url,
  params,
  query,
  fragment 
)
Put a parsed URL back together again.  This may result in a
slightly different, but equivalent URL, if the URL that was parsed
originally had redundant delimiters, e.g. a ? with an empty query
(the draft states that these are equivalent).

Definition at line 121 of file urlparse.py.

References urlunsplit().

122 def urlunparse((scheme, netloc, url, params, query, fragment)):
123  """Put a parsed URL back together again. This may result in a
124  slightly different, but equivalent URL, if the URL that was parsed
125  originally had redundant delimiters, e.g. a ? with an empty query
126  (the draft states that these are equivalent)."""
127  if params:
128  url = "%s;%s" % (url, params)
129  return urlunsplit((scheme, netloc, url, query, fragment))
def urlparse.urlunsplit (   scheme,
  netloc,
  url,
  query,
  fragment 
)

Definition at line 130 of file urlparse.py.

131 def urlunsplit((scheme, netloc, url, query, fragment)):
132  if netloc or (scheme in uses_netloc and url[:2] == '//'):
133  if url and url[:1] != '/': url = '/' + url
134  url = '//' + (netloc or '') + url
135  if scheme:
136  url = scheme + ':' + url
137  if query:
138  url = url + '?' + query
139  if fragment:
140  url = url + '#' + fragment
141  return url

Variable Documentation

list __all__ = ["urlparse", "urlunparse", "urljoin"]

Definition at line 7 of file urlparse.py.

dictionary _parse_cache = {}

Definition at line 38 of file urlparse.py.

int MAX_CACHE_SIZE = 20

Definition at line 37 of file urlparse.py.

list non_hierarchical
Initial value:
1 = ['gopher', 'hdl', 'mailto', 'news', 'telnet', 'wais',
2  'snews', 'sip',
3  ]

Definition at line 17 of file urlparse.py.

tuple scheme_chars
Initial value:
1 = ('abcdefghijklmnopqrstuvwxyz'
2  'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
3  '0123456789'
4  '+-.')

Definition at line 32 of file urlparse.py.

string test_input

Definition at line 209 of file urlparse.py.

list uses_fragment
Initial value:
1 = ['ftp', 'hdl', 'http', 'gopher', 'news', 'nntp', 'wais',
2  'https', 'shttp', 'snews',
3  'file', 'prospero', '']

Definition at line 27 of file urlparse.py.

list uses_netloc
Initial value:
1 = ['ftp', 'http', 'gopher', 'nntp', 'telnet', 'wais',
2  'file',
3  'https', 'shttp', 'snews',
4  'prospero', 'rtsp', 'rtspu', '']

Definition at line 13 of file urlparse.py.

list uses_params
Initial value:
1 = ['ftp', 'hdl', 'prospero', 'http',
2  'https', 'shttp', 'rtsp', 'rtspu', 'sip',
3  '']

Definition at line 20 of file urlparse.py.

list uses_query
Initial value:
1 = ['http', 'wais',
2  'https', 'shttp',
3  'gopher', 'rtsp', 'rtspu', 'sip',
4  '']

Definition at line 23 of file urlparse.py.

list uses_relative
Initial value:
1 = ['ftp', 'http', 'gopher', 'nntp', 'wais', 'file',
2  'https', 'shttp',
3  'prospero', 'rtsp', 'rtspu', '']

Definition at line 10 of file urlparse.py.