Vega strike Python Modules doc: urlparse Namespace Reference

def urlparse.urldefrag ( url)

Removes any existing fragment from URL.

Returns a tuple of the defragmented URL and the fragment.  If
the URL contained no fragments, the second element is the
empty string.

Definition at line 194 of file urlparse.py.

References urlunparse().

 
 def urldefrag(url):
     """Removes any existing fragment from URL.
 
     Returns a tuple of the defragmented URL and the fragment.  If
     the URL contained no fragments, the second element is the
     empty string.
     """
     if '#' in url:
         s, n, p, a, q, frag = urlparse(url)
         defrag = urlunparse((s, n, p, a, q, ''))
         return defrag, frag
     else:
         return url, ''
 

def urlparse.urljoin	(	base,
		url,
		allow_fragments = `1`
	)

Join a base URL and a possibly relative URL to form an absolute
interpretation of the latter.

Definition at line 142 of file urlparse.py.

References dospath.join(), and urlunparse().

 
 def urljoin(base, url, allow_fragments = 1):
     """Join a base URL and a possibly relative URL to form an absolute
     interpretation of the latter."""
     if not base:
         return url
     if not url:
         return base
     bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
             urlparse(base, '', allow_fragments)
     scheme, netloc, path, params, query, fragment = \
             urlparse(url, bscheme, allow_fragments)
     if scheme != bscheme or scheme not in uses_relative:
         return url
     if scheme in uses_netloc:
         if netloc:
             return urlunparse((scheme, netloc, path,
                                params, query, fragment))
         netloc = bnetloc
     if path[:1] == '/':
         return urlunparse((scheme, netloc, path,
                            params, query, fragment))
     if not path:
         if not params:
             params = bparams
             if not query:
                 query = bquery
         return urlunparse((scheme, netloc, bpath,
                            params, query, fragment))
     segments = bpath.split('/')[:-1] + path.split('/')
     # XXX The stuff below is bogus in various ways...
     if segments[-1] == '.':
         segments[-1] = ''
     while '.' in segments:
         segments.remove('.')
     while 1:
         i = 1
         n = len(segments) - 1
         while i < n:
             if (segments[i] == '..'
                 and segments[i-1] not in ('', '..')):
                 del segments[i-1:i+1]
                 break
             i = i+1
         else:
             break
     if segments == ['', '..']:
         segments[-1] = ''
     elif len(segments) >= 2 and segments[-1] == '..':
         segments[-2:] = ['']
     return urlunparse((scheme, netloc, '/'.join(segments),
                        params, query, fragment))

def urlparse.urlparse	(	url,
		scheme = `''`,
		allow_fragments = `1`
	)

Parse a URL into 6 components:
<scheme>://<netloc>/<path>;<params>?<query>#<fragment>
Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
Note that we don't break the components up in smaller bits
(e.g. netloc is a single string) and we don't expand % escapes.

Definition at line 46 of file urlparse.py.

References urlsplit().

 
 def urlparse(url, scheme='', allow_fragments=1):
     """Parse a URL into 6 components:
     <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
     Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
     Note that we don't break the components up in smaller bits
     (e.g. netloc is a single string) and we don't expand % escapes."""
     tuple = urlsplit(url, scheme, allow_fragments)
     scheme, netloc, url, query, fragment = tuple
     if scheme in uses_params and ';' in url:
         url, params = _splitparams(url)
     else:
         params = ''
     return scheme, netloc, url, params, query, fragment

def urlparse.urlsplit	(	url,
		scheme = `''`,
		allow_fragments = `1`
	)

Parse a URL into 5 components:
<scheme>://<netloc>/<path>?<query>#<fragment>
Return a 5-tuple: (scheme, netloc, path, query, fragment).
Note that we don't break the components up in smaller bits
(e.g. netloc is a single string) and we don't expand % escapes.

Definition at line 69 of file urlparse.py.

References clear_cache(), and string.lower().

 
 def urlsplit(url, scheme='', allow_fragments=1):
     """Parse a URL into 5 components:
     <scheme>://<netloc>/<path>?<query>#<fragment>
     Return a 5-tuple: (scheme, netloc, path, query, fragment).
     Note that we don't break the components up in smaller bits
     (e.g. netloc is a single string) and we don't expand % escapes."""
     key = url, scheme, allow_fragments
     cached = _parse_cache.get(key, None)
     if cached:
         return cached
     if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
         clear_cache()
     netloc = query = fragment = ''
     i = url.find(':')
     if i > 0:
         if url[:i] == 'http': # optimize the common case
             scheme = url[:i].lower()
             url = url[i+1:]
             if url[:2] == '//':
                 i = url.find('/', 2)
                 if i < 0:
                     i = url.find('#')
                     if i < 0:
                         i = len(url)
                 netloc = url[2:i]
                 url = url[i:]
             if allow_fragments and '#' in url:
                 url, fragment = url.split('#', 1)
             if '?' in url:
                 url, query = url.split('?', 1)
             tuple = scheme, netloc, url, query, fragment
             _parse_cache[key] = tuple
             return tuple
         for c in url[:i]:
             if c not in scheme_chars:
                 break
         else:
             scheme, url = url[:i].lower(), url[i+1:]
     if scheme in uses_netloc:
         if url[:2] == '//':
             i = url.find('/', 2)
             if i < 0:
                 i = len(url)
             netloc, url = url[2:i], url[i:]
     if allow_fragments and scheme in uses_fragment and '#' in url:
         url, fragment = url.split('#', 1)
     if scheme in uses_query and '?' in url:
         url, query = url.split('?', 1)
     tuple = scheme, netloc, url, query, fragment
     _parse_cache[key] = tuple
     return tuple

def urlparse.urlunparse	(	scheme,
		netloc,
		url,
		params,
		query,
		fragment
	)

Put a parsed URL back together again.  This may result in a
slightly different, but equivalent URL, if the URL that was parsed
originally had redundant delimiters, e.g. a ? with an empty query
(the draft states that these are equivalent).

Definition at line 121 of file urlparse.py.

References urlunsplit().

 
 def urlunparse((scheme, netloc, url, params, query, fragment)):
     """Put a parsed URL back together again.  This may result in a
     slightly different, but equivalent URL, if the URL that was parsed
     originally had redundant delimiters, e.g. a ? with an empty query
     (the draft states that these are equivalent)."""
     if params:
         url = "%s;%s" % (url, params)
     return urlunsplit((scheme, netloc, url, query, fragment))

def urlparse.urlunsplit	(	scheme,
		netloc,
		url,
		query,
		fragment
	)

Definition at line 130 of file urlparse.py.

 
 def urlunsplit((scheme, netloc, url, query, fragment)):
     if netloc or (scheme in uses_netloc and url[:2] == '//'):
         if url and url[:1] != '/': url = '/' + url
         url = '//' + (netloc or '') + url
     if scheme:
         url = scheme + ':' + url
     if query:
         url = url + '?' + query
     if fragment:
         url = url + '#' + fragment
     return url

list __all__ = ["urlparse", "urlunparse", "urljoin"]

Definition at line 7 of file urlparse.py.

dictionary _parse_cache = {}

Definition at line 38 of file urlparse.py.

int MAX_CACHE_SIZE = 20

Definition at line 37 of file urlparse.py.

list non_hierarchical

Initial value:

 = ['gopher', 'hdl', 'mailto', 'news', 'telnet', 'wais',
                     'snews', 'sip',
                     ]

Definition at line 17 of file urlparse.py.

tuple scheme_chars

Initial value:

 = ('abcdefghijklmnopqrstuvwxyz'
                 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
                 '0123456789'
                 '+-.')

Definition at line 32 of file urlparse.py.

string test_input

Definition at line 209 of file urlparse.py.

list uses_fragment

Initial value:

 = ['ftp', 'hdl', 'http', 'gopher', 'news', 'nntp', 'wais',
                  'https', 'shttp', 'snews',
                  'file', 'prospero', '']

Definition at line 27 of file urlparse.py.

list uses_netloc

Initial value:

 = ['ftp', 'http', 'gopher', 'nntp', 'telnet', 'wais',
                'file',
                'https', 'shttp', 'snews',
                'prospero', 'rtsp', 'rtspu', '']

Definition at line 13 of file urlparse.py.

list uses_params

Initial value:

 = ['ftp', 'hdl', 'prospero', 'http',
                'https', 'shttp', 'rtsp', 'rtspu', 'sip',
                '']

Definition at line 20 of file urlparse.py.

list uses_query

Initial value:

 = ['http', 'wais',
               'https', 'shttp',
               'gopher', 'rtsp', 'rtspu', 'sip',
               '']

Definition at line 23 of file urlparse.py.

list uses_relative

Initial value:

 = ['ftp', 'http', 'gopher', 'nntp', 'wais', 'file',
                  'https', 'shttp',
                  'prospero', 'rtsp', 'rtspu', '']

Definition at line 10 of file urlparse.py.

Functions

Variables

Detailed Description

Function Documentation

Variable Documentation

Functions
def	clear_cache

def	urlparse

def	urlsplit

def	urlunparse

def	urlunsplit

def	urljoin

def	urldefrag

def	test

Variables
list	__all__ = ["urlparse", "urlunparse", "urljoin"]

list	uses_relative

list	uses_netloc

list	non_hierarchical

list	uses_params

list	uses_query

list	uses_fragment

tuple	scheme_chars

int	MAX_CACHE_SIZE = 20

dictionary	_parse_cache = {}

string	test_input