Vega strike Python Modules doc  0.5.1
Documentation of the " Modules " folder of Vega strike
 All Data Structures Namespaces Files Functions Variables
regsub.py
Go to the documentation of this file.
1 """Regexp-based split and replace using the obsolete regex module.
2 
3 This module is only for backward compatibility. These operations
4 are now provided by the new regular expression module, "re".
5 
6 sub(pat, repl, str): replace first occurrence of pattern in string
7 gsub(pat, repl, str): replace all occurrences of pattern in string
8 split(str, pat, maxsplit): split string using pattern as delimiter
9 splitx(str, pat, maxsplit): split string using pattern as delimiter plus
10  return delimiters
11 """
12 
13 import warnings
14 warnings.warn("the regsub module is deprecated; please use re.sub()",
15  DeprecationWarning)
16 
17 # Ignore further deprecation warnings about this module
18 warnings.filterwarnings("ignore", "", DeprecationWarning, __name__)
19 
20 import regex
21 
22 __all__ = ["sub","gsub","split","splitx","capwords"]
23 
24 # Replace first occurrence of pattern pat in string str by replacement
25 # repl. If the pattern isn't found, the string is returned unchanged.
26 # The replacement may contain references \digit to subpatterns and
27 # escaped backslashes. The pattern may be a string or an already
28 # compiled pattern.
29 
30 def sub(pat, repl, str):
31  prog = compile(pat)
32  if prog.search(str) >= 0:
33  regs = prog.regs
34  a, b = regs[0]
35  str = str[:a] + expand(repl, regs, str) + str[b:]
36  return str
37 
38 
39 # Replace all (non-overlapping) occurrences of pattern pat in string
40 # str by replacement repl. The same rules as for sub() apply.
41 # Empty matches for the pattern are replaced only when not adjacent to
42 # a previous match, so e.g. gsub('', '-', 'abc') returns '-a-b-c-'.
43 
44 def gsub(pat, repl, str):
45  prog = compile(pat)
46  new = ''
47  start = 0
48  first = 1
49  while prog.search(str, start) >= 0:
50  regs = prog.regs
51  a, b = regs[0]
52  if a == b == start and not first:
53  if start >= len(str) or prog.search(str, start+1) < 0:
54  break
55  regs = prog.regs
56  a, b = regs[0]
57  new = new + str[start:a] + expand(repl, regs, str)
58  start = b
59  first = 0
60  new = new + str[start:]
61  return new
62 
63 
64 # Split string str in fields separated by delimiters matching pattern
65 # pat. Only non-empty matches for the pattern are considered, so e.g.
66 # split('abc', '') returns ['abc'].
67 # The optional 3rd argument sets the number of splits that are performed.
68 
69 def split(str, pat, maxsplit = 0):
70  return intsplit(str, pat, maxsplit, 0)
71 
72 # Split string str in fields separated by delimiters matching pattern
73 # pat. Only non-empty matches for the pattern are considered, so e.g.
74 # split('abc', '') returns ['abc']. The delimiters are also included
75 # in the list.
76 # The optional 3rd argument sets the number of splits that are performed.
77 
78 
79 def splitx(str, pat, maxsplit = 0):
80  return intsplit(str, pat, maxsplit, 1)
81 
82 # Internal function used to implement split() and splitx().
83 
84 def intsplit(str, pat, maxsplit, retain):
85  prog = compile(pat)
86  res = []
87  start = next = 0
88  splitcount = 0
89  while prog.search(str, next) >= 0:
90  regs = prog.regs
91  a, b = regs[0]
92  if a == b:
93  next = next + 1
94  if next >= len(str):
95  break
96  else:
97  res.append(str[start:a])
98  if retain:
99  res.append(str[a:b])
100  start = next = b
101  splitcount = splitcount + 1
102  if (maxsplit and (splitcount >= maxsplit)):
103  break
104  res.append(str[start:])
105  return res
106 
107 
108 # Capitalize words split using a pattern
109 
110 def capwords(str, pat='[^a-zA-Z0-9_]+'):
111  words = splitx(str, pat)
112  for i in range(0, len(words), 2):
113  words[i] = words[i].capitalize()
114  return "".join(words)
115 
116 
117 # Internal subroutines:
118 # compile(pat): compile a pattern, caching already compiled patterns
119 # expand(repl, regs, str): expand \digit escapes in replacement string
120 
121 
122 # Manage a cache of compiled regular expressions.
123 #
124 # If the pattern is a string a compiled version of it is returned. If
125 # the pattern has been used before we return an already compiled
126 # version from the cache; otherwise we compile it now and save the
127 # compiled version in the cache, along with the syntax it was compiled
128 # with. Instead of a string, a compiled regular expression can also
129 # be passed.
130 
131 cache = {}
132 
133 def compile(pat):
134  if type(pat) != type(''):
135  return pat # Assume it is a compiled regex
136  key = (pat, regex.get_syntax())
137  if cache.has_key(key):
138  prog = cache[key] # Get it from the cache
139  else:
140  prog = cache[key] = regex.compile(pat)
141  return prog
142 
143 
145  global cache
146  cache = {}
147 
148 
149 # Expand \digit in the replacement.
150 # Each occurrence of \digit is replaced by the substring of str
151 # indicated by regs[digit]. To include a literal \ in the
152 # replacement, double it; other \ escapes are left unchanged (i.e.
153 # the \ and the following character are both copied).
154 
155 def expand(repl, regs, str):
156  if '\\' not in repl:
157  return repl
158  new = ''
159  i = 0
160  ord0 = ord('0')
161  while i < len(repl):
162  c = repl[i]; i = i+1
163  if c != '\\' or i >= len(repl):
164  new = new + c
165  else:
166  c = repl[i]; i = i+1
167  if '0' <= c <= '9':
168  a, b = regs[ord(c)-ord0]
169  new = new + str[a:b]
170  elif c == '\\':
171  new = new + c
172  else:
173  new = new + '\\' + c
174  return new
175 
176 
177 # Test program, reads sequences "pat repl str" from stdin.
178 # Optional argument specifies pattern used to split lines.
179 
180 def test():
181  import sys
182  if sys.argv[1:]:
183  delpat = sys.argv[1]
184  else:
185  delpat = '[ \t\n]+'
186  while 1:
187  if sys.stdin.isatty(): sys.stderr.write('--> ')
188  line = sys.stdin.readline()
189  if not line: break
190  if line[-1] == '\n': line = line[:-1]
191  fields = split(line, delpat)
192  if len(fields) != 3:
193  print 'Sorry, not three fields'
194  print 'split:', `fields`
195  continue
196  [pat, repl, str] = split(line, delpat)
197  print 'sub :', `sub(pat, repl, str)`
198  print 'gsub:', `gsub(pat, repl, str)`