11 """Internal support module for sre"""
15 from sre_constants
import *
17 assert _sre.MAGIC == MAGIC,
"SRE module mismatch"
21 def _compile(code, pattern, flags):
24 for op, av
in pattern:
25 if op
in (LITERAL, NOT_LITERAL):
26 if flags & SRE_FLAG_IGNORECASE:
27 emit(OPCODES[OP_IGNORE[op]])
28 emit(_sre.getlower(av, flags))
33 if flags & SRE_FLAG_IGNORECASE:
34 emit(OPCODES[OP_IGNORE[op]])
35 def fixup(literal, flags=flags):
36 return _sre.getlower(literal, flags)
40 skip = len(code);
emit(0)
41 _compile_charset(av, flags, code, fixup)
42 code[skip] = len(code) - skip
44 if flags & SRE_FLAG_DOTALL:
45 emit(OPCODES[ANY_ALL])
48 elif op
in (REPEAT, MIN_REPEAT, MAX_REPEAT):
49 if flags & SRE_FLAG_TEMPLATE:
50 raise error,
"internal: unsupported template operator"
52 skip = len(code);
emit(0)
55 _compile(code, av[2], flags)
56 emit(OPCODES[SUCCESS])
57 code[skip] = len(code) - skip
58 elif _simple(av)
and op == MAX_REPEAT:
59 emit(OPCODES[REPEAT_ONE])
60 skip = len(code);
emit(0)
63 _compile(code, av[2], flags)
64 emit(OPCODES[SUCCESS])
65 code[skip] = len(code) - skip
68 skip = len(code);
emit(0)
71 _compile(code, av[2], flags)
72 code[skip] = len(code) - skip
74 emit(OPCODES[MAX_UNTIL])
76 emit(OPCODES[MIN_UNTIL])
77 elif op
is SUBPATTERN:
82 _compile(code, av[1], flags)
86 elif op
in (SUCCESS, FAILURE):
88 elif op
in (ASSERT, ASSERT_NOT):
90 skip = len(code);
emit(0)
94 lo, hi = av[1].getwidth()
96 raise error,
"look-behind requires fixed-width pattern"
98 _compile(code, av[1], flags)
99 emit(OPCODES[SUCCESS])
100 code[skip] = len(code) - skip
103 skip = len(code);
emit(0)
104 _compile(code, av, flags)
105 emit(OPCODES[SUCCESS])
106 code[skip] = len(code) - skip
109 if flags & SRE_FLAG_MULTILINE:
110 av = AT_MULTILINE.get(av, av)
111 if flags & SRE_FLAG_LOCALE:
112 av = AT_LOCALE.get(av, av)
113 elif flags & SRE_FLAG_UNICODE:
114 av = AT_UNICODE.get(av, av)
120 skip = len(code);
emit(0)
122 _compile(code, av, flags)
124 tail.append(len(code));
emit(0)
125 code[skip] = len(code) - skip
128 code[tail] = len(code) - tail
131 if flags & SRE_FLAG_LOCALE:
133 elif flags & SRE_FLAG_UNICODE:
137 if flags & SRE_FLAG_IGNORECASE:
138 emit(OPCODES[OP_IGNORE[op]])
143 raise ValueError, (
"unsupported operand type", op)
145 def _compile_charset(charset, flags, code, fixup=None):
150 for op, av
in _optimize_charset(charset, fixup):
161 elif op
is BIGCHARSET:
164 if flags & SRE_FLAG_LOCALE:
165 emit(CHCODES[CH_LOCALE[av]])
166 elif flags & SRE_FLAG_UNICODE:
167 emit(CHCODES[CH_UNICODE[av]])
171 raise error,
"internal: unsupported set operator"
172 emit(OPCODES[FAILURE])
174 def _optimize_charset(charset, fixup):
179 for op, av
in charset:
183 charmap[fixup(av)] = 1
185 for i
in range(fixup(av[0]), fixup(av[1])+1):
192 return _optimize_unicode(charset, fixup)
211 out.append((LITERAL, p))
213 out.append((RANGE, (p, p+n-1)))
214 if len(out) < len(charset):
218 data = _mk_bitmap(charmap)
219 out.append((CHARSET, data))
223 def _mk_bitmap(bits):
255 def _optimize_unicode(charset, fixup):
258 for op, av
in charset:
262 charmap[fixup(av)] = 1
264 for i
in range(fixup(av[0]), fixup(av[1])+1):
270 for i
in range(65536):
271 charmap[i] =
not charmap[i]
277 chunk =
tuple(charmap[i*256:(i+1)*256])
278 new = comps.setdefault(chunk, block)
282 data += _mk_bitmap(chunk)
284 assert MAXCODE == 65535
286 if sys.byteorder ==
'big':
287 header.append(256*mapping[2*i]+mapping[2*i+1])
289 header.append(mapping[2*i]+256*mapping[2*i+1])
291 return [(BIGCHARSET, data)]
295 lo, hi = av[2].getwidth()
296 if lo == 0
and hi == MAXREPEAT:
297 raise error,
"nothing to repeat"
298 return lo == hi == 1
and av[2][0][0] != SUBPATTERN
300 def _compile_info(code, pattern, flags):
304 lo, hi = pattern.getwidth()
311 if not (flags & SRE_FLAG_IGNORECASE):
313 for op, av
in pattern.data:
315 if len(prefix) == prefix_skip:
316 prefix_skip = prefix_skip + 1
318 elif op
is SUBPATTERN
and len(av[1]) == 1:
327 if not prefix
and pattern.data:
328 op, av = pattern.data[0]
329 if op
is SUBPATTERN
and av[1]:
332 charset.append((op, av))
370 mask = SRE_INFO_PREFIX
371 if len(prefix) == prefix_skip == len(pattern.data):
372 mask = mask + SRE_INFO_LITERAL
374 mask = mask + SRE_INFO_CHARSET
381 prefix = prefix[:MAXCODE]
392 table = [-1] + ([0]*len(prefix))
393 for i
in range(len(prefix)):
394 table[i+1] = table[i]+1
395 while table[i+1] > 0
and prefix[i] != prefix[table[i+1]-1]:
396 table[i+1] = table[table[i+1]-1]+1
397 code.extend(table[1:])
399 _compile_charset(charset, 0, code)
400 code[skip] = len(code) - skip
402 STRING_TYPES = [type(
"")]
405 STRING_TYPES.append(type(
unicode(
"")))
411 flags = p.pattern.flags | flags
415 _compile_info(code, p, flags)
418 _compile(code, p.data, flags)
420 code.append(OPCODES[SUCCESS])
427 if type(p)
in STRING_TYPES:
434 code = _code(p, flags)
439 assert p.pattern.groups <= 100,\
440 "sorry, but this version only supports 100 named groups"
443 groupindex = p.pattern.groupdict
444 indexgroup = [
None] * p.pattern.groups
445 for k, i
in groupindex.items():
449 pattern, flags, code,
451 groupindex, indexgroup