Vega strike Python Modules doc  0.5.1
Documentation of the " Modules " folder of Vega strike
 All Data Structures Namespaces Files Functions Variables
SGMLParser Class Reference
Inheritance diagram for SGMLParser:
HTMLParser TestSGMLParser

Public Member Functions

def __init__
 
def reset
 
def setnomoretags
 
def setliteral
 
def feed
 
def close
 
def error
 
def goahead
 
def parse_comment
 
def parse_pi
 
def get_starttag_text
 
def parse_starttag
 
def parse_endtag
 
def finish_shorttag
 
def finish_starttag
 
def finish_endtag
 
def handle_starttag
 
def handle_endtag
 
def report_unbalanced
 
def handle_charref
 
def handle_entityref
 
def handle_data
 
def handle_comment
 
def handle_decl
 
def handle_pi
 
def unknown_starttag
 
def unknown_endtag
 
def unknown_charref
 
def unknown_entityref
 

Data Fields

 verbose
 
 rawdata
 
 stack
 
 lasttag
 
 nomoretags
 
 literal
 

Static Public Attributes

 entitydefs = \
 

Detailed Description

Definition at line 56 of file sgmllib.py.

Constructor & Destructor Documentation

def __init__ (   self,
  verbose = 0 
)
Initialize and reset this instance.

Definition at line 58 of file sgmllib.py.

58 
59  def __init__(self, verbose=0):
60  """Initialize and reset this instance."""
61  self.verbose = verbose
62  self.reset()

Member Function Documentation

def close (   self)
Handle the remaining data.

Definition at line 97 of file sgmllib.py.

References SGMLParser.goahead().

97 
98  def close(self):
99  """Handle the remaining data."""
100  self.goahead(1)
def error (   self,
  message 
)

Definition at line 101 of file sgmllib.py.

102  def error(self, message):
103  raise SGMLParseError(message)
def feed (   self,
  data 
)
Feed some data to the parser.

Call this as often as you want, with as little or as much text
as you want (may include '\n').  (This just saves the text,
all the processing is done by goahead().)

Definition at line 86 of file sgmllib.py.

References SGMLParser.goahead(), and SGMLParser.rawdata.

86 
87  def feed(self, data):
88  """Feed some data to the parser.
89 
90  Call this as often as you want, with as little or as much text
91  as you want (may include '\n'). (This just saves the text,
92  all the processing is done by goahead().)
93  """
94 
95  self.rawdata = self.rawdata + data
96  self.goahead(0)
def finish_endtag (   self,
  tag 
)

Definition at line 332 of file sgmllib.py.

References SGMLParser.handle_endtag(), SGMLParser.report_unbalanced(), MultiFile.stack, SGMLParser.stack, Pdb.stack, Unpickler.stack, HTMLParser.unknown_endtag(), SlowParser.unknown_endtag, and SGMLParser.unknown_endtag().

333  def finish_endtag(self, tag):
334  if not tag:
335  found = len(self.stack) - 1
336  if found < 0:
337  self.unknown_endtag(tag)
338  return
339  else:
340  if tag not in self.stack:
341  try:
342  method = getattr(self, 'end_' + tag)
343  except AttributeError:
344  self.unknown_endtag(tag)
345  else:
346  self.report_unbalanced(tag)
347  return
348  found = len(self.stack)
349  for i in range(found):
350  if self.stack[i] == tag: found = i
351  while len(self.stack) > found:
352  tag = self.stack[-1]
353  try:
354  method = getattr(self, 'end_' + tag)
355  except AttributeError:
356  method = None
357  if method:
358  self.handle_endtag(tag, method)
359  else:
360  self.unknown_endtag(tag)
361  del self.stack[-1]
def finish_shorttag (   self,
  tag,
  data 
)

Definition at line 307 of file sgmllib.py.

References SGMLParser.finish_endtag(), SgmlopParser.finish_endtag, SGMLParser.finish_starttag(), SgmlopParser.finish_starttag, HTMLParser.handle_data(), SgmlopParser.handle_data, SlowParser.handle_data, and SGMLParser.handle_data().

308  def finish_shorttag(self, tag, data):
309  self.finish_starttag(tag, [])
310  self.handle_data(data)
311  self.finish_endtag(tag)
def finish_starttag (   self,
  tag,
  attrs 
)

Definition at line 314 of file sgmllib.py.

References SGMLParser.handle_starttag(), HTMLParser.unknown_starttag(), SlowParser.unknown_starttag, and SGMLParser.unknown_starttag().

315  def finish_starttag(self, tag, attrs):
316  try:
317  method = getattr(self, 'start_' + tag)
318  except AttributeError:
319  try:
320  method = getattr(self, 'do_' + tag)
321  except AttributeError:
322  self.unknown_starttag(tag, attrs)
323  return -1
324  else:
325  self.handle_starttag(tag, method, attrs)
326  return 0
327  else:
328  self.stack.append(tag)
329  self.handle_starttag(tag, method, attrs)
330  return 1
def get_starttag_text (   self)

Definition at line 235 of file sgmllib.py.

References SGMLParser.__starttag_text.

236  def get_starttag_text(self):
237  return self.__starttag_text
def goahead (   self,
  end 
)

Definition at line 107 of file sgmllib.py.

References SGMLParser.error(), MH.error(), Folder.error(), SGMLParser.handle_charref(), HTMLParser.handle_data(), SgmlopParser.handle_data, SlowParser.handle_data, SGMLParser.handle_data(), SGMLParser.handle_entityref(), SGMLParser.literal, SGMLParser.nomoretags, SGMLParser.parse_comment(), SGMLParser.parse_endtag(), SGMLParser.parse_pi(), SGMLParser.parse_starttag(), and SGMLParser.rawdata.

108  def goahead(self, end):
109  rawdata = self.rawdata
110  i = 0
111  n = len(rawdata)
112  while i < n:
113  if self.nomoretags:
114  self.handle_data(rawdata[i:n])
115  i = n
116  break
117  match = interesting.search(rawdata, i)
118  if match: j = match.start()
119  else: j = n
120  if i < j:
121  self.handle_data(rawdata[i:j])
122  i = j
123  if i == n: break
124  if rawdata[i] == '<':
125  if starttagopen.match(rawdata, i):
126  if self.literal:
127  self.handle_data(rawdata[i])
128  i = i+1
129  continue
130  k = self.parse_starttag(i)
131  if k < 0: break
132  i = k
133  continue
134  if rawdata.startswith("</", i):
135  k = self.parse_endtag(i)
136  if k < 0: break
137  i = k
138  self.literal = 0
139  continue
140  if self.literal:
141  if n > (i + 1):
142  self.handle_data("<")
143  i = i+1
144  else:
145  # incomplete
146  break
147  continue
148  if rawdata.startswith("<!--", i):
149  k = self.parse_comment(i)
150  if k < 0: break
151  i = k
152  continue
153  if rawdata.startswith("<?", i):
154  k = self.parse_pi(i)
155  if k < 0: break
156  i = i+k
157  continue
158  if rawdata.startswith("<!", i):
159  # This is some sort of declaration; in "HTML as
160  # deployed," this should only be the document type
161  # declaration ("<!DOCTYPE html...>").
162  k = self.parse_declaration(i)
163  if k < 0: break
164  i = k
165  continue
166  elif rawdata[i] == '&':
167  if self.literal:
168  self.handle_data(rawdata[i])
169  i = i+1
170  continue
171  match = charref.match(rawdata, i)
172  if match:
173  name = match.group(1)
174  self.handle_charref(name)
175  i = match.end(0)
176  if rawdata[i-1] != ';': i = i-1
177  continue
178  match = entityref.match(rawdata, i)
179  if match:
180  name = match.group(1)
181  self.handle_entityref(name)
182  i = match.end(0)
183  if rawdata[i-1] != ';': i = i-1
184  continue
185  else:
186  self.error('neither < nor & ??')
187  # We get here only if incomplete matches but
188  # nothing else
189  match = incomplete.match(rawdata, i)
190  if not match:
191  self.handle_data(rawdata[i])
192  i = i+1
193  continue
194  j = match.end(0)
195  if j == n:
196  break # Really incomplete
197  self.handle_data(rawdata[i:j])
198  i = j
199  # end while
200  if end and i < n:
201  self.handle_data(rawdata[i:n])
202  i = n
203  self.rawdata = rawdata[i:]
204  # XXX if end: check for empty stack
def handle_charref (   self,
  name 
)
Handle character reference, no need to override.

Definition at line 376 of file sgmllib.py.

References HTMLParser.handle_data(), SgmlopParser.handle_data, SGMLParser.handle_data(), SlowParser.handle_data, and SGMLParser.unknown_charref().

377  def handle_charref(self, name):
378  """Handle character reference, no need to override."""
379  try:
380  n = int(name)
381  except ValueError:
382  self.unknown_charref(name)
383  return
384  if not 0 <= n <= 255:
385  self.unknown_charref(name)
386  return
387  self.handle_data(chr(n))
def handle_comment (   self,
  data 
)

Definition at line 410 of file sgmllib.py.

411  def handle_comment(self, data):
412  pass
def handle_data (   self,
  data 
)

Definition at line 406 of file sgmllib.py.

407  def handle_data(self, data):
408  pass
def handle_decl (   self,
  decl 
)

Definition at line 414 of file sgmllib.py.

415  def handle_decl(self, decl):
416  pass
def handle_endtag (   self,
  tag,
  method 
)

Definition at line 367 of file sgmllib.py.

368  def handle_endtag(self, tag, method):
369  method()
def handle_entityref (   self,
  name 
)
Handle entity references.

There should be no need to override this method; it can be
tailored by setting up the self.entitydefs mapping appropriately.

Definition at line 392 of file sgmllib.py.

References SGMLParser.entitydefs, HTMLParser.handle_data(), SgmlopParser.handle_data, SGMLParser.handle_data(), SlowParser.handle_data, and SGMLParser.unknown_entityref().

393  def handle_entityref(self, name):
394  """Handle entity references.
395 
396  There should be no need to override this method; it can be
397  tailored by setting up the self.entitydefs mapping appropriately.
398  """
399  table = self.entitydefs
400  if table.has_key(name):
401  self.handle_data(table[name])
402  else:
403  self.unknown_entityref(name)
404  return
def handle_pi (   self,
  data 
)

Definition at line 418 of file sgmllib.py.

419  def handle_pi(self, data):
420  pass
def handle_starttag (   self,
  tag,
  method,
  attrs 
)

Definition at line 363 of file sgmllib.py.

364  def handle_starttag(self, tag, method, attrs):
365  method(attrs)
def parse_comment (   self,
  i,
  report = 1 
)

Definition at line 206 of file sgmllib.py.

References SGMLParser.error(), MH.error(), Folder.error(), SGMLParser.handle_comment(), and SGMLParser.rawdata.

207  def parse_comment(self, i, report=1):
208  rawdata = self.rawdata
209  if rawdata[i:i+4] != '<!--':
210  self.error('unexpected call to parse_comment()')
211  match = commentclose.search(rawdata, i+4)
212  if not match:
213  return -1
214  if report:
215  j = match.start(0)
216  self.handle_comment(rawdata[i+4: j])
217  return match.end(0)
def parse_endtag (   self,
  i 
)

Definition at line 294 of file sgmllib.py.

References SGMLParser.finish_endtag(), SgmlopParser.finish_endtag, string.lower(), SGMLParser.rawdata, and string.strip().

295  def parse_endtag(self, i):
296  rawdata = self.rawdata
297  match = endbracket.search(rawdata, i+1)
298  if not match:
299  return -1
300  j = match.start(0)
301  tag = rawdata[i+2:j].strip().lower()
302  if rawdata[j] == '>':
303  j = j+1
304  self.finish_endtag(tag)
305  return j
def parse_pi (   self,
  i 
)

Definition at line 222 of file sgmllib.py.

References SGMLParser.error(), MH.error(), Folder.error(), SGMLParser.handle_pi(), and SGMLParser.rawdata.

223  def parse_pi(self, i):
224  rawdata = self.rawdata
225  if rawdata[i:i+2] != '<?':
226  self.error('unexpected call to parse_pi()')
227  match = piclose.search(rawdata, i+2)
228  if not match:
229  return -1
230  j = match.start(0)
231  self.handle_pi(rawdata[i+2: j])
232  j = match.end(0)
233  return j-i
def parse_starttag (   self,
  i 
)

Definition at line 239 of file sgmllib.py.

References SGMLParser.__starttag_text, SGMLParser.error(), MH.error(), Folder.error(), SGMLParser.finish_shorttag(), SGMLParser.finish_starttag(), SgmlopParser.finish_starttag, SGMLParser.lasttag, string.lower(), and SGMLParser.rawdata.

240  def parse_starttag(self, i):
241  self.__starttag_text = None
242  start_pos = i
243  rawdata = self.rawdata
244  if shorttagopen.match(rawdata, i):
245  # SGML shorthand: <tag/data/ == <tag>data</tag>
246  # XXX Can data contain &... (entity or char refs)?
247  # XXX Can data contain < or > (tag characters)?
248  # XXX Can there be whitespace before the first /?
249  match = shorttag.match(rawdata, i)
250  if not match:
251  return -1
252  tag, data = match.group(1, 2)
253  self.__starttag_text = '<%s/' % tag
254  tag = tag.lower()
255  k = match.end(0)
256  self.finish_shorttag(tag, data)
257  self.__starttag_text = rawdata[start_pos:match.end(1) + 1]
258  return k
259  # XXX The following should skip matching quotes (' or ")
260  match = endbracket.search(rawdata, i+1)
261  if not match:
262  return -1
263  j = match.start(0)
264  # Now parse the data between i+1 and j into a tag and attrs
265  attrs = []
266  if rawdata[i:i+2] == '<>':
267  # SGML shorthand: <> == <last open tag seen>
268  k = j
269  tag = self.lasttag
270  else:
271  match = tagfind.match(rawdata, i+1)
272  if not match:
273  self.error('unexpected call to parse_starttag')
274  k = match.end(0)
275  tag = rawdata[i+1:k].lower()
276  self.lasttag = tag
277  while k < j:
278  match = attrfind.match(rawdata, k)
279  if not match: break
280  attrname, rest, attrvalue = match.group(1, 2, 3)
281  if not rest:
282  attrvalue = attrname
283  elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
284  attrvalue[:1] == '"' == attrvalue[-1:]:
285  attrvalue = attrvalue[1:-1]
286  attrs.append((attrname.lower(), attrvalue))
287  k = match.end(0)
288  if rawdata[j] == '>':
289  j = j+1
290  self.__starttag_text = rawdata[start_pos:j]
291  self.finish_starttag(tag, attrs)
292  return j
def report_unbalanced (   self,
  tag 
)

Definition at line 371 of file sgmllib.py.

References MultiFile.stack, SGMLParser.stack, Pdb.stack, Unpickler.stack, SGMLParser.verbose, _Verbose.verbose, and Tester.verbose.

372  def report_unbalanced(self, tag):
373  if self.verbose:
374  print '*** Unbalanced </' + tag + '>'
375  print '*** Stack:', self.stack
def reset (   self)
Reset this instance. Loses all unprocessed data.

Definition at line 63 of file sgmllib.py.

63 
64  def reset(self):
65  """Reset this instance. Loses all unprocessed data."""
66  self.rawdata = ''
67  self.stack = []
68  self.lasttag = '???'
69  self.nomoretags = 0
70  self.literal = 0
71  markupbase.ParserBase.reset(self)
def setliteral (   self,
  args 
)
Enter literal mode (CDATA).

Intended for derived classes only.

Definition at line 79 of file sgmllib.py.

References SGMLParser.literal.

79 
80  def setliteral(self, *args):
81  """Enter literal mode (CDATA).
82 
83  Intended for derived classes only.
84  """
85  self.literal = 1
def setnomoretags (   self)
Enter literal mode (CDATA) till EOF.

Intended for derived classes only.

Definition at line 72 of file sgmllib.py.

References SGMLParser.literal, and SGMLParser.nomoretags.

72 
73  def setnomoretags(self):
74  """Enter literal mode (CDATA) till EOF.
75 
76  Intended for derived classes only.
77  """
78  self.nomoretags = self.literal = 1
def unknown_charref (   self,
  ref 
)

Definition at line 424 of file sgmllib.py.

def unknown_charref(self, ref): pass
def unknown_endtag (   self,
  tag 
)

Definition at line 423 of file sgmllib.py.

def unknown_endtag(self, tag): pass
def unknown_entityref (   self,
  ref 
)

Definition at line 425 of file sgmllib.py.

426  def unknown_entityref(self, ref): pass
427 
def unknown_starttag (   self,
  tag,
  attrs 
)

Definition at line 422 of file sgmllib.py.

def unknown_starttag(self, tag, attrs): pass

Field Documentation

entitydefs = \
static

Definition at line 389 of file sgmllib.py.

lasttag

Definition at line 67 of file sgmllib.py.

literal

Definition at line 69 of file sgmllib.py.

nomoretags

Definition at line 68 of file sgmllib.py.

rawdata

Definition at line 65 of file sgmllib.py.

stack

Definition at line 66 of file sgmllib.py.

verbose

Definition at line 60 of file sgmllib.py.


The documentation for this class was generated from the following file: