Vega strike Python Modules doc  0.5.1
Documentation of the " Modules " folder of Vega strike
 All Data Structures Namespaces Files Functions Variables
HTMLParser Class Reference
Inheritance diagram for HTMLParser:
SGMLParser

Public Member Functions

def __init__
 
def handle_data
 
def save_bgn
 
def save_end
 
def anchor_bgn
 
def anchor_end
 
def handle_image
 
def start_html
 
def end_html
 
def start_head
 
def end_head
 
def start_body
 
def end_body
 
def start_title
 
def end_title
 
def do_base
 
def do_isindex
 
def do_link
 
def do_meta
 
def do_nextid
 
def start_h1
 
def end_h1
 
def start_h2
 
def end_h2
 
def start_h3
 
def end_h3
 
def start_h4
 
def end_h4
 
def start_h5
 
def end_h5
 
def start_h6
 
def end_h6
 
def do_p
 
def start_pre
 
def end_pre
 
def start_xmp
 
def end_xmp
 
def start_listing
 
def end_listing
 
def start_address
 
def end_address
 
def start_blockquote
 
def end_blockquote
 
def start_ul
 
def end_ul
 
def do_li
 
def start_ol
 
def end_ol
 
def start_menu
 
def end_menu
 
def start_dir
 
def end_dir
 
def start_dl
 
def end_dl
 
def do_dt
 
def do_dd
 
def ddpop
 
def start_cite
 
def end_cite
 
def start_code
 
def end_code
 
def start_em
 
def end_em
 
def start_kbd
 
def end_kbd
 
def start_samp
 
def end_samp
 
def start_strong
 
def end_strong
 
def start_var
 
def end_var
 
def start_i
 
def end_i
 
def start_b
 
def end_b
 
def start_tt
 
def end_tt
 
def start_a
 
def end_a
 
def do_br
 
def do_hr
 
def do_img
 
def do_plaintext
 
def unknown_starttag
 
def unknown_endtag
 
- Public Member Functions inherited from SGMLParser
def __init__
 
def reset
 
def setnomoretags
 
def setliteral
 
def feed
 
def close
 
def error
 
def goahead
 
def parse_comment
 
def parse_pi
 
def get_starttag_text
 
def parse_starttag
 
def parse_endtag
 
def finish_shorttag
 
def finish_starttag
 
def finish_endtag
 
def handle_starttag
 
def handle_endtag
 
def report_unbalanced
 
def handle_charref
 
def handle_entityref
 
def handle_data
 
def handle_comment
 
def handle_decl
 
def handle_pi
 
def unknown_starttag
 
def unknown_endtag
 
def unknown_charref
 
def unknown_entityref
 

Data Fields

 formatter
 
 savedata
 
 isindex
 
 title
 
 base
 
 anchor
 
 anchorlist
 
 nofill
 
 list_stack
 
- Data Fields inherited from SGMLParser
 verbose
 
 rawdata
 
 stack
 
 lasttag
 
 nomoretags
 
 literal
 

Additional Inherited Members

- Static Public Attributes inherited from SGMLParser
 entitydefs = \
 

Detailed Description

Definition at line 13 of file htmllib.py.

Constructor & Destructor Documentation

def __init__ (   self,
  formatter,
  verbose = 0 
)

Definition at line 17 of file htmllib.py.

17 
18  def __init__(self, formatter, verbose=0):
19  SGMLParser.__init__(self, verbose)
20  self.formatter = formatter
21  self.savedata = None
22  self.isindex = 0
23  self.title = None
24  self.base = None
25  self.anchor = None
26  self.anchorlist = []
27  self.nofill = 0
28  self.list_stack = []

Member Function Documentation

def anchor_bgn (   self,
  href,
  name,
  type 
)

Definition at line 57 of file htmllib.py.

References HTMLParser.anchor.

57 
58  def anchor_bgn(self, href, name, type):
59  self.anchor = href
60  if self.anchor:
61  self.anchorlist.append(href)
def anchor_end (   self)

Definition at line 62 of file htmllib.py.

References HTMLParser.anchor, HTMLParser.anchorlist, HTMLParser.handle_data(), SgmlopParser.handle_data, and SlowParser.handle_data.

62 
63  def anchor_end(self):
64  if self.anchor:
65  self.handle_data("[%d]" % len(self.anchorlist))
66  self.anchor = None
def ddpop (   self,
  bl = 0 
)

Definition at line 269 of file htmllib.py.

References HTMLParser.list_stack.

270  def ddpop(self, bl=0):
271  self.formatter.end_paragraph(bl)
272  if self.list_stack:
273  if self.list_stack[-1][0] == 'dd':
274  del self.list_stack[-1]
275  self.formatter.pop_margin()
def do_base (   self,
  attrs 
)

Definition at line 91 of file htmllib.py.

References HTMLParser.base.

91 
92  def do_base(self, attrs):
93  for a, v in attrs:
94  if a == 'href':
95  self.base = v
def do_br (   self,
  attrs 
)

Definition at line 337 of file htmllib.py.

338  def do_br(self, attrs):
339  self.formatter.add_line_break()
def do_dd (   self,
  attrs 
)

Definition at line 264 of file htmllib.py.

References HTMLParser.ddpop().

265  def do_dd(self, attrs):
266  self.ddpop()
267  self.formatter.push_margin('dd')
268  self.list_stack.append(['dd', '', 0])
def do_dt (   self,
  attrs 
)

Definition at line 261 of file htmllib.py.

References HTMLParser.ddpop().

262  def do_dt(self, attrs):
263  self.ddpop()
def do_hr (   self,
  attrs 
)

Definition at line 342 of file htmllib.py.

343  def do_hr(self, attrs):
344  self.formatter.add_hor_rule()
def do_img (   self,
  attrs 
)

Definition at line 347 of file htmllib.py.

References HTMLParser.handle_image().

348  def do_img(self, attrs):
349  align = ''
350  alt = '(image)'
351  ismap = ''
352  src = ''
353  width = 0
354  height = 0
355  for attrname, value in attrs:
356  if attrname == 'align':
357  align = value
358  if attrname == 'alt':
359  alt = value
360  if attrname == 'ismap':
361  ismap = value
362  if attrname == 'src':
363  src = value
364  if attrname == 'width':
365  try: width = int(value)
366  except ValueError: pass
367  if attrname == 'height':
368  try: height = int(value)
369  except ValueError: pass
370  self.handle_image(src, alt, ismap, align, width, height)
def do_isindex (   self,
  attrs 
)

Definition at line 96 of file htmllib.py.

References HTMLParser.isindex.

96 
97  def do_isindex(self, attrs):
98  self.isindex = 1
def do_li (   self,
  attrs 
)

Definition at line 217 of file htmllib.py.

References HTMLParser.list_stack.

218  def do_li(self, attrs):
219  self.formatter.end_paragraph(0)
220  if self.list_stack:
221  [dummy, label, counter] = top = self.list_stack[-1]
222  top[2] = counter = counter+1
223  else:
224  label, counter = '*', 0
225  self.formatter.add_label_data(label, counter)
def do_link (   self,
  attrs 
)

Definition at line 99 of file htmllib.py.

99 
100  def do_link(self, attrs):
101  pass
def do_meta (   self,
  attrs 
)

Definition at line 102 of file htmllib.py.

103  def do_meta(self, attrs):
104  pass
def do_nextid (   self,
  attrs 
)

Definition at line 105 of file htmllib.py.

106  def do_nextid(self, attrs): # Deprecated
107  pass
def do_p (   self,
  attrs 
)

Definition at line 162 of file htmllib.py.

163  def do_p(self, attrs):
164  self.formatter.end_paragraph(1)
def do_plaintext (   self,
  attrs 
)

Definition at line 373 of file htmllib.py.

References SGMLParser.setnomoretags(), and HTMLParser.start_pre().

374  def do_plaintext(self, attrs):
375  self.start_pre(attrs)
376  self.setnomoretags() # Tell SGML parser
def end_a (   self)

Definition at line 332 of file htmllib.py.

References HTMLParser.anchor_end().

333  def end_a(self):
334  self.anchor_end()
def end_address (   self)

Definition at line 193 of file htmllib.py.

194  def end_address(self):
195  self.formatter.end_paragraph(0)
196  self.formatter.pop_font()
def end_b (   self)

Definition at line 310 of file htmllib.py.

311  def end_b(self):
312  self.formatter.pop_font()
def end_blockquote (   self)

Definition at line 201 of file htmllib.py.

202  def end_blockquote(self):
203  self.formatter.end_paragraph(1)
204  self.formatter.pop_margin()
def end_body (   self)

Definition at line 81 of file htmllib.py.

81 
82  def end_body(self): pass
def end_cite (   self)

Definition at line 281 of file htmllib.py.

References HTMLParser.end_i().

282  def end_cite(self): self.end_i()
def end_code (   self)

Definition at line 284 of file htmllib.py.

References HTMLParser.end_tt().

285  def end_code(self): self.end_tt()
def end_dir (   self)

Definition at line 250 of file htmllib.py.

References HTMLParser.end_ul().

251  def end_dir(self):
252  self.end_ul()
def end_dl (   self)

Definition at line 257 of file htmllib.py.

References HTMLParser.ddpop(), and HTMLParser.list_stack.

258  def end_dl(self):
259  self.ddpop(1)
260  if self.list_stack: del self.list_stack[-1]
def end_em (   self)

Definition at line 287 of file htmllib.py.

References HTMLParser.end_i().

288  def end_em(self): self.end_i()
def end_h1 (   self)

Definition at line 116 of file htmllib.py.

117  def end_h1(self):
118  self.formatter.end_paragraph(1)
119  self.formatter.pop_font()
def end_h2 (   self)

Definition at line 124 of file htmllib.py.

125  def end_h2(self):
126  self.formatter.end_paragraph(1)
127  self.formatter.pop_font()
def end_h3 (   self)

Definition at line 132 of file htmllib.py.

133  def end_h3(self):
134  self.formatter.end_paragraph(1)
135  self.formatter.pop_font()
def end_h4 (   self)

Definition at line 140 of file htmllib.py.

141  def end_h4(self):
142  self.formatter.end_paragraph(1)
143  self.formatter.pop_font()
def end_h5 (   self)

Definition at line 148 of file htmllib.py.

149  def end_h5(self):
150  self.formatter.end_paragraph(1)
151  self.formatter.pop_font()
def end_h6 (   self)

Definition at line 156 of file htmllib.py.

157  def end_h6(self):
158  self.formatter.end_paragraph(1)
159  self.formatter.pop_font()
def end_head (   self)

Definition at line 78 of file htmllib.py.

78 
79  def end_head(self): pass
def end_html (   self)

Definition at line 75 of file htmllib.py.

75 
76  def end_html(self): pass
def end_i (   self)

Definition at line 305 of file htmllib.py.

306  def end_i(self):
307  self.formatter.pop_font()
def end_kbd (   self)

Definition at line 290 of file htmllib.py.

References HTMLParser.end_tt().

291  def end_kbd(self): self.end_tt()
def end_listing (   self)

Definition at line 186 of file htmllib.py.

References HTMLParser.end_pre().

187  def end_listing(self):
188  self.end_pre()
def end_menu (   self)

Definition at line 244 of file htmllib.py.

References HTMLParser.end_ul().

245  def end_menu(self):
246  self.end_ul()
def end_ol (   self)

Definition at line 236 of file htmllib.py.

References HTMLParser.list_stack.

237  def end_ol(self):
238  if self.list_stack: del self.list_stack[-1]
239  self.formatter.end_paragraph(not self.list_stack)
240  self.formatter.pop_margin()
def end_pre (   self)

Definition at line 170 of file htmllib.py.

References sre_parse.max, and HTMLParser.nofill.

171  def end_pre(self):
172  self.formatter.end_paragraph(1)
173  self.formatter.pop_font()
174  self.nofill = max(0, self.nofill - 1)
def end_samp (   self)

Definition at line 293 of file htmllib.py.

References HTMLParser.end_tt().

294  def end_samp(self): self.end_tt()
def end_strong (   self)

Definition at line 296 of file htmllib.py.

References HTMLParser.end_b().

297  def end_strong(self): self.end_b()
def end_title (   self)

Definition at line 88 of file htmllib.py.

References HTMLParser.save_end(), and HTMLParser.title.

88 
89  def end_title(self):
90  self.title = self.save_end()
def end_tt (   self)

Definition at line 315 of file htmllib.py.

316  def end_tt(self):
317  self.formatter.pop_font()
def end_ul (   self)

Definition at line 212 of file htmllib.py.

References HTMLParser.list_stack.

213  def end_ul(self):
214  if self.list_stack: del self.list_stack[-1]
215  self.formatter.end_paragraph(not self.list_stack)
216  self.formatter.pop_margin()
def end_var (   self)

Definition at line 299 of file htmllib.py.

References HTMLParser.end_i().

300  def end_var(self): self.end_i()
def end_xmp (   self)

Definition at line 179 of file htmllib.py.

References HTMLParser.end_pre().

180  def end_xmp(self):
181  self.end_pre()
def handle_data (   self,
  data 
)

Definition at line 34 of file htmllib.py.

References HTMLParser.nofill, and HTMLParser.savedata.

34 
35  def handle_data(self, data):
36  if self.savedata is not None:
37  self.savedata = self.savedata + data
38  else:
39  if self.nofill:
40  self.formatter.add_literal_data(data)
41  else:
42  self.formatter.add_flowing_data(data)
def handle_image (   self,
  src,
  alt,
  args 
)

Definition at line 69 of file htmllib.py.

References HTMLParser.handle_data(), SgmlopParser.handle_data, and SlowParser.handle_data.

69 
70  def handle_image(self, src, alt, *args):
71  self.handle_data(alt)
def save_bgn (   self)

Definition at line 45 of file htmllib.py.

References HTMLParser.savedata.

45 
46  def save_bgn(self):
47  self.savedata = ''
def save_end (   self)

Definition at line 48 of file htmllib.py.

References dospath.join(), HTMLParser.nofill, and HTMLParser.savedata.

48 
49  def save_end(self):
50  data = self.savedata
51  self.savedata = None
52  if not self.nofill:
53  data = ' '.join(data.split())
54  return data
def start_a (   self,
  attrs 
)

Definition at line 318 of file htmllib.py.

References HTMLParser.anchor_bgn().

319  def start_a(self, attrs):
320  href = ''
321  name = ''
322  type = ''
323  for attrname, value in attrs:
324  value = value.strip()
325  if attrname == 'href':
326  href = value
327  if attrname == 'name':
328  name = value
329  if attrname == 'type':
330  type = value.lower()
331  self.anchor_bgn(href, name, type)
def start_address (   self,
  attrs 
)

Definition at line 189 of file htmllib.py.

190  def start_address(self, attrs):
191  self.formatter.end_paragraph(0)
192  self.formatter.push_font((AS_IS, 1, AS_IS, AS_IS))
def start_b (   self,
  attrs 
)

Definition at line 308 of file htmllib.py.

309  def start_b(self, attrs):
self.formatter.push_font((AS_IS, AS_IS, 1, AS_IS))
def start_blockquote (   self,
  attrs 
)

Definition at line 197 of file htmllib.py.

198  def start_blockquote(self, attrs):
199  self.formatter.end_paragraph(1)
200  self.formatter.push_margin('blockquote')
def start_body (   self,
  attrs 
)

Definition at line 80 of file htmllib.py.

80 
def start_body(self, attrs): pass
def start_cite (   self,
  attrs 
)

Definition at line 280 of file htmllib.py.

References HTMLParser.start_i().

def start_cite(self, attrs): self.start_i(attrs)
def start_code (   self,
  attrs 
)

Definition at line 283 of file htmllib.py.

References HTMLParser.start_tt().

def start_code(self, attrs): self.start_tt(attrs)
def start_dir (   self,
  attrs 
)

Definition at line 247 of file htmllib.py.

References HTMLParser.start_ul().

248  def start_dir(self, attrs):
249  self.start_ul(attrs)
def start_dl (   self,
  attrs 
)

Definition at line 253 of file htmllib.py.

254  def start_dl(self, attrs):
255  self.formatter.end_paragraph(1)
256  self.list_stack.append(['dl', '', 0])
def start_em (   self,
  attrs 
)

Definition at line 286 of file htmllib.py.

References HTMLParser.start_i().

def start_em(self, attrs): self.start_i(attrs)
def start_h1 (   self,
  attrs 
)

Definition at line 112 of file htmllib.py.

113  def start_h1(self, attrs):
114  self.formatter.end_paragraph(1)
115  self.formatter.push_font(('h1', 0, 1, 0))
def start_h2 (   self,
  attrs 
)

Definition at line 120 of file htmllib.py.

121  def start_h2(self, attrs):
122  self.formatter.end_paragraph(1)
123  self.formatter.push_font(('h2', 0, 1, 0))
def start_h3 (   self,
  attrs 
)

Definition at line 128 of file htmllib.py.

129  def start_h3(self, attrs):
130  self.formatter.end_paragraph(1)
131  self.formatter.push_font(('h3', 0, 1, 0))
def start_h4 (   self,
  attrs 
)

Definition at line 136 of file htmllib.py.

137  def start_h4(self, attrs):
138  self.formatter.end_paragraph(1)
139  self.formatter.push_font(('h4', 0, 1, 0))
def start_h5 (   self,
  attrs 
)

Definition at line 144 of file htmllib.py.

145  def start_h5(self, attrs):
146  self.formatter.end_paragraph(1)
147  self.formatter.push_font(('h5', 0, 1, 0))
def start_h6 (   self,
  attrs 
)

Definition at line 152 of file htmllib.py.

153  def start_h6(self, attrs):
154  self.formatter.end_paragraph(1)
155  self.formatter.push_font(('h6', 0, 1, 0))
def start_head (   self,
  attrs 
)

Definition at line 77 of file htmllib.py.

77 
def start_head(self, attrs): pass
def start_html (   self,
  attrs 
)

Definition at line 74 of file htmllib.py.

74 
def start_html(self, attrs): pass
def start_i (   self,
  attrs 
)

Definition at line 303 of file htmllib.py.

304  def start_i(self, attrs):
self.formatter.push_font((AS_IS, 1, AS_IS, AS_IS))
def start_kbd (   self,
  attrs 
)

Definition at line 289 of file htmllib.py.

References HTMLParser.start_tt().

def start_kbd(self, attrs): self.start_tt(attrs)
def start_listing (   self,
  attrs 
)

Definition at line 182 of file htmllib.py.

References SGMLParser.setliteral(), and HTMLParser.start_pre().

183  def start_listing(self, attrs):
184  self.start_pre(attrs)
185  self.setliteral('listing') # Tell SGML parser
def start_menu (   self,
  attrs 
)

Definition at line 241 of file htmllib.py.

References HTMLParser.start_ul().

242  def start_menu(self, attrs):
243  self.start_ul(attrs)
def start_ol (   self,
  attrs 
)

Definition at line 226 of file htmllib.py.

References HTMLParser.list_stack.

227  def start_ol(self, attrs):
228  self.formatter.end_paragraph(not self.list_stack)
229  self.formatter.push_margin('ol')
230  label = '1.'
231  for a, v in attrs:
232  if a == 'type':
233  if len(v) == 1: v = v + '.'
234  label = v
235  self.list_stack.append(['ol', label, 0])
def start_pre (   self,
  attrs 
)

Definition at line 165 of file htmllib.py.

References HTMLParser.nofill.

166  def start_pre(self, attrs):
167  self.formatter.end_paragraph(1)
168  self.formatter.push_font((AS_IS, AS_IS, AS_IS, 1))
169  self.nofill = self.nofill + 1
def start_samp (   self,
  attrs 
)

Definition at line 292 of file htmllib.py.

References HTMLParser.start_tt().

def start_samp(self, attrs): self.start_tt(attrs)
def start_strong (   self,
  attrs 
)

Definition at line 295 of file htmllib.py.

References HTMLParser.start_b().

def start_strong(self, attrs): self.start_b(attrs)
def start_title (   self,
  attrs 
)

Definition at line 85 of file htmllib.py.

References HTMLParser.save_bgn().

85 
86  def start_title(self, attrs):
87  self.save_bgn()
def start_tt (   self,
  attrs 
)

Definition at line 313 of file htmllib.py.

314  def start_tt(self, attrs):
self.formatter.push_font((AS_IS, AS_IS, AS_IS, 1))
def start_ul (   self,
  attrs 
)

Definition at line 207 of file htmllib.py.

References HTMLParser.list_stack.

208  def start_ul(self, attrs):
209  self.formatter.end_paragraph(not self.list_stack)
210  self.formatter.push_margin('ul')
211  self.list_stack.append(['ul', '*', 0])
def start_var (   self,
  attrs 
)

Definition at line 298 of file htmllib.py.

References HTMLParser.start_i().

def start_var(self, attrs): self.start_i(attrs)
def start_xmp (   self,
  attrs 
)

Definition at line 175 of file htmllib.py.

References SGMLParser.setliteral(), and HTMLParser.start_pre().

176  def start_xmp(self, attrs):
177  self.start_pre(attrs)
178  self.setliteral('xmp') # Tell SGML parser
def unknown_endtag (   self,
  tag 
)

Definition at line 382 of file htmllib.py.

383  def unknown_endtag(self, tag):
384  pass
385 
def unknown_starttag (   self,
  tag,
  attrs 
)

Definition at line 379 of file htmllib.py.

380  def unknown_starttag(self, tag, attrs):
381  pass

Field Documentation

anchor

Definition at line 24 of file htmllib.py.

anchorlist

Definition at line 25 of file htmllib.py.

base

Definition at line 23 of file htmllib.py.

formatter

Definition at line 19 of file htmllib.py.

isindex

Definition at line 21 of file htmllib.py.

list_stack

Definition at line 27 of file htmllib.py.

nofill

Definition at line 26 of file htmllib.py.

savedata

Definition at line 20 of file htmllib.py.

title

Definition at line 22 of file htmllib.py.


The documentation for this class was generated from the following file: