• R/O
  • HTTP
  • SSH
  • HTTPS

htmltree: 提交

メインリポジトリ


Commit MetaInfo

修订版ec0ba778324d7a92b215c990692fa6d08263a187 (tree)
时间2011-07-28 19:59:54
作者Hiromichi MATSUSHIMA <hirom@offi...>
CommiterHiromichi MATSUSHIMA

Log Message

add entityref handler

更改概述

差异

--- a/htmltree.py
+++ b/htmltree.py
@@ -118,6 +118,10 @@ class HTMLElement(list):
118118 """returns given attribute's value."""
119119 return self.attrs.get(attr, default)
120120
121+ def attr(self, attr, default=None):
122+ """returns given attribute's value."""
123+ return self.attrs.get(attr, default)
124+
121125 def has_attribute(self, attr):
122126 """returns True if element has "attr" attribute."""
123127 return attr in self.attrs
@@ -281,7 +285,14 @@ class HTMLTreeError(Exception):
281285 def __repr__(self):
282286 str = "HTML Parse Error: %s , line: %d, char: %d" % (self.msg, self.lineno, self.offset)
283287 return str
284-
288+
289+
290+def parse(data, charset=None, option=0):
291+ "parse HTML and returns HTMLTree object"
292+ tree = HTMLTree()
293+ tree.parse(data, charset, option)
294+ return tree
295+
285296
286297 class HTMLTree(HTMLParser.HTMLParser):
287298 "HTML Tree Builder"
@@ -415,6 +426,14 @@ class HTMLTree(HTMLParser.HTMLParser):
415426 elem._text = data
416427 self._cursor.append(elem)
417428
429+ def handle_entityref(self, name):
430+ data = "&" + name + ";"
431+ self.handle_data(data)
432+
433+ def handle_charref(self, ref):
434+ data = "&#" + ref + ";"
435+ self.handle_data(data)
436+
418437 def handle_decl(self, decl):
419438 elem = HTMLElement(HTMLElement.DECL, decl)
420439 elem._parent = self._cursor
Show on old repository browser