sfjplib for python
Rev. | f6bf83ccbc242faf3fc6fcd7916144eaee220512 |
---|---|
大小 | 2,507 字节 |
时间 | 2011-08-25 20:46:40 |
作者 | Hiromichi MATSUSHIMA |
Log Message | add some files
|
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""form_retriver.py"""
import HTMLParser
import re
import htmltree
class Form(list):
def __init__(self):
self.elements = []
self.action = None
self.target = None
self.enctype = None
self.method = None
class FormRetriver(object):
def __init__(self):
"Constructor"
self._forms = []
def parse(self, data):
tree = htmltree.parse(data)
r = tree.root()
forms = r.get_elements_by_name("form")
for f in forms:
self._forms.append(self.form_parse(f))
def convert_ref(self, text):
f = lambda x: self._convert_ref(x)
return re.sub(r"&(\w+);", f, text)
def _convert_ref(self, m):
conv_dict = dict(
lt="<",
gt=">",
amp="&",
quot="'",
nbsp=" ",
)
return conv_dict[m.group(1)]
def form_parse(self, elem):
f = Form()
f.action = elem.attr("action")
f.target = elem.attr("target")
f.enctype = elem.attr("enctype")
f.method = elem.attr("method")
self._r_form_parse(elem, f)
return f
def _r_form_parse(self, elem, f):
for e in elem:
if e.name == "input":
if e.attr("name") == None:
continue
f.append((e.attr("name"), e.attr("value")))
f.elements.append(e)
elif e.name == "textarea":
if e.attr("name") == None:
continue
t = e.inner_html().encode("utf-8")
t = self.convert_ref(t)
f.append((e.attr("name"), t))
f.elements.append(e)
elif e.name == "select":
if e.attr("name") == None:
continue
name = e.attr("name")
for opt in e:
if opt.has_attribute("selected"):
f.append((name, opt.attr("value")))
f.elements.append(opt)
elif e.name == "button":
if e.attr("name") == None:
continue
f.append((e.attr("name"), e.attr("value")))
f.elements.append(e)
else:
self._r_form_parse(e, f)
def forms(self):
return self._forms