• R/O
  • HTTP
  • SSH
  • HTTPS

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

sfjplib for python


File Info

Rev. f6bf83ccbc242faf3fc6fcd7916144eaee220512
大小 2,507 字节
时间 2011-08-25 20:46:40
作者 Hiromichi MATSUSHIMA
Log Message

add some files

Content

#!/usr/bin/python
# -*- coding: utf-8 -*-
"""form_retriver.py"""

import HTMLParser
import re
import htmltree

class Form(list):
    def __init__(self):
        self.elements = []
        self.action = None
        self.target = None
        self.enctype = None
        self.method = None

class FormRetriver(object):
    def __init__(self):
        "Constructor"
        self._forms = []

    def parse(self, data):
        tree = htmltree.parse(data)
        r = tree.root()
        forms = r.get_elements_by_name("form")
        for f in forms:
            self._forms.append(self.form_parse(f))

    def convert_ref(self, text):
        f = lambda x: self._convert_ref(x)
        return re.sub(r"&(\w+);", f, text)

    def _convert_ref(self, m):
        conv_dict = dict(
            lt="<",
            gt=">",
            amp="&",
            quot="'",
            nbsp=" ",
            )
        return conv_dict[m.group(1)]

    def form_parse(self, elem):
        f = Form()
        f.action = elem.attr("action")
        f.target = elem.attr("target")
        f.enctype = elem.attr("enctype")
        f.method = elem.attr("method")
        self._r_form_parse(elem, f)
        return f

    def _r_form_parse(self, elem, f):
        for e in elem:
            if e.name == "input":
                if e.attr("name") == None:
                    continue
                f.append((e.attr("name"), e.attr("value")))
                f.elements.append(e)
            elif e.name == "textarea":
                if e.attr("name") == None:
                    continue
                t = e.inner_html().encode("utf-8")
                t = self.convert_ref(t)
                f.append((e.attr("name"), t))
                f.elements.append(e)
            elif e.name == "select":
                if e.attr("name") == None:
                    continue
                name = e.attr("name")
                for opt in e:
                    if opt.has_attribute("selected"):
                        f.append((name, opt.attr("value")))
                        f.elements.append(opt)
            elif e.name == "button":
                if e.attr("name") == None:
                    continue
                f.append((e.attr("name"), e.attr("value")))
                f.elements.append(e)
            else:
                self._r_form_parse(e, f)
        

    def forms(self):
        return self._forms