• R/O
  • SSH
  • HTTPS

提交

标签

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

作業部屋の使い方を試しています。


Commit MetaInfo

修订版132 (tree)
时间2015-03-31 11:44:57
作者tuna_p

Log Message

branches/b3/WebScraping をマージ

更改概述

差异

--- trunk/HtmlTest2/data/Yahoo!天気.xml (revision 131)
+++ trunk/HtmlTest2/data/Yahoo!天気.xml (revision 132)
@@ -1,108 +1,71 @@
11 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
2-<searchdata>
3- <url>http://weather.yahoo.co.jp/weather/</url>
4- <searchlist>
5- <item>天気01</item>
6- <htmltag>li</htmltag>
7- <htmlid/>
8- <htmlclass>point pt1400</htmlclass>
9- <around/>
10- <regexp/>
11- </searchlist>
12- <searchlist>
13- <item>天気02</item>
14- <htmltag>li</htmltag>
15- <htmlid/>
16- <htmlclass>point pt1900</htmlclass>
17- <around/>
18- <regexp/>
19- </searchlist>
20- <searchlist>
21- <item>天気03</item>
22- <htmltag>li</htmltag>
23- <htmlid/>
24- <htmlclass>point pt3410</htmlclass>
25- <around/>
26- <regexp/>
27- </searchlist>
28- <searchlist>
29- <item>天気04</item>
30- <htmltag>li</htmltag>
31- <htmlid/>
32- <htmlclass>point pt4410</htmlclass>
33- <around/>
34- <regexp/>
35- </searchlist>
36- <searchlist>
37- <item>天気05</item>
38- <htmltag>li</htmltag>
39- <htmlid/>
40- <htmlclass>point pt5110</htmlclass>
41- <around/>
42- <regexp/>
43- </searchlist>
44- <searchlist>
45- <item>天気06</item>
46- <htmltag>li</htmltag>
47- <htmlid/>
48- <htmlclass>point pt5410</htmlclass>
49- <around/>
50- <regexp/>
51- </searchlist>
52- <searchlist>
53- <item>天気07</item>
54- <htmltag>li</htmltag>
55- <htmlid/>
56- <htmlclass>point pt5610</htmlclass>
57- <around/>
58- <regexp/>
59- </searchlist>
60- <searchlist>
61- <item>天気08</item>
62- <htmltag>li</htmltag>
63- <htmlid/>
64- <htmlclass>point pt6200</htmlclass>
65- <around/>
66- <regexp/>
67- </searchlist>
68- <searchlist>
69- <item>天気09</item>
70- <htmltag>li</htmltag>
71- <htmlid/>
72- <htmlclass>point pt6710</htmlclass>
73- <around/>
74- <regexp/>
75- </searchlist>
76- <searchlist>
77- <item>天気10</item>
78- <htmltag>li</htmltag>
79- <htmlid/>
80- <htmlclass>point pt7410</htmlclass>
81- <around/>
82- <regexp/>
83- </searchlist>
84- <searchlist>
85- <item>天気11</item>
86- <htmltag>li</htmltag>
87- <htmlid/>
88- <htmlclass>point pt8210</htmlclass>
89- <around/>
90- <regexp/>
91- </searchlist>
92- <searchlist>
93- <item>天気12</item>
94- <htmltag>li</htmltag>
95- <htmlid/>
96- <htmlclass>point pt8810</htmlclass>
97- <around/>
98- <regexp/>
99- </searchlist>
100- <searchlist>
101- <item>天気13</item>
102- <htmltag>li</htmltag>
103- <htmlid/>
104- <htmlclass>point pt9110</htmlclass>
105- <around/>
106- <regexp/>
107- </searchlist>
108-</searchdata>
\ No newline at end of file
2+<xmlcontainer>
3+<webscraping>
4+<url>http://weather.yahoo.co.jp/weather/</url>
5+<searchlist listNo="1">
6+<item>天気01</item>
7+<htmltag>li</htmltag>
8+<htmlclass>point pt1400</htmlclass>
9+</searchlist>
10+<searchlist listNo="2">
11+<item>天気02</item>
12+<htmltag>li</htmltag>
13+<htmlclass>point pt1900</htmlclass>
14+</searchlist>
15+<searchlist listNo="3">
16+<item>天気03</item>
17+<htmltag>li</htmltag>
18+<htmlclass>point pt3410</htmlclass>
19+</searchlist>
20+<searchlist listNo="4">
21+<item>天気04</item>
22+<htmltag>li</htmltag>
23+<htmlclass>point pt4410</htmlclass>
24+</searchlist>
25+<searchlist listNo="5">
26+<item>天気05</item>
27+<htmltag>li</htmltag>
28+<htmlclass>point pt5110</htmlclass>
29+</searchlist>
30+<searchlist listNo="6">
31+<item>天気06</item>
32+<htmltag>li</htmltag>
33+<htmlclass>point pt5410</htmlclass>
34+</searchlist>
35+<searchlist listNo="7">
36+<item>天気07</item>
37+<htmltag>li</htmltag>
38+<htmlclass>point pt5610</htmlclass>
39+</searchlist>
40+<searchlist listNo="8">
41+<item>天気08</item>
42+<htmltag>li</htmltag>
43+<htmlclass>point pt6200</htmlclass>
44+</searchlist>
45+<searchlist listNo="9">
46+<item>天気09</item>
47+<htmltag>li</htmltag>
48+<htmlclass>point pt6710</htmlclass>
49+</searchlist>
50+<searchlist listNo="10">
51+<item>天気10</item>
52+<htmltag>li</htmltag>
53+<htmlclass>point pt7410</htmlclass>
54+</searchlist>
55+<searchlist listNo="11">
56+<item>天気11</item>
57+<htmltag>li</htmltag>
58+<htmlclass>point pt8210</htmlclass>
59+</searchlist>
60+<searchlist listNo="12">
61+<item>天気12</item>
62+<htmltag>li</htmltag>
63+<htmlclass>point pt8810</htmlclass>
64+</searchlist>
65+<searchlist listNo="13">
66+<item>天気13</item>
67+<htmltag>li</htmltag>
68+<htmlclass>point pt9110</htmlclass>
69+</searchlist>
70+</webscraping>
71+</xmlcontainer>
--- trunk/HtmlTest2/data/Yahoo!ファイナンス.xml (revision 131)
+++ trunk/HtmlTest2/data/Yahoo!ファイナンス.xml (revision 132)
@@ -1 +1,217 @@
1-<?xml version="1.0" encoding="UTF-8" standalone="no"?><searchdata><url>http://stocks.finance.yahoo.co.jp/stocks/detail/?code=9984.T</url><searchlist><item>銘柄コード</item><htmltag>dl</htmltag><htmlid/><htmlclass>stocksInfo clearFix</htmlclass><around/><regexp>(^\d{4})</regexp></searchlist><searchlist><item>カテゴリ</item><htmltag>div</htmltag><htmlid/><htmlclass>stockMainTabParts stockMainTabPartsCurrent</htmlclass><around/><regexp/></searchlist><searchlist><item>業種</item><htmltag>dd</htmltag><htmlid/><htmlclass>category yjSb</htmlclass><around/><regexp/></searchlist><searchlist><item>取得時間</item><htmltag>dd</htmltag><htmlid/><htmlclass>yjSb real</htmlclass><around/><regexp>^(.*)\t</regexp></searchlist><searchlist><item>銘柄名</item><htmltag>th</htmltag><htmlid/><htmlclass>symbol</htmlclass><around/><regexp/></searchlist><searchlist><item>株価</item><htmltag>td</htmltag><htmlid/><htmlclass>stoksPrice</htmlclass><around/><regexp/></searchlist><searchlist><item>前日比</item><htmltag>td</htmltag><htmlid/><htmlclass>change</htmlclass><around/><regexp>\t(.*)(.*%)</regexp></searchlist><searchlist><item>前日比%</item><htmltag>td</htmltag><htmlid/><htmlclass>change</htmlclass><around/><regexp>\t.*((.*)%)</regexp></searchlist><searchlist><item>前日終値</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi clearfix</htmlclass><around>0</around><regexp>^([,0-9]+)\t</regexp></searchlist><searchlist><item>始値</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi clearfix</htmlclass><around>1</around><regexp>^([,0-9]+|-{3})\t</regexp></searchlist><searchlist><item>高値</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi clearfix</htmlclass><around>2</around><regexp>^([,0-9]+|-{3})\t</regexp></searchlist><searchlist><item>安値</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi clearfix</htmlclass><around>3</around><regexp>^([,0-9]+|-{3})\t</regexp></searchlist><searchlist><item>出来高</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi clearfix</htmlclass><around>4</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>売買代金</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi clearfix</htmlclass><around>5</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>値幅制限</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi clearfix</htmlclass><around>6</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>時価総額</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>0</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>発行済株式数</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>1</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>配当利回り</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>2</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>1株配当</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>3</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>PER</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>4</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>PBR</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>5</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>EPS</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>6</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>BPS</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>7</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>最低購入代金</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>8</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>単元株数</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>9</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>年初来高値</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>10</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>年初来安値</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>11</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>信用買残</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>12</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>信用買残前週比</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>13</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>信用売残</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>14</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>信用売残前週比</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>15</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>貸借倍率</item><htmltag>div</htmltag><htmlid/><htmlclass>yjMS clearfix</htmlclass><around/><regexp>^(.*?)\t</regexp></searchlist></searchdata>
\ No newline at end of file
1+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
2+<xmlcontainer>
3+<webscraping>
4+<url>http://stocks.finance.yahoo.co.jp/stocks/detail/?code=5020.T</url>
5+<searchlist listNo="1">
6+<item>銘柄コード</item>
7+<htmltag>dl</htmltag>
8+<htmlclass>stocksInfo clearFix</htmlclass>
9+<regexp>(^\d{4})</regexp>
10+</searchlist>
11+<searchlist listNo="2">
12+<item>カテゴリ</item>
13+<htmltag>div</htmltag>
14+<htmlclass>stockMainTabParts stockMainTabPartsCurrent</htmlclass>
15+</searchlist>
16+<searchlist listNo="3">
17+<item>業種</item>
18+<htmltag>dd</htmltag>
19+<htmlclass>category yjSb</htmlclass>
20+</searchlist>
21+<searchlist listNo="4">
22+<item>取得時間</item>
23+<htmltag>dd</htmltag>
24+<htmlclass>yjSb real</htmlclass>
25+<regexp>^(.*)\t</regexp>
26+</searchlist>
27+<searchlist listNo="5">
28+<item>銘柄名</item>
29+<htmltag>th</htmltag>
30+<htmlclass>symbol</htmlclass>
31+</searchlist>
32+<searchlist listNo="6">
33+<item>株価</item>
34+<htmltag>td</htmltag>
35+<htmlclass>stoksPrice</htmlclass>
36+</searchlist>
37+<searchlist listNo="7">
38+<item>前日比</item>
39+<htmltag>td</htmltag>
40+<htmlclass>change</htmlclass>
41+<regexp>\t(.*)(.*%)</regexp>
42+</searchlist>
43+<searchlist listNo="8">
44+<item>前日比%</item>
45+<htmltag>td</htmltag>
46+<htmlclass>change</htmlclass>
47+<regexp>\t.*((.*)%)</regexp>
48+</searchlist>
49+<searchlist listNo="9">
50+<item>前日終値</item>
51+<htmltag>div</htmltag>
52+<htmlclass>lineFi clearfix</htmlclass>
53+<around>0</around>
54+<regexp>^([,.0-9]+)\t</regexp>
55+</searchlist>
56+<searchlist listNo="10">
57+<item>始値</item>
58+<htmltag>div</htmltag>
59+<htmlclass>lineFi clearfix</htmlclass>
60+<around>1</around>
61+<regexp>^([,.0-9]+|-{3})\t</regexp>
62+</searchlist>
63+<searchlist listNo="11">
64+<item>高値</item>
65+<htmltag>div</htmltag>
66+<htmlclass>lineFi clearfix</htmlclass>
67+<around>2</around>
68+<regexp>^((ストップ高\t|ストップ安\t)?[,0-9]+|-{3})</regexp>
69+</searchlist>
70+<searchlist listNo="12">
71+<item>安値</item>
72+<htmltag>div</htmltag>
73+<htmlclass>lineFi clearfix</htmlclass>
74+<around>3</around>
75+<regexp>^((ストップ高\t|ストップ安\t)?[,0-9]+|-{3})</regexp>
76+</searchlist>
77+<searchlist listNo="13">
78+<item>出来高</item>
79+<htmltag>div</htmltag>
80+<htmlclass>lineFi clearfix</htmlclass>
81+<around>4</around>
82+<regexp>^(.*?)\t</regexp>
83+</searchlist>
84+<searchlist listNo="14">
85+<item>売買代金</item>
86+<htmltag>div</htmltag>
87+<htmlclass>lineFi clearfix</htmlclass>
88+<around>5</around>
89+<regexp>^(.*?)\t</regexp>
90+</searchlist>
91+<searchlist listNo="15">
92+<item>値幅制限</item>
93+<htmltag>div</htmltag>
94+<htmlclass>lineFi clearfix</htmlclass>
95+<around>6</around>
96+<regexp>^(.*?)\t</regexp>
97+</searchlist>
98+<searchlist listNo="16">
99+<item>時価総額</item>
100+<htmltag>div</htmltag>
101+<htmlclass>lineFi yjMS clearfix</htmlclass>
102+<around>0</around>
103+<regexp>^(.*?)\t</regexp>
104+</searchlist>
105+<searchlist listNo="17">
106+<item>発行済株式数</item>
107+<htmltag>div</htmltag>
108+<htmlclass>lineFi yjMS clearfix</htmlclass>
109+<around>1</around>
110+<regexp>^(.*?)\t</regexp>
111+</searchlist>
112+<searchlist listNo="18">
113+<item>配当利回り</item>
114+<htmltag>div</htmltag>
115+<htmlclass>lineFi yjMS clearfix</htmlclass>
116+<around>2</around>
117+<regexp>^(.*?)\t</regexp>
118+</searchlist>
119+<searchlist listNo="19">
120+<item>1株配当</item>
121+<htmltag>div</htmltag>
122+<htmlclass>lineFi yjMS clearfix</htmlclass>
123+<around>3</around>
124+<regexp>^(.*?)\t</regexp>
125+</searchlist>
126+<searchlist listNo="20">
127+<item>PER</item>
128+<htmltag>div</htmltag>
129+<htmlclass>lineFi yjMS clearfix</htmlclass>
130+<around>4</around>
131+<regexp>^(.*?)\t</regexp>
132+</searchlist>
133+<searchlist listNo="21">
134+<item>PBR</item>
135+<htmltag>div</htmltag>
136+<htmlclass>lineFi yjMS clearfix</htmlclass>
137+<around>5</around>
138+<regexp>^(.*?)\t</regexp>
139+</searchlist>
140+<searchlist listNo="22">
141+<item>EPS</item>
142+<htmltag>div</htmltag>
143+<htmlclass>lineFi yjMS clearfix</htmlclass>
144+<around>6</around>
145+<regexp>^(.*?)\t</regexp>
146+</searchlist>
147+<searchlist listNo="23">
148+<item>BPS</item>
149+<htmltag>div</htmltag>
150+<htmlclass>lineFi yjMS clearfix</htmlclass>
151+<around>7</around>
152+<regexp>^(.*?)\t</regexp>
153+</searchlist>
154+<searchlist listNo="24">
155+<item>最低購入代金</item>
156+<htmltag>div</htmltag>
157+<htmlclass>lineFi yjMS clearfix</htmlclass>
158+<around>8</around>
159+<regexp>^(.*?)\t</regexp>
160+</searchlist>
161+<searchlist listNo="25">
162+<item>単元株数</item>
163+<htmltag>div</htmltag>
164+<htmlclass>lineFi yjMS clearfix</htmlclass>
165+<around>9</around>
166+<regexp>^(.*?)\t</regexp>
167+</searchlist>
168+<searchlist listNo="26">
169+<item>年初来高値</item>
170+<htmltag>div</htmltag>
171+<htmlclass>lineFi yjMS clearfix</htmlclass>
172+<around>10</around>
173+<regexp>^(.*?)\t</regexp>
174+</searchlist>
175+<searchlist listNo="27">
176+<item>年初来安値</item>
177+<htmltag>div</htmltag>
178+<htmlclass>lineFi yjMS clearfix</htmlclass>
179+<around>11</around>
180+<regexp>^(.*?)\t</regexp>
181+</searchlist>
182+<searchlist listNo="28">
183+<item>信用買残</item>
184+<htmltag>div</htmltag>
185+<htmlclass>lineFi yjMS clearfix</htmlclass>
186+<around>12</around>
187+<regexp>^(.*?)\t</regexp>
188+</searchlist>
189+<searchlist listNo="29">
190+<item>信用買残前週比</item>
191+<htmltag>div</htmltag>
192+<htmlclass>lineFi yjMS clearfix</htmlclass>
193+<around>13</around>
194+<regexp>^(.*?)\t</regexp>
195+</searchlist>
196+<searchlist listNo="30">
197+<item>信用売残</item>
198+<htmltag>div</htmltag>
199+<htmlclass>lineFi yjMS clearfix</htmlclass>
200+<around>14</around>
201+<regexp>^(.*?)\t</regexp>
202+</searchlist>
203+<searchlist listNo="31">
204+<item>信用売残前週比</item>
205+<htmltag>div</htmltag>
206+<htmlclass>lineFi yjMS clearfix</htmlclass>
207+<around>15</around>
208+<regexp>^(.*?)\t</regexp>
209+</searchlist>
210+<searchlist listNo="32">
211+<item>貸借倍率</item>
212+<htmltag>div</htmltag>
213+<htmlclass>yjMS clearfix</htmlclass>
214+<regexp>^(.*?)\t</regexp>
215+</searchlist>
216+</webscraping>
217+</xmlcontainer>
--- trunk/HtmlTest2/test/utility/test1/SearchDataRW.java (nonexistent)
+++ trunk/HtmlTest2/test/utility/test1/SearchDataRW.java (revision 132)
@@ -0,0 +1,314 @@
1+/*
2+ * Copyright (C) 2014 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id$
21+ */
22+
23+package utility.test1;
24+
25+import webScraping.core.SearchData;
26+import java.io.File;
27+import java.io.FileNotFoundException;
28+import java.io.FileOutputStream;
29+import java.io.IOException;
30+import java.util.logging.Level;
31+import java.util.logging.Logger;
32+import javax.xml.parsers.DocumentBuilder;
33+import javax.xml.parsers.DocumentBuilderFactory;
34+import javax.xml.parsers.ParserConfigurationException;
35+import javax.xml.transform.Transformer;
36+import javax.xml.transform.TransformerConfigurationException;
37+import javax.xml.transform.TransformerException;
38+import javax.xml.transform.TransformerFactory;
39+import javax.xml.transform.dom.DOMSource;
40+import javax.xml.transform.stream.StreamResult;
41+import org.w3c.dom.DOMImplementation;
42+import org.w3c.dom.Document;
43+import org.w3c.dom.Element;
44+import org.w3c.dom.Node;
45+import org.w3c.dom.NodeList;
46+import org.xml.sax.SAXException;
47+
48+/**
49+ * 検索データ読込・保存.
50+ * @author kgto
51+ */
52+public class SearchDataRW {
53+ /* ---------------------------------------------------------------------- *
54+ * フィールド
55+ * ---------------------------------------------------------------------- */
56+ private String UrlAdress;
57+
58+ DocumentBuilder builder;
59+ public Document document;
60+ Element root;
61+
62+ /* ---------------------------------------------------------------------- *
63+ * コンストラクタ
64+ * ---------------------------------------------------------------------- */
65+ public SearchDataRW() {
66+ try {
67+ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
68+ builder = factory.newDocumentBuilder();
69+
70+ } catch (ParserConfigurationException ex) {
71+ Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
72+ }
73+ }
74+
75+ /* ---------------------------------------------------------------------- *
76+ * Setter
77+ * ---------------------------------------------------------------------- */
78+ public void seturl(String UrlAdress) {
79+ this.UrlAdress = UrlAdress;
80+ }
81+
82+ /* ---------------------------------------------------------------------- *
83+ * Getter
84+ * ---------------------------------------------------------------------- */
85+ public String geturl() {
86+ return UrlAdress;
87+ }
88+
89+ /* ---------------------------------------------------------------------- *
90+ * メソッド
91+ * ---------------------------------------------------------------------- */
92+ /**
93+ * 保存.
94+ * @param file
95+ */
96+ public void save(File file) {
97+ saveUrl(UrlAdress);
98+ saveSearchList();
99+ write(file);
100+ }
101+
102+ /**
103+ * 読込.
104+ * @param file
105+ */
106+ public void load(File file) {
107+ read(file);
108+ loadUrl();
109+ loadSearchList();
110+ }
111+
112+ /* ---------------------------------------------------------------------- */
113+
114+ void loadUrl() {
115+ NodeList nodelist = root.getElementsByTagName("url");
116+ Node node = nodelist.item(0);
117+ UrlAdress = node.getFirstChild().getNodeValue();
118+ }
119+
120+ public void loadSearchList() {
121+ SearchData.clear();
122+
123+ NodeList nodelist = root.getElementsByTagName("searchlist");
124+ for(int i = 0; i < nodelist.getLength(); i++) {
125+ Node childnode = nodelist.item(i);
126+
127+ boolean sdatflg = false;
128+ SearchData sdat = new SearchData();
129+ for (Node child = childnode.getFirstChild(); child != null; child = child.getNextSibling()) {
130+ if(child.getNodeType() == Node.ELEMENT_NODE) {
131+ String tag = child.getNodeName();
132+ String rtn = "";
133+ if(child.getFirstChild() != null) {
134+ rtn = child.getFirstChild().getNodeValue();
135+ }
136+ switch (tag) {
137+ case "item" :
138+ sdat.setitem(rtn);
139+ sdatflg = true;
140+ break;
141+ case "htmltag" :
142+ sdat.setHtmltag(rtn);
143+ sdatflg = true;
144+ break;
145+ case "htmlid" :
146+ sdat.setHtmlid(rtn);
147+ sdatflg = true;
148+ break;
149+ case "htmlclass" :
150+ sdat.setHtmlclass(rtn);
151+ sdatflg = true;
152+ break;
153+ case "around" :
154+ sdat.setaround(rtn);
155+ sdatflg = true;
156+ break;
157+ case "regexp" :
158+ sdat.setregexp(rtn);
159+ sdatflg = true;
160+ break;
161+ }
162+ }
163+ }
164+ if(sdatflg) SearchData.add(sdat);
165+ }
166+ }
167+
168+ public String loadMsg404() {
169+ StringBuilder strbuf = new StringBuilder();
170+ NodeList nodelist = root.getElementsByTagName("msg404");
171+ for(int i = 0; i < nodelist.getLength(); i++) {
172+ Node childnode = nodelist.item(i);
173+ String str = childnode.getFirstChild().getNodeValue();
174+ if(strbuf.length() > 0) {
175+ strbuf.append("\n");
176+ }
177+ strbuf.append(str);
178+ }
179+ return strbuf.toString();
180+ }
181+
182+ public Element loadElement(String elementTagName) {
183+ NodeList nodelist = root.getElementsByTagName(elementTagName);
184+ Element element = (Element)nodelist.item(0);
185+
186+ return element;
187+ }
188+
189+ /* ---------------------------------------------------------------------- */
190+
191+ void saveUrl(String urladdress) {
192+ checkdoc();
193+ removeElement("url"); // 既にElementが存在してた場合、一度削除
194+
195+ Element url = document.createElement("url");
196+ url.appendChild(document.createTextNode(urladdress));
197+ root.appendChild(url);
198+ }
199+
200+ void saveSearchList() {
201+ checkdoc();
202+ removeElement("searchlist"); // 既にElementが存在してた場合、一度削除
203+
204+ int count = 0;
205+ for(int i = 0; i < SearchData.size(); i++) {
206+ SearchData sdat = SearchData.get(i);
207+
208+ Element cslist = document.createElement("searchlist");
209+ cslist.setAttribute("listNo", String.valueOf(++count));
210+
211+ addChild(cslist, "item", sdat.getitem());
212+ addChild(cslist, "htmltag", sdat.getHtmltag());
213+ addChild(cslist, "htmlid", sdat.getHtmlid());
214+ addChild(cslist, "htmlclass", sdat.getHtmlclass());
215+ addChild(cslist, "around", sdat.getaround());
216+ addChild(cslist, "regexp", sdat.getregexp());
217+
218+ root.appendChild(cslist);
219+ }
220+ }
221+
222+ void saveMsg404(String msg) {
223+ checkdoc();
224+ removeElement("msg404"); // 既にElementが存在してた場合、一度削除
225+
226+ String[] msgs = msg.split("\n");
227+ int count = 0;
228+ for(String msgOne : msgs) {
229+ Element msgElement = document.createElement("msg404");
230+ msgElement.setAttribute("No", String.valueOf(++count));
231+ msgElement.appendChild(document.createTextNode(msgOne));
232+
233+ root.appendChild(msgElement);
234+ }
235+ }
236+
237+ public void saveElement(Element element) {
238+ checkdoc();
239+ removeElement(element.getTagName()); // 既にElementが存在してた場合、一度削除
240+
241+ root.appendChild(element);
242+ }
243+
244+ /* ---------------------------------------------------------------------- */
245+
246+ private void addChild(Element cslist, String keyword, String data) {
247+ if(!data.isEmpty()) {
248+ Element element = document.createElement(keyword);
249+ element.appendChild(document.createTextNode(data));
250+ cslist.appendChild(element);
251+ }
252+ }
253+
254+ private void removeElement(String elementTagName) {
255+ int nodeSize;
256+ do {
257+ NodeList nodelist = document.getElementsByTagName(elementTagName);
258+ nodeSize = nodelist.getLength();
259+ for(int i = 0; i < nodelist.getLength(); i++) {
260+ Node node = nodelist.item(i);
261+ root.removeChild(node);
262+ }
263+ } while(nodeSize > 0);
264+ }
265+
266+ /**
267+ * ドキュメントチェック.
268+ * 新規の場合やXMLファイルの読込みが行われていない状態時、新たにルートエレメントを作成する。
269+ * 既読の場合、ルートエレメントの取得を行う。
270+ */
271+ public void checkdoc() {
272+ if(document == null) {
273+ DOMImplementation domImpl = builder.getDOMImplementation();
274+ document = domImpl.createDocument("","searchdata",null);
275+ }
276+ root = document.getDocumentElement();
277+ }
278+
279+ /**
280+ * XML読込み.
281+ * @param file
282+ */
283+ public void read(File file) {
284+ try {
285+ document = builder.parse(file);
286+ root = document.getDocumentElement();
287+
288+ } catch (SAXException | IOException ex) {
289+ Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
290+ }
291+ }
292+
293+ /**
294+ * XML書込み.
295+ * @param file
296+ */
297+ public void write(File file) {
298+ try {
299+ TransformerFactory transFactory = TransformerFactory.newInstance();
300+ Transformer transformer = transFactory.newTransformer();
301+
302+ DOMSource source = new DOMSource(document);
303+ FileOutputStream os = new FileOutputStream(file);
304+ StreamResult result = new StreamResult(os);
305+ transformer.transform(source, result);
306+
307+ } catch (TransformerConfigurationException ex) {
308+ Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
309+ } catch (FileNotFoundException | TransformerException ex) {
310+ Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
311+ }
312+ }
313+
314+}
Added: svn:keywords
## -0,0 +1 ##
+Id
\ No newline at end of property
--- trunk/HtmlTest2/test/utility/test1/SearchDataRWT01.java (revision 131)
+++ trunk/HtmlTest2/test/utility/test1/SearchDataRWT01.java (revision 132)
@@ -1,7 +1,6 @@
11
22 package utility.test1;
33
4-import webScraping.utility.SearchDataRW;
54 import java.io.File;
65 import java.lang.reflect.InvocationTargetException;
76 import java.lang.reflect.Method;
--- trunk/HtmlTest2/test/utility/test1/SearchDataRWT02.java (revision 131)
+++ trunk/HtmlTest2/test/utility/test1/SearchDataRWT02.java (revision 132)
@@ -1,7 +1,6 @@
11
22 package utility.test1;
33
4-import webScraping.utility.SearchDataRW;
54 import java.io.File;
65 import webScraping.core.SearchData;
76
--- trunk/HtmlTest2/test/utility/test1/ConvertXml01.java (nonexistent)
+++ trunk/HtmlTest2/test/utility/test1/ConvertXml01.java (revision 132)
@@ -0,0 +1,42 @@
1+
2+package utility.test1;
3+
4+import java.io.File;
5+import webScraping.utility.ScrapingXml;
6+
7+/**
8+ * XMLコンバータ
9+ * 旧:SearchDataRW.java → 新:ScrapingXml.java
10+ * @author kgto
11+ */
12+public class ConvertXml01 {
13+
14+ private String UrlAdress;
15+ File file = new File("test1.xml");
16+
17+ /**
18+ * @param args the command line arguments
19+ */
20+ public static void main(String[] args) {
21+ ConvertXml01 conv = new ConvertXml01();
22+
23+ conv.readold();
24+ conv.writenew();
25+
26+ System.exit(0);
27+ }
28+
29+ void readold() {
30+ SearchDataRW sdatrw = new SearchDataRW();
31+ sdatrw.load(file);
32+ UrlAdress = sdatrw.geturl();
33+ }
34+
35+ void writenew() {
36+ ScrapingXml xmlwriter = new ScrapingXml();
37+ xmlwriter.setTestUrl(UrlAdress);
38+ xmlwriter.setSdata();
39+ xmlwriter.save(file);
40+ }
41+
42+}
--- trunk/HtmlTest2/test1.xml (revision 131)
+++ trunk/HtmlTest2/test1.xml (revision 132)
@@ -1,16 +1,71 @@
1-<?xml version="1.0" encoding="UTF-8" standalone="no"?><searchdata>
2-
3-
4-
5-
6-
7-
8-
9-
10-
11-
12-
13-
14-
15-
16-<url>http://weather.yahoo.co.jp/weather/</url><searchlist listNo="1"><item>天気01</item><htmltag>li</htmltag><htmlclass>point pt1400</htmlclass></searchlist><searchlist listNo="2"><item>天気02</item><htmltag>li</htmltag><htmlclass>point pt1900</htmlclass></searchlist><searchlist listNo="3"><item>天気03</item><htmltag>li</htmltag><htmlclass>point pt3410</htmlclass></searchlist><searchlist listNo="4"><item>天気04</item><htmltag>li</htmltag><htmlclass>point pt4410</htmlclass></searchlist><searchlist listNo="5"><item>天気05</item><htmltag>li</htmltag><htmlclass>point pt5110</htmlclass></searchlist><searchlist listNo="6"><item>天気06</item><htmltag>li</htmltag><htmlclass>point pt5410</htmlclass></searchlist><searchlist listNo="7"><item>天気07</item><htmltag>li</htmltag><htmlclass>point pt5610</htmlclass></searchlist><searchlist listNo="8"><item>天気08</item><htmltag>li</htmltag><htmlclass>point pt6200</htmlclass></searchlist><searchlist listNo="9"><item>天気09</item><htmltag>li</htmltag><htmlclass>point pt6710</htmlclass></searchlist><searchlist listNo="10"><item>天気10</item><htmltag>li</htmltag><htmlclass>point pt7410</htmlclass></searchlist><searchlist listNo="11"><item>天気11</item><htmltag>li</htmltag><htmlclass>point pt8210</htmlclass></searchlist><searchlist listNo="12"><item>天気12</item><htmltag>li</htmltag><htmlclass>point pt8810</htmlclass></searchlist><searchlist listNo="13"><item>天気13</item><htmltag>li</htmltag><htmlclass>point pt9110</htmlclass></searchlist></searchdata>
\ No newline at end of file
1+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
2+<xmlcontainer>
3+<webscraping>
4+<url>http://weather.yahoo.co.jp/weather/</url>
5+<searchlist listNo="1">
6+<item>天気01</item>
7+<htmltag>li</htmltag>
8+<htmlclass>point pt1400</htmlclass>
9+</searchlist>
10+<searchlist listNo="2">
11+<item>天気02</item>
12+<htmltag>li</htmltag>
13+<htmlclass>point pt1900</htmlclass>
14+</searchlist>
15+<searchlist listNo="3">
16+<item>天気03</item>
17+<htmltag>li</htmltag>
18+<htmlclass>point pt3410</htmlclass>
19+</searchlist>
20+<searchlist listNo="4">
21+<item>天気04</item>
22+<htmltag>li</htmltag>
23+<htmlclass>point pt4410</htmlclass>
24+</searchlist>
25+<searchlist listNo="5">
26+<item>天気05</item>
27+<htmltag>li</htmltag>
28+<htmlclass>point pt5110</htmlclass>
29+</searchlist>
30+<searchlist listNo="6">
31+<item>天気06</item>
32+<htmltag>li</htmltag>
33+<htmlclass>point pt5410</htmlclass>
34+</searchlist>
35+<searchlist listNo="7">
36+<item>天気07</item>
37+<htmltag>li</htmltag>
38+<htmlclass>point pt5610</htmlclass>
39+</searchlist>
40+<searchlist listNo="8">
41+<item>天気08</item>
42+<htmltag>li</htmltag>
43+<htmlclass>point pt6200</htmlclass>
44+</searchlist>
45+<searchlist listNo="9">
46+<item>天気09</item>
47+<htmltag>li</htmltag>
48+<htmlclass>point pt6710</htmlclass>
49+</searchlist>
50+<searchlist listNo="10">
51+<item>天気10</item>
52+<htmltag>li</htmltag>
53+<htmlclass>point pt7410</htmlclass>
54+</searchlist>
55+<searchlist listNo="11">
56+<item>天気11</item>
57+<htmltag>li</htmltag>
58+<htmlclass>point pt8210</htmlclass>
59+</searchlist>
60+<searchlist listNo="12">
61+<item>天気12</item>
62+<htmltag>li</htmltag>
63+<htmlclass>point pt8810</htmlclass>
64+</searchlist>
65+<searchlist listNo="13">
66+<item>天気13</item>
67+<htmltag>li</htmltag>
68+<htmlclass>point pt9110</htmlclass>
69+</searchlist>
70+</webscraping>
71+</xmlcontainer>
--- trunk/HtmlTest2/src/WebScraping/utility/SearchDataRW.java (revision 131)
+++ trunk/HtmlTest2/src/WebScraping/utility/SearchDataRW.java (nonexistent)
@@ -1,547 +0,0 @@
1-/*
2- * Copyright (C) 2014 kgto.
3- *
4- * This library is free software; you can redistribute it and/or
5- * modify it under the terms of the GNU Lesser General Public
6- * License as published by the Free Software Foundation; either
7- * version 2.1 of the License, or (at your option) any later version.
8- *
9- * This library is distributed in the hope that it will be useful,
10- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12- * Lesser General Public License for more details.
13- *
14- * You should have received a copy of the GNU Lesser General Public
15- * License along with this library; if not, write to the Free Software
16- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17- * MA 02110-1301 USA
18- */
19-/*
20- * $Id$
21- */
22-
23-package webScraping.utility;
24-
25-import webScraping.core.SearchData;
26-import java.io.BufferedReader;
27-import java.io.BufferedWriter;
28-import java.io.File;
29-import java.io.FileInputStream;
30-import java.io.FileNotFoundException;
31-import java.io.FileOutputStream;
32-import java.io.IOException;
33-import java.io.InputStreamReader;
34-import java.io.OutputStreamWriter;
35-import java.util.ArrayList;
36-import java.util.logging.Level;
37-import java.util.logging.Logger;
38-import javax.xml.parsers.DocumentBuilder;
39-import javax.xml.parsers.DocumentBuilderFactory;
40-import javax.xml.parsers.ParserConfigurationException;
41-import javax.xml.transform.Transformer;
42-import javax.xml.transform.TransformerConfigurationException;
43-import javax.xml.transform.TransformerException;
44-import javax.xml.transform.TransformerFactory;
45-import javax.xml.transform.dom.DOMSource;
46-import javax.xml.transform.stream.StreamResult;
47-import org.w3c.dom.DOMImplementation;
48-import org.w3c.dom.Document;
49-import org.w3c.dom.Element;
50-import org.w3c.dom.Node;
51-import org.w3c.dom.NodeList;
52-import org.xml.sax.SAXException;
53-
54-/**
55- *
56- * @author kgto
57- */
58-public class SearchDataRW {
59-
60- DocumentBuilder builder;
61- public Document document;
62- Element root;
63-
64- private final String splitchar = "\t";
65-
66- private String UrlAdress;
67- private ArrayList<SearchData> slist = new ArrayList<>();
68-
69- public SearchDataRW() {
70- try {
71- DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
72- builder = factory.newDocumentBuilder();
73-
74- } catch (ParserConfigurationException ex) {
75- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
76- }
77- }
78-
79- public void seturl(String UrlAdress) {
80- this.UrlAdress = UrlAdress;
81- }
82-
83- public void setslist(ArrayList slist) {
84- this.slist = slist;
85- }
86-
87- public String geturl() {
88- return UrlAdress;
89- }
90-
91- public ArrayList getslist() {
92- return slist;
93- }
94-
95- /**
96- * 保存.
97- * @param file
98- */
99- public void save(File file) {
100- //saveCsv(file);
101- //saveXml(file);
102-
103- saveUrl(UrlAdress);
104- saveSearchList(slist);
105- write(file);
106- }
107-
108- /**
109- * 読込.
110- * @param file
111- */
112- public void load(File file) {
113- //loadCsv(file);
114- //loadXml(file);
115-
116- read(file);
117- loadUrl();
118- loadSearchList();
119- }
120-
121- /* ---------------------------------------------------------------------- */
122- /**
123- * 保存(CSV形式).
124- * @param file
125- */
126- public void saveCsv(File file) {
127- BufferedWriter bufferedwriter = null;
128- try {
129- //空のファイルを作成
130- file.createNewFile();
131- FileOutputStream fileoutputstream = new FileOutputStream(file);
132- OutputStreamWriter outputstreamwriter = new OutputStreamWriter(fileoutputstream, "UTF-8");
133- bufferedwriter = new BufferedWriter(outputstreamwriter);
134-
135- // URL
136- bufferedwriter.write(UrlAdress);
137- bufferedwriter.write("\n");
138- // 検索情報
139- for(Object slist1 : slist) {
140- SearchData sdat = (SearchData)slist1;
141- //
142- StringBuilder str = new StringBuilder();
143- str.append(sdat.getitem()).append(splitchar);
144- str.append(sdat.getHtmltag()).append(splitchar);
145- str.append(sdat.getHtmlid()).append(splitchar);
146- str.append(sdat.getHtmlclass()).append(splitchar);
147- str.append(sdat.getaround()).append(splitchar);
148- str.append(sdat.getregexp()).append("\n");
149- // 書込み
150- bufferedwriter.write(str.toString());
151- }
152-
153- } catch (IOException ex) {
154- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
155- } finally {
156- try {
157- if(bufferedwriter != null) {
158- bufferedwriter.close();
159- }
160-
161- } catch (IOException ex) {
162- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
163- }
164- }
165- }
166-
167- /**
168- * 読込(CSV形式).
169- * @param file
170- */
171- public void loadCsv(File file) {
172- slist = new ArrayList();
173-
174- BufferedReader bufferedreader = null;
175- try {
176- FileInputStream fileinputstream = new FileInputStream(file);
177- InputStreamReader inputstreamreader = new InputStreamReader(fileinputstream, "UTF-8");
178- bufferedreader = new BufferedReader(inputstreamreader);
179-
180- // URL
181- UrlAdress = bufferedreader.readLine();
182- // 検索情報
183- String rec;
184- while((rec = bufferedreader.readLine()) != null) {
185- String[] recary = rec.split(splitchar, -1);
186- SearchData sdat = new SearchData();
187- sdat.setitem(recary[0]);
188- sdat.setHtmltag(recary[1]);
189- sdat.setHtmlid(recary[2]);
190- sdat.setHtmlclass(recary[3]);
191- sdat.setaround(recary[4]);
192- sdat.setregexp(recary[5]);
193-
194- slist.add(sdat);
195- }
196-
197- } catch(IOException ex) {
198- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
199-
200- } finally {
201- try {
202- if(bufferedreader != null) {
203- bufferedreader.close();
204- }
205-
206- } catch (IOException ex) {
207- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
208- }
209- }
210- }
211-
212- /* ---------------------------------------------------------------------- */
213- /**
214- * 保存(XML形式).
215- * @param file
216- */
217- public void saveXml(File file) {
218- try {
219- DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
220- DocumentBuilder wkBuilder = factory.newDocumentBuilder();
221- DOMImplementation domImpl = wkBuilder.getDOMImplementation();
222-
223- Document doc = domImpl.createDocument("","searchdata",null);
224- Element wkRoot = doc.getDocumentElement();
225-
226- // URL
227- Element url = doc.createElement("url");
228- url.appendChild(doc.createTextNode(UrlAdress));
229- wkRoot.appendChild(url);
230-
231- // 検索情報
232- for (Object slist1 : slist) {
233- SearchData sdat = (SearchData) slist1;
234-
235- Element cslist = doc.createElement("searchlist");
236- Element item = doc.createElement("item");
237- Element htmltag = doc.createElement("htmltag");
238- Element htmlid = doc.createElement("htmlid");
239- Element htmlclass = doc.createElement("htmlclass");
240- Element around = doc.createElement("around");
241- Element regexp = doc.createElement("regexp");
242-
243- item.appendChild(doc.createTextNode(sdat.getitem()));
244- htmltag.appendChild(doc.createTextNode(sdat.getHtmltag()));
245- htmlid.appendChild(doc.createTextNode(sdat.getHtmlid()));
246- htmlclass.appendChild(doc.createTextNode(sdat.getHtmlclass()));
247- around.appendChild(doc.createTextNode(sdat.getaround()));
248- regexp.appendChild(doc.createTextNode(sdat.getregexp()));
249-
250- cslist.appendChild(item);
251- cslist.appendChild(htmltag);
252- cslist.appendChild(htmlid);
253- cslist.appendChild(htmlclass);
254- cslist.appendChild(around);
255- cslist.appendChild(regexp);
256-
257- wkRoot.appendChild(cslist);
258- }
259- // 出力
260- TransformerFactory transFactory = TransformerFactory.newInstance();
261- Transformer transformer = transFactory.newTransformer();
262-
263- DOMSource source = new DOMSource(doc);
264- FileOutputStream os = new FileOutputStream(file);
265- StreamResult result = new StreamResult(os);
266- transformer.transform(source, result);
267-
268- } catch (ParserConfigurationException | FileNotFoundException ex) {
269- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
270- } catch (TransformerConfigurationException ex) {
271- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
272- } catch (TransformerException ex) {
273- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
274- }
275- }
276-
277- /**
278- * 読込(XML形式).
279- * @param file
280- */
281- public void loadXml(File file) {
282- slist = new ArrayList();
283-
284- try {
285- DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
286- DocumentBuilder wkBuilder = factory.newDocumentBuilder();
287- Document doc = wkBuilder.parse(file);
288-
289- // ルート要素の取得
290- Element wkRoot = doc.getDocumentElement();
291-
292- // URL
293- NodeList url = wkRoot.getElementsByTagName("url");
294- Node urlnode = url.item(0);
295- UrlAdress = urlnode.getFirstChild().getNodeValue();
296-
297- // 検索情報
298- NodeList cslist = wkRoot.getElementsByTagName("searchlist");
299- for(int i = 0; i < cslist.getLength(); i++) {
300- SearchData sdat = new SearchData();
301-
302- Node slistnode = cslist.item(i);
303- Node child;
304- for (child = slistnode.getFirstChild(); child != null; child = child.getNextSibling()) {
305- if(child.getNodeType() == Node.ELEMENT_NODE) {
306-
307- String tag = child.getNodeName();
308- String rtn = "";
309- if(child.getFirstChild() != null) {
310- rtn = child.getFirstChild().getNodeValue();
311- }
312-
313- switch (tag) {
314- case "item" :
315- sdat.setitem(rtn);
316- break;
317- case "htmltag" :
318- sdat.setHtmltag(rtn);
319- break;
320- case "htmlid" :
321- sdat.setHtmlid(rtn);
322- break;
323- case "htmlclass" :
324- sdat.setHtmlclass(rtn);
325- break;
326- case "around" :
327- sdat.setaround(rtn);
328- break;
329- case "regexp" :
330- sdat.setregexp(rtn);
331- break;
332- }
333- }
334- }
335- slist.add(sdat);
336- }
337-
338- } catch (ParserConfigurationException | SAXException | IOException ex) {
339- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
340- }
341- }
342-
343- /* ---------------------------------------------------------------------- */
344-
345- void loadUrl() {
346- NodeList nodelist = root.getElementsByTagName("url");
347- Node node = nodelist.item(0);
348- UrlAdress = node.getFirstChild().getNodeValue();
349- }
350-
351- public void loadSearchList() {
352- slist.clear();
353- SearchData.clear();
354-
355- NodeList nodelist = root.getElementsByTagName("searchlist");
356- for(int i = 0; i < nodelist.getLength(); i++) {
357- Node childnode = nodelist.item(i);
358-
359- boolean sdatflg = false;
360- SearchData sdat = new SearchData();
361- for (Node child = childnode.getFirstChild(); child != null; child = child.getNextSibling()) {
362- if(child.getNodeType() == Node.ELEMENT_NODE) {
363- String tag = child.getNodeName();
364- String rtn = "";
365- if(child.getFirstChild() != null) {
366- rtn = child.getFirstChild().getNodeValue();
367- }
368- switch (tag) {
369- case "item" :
370- sdat.setitem(rtn);
371- sdatflg = true;
372- break;
373- case "htmltag" :
374- sdat.setHtmltag(rtn);
375- sdatflg = true;
376- break;
377- case "htmlid" :
378- sdat.setHtmlid(rtn);
379- sdatflg = true;
380- break;
381- case "htmlclass" :
382- sdat.setHtmlclass(rtn);
383- sdatflg = true;
384- break;
385- case "around" :
386- sdat.setaround(rtn);
387- sdatflg = true;
388- break;
389- case "regexp" :
390- sdat.setregexp(rtn);
391- sdatflg = true;
392- break;
393- }
394- }
395- }
396- if(sdatflg) slist.add(sdat);
397- if(sdatflg) SearchData.add(sdat);
398- }
399- }
400-
401- public String loadMsg404() {
402- StringBuilder strbuf = new StringBuilder();
403- NodeList nodelist = root.getElementsByTagName("msg404");
404- for(int i = 0; i < nodelist.getLength(); i++) {
405- Node childnode = nodelist.item(i);
406- String str = childnode.getFirstChild().getNodeValue();
407- if(strbuf.length() > 0) {
408- strbuf.append("\n");
409- }
410- strbuf.append(str);
411- }
412- return strbuf.toString();
413- }
414-
415- public Element loadElement(String elementTagName) {
416- NodeList nodelist = root.getElementsByTagName(elementTagName);
417- Element element = (Element)nodelist.item(0);
418-
419- return element;
420- }
421-
422- /* ---------------------------------------------------------------------- */
423-
424- void saveUrl(String urladdress) {
425- checkdoc();
426- removeElement("url"); // 既にElementが存在してた場合、一度削除
427-
428- Element url = document.createElement("url");
429- url.appendChild(document.createTextNode(urladdress));
430- root.appendChild(url);
431- }
432-
433- void saveSearchList(ArrayList slist) {
434- checkdoc();
435- removeElement("searchlist"); // 既にElementが存在してた場合、一度削除
436-
437- int count = 0;
438- for (Object slist1 : slist) {
439- SearchData sdat = (SearchData) slist1;
440-
441- Element cslist = document.createElement("searchlist");
442- cslist.setAttribute("listNo", String.valueOf(++count));
443-
444- addChild(cslist, "item", sdat.getitem());
445- addChild(cslist, "htmltag", sdat.getHtmltag());
446- addChild(cslist, "htmlid", sdat.getHtmlid());
447- addChild(cslist, "htmlclass", sdat.getHtmlclass());
448- addChild(cslist, "around", sdat.getaround());
449- addChild(cslist, "regexp", sdat.getregexp());
450-
451- root.appendChild(cslist);
452- }
453- }
454-
455- void saveMsg404(String msg) {
456- checkdoc();
457- removeElement("msg404"); // 既にElementが存在してた場合、一度削除
458-
459- String[] msgs = msg.split("\n");
460- int count = 0;
461- for(String msgOne : msgs) {
462- Element msgElement = document.createElement("msg404");
463- msgElement.setAttribute("No", String.valueOf(++count));
464- msgElement.appendChild(document.createTextNode(msgOne));
465-
466- root.appendChild(msgElement);
467- }
468- }
469-
470- public void saveElement(Element element) {
471- checkdoc();
472- removeElement(element.getTagName()); // 既にElementが存在してた場合、一度削除
473-
474- root.appendChild(element);
475- }
476-
477- /* ---------------------------------------------------------------------- */
478-
479- private void addChild(Element cslist, String keyword, String data) {
480- if(!data.isEmpty()) {
481- Element element = document.createElement(keyword);
482- element.appendChild(document.createTextNode(data));
483- cslist.appendChild(element);
484- }
485- }
486-
487- private void removeElement(String elementTagName) {
488- int nodeSize;
489- do {
490- NodeList nodelist = document.getElementsByTagName(elementTagName);
491- nodeSize = nodelist.getLength();
492- for(int i = 0; i < nodelist.getLength(); i++) {
493- Node node = nodelist.item(i);
494- root.removeChild(node);
495- }
496- } while(nodeSize > 0);
497- }
498-
499- /**
500- * ドキュメントチェック.
501- * 新規の場合やXMLファイルの読込みが行われていない状態時、新たにルートエレメントを作成する。
502- * 既読の場合、ルートエレメントの取得を行う。
503- */
504- public void checkdoc() {
505- if(document == null) {
506- DOMImplementation domImpl = builder.getDOMImplementation();
507- document = domImpl.createDocument("","searchdata",null);
508- }
509- root = document.getDocumentElement();
510- }
511-
512- /**
513- * XML読込み.
514- * @param file
515- */
516- public void read(File file) {
517- try {
518- document = builder.parse(file);
519- root = document.getDocumentElement();
520-
521- } catch (SAXException | IOException ex) {
522- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
523- }
524- }
525-
526- /**
527- * XML書込み.
528- * @param file
529- */
530- public void write(File file) {
531- try {
532- TransformerFactory transFactory = TransformerFactory.newInstance();
533- Transformer transformer = transFactory.newTransformer();
534-
535- DOMSource source = new DOMSource(document);
536- FileOutputStream os = new FileOutputStream(file);
537- StreamResult result = new StreamResult(os);
538- transformer.transform(source, result);
539-
540- } catch (TransformerConfigurationException ex) {
541- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
542- } catch (FileNotFoundException | TransformerException ex) {
543- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
544- }
545- }
546-
547-}
Deleted: svn:keywords
## -1 +0,0 ##
-Id
\ No newline at end of property
--- trunk/HtmlTest2/src/WebScraping/utility/LibraryXml.java (nonexistent)
+++ trunk/HtmlTest2/src/WebScraping/utility/LibraryXml.java (revision 132)
@@ -0,0 +1,142 @@
1+/*
2+ * Copyright (C) 2014-2015 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id$
21+ */
22+
23+package webScraping.utility;
24+
25+import java.io.File;
26+import java.io.FileNotFoundException;
27+import java.io.FileOutputStream;
28+import java.io.IOException;
29+import java.util.logging.Level;
30+import java.util.logging.Logger;
31+
32+import javax.xml.parsers.DocumentBuilder;
33+import javax.xml.parsers.DocumentBuilderFactory;
34+import javax.xml.parsers.ParserConfigurationException;
35+import javax.xml.transform.Transformer;
36+import javax.xml.transform.TransformerConfigurationException;
37+import javax.xml.transform.TransformerException;
38+import javax.xml.transform.TransformerFactory;
39+import javax.xml.transform.dom.DOMSource;
40+import javax.xml.transform.stream.StreamResult;
41+
42+import org.w3c.dom.DOMImplementation;
43+import org.w3c.dom.Document;
44+import org.w3c.dom.Element;
45+import org.w3c.dom.Node;
46+import org.w3c.dom.NodeList;
47+import org.xml.sax.SAXException;
48+
49+public class LibraryXml {
50+
51+ String xmlrootname = "xmlcontainer";
52+
53+ DocumentBuilder builder;
54+ public Document readdoc, writedoc;
55+ Element xmlroot;
56+
57+ /* ---------------------------------------------------------------------- *
58+ * コンストラクタ
59+ * ---------------------------------------------------------------------- */
60+ public LibraryXml() {
61+ try {
62+ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
63+ builder = factory.newDocumentBuilder();
64+
65+ } catch (ParserConfigurationException ex) {
66+ Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
67+ }
68+ }
69+
70+ /* ---------------------------------------------------------------------- *
71+ * メソッド
72+ * ---------------------------------------------------------------------- */
73+ /* 読込み処理 */
74+ public Element getwriteRoot(String elementName) {
75+ mainElement();
76+ Element element = writedoc.createElement(elementName);
77+ xmlroot.appendChild(element);
78+ return element;
79+ }
80+
81+ private void mainElement() {
82+ if(writedoc == null) {
83+ DOMImplementation domImpl = builder.getDOMImplementation();
84+ writedoc = domImpl.createDocument("", xmlrootname, null);
85+ xmlroot = writedoc.getDocumentElement();
86+ }
87+ }
88+
89+ /**
90+ * XML書込み.
91+ * @param file
92+ */
93+ public void write(File file) {
94+ try (FileOutputStream os = new FileOutputStream(file)) {
95+ TransformerFactory transFactory = TransformerFactory.newInstance();
96+ Transformer transformer = transFactory.newTransformer();
97+
98+ transformer.setOutputProperty("indent", "yes"); // 改行指定
99+ transformer.setOutputProperty("method", "xml");
100+
101+ DOMSource source = new DOMSource(writedoc);
102+ StreamResult result = new StreamResult(os);
103+ transformer.transform(source, result);
104+
105+ // 作成したXMLをクリア
106+ writedoc = null;
107+
108+ } catch (TransformerConfigurationException ex) {
109+ Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
110+ } catch (FileNotFoundException | TransformerException ex) {
111+ Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
112+ } catch (IOException ex) {
113+ Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
114+ }
115+ }
116+
117+ /* ---------------------------------------------------------------------- */
118+ /* 書込み処理 */
119+
120+ public Element getreadRoot(String elementName) {
121+ NodeList nodelist = xmlroot.getElementsByTagName(elementName);
122+ Node node = nodelist.item(0);
123+ return (node.getNodeType() == Node.ELEMENT_NODE ? (Element)node : null);
124+ }
125+
126+ /**
127+ * XML読込み.
128+ * @param file
129+ */
130+ public void read(File file) {
131+ try {
132+ readdoc = builder.parse(file);
133+ xmlroot = readdoc.getDocumentElement();
134+
135+ } catch (SAXException | IOException ex) {
136+ Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
137+ }
138+ }
139+
140+ /* ---------------------------------------------------------------------- */
141+
142+}
Added: svn:keywords
## -0,0 +1 ##
+Id
\ No newline at end of property
--- trunk/HtmlTest2/src/WebScraping/utility/ScrapingXml.java (nonexistent)
+++ trunk/HtmlTest2/src/WebScraping/utility/ScrapingXml.java (revision 132)
@@ -0,0 +1,198 @@
1+/*
2+ * Copyright (C) 2014-2015 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id$
21+ */
22+
23+package webScraping.utility;
24+
25+import webScraping.core.SearchData;
26+import java.io.File;
27+import java.util.ArrayList;
28+import org.w3c.dom.Element;
29+import org.w3c.dom.Node;
30+import org.w3c.dom.NodeList;
31+
32+public class ScrapingXml {
33+ /* ---------------------------------------------------------------------- *
34+ * フィールド
35+ * ---------------------------------------------------------------------- */
36+ String rootnameScraping = "webscraping";
37+
38+ private String testUrl;
39+ private SearchData[] sdata;
40+
41+ public LibraryXml xlib = new LibraryXml();
42+ public Element root;
43+
44+ /* ---------------------------------------------------------------------- *
45+ * コンストラクタ
46+ * ---------------------------------------------------------------------- */
47+ public ScrapingXml() {
48+ }
49+
50+ /* ---------------------------------------------------------------------- *
51+ * Setter
52+ * ---------------------------------------------------------------------- */
53+ public void setTestUrl(String testUrl) {
54+ this.testUrl = testUrl;
55+ }
56+
57+ public void setSdata() {
58+ this.sdata = new SearchData[SearchData.size()];
59+ for(int i = 0; i < SearchData.size(); i++) {
60+ this.sdata[i] = SearchData.get(i);
61+ }
62+ }
63+
64+ /* ---------------------------------------------------------------------- *
65+ * Getter
66+ * ---------------------------------------------------------------------- */
67+ public String getTestUrl() {
68+ return testUrl;
69+ }
70+
71+ public void getSdata() {
72+ SearchData.clear();
73+ for(SearchData sdata1 : sdata) {
74+ SearchData.add(sdata1);
75+ }
76+ }
77+
78+ /* ---------------------------------------------------------------------- *
79+ * メソッド
80+ * ---------------------------------------------------------------------- */
81+ public void save(File file) {
82+
83+ elementset();
84+
85+ xlib.write(file);
86+ }
87+
88+ public void elementset() {
89+ root = xlib.getwriteRoot(rootnameScraping);
90+ elementsetUrl();
91+ elementsetSearchdata();
92+ System.out.println("elementset XmlScraping");
93+ }
94+
95+ private void elementsetUrl() {
96+ Element url = xlib.writedoc.createElement("url");
97+ url.appendChild(xlib.writedoc.createTextNode(testUrl));
98+ root.appendChild(url);
99+ }
100+
101+ private void elementsetSearchdata() {
102+ int count = 0;
103+ for(SearchData sdat : sdata) {
104+ Element cslist = xlib.writedoc.createElement("searchlist");
105+ cslist.setAttribute("listNo", String.valueOf(++count));
106+
107+ addChild(cslist, "item" , sdat.getitem());
108+ addChild(cslist, "htmltag" , sdat.getHtmltag());
109+ addChild(cslist, "htmlid" , sdat.getHtmlid());
110+ addChild(cslist, "htmlclass", sdat.getHtmlclass());
111+ addChild(cslist, "around" , sdat.getaround());
112+ addChild(cslist, "regexp" , sdat.getregexp());
113+
114+ root.appendChild(cslist);
115+ }
116+ }
117+
118+ private void addChild(Element cslist, String keyword, String data) {
119+ if(!data.isEmpty()) {
120+ Element element = xlib.writedoc.createElement(keyword);
121+ element.appendChild(xlib.writedoc.createTextNode(data));
122+ cslist.appendChild(element);
123+ }
124+ }
125+
126+ /* ---------------------------------------------------------------------- */
127+
128+ void load(File file) {
129+ xlib.read(file);
130+ elementget();
131+ }
132+
133+ public void elementget() {
134+ root = xlib.getreadRoot(rootnameScraping);
135+ elementgetUrl();
136+ elementgetSearchdata();
137+ }
138+
139+ private void elementgetUrl() {
140+ NodeList nodelist = root.getElementsByTagName("url");
141+ Node node = nodelist.item(0);
142+ testUrl = node.getFirstChild().getNodeValue();
143+ }
144+
145+ private void elementgetSearchdata() {
146+ ArrayList<SearchData> slist = new ArrayList<>();
147+
148+ NodeList nodelist = root.getElementsByTagName("searchlist");
149+ for(int i = 0; i < nodelist.getLength(); i++) {
150+ Node childnode = nodelist.item(i);
151+
152+ boolean sdatflg = false;
153+ SearchData sdat = new SearchData();
154+ for (Node child = childnode.getFirstChild(); child != null; child = child.getNextSibling()) {
155+ if(child.getNodeType() == Node.ELEMENT_NODE) {
156+ String tag = child.getNodeName();
157+ String rtn = "";
158+ if(child.getFirstChild() != null) {
159+ rtn = child.getFirstChild().getNodeValue();
160+ }
161+ switch (tag) {
162+ case "item" :
163+ sdat.setitem(rtn);
164+ sdatflg = true;
165+ break;
166+ case "htmltag" :
167+ sdat.setHtmltag(rtn);
168+ sdatflg = true;
169+ break;
170+ case "htmlid" :
171+ sdat.setHtmlid(rtn);
172+ sdatflg = true;
173+ break;
174+ case "htmlclass" :
175+ sdat.setHtmlclass(rtn);
176+ sdatflg = true;
177+ break;
178+ case "around" :
179+ sdat.setaround(rtn);
180+ sdatflg = true;
181+ break;
182+ case "regexp" :
183+ sdat.setregexp(rtn);
184+ sdatflg = true;
185+ break;
186+ }
187+ }
188+ }
189+ if(sdatflg) slist.add(sdat);
190+ }
191+ // 配列化
192+ sdata = new SearchData[slist.size()];
193+ for(int i = 0; i < slist.size(); i++) {
194+ sdata[i] = slist.get(i);
195+ }
196+ }
197+
198+}
Added: svn:keywords
## -0,0 +1 ##
+Id
\ No newline at end of property
--- trunk/HtmlTest2/src/WebScraping/utility/HtmlSearch.java (revision 131)
+++ trunk/HtmlTest2/src/WebScraping/utility/HtmlSearch.java (revision 132)
@@ -40,7 +40,7 @@
4040 * @author kgto
4141 */
4242 public class HtmlSearch extends javax.swing.JFrame {
43- private final SearchDataRW sio = new SearchDataRW();
43+ private final ScrapingXml xmlwriter = new ScrapingXml();
4444
4545 SearchDataTableModel sdatatblmodel;
4646
@@ -332,8 +332,9 @@
332332 int selected = jFileChooser1.showOpenDialog(this);
333333 if (selected == JFileChooser.APPROVE_OPTION) {
334334 File file = jFileChooser1.getSelectedFile();
335- sio.load(file);
336- jTxtUrl.setText(sio.geturl());
335+ xmlwriter.load(file);
336+ jTxtUrl.setText(xmlwriter.getTestUrl());
337+ xmlwriter.getSdata();
337338 sdatatblmodel.setRowCount(0);
338339 for(int i = 0; i < SearchData.size(); i++) {
339340 SearchData sdata = SearchData.get(i);
@@ -347,7 +348,7 @@
347348 int selected = jFileChooser1.showSaveDialog(this);
348349 if (selected == JFileChooser.APPROVE_OPTION) {
349350 File file = jFileChooser1.getSelectedFile();
350- sio.seturl(jTxtUrl.getText());
351+ xmlwriter.setTestUrl(jTxtUrl.getText());
351352
352353 SearchData.clear();
353354 for(int row = 0; row < sdatatblmodel.getRowCount(); row++) {
@@ -354,7 +355,8 @@
354355 SearchData sdata = sdatatblmodel.getSearchData(row);
355356 SearchData.add(sdata);
356357 }
357- sio.save(file);
358+ xmlwriter.setSdata();
359+ xmlwriter.save(file);
358360 }
359361 }//GEN-LAST:event_jMenuSaveActionPerformed
360362
--- trunk/HtmlTest2/src/WebScraping/core/AttributeData.java (revision 131)
+++ trunk/HtmlTest2/src/WebScraping/core/AttributeData.java (revision 132)
@@ -76,7 +76,8 @@
7676 for (Object AttrList1 : AttrList) {
7777 AttrData a = (AttrData)AttrList1;
7878 if(a.tag == tag) {
79- if(a.attrname.equals(attrname) && a.attrvalue.equals(attrvalue)) {
79+ //if(a.attrname.equals(attrname) && a.attrvalue.equals(attrvalue)) {
80+ if(a.attrname.equals(attrname) && a.attrvalue.startsWith(attrvalue)) {
8081 ret = true;
8182 }
8283 }
--- trunk/HtmlTest2/src/WebScraping/core/HtmlParserCallback.java (revision 131)
+++ trunk/HtmlTest2/src/WebScraping/core/HtmlParserCallback.java (revision 132)
@@ -33,7 +33,9 @@
3333 * @author kgto
3434 */
3535 class HtmlParserCallback extends HTMLEditorKit.ParserCallback {
36-
36+ /* ---------------------------------------------------------------------- *
37+ * フィールド
38+ * ---------------------------------------------------------------------- */
3739 // Tag毎の階層
3840 HashMap<HTML.Tag,Integer> tagMap = new HashMap<>();
3941
@@ -54,6 +56,9 @@
5456 // 属性データ
5557 AttributeData attrdata;
5658
59+ /* ---------------------------------------------------------------------- *
60+ * コンストラクタ
61+ * ---------------------------------------------------------------------- */
5762 protected HtmlParserCallback(SearchData skey) {
5863
5964 // キー情報展開
@@ -64,10 +69,16 @@
6469 sData = new ArrayList();
6570 }
6671
72+ /* ---------------------------------------------------------------------- *
73+ * Getter
74+ * ---------------------------------------------------------------------- */
6775 ArrayList getrtnData() {
6876 return this.sData;
6977 }
7078
79+ /* ---------------------------------------------------------------------- *
80+ * メソッド
81+ * ---------------------------------------------------------------------- */
7182 @Override
7283 public void handleStartTag(HTML.Tag tag, MutableAttributeSet attr, int pos){
7384 // Tag毎の階層を保持
--- trunk/HtmlTest2/src/WebScraping/core/SearchData.java (revision 131)
+++ trunk/HtmlTest2/src/WebScraping/core/SearchData.java (revision 132)
@@ -42,6 +42,28 @@
4242 /* ---------------------------------------------------------------------- *
4343 * static 処理
4444 * ---------------------------------------------------------------------- */
45+ public static class Context {
46+ public Class columnClass;
47+ public String columnName;
48+ public String columnNameJp;
49+
50+ public Context(Class columnClass, String columnName, String columnNameJp) {
51+ this.columnClass = columnClass;
52+ this.columnName = columnName;
53+ this.columnNameJp = columnNameJp;
54+ }
55+ }
56+
57+ public static final Context[] context = {
58+ /* 0 */ new Context(String.class , "item" , "項目名"),
59+ /* 1 */ new Context(String.class , "htmltag" , "タグ"),
60+ /* 2 */ new Context(String.class , "htmlid" , "ID"),
61+ /* 3 */ new Context(String.class , "htmlclass" , "クラス"),
62+ /* 4 */ new Context(String.class , "around" , "位置"),
63+ /* 5 */ new Context(String.class , "regexp" , "抽出条件")
64+ };
65+
66+ /* ---------------------------------------------------------------------- */
4567 private static ArrayList<SearchData> slist = new ArrayList<>();
4668
4769 public static void addSearchData(
@@ -162,5 +184,17 @@
162184 this.around = "";
163185 this.regexp = "";
164186 }
165-
187+
188+ public Object[] getObjData() {
189+ Object[] obj = {
190+ /* 0 */ getitem(), // 項目名
191+ /* 1 */ getHtmltag(), // タグ
192+ /* 2 */ getHtmlid(), // ID
193+ /* 3 */ getHtmlclass(), // クラス
194+ /* 4 */ getaround(), // 位置
195+ /* 5 */ getregexp() // 抽出条件
196+ };
197+ return obj;
198+ }
199+
166200 }
--- trunk/HtmlTest2/src/WebScraping/core/HtmlParser.java (revision 131)
+++ trunk/HtmlTest2/src/WebScraping/core/HtmlParser.java (revision 132)
@@ -32,20 +32,25 @@
3232 import javax.swing.text.html.parser.ParserDelegator;
3333
3434 /**
35- *
35+ * HTMLパーサ.
3636 * @author kgto
3737 */
3838 public class HtmlParser {
39-
39+ /* ---------------------------------------------------------------------- *
40+ * フィールド
41+ * ---------------------------------------------------------------------- */
4042 URL url;
4143 String pageData;
4244 ArrayList sData;
4345
4446 // 作業ワーク
45- String htmltag;
46- String htmlid;
47- String htmlclass;
47+ private String htmltag;
48+ private String htmlid;
49+ private String htmlclass;
4850
51+ /* ---------------------------------------------------------------------- *
52+ * コンストラクタ
53+ * ---------------------------------------------------------------------- */
4954 public HtmlParser(URL UrlAdress) {
5055 DebugProcess.debuglog_set();
5156 this.url = UrlAdress;
@@ -68,15 +73,24 @@
6873 url = null;
6974 }
7075
76+ /* ---------------------------------------------------------------------- *
77+ * Getter
78+ * ---------------------------------------------------------------------- */
7179 public String getStringPageData() {
7280 return pageData;
7381 }
7482
83+ /* ---------------------------------------------------------------------- *
84+ * Setter
85+ * ---------------------------------------------------------------------- */
7586 public void seturl(URL UrlAdress) {
7687 this.url = UrlAdress;
7788 getPageData();
7889 }
7990
91+ /* ---------------------------------------------------------------------- *
92+ * メソッド
93+ * ---------------------------------------------------------------------- */
8094 public void seturl(String UrlAdress) {
8195 try {
8296 url = new URL(UrlAdress);