OSDN > Developer >

tuna_p > Chamber > testroom1 > 提交

tuna_p

お試し作業部屋

(Original repository, No fork origin)

R/O
SSH
HTTPS

提交

Commit MetaInfo

修订版	132 (tree)
时间	2015-03-31 11:44:57
作者	tuna_p

Log Message

branches/b3/WebScraping をマージ

更改概述

modified: trunk/HtmlTest2/data/Yahoo!天気.xml (diff)
modified: trunk/HtmlTest2/data/Yahoo!ファイナンス.xml (diff)
added: trunk/HtmlTest2/test/utility/test1/SearchDataRW.java (diff)
modified: trunk/HtmlTest2/test/utility/test1/SearchDataRWT01.java (diff)
modified: trunk/HtmlTest2/test/utility/test1/SearchDataRWT02.java (diff)
added: trunk/HtmlTest2/test/utility/test1/ConvertXml01.java (diff)
modified: trunk/HtmlTest2/test1.xml (diff)
delete: trunk/HtmlTest2/src/WebScraping/utility/SearchDataRW.java
added: trunk/HtmlTest2/src/WebScraping/utility/LibraryXml.java (diff)
added: trunk/HtmlTest2/src/WebScraping/utility/ScrapingXml.java (diff)
modified: trunk/HtmlTest2/src/WebScraping/utility/HtmlSearch.java (diff)
modified: trunk/HtmlTest2/src/WebScraping/core/AttributeData.java (diff)
modified: trunk/HtmlTest2/src/WebScraping/core/HtmlParserCallback.java (diff)
modified: trunk/HtmlTest2/src/WebScraping/core/SearchData.java (diff)
modified: trunk/HtmlTest2/src/WebScraping/core/HtmlParser.java (diff)
normal: trunk/HtmlTest2

差异

--- trunk/HtmlTest2/data/Yahoo!天気.xml (revision 131)

+++ trunk/HtmlTest2/data/Yahoo!天気.xml (revision 132)

		@@ -1,108 +1,71 @@
1	1	<?xml version="1.0" encoding="UTF-8" standalone="no"?>
2		-<searchdata>
3		- <url>http://weather.yahoo.co.jp/weather/</url>
4		- <searchlist>
5		- <item>天気０１</item>
6		- <htmltag>li</htmltag>
7		- <htmlid/>
8		- <htmlclass>point pt1400</htmlclass>
9		- <around/>
10		- <regexp/>
11		- </searchlist>
12		- <searchlist>
13		- <item>天気０２</item>
14		- <htmltag>li</htmltag>
15		- <htmlid/>
16		- <htmlclass>point pt1900</htmlclass>
17		- <around/>
18		- <regexp/>
19		- </searchlist>
20		- <searchlist>
21		- <item>天気０３</item>
22		- <htmltag>li</htmltag>
23		- <htmlid/>
24		- <htmlclass>point pt3410</htmlclass>
25		- <around/>
26		- <regexp/>
27		- </searchlist>
28		- <searchlist>
29		- <item>天気０４</item>
30		- <htmltag>li</htmltag>
31		- <htmlid/>
32		- <htmlclass>point pt4410</htmlclass>
33		- <around/>
34		- <regexp/>
35		- </searchlist>
36		- <searchlist>
37		- <item>天気０５</item>
38		- <htmltag>li</htmltag>
39		- <htmlid/>
40		- <htmlclass>point pt5110</htmlclass>
41		- <around/>
42		- <regexp/>
43		- </searchlist>
44		- <searchlist>
45		- <item>天気０６</item>
46		- <htmltag>li</htmltag>
47		- <htmlid/>
48		- <htmlclass>point pt5410</htmlclass>
49		- <around/>
50		- <regexp/>
51		- </searchlist>
52		- <searchlist>
53		- <item>天気０７</item>
54		- <htmltag>li</htmltag>
55		- <htmlid/>
56		- <htmlclass>point pt5610</htmlclass>
57		- <around/>
58		- <regexp/>
59		- </searchlist>
60		- <searchlist>
61		- <item>天気０８</item>
62		- <htmltag>li</htmltag>
63		- <htmlid/>
64		- <htmlclass>point pt6200</htmlclass>
65		- <around/>
66		- <regexp/>
67		- </searchlist>
68		- <searchlist>
69		- <item>天気０９</item>
70		- <htmltag>li</htmltag>
71		- <htmlid/>
72		- <htmlclass>point pt6710</htmlclass>
73		- <around/>
74		- <regexp/>
75		- </searchlist>
76		- <searchlist>
77		- <item>天気１０</item>
78		- <htmltag>li</htmltag>
79		- <htmlid/>
80		- <htmlclass>point pt7410</htmlclass>
81		- <around/>
82		- <regexp/>
83		- </searchlist>
84		- <searchlist>
85		- <item>天気１１</item>
86		- <htmltag>li</htmltag>
87		- <htmlid/>
88		- <htmlclass>point pt8210</htmlclass>
89		- <around/>
90		- <regexp/>
91		- </searchlist>
92		- <searchlist>
93		- <item>天気１２</item>
94		- <htmltag>li</htmltag>
95		- <htmlid/>
96		- <htmlclass>point pt8810</htmlclass>
97		- <around/>
98		- <regexp/>
99		- </searchlist>
100		- <searchlist>
101		- <item>天気１３</item>
102		- <htmltag>li</htmltag>
103		- <htmlid/>
104		- <htmlclass>point pt9110</htmlclass>
105		- <around/>
106		- <regexp/>
107		- </searchlist>
108		-</searchdata>
		\ No newline at end of file
	2	+<xmlcontainer>
	3	+<webscraping>
	4	+<url>http://weather.yahoo.co.jp/weather/</url>
	5	+<searchlist listNo="1">
	6	+<item>天気０１</item>
	7	+<htmltag>li</htmltag>
	8	+<htmlclass>point pt1400</htmlclass>
	9	+</searchlist>
	10	+<searchlist listNo="2">
	11	+<item>天気０２</item>
	12	+<htmltag>li</htmltag>
	13	+<htmlclass>point pt1900</htmlclass>
	14	+</searchlist>
	15	+<searchlist listNo="3">
	16	+<item>天気０３</item>
	17	+<htmltag>li</htmltag>
	18	+<htmlclass>point pt3410</htmlclass>
	19	+</searchlist>
	20	+<searchlist listNo="4">
	21	+<item>天気０４</item>
	22	+<htmltag>li</htmltag>
	23	+<htmlclass>point pt4410</htmlclass>
	24	+</searchlist>
	25	+<searchlist listNo="5">
	26	+<item>天気０５</item>
	27	+<htmltag>li</htmltag>
	28	+<htmlclass>point pt5110</htmlclass>
	29	+</searchlist>
	30	+<searchlist listNo="6">
	31	+<item>天気０６</item>
	32	+<htmltag>li</htmltag>
	33	+<htmlclass>point pt5410</htmlclass>
	34	+</searchlist>
	35	+<searchlist listNo="7">
	36	+<item>天気０７</item>
	37	+<htmltag>li</htmltag>
	38	+<htmlclass>point pt5610</htmlclass>
	39	+</searchlist>
	40	+<searchlist listNo="8">
	41	+<item>天気０８</item>
	42	+<htmltag>li</htmltag>
	43	+<htmlclass>point pt6200</htmlclass>
	44	+</searchlist>
	45	+<searchlist listNo="9">
	46	+<item>天気０９</item>
	47	+<htmltag>li</htmltag>
	48	+<htmlclass>point pt6710</htmlclass>
	49	+</searchlist>
	50	+<searchlist listNo="10">
	51	+<item>天気１０</item>
	52	+<htmltag>li</htmltag>
	53	+<htmlclass>point pt7410</htmlclass>
	54	+</searchlist>
	55	+<searchlist listNo="11">
	56	+<item>天気１１</item>
	57	+<htmltag>li</htmltag>
	58	+<htmlclass>point pt8210</htmlclass>
	59	+</searchlist>
	60	+<searchlist listNo="12">
	61	+<item>天気１２</item>
	62	+<htmltag>li</htmltag>
	63	+<htmlclass>point pt8810</htmlclass>
	64	+</searchlist>
	65	+<searchlist listNo="13">
	66	+<item>天気１３</item>
	67	+<htmltag>li</htmltag>
	68	+<htmlclass>point pt9110</htmlclass>
	69	+</searchlist>
	70	+</webscraping>
	71	+</xmlcontainer>

--- trunk/HtmlTest2/data/Yahoo!ファイナンス.xml (revision 131)

+++ trunk/HtmlTest2/data/Yahoo!ファイナンス.xml (revision 132)

		@@ -1 +1,217 @@
1		-<?xml version="1.0" encoding="UTF-8" standalone="no"?><searchdata><url>http://stocks.finance.yahoo.co.jp/stocks/detail/?code=9984.T</url><searchlist><item>銘柄コード</item><htmltag>dl</htmltag><htmlid/><htmlclass>stocksInfo clearFix</htmlclass><around/><regexp>(^\d{4})</regexp></searchlist><searchlist><item>カテゴリ</item><htmltag>div</htmltag><htmlid/><htmlclass>stockMainTabParts stockMainTabPartsCurrent</htmlclass><around/><regexp/></searchlist><searchlist><item>業種</item><htmltag>dd</htmltag><htmlid/><htmlclass>category yjSb</htmlclass><around/><regexp/></searchlist><searchlist><item>取得時間</item><htmltag>dd</htmltag><htmlid/><htmlclass>yjSb real</htmlclass><around/><regexp>^(.)\t</regexp></searchlist><searchlist><item>銘柄名</item><htmltag>th</htmltag><htmlid/><htmlclass>symbol</htmlclass><around/><regexp/></searchlist><searchlist><item>株価</item><htmltag>td</htmltag><htmlid/><htmlclass>stoksPrice</htmlclass><around/><regexp/></searchlist><searchlist><item>前日比</item><htmltag>td</htmltag><htmlid/><htmlclass>change</htmlclass><around/><regexp>\t(.)（.%）</regexp></searchlist><searchlist><item>前日比％</item><htmltag>td</htmltag><htmlid/><htmlclass>change</htmlclass><around/><regexp>\t.（(.)%）</regexp></searchlist><searchlist><item>前日終値</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi clearfix</htmlclass><around>0</around><regexp>^([,0-9]+)\t</regexp></searchlist><searchlist><item>始値</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi clearfix</htmlclass><around>1</around><regexp>^([,0-9]+\|-{3})\t</regexp></searchlist><searchlist><item>高値</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi clearfix</htmlclass><around>2</around><regexp>^([,0-9]+\|-{3})\t</regexp></searchlist><searchlist><item>安値</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi clearfix</htmlclass><around>3</around><regexp>^([,0-9]+\|-{3})\t</regexp></searchlist><searchlist><item>出来高</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi clearfix</htmlclass><around>4</around><regexp>^(.?)\t</regexp></searchlist><searchlist><item>売買代金</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi clearfix</htmlclass><around>5</around><regexp>^(.?)\t</regexp></searchlist><searchlist><item>値幅制限</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi clearfix</htmlclass><around>6</around><regexp>^(.?)\t</regexp></searchlist><searchlist><item>時価総額</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>0</around><regexp>^(.?)\t</regexp></searchlist><searchlist><item>発行済株式数</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>1</around><regexp>^(.?)\t</regexp></searchlist><searchlist><item>配当利回り</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>2</around><regexp>^(.?)\t</regexp></searchlist><searchlist><item>１株配当</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>3</around><regexp>^(.?)\t</regexp></searchlist><searchlist><item>PER</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>4</around><regexp>^(.?)\t</regexp></searchlist><searchlist><item>PBR</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>5</around><regexp>^(.?)\t</regexp></searchlist><searchlist><item>EPS</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>6</around><regexp>^(.?)\t</regexp></searchlist><searchlist><item>BPS</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>7</around><regexp>^(.?)\t</regexp></searchlist><searchlist><item>最低購入代金</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>8</around><regexp>^(.?)\t</regexp></searchlist><searchlist><item>単元株数</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>9</around><regexp>^(.?)\t</regexp></searchlist><searchlist><item>年初来高値</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>10</around><regexp>^(.?)\t</regexp></searchlist><searchlist><item>年初来安値</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>11</around><regexp>^(.?)\t</regexp></searchlist><searchlist><item>信用買残</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>12</around><regexp>^(.?)\t</regexp></searchlist><searchlist><item>信用買残前週比</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>13</around><regexp>^(.?)\t</regexp></searchlist><searchlist><item>信用売残</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>14</around><regexp>^(.?)\t</regexp></searchlist><searchlist><item>信用売残前週比</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>15</around><regexp>^(.?)\t</regexp></searchlist><searchlist><item>貸借倍率</item><htmltag>div</htmltag><htmlid/><htmlclass>yjMS clearfix</htmlclass><around/><regexp>^(.*?)\t</regexp></searchlist></searchdata>
		\ No newline at end of file
	1	+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
	2	+<xmlcontainer>
	3	+<webscraping>
	4	+<url>http://stocks.finance.yahoo.co.jp/stocks/detail/?code=5020.T</url>
	5	+<searchlist listNo="1">
	6	+<item>銘柄コード</item>
	7	+<htmltag>dl</htmltag>
	8	+<htmlclass>stocksInfo clearFix</htmlclass>
	9	+<regexp>(^\d{4})</regexp>
	10	+</searchlist>
	11	+<searchlist listNo="2">
	12	+<item>カテゴリ</item>
	13	+<htmltag>div</htmltag>
	14	+<htmlclass>stockMainTabParts stockMainTabPartsCurrent</htmlclass>
	15	+</searchlist>
	16	+<searchlist listNo="3">
	17	+<item>業種</item>
	18	+<htmltag>dd</htmltag>
	19	+<htmlclass>category yjSb</htmlclass>
	20	+</searchlist>
	21	+<searchlist listNo="4">
	22	+<item>取得時間</item>
	23	+<htmltag>dd</htmltag>
	24	+<htmlclass>yjSb real</htmlclass>
	25	+<regexp>^(.*)\t</regexp>
	26	+</searchlist>
	27	+<searchlist listNo="5">
	28	+<item>銘柄名</item>
	29	+<htmltag>th</htmltag>
	30	+<htmlclass>symbol</htmlclass>
	31	+</searchlist>
	32	+<searchlist listNo="6">
	33	+<item>株価</item>
	34	+<htmltag>td</htmltag>
	35	+<htmlclass>stoksPrice</htmlclass>
	36	+</searchlist>
	37	+<searchlist listNo="7">
	38	+<item>前日比</item>
	39	+<htmltag>td</htmltag>
	40	+<htmlclass>change</htmlclass>
	41	+<regexp>\t(.)（.%）</regexp>
	42	+</searchlist>
	43	+<searchlist listNo="8">
	44	+<item>前日比％</item>
	45	+<htmltag>td</htmltag>
	46	+<htmlclass>change</htmlclass>
	47	+<regexp>\t.（(.)%）</regexp>
	48	+</searchlist>
	49	+<searchlist listNo="9">
	50	+<item>前日終値</item>
	51	+<htmltag>div</htmltag>
	52	+<htmlclass>lineFi clearfix</htmlclass>
	53	+<around>0</around>
	54	+<regexp>^([,.0-9]+)\t</regexp>
	55	+</searchlist>
	56	+<searchlist listNo="10">
	57	+<item>始値</item>
	58	+<htmltag>div</htmltag>
	59	+<htmlclass>lineFi clearfix</htmlclass>
	60	+<around>1</around>
	61	+<regexp>^([,.0-9]+\|-{3})\t</regexp>
	62	+</searchlist>
	63	+<searchlist listNo="11">
	64	+<item>高値</item>
	65	+<htmltag>div</htmltag>
	66	+<htmlclass>lineFi clearfix</htmlclass>
	67	+<around>2</around>
	68	+<regexp>^((ストップ高\t\|ストップ安\t)?[,0-9]+\|-{3})</regexp>
	69	+</searchlist>
	70	+<searchlist listNo="12">
	71	+<item>安値</item>
	72	+<htmltag>div</htmltag>
	73	+<htmlclass>lineFi clearfix</htmlclass>
	74	+<around>3</around>
	75	+<regexp>^((ストップ高\t\|ストップ安\t)?[,0-9]+\|-{3})</regexp>
	76	+</searchlist>
	77	+<searchlist listNo="13">
	78	+<item>出来高</item>
	79	+<htmltag>div</htmltag>
	80	+<htmlclass>lineFi clearfix</htmlclass>
	81	+<around>4</around>
	82	+<regexp>^(.*?)\t</regexp>
	83	+</searchlist>
	84	+<searchlist listNo="14">
	85	+<item>売買代金</item>
	86	+<htmltag>div</htmltag>
	87	+<htmlclass>lineFi clearfix</htmlclass>
	88	+<around>5</around>
	89	+<regexp>^(.*?)\t</regexp>
	90	+</searchlist>
	91	+<searchlist listNo="15">
	92	+<item>値幅制限</item>
	93	+<htmltag>div</htmltag>
	94	+<htmlclass>lineFi clearfix</htmlclass>
	95	+<around>6</around>
	96	+<regexp>^(.*?)\t</regexp>
	97	+</searchlist>
	98	+<searchlist listNo="16">
	99	+<item>時価総額</item>
	100	+<htmltag>div</htmltag>
	101	+<htmlclass>lineFi yjMS clearfix</htmlclass>
	102	+<around>0</around>
	103	+<regexp>^(.*?)\t</regexp>
	104	+</searchlist>
	105	+<searchlist listNo="17">
	106	+<item>発行済株式数</item>
	107	+<htmltag>div</htmltag>
	108	+<htmlclass>lineFi yjMS clearfix</htmlclass>
	109	+<around>1</around>
	110	+<regexp>^(.*?)\t</regexp>
	111	+</searchlist>
	112	+<searchlist listNo="18">
	113	+<item>配当利回り</item>
	114	+<htmltag>div</htmltag>
	115	+<htmlclass>lineFi yjMS clearfix</htmlclass>
	116	+<around>2</around>
	117	+<regexp>^(.*?)\t</regexp>
	118	+</searchlist>
	119	+<searchlist listNo="19">
	120	+<item>１株配当</item>
	121	+<htmltag>div</htmltag>
	122	+<htmlclass>lineFi yjMS clearfix</htmlclass>
	123	+<around>3</around>
	124	+<regexp>^(.*?)\t</regexp>
	125	+</searchlist>
	126	+<searchlist listNo="20">
	127	+<item>PER</item>
	128	+<htmltag>div</htmltag>
	129	+<htmlclass>lineFi yjMS clearfix</htmlclass>
	130	+<around>4</around>
	131	+<regexp>^(.*?)\t</regexp>
	132	+</searchlist>
	133	+<searchlist listNo="21">
	134	+<item>PBR</item>
	135	+<htmltag>div</htmltag>
	136	+<htmlclass>lineFi yjMS clearfix</htmlclass>
	137	+<around>5</around>
	138	+<regexp>^(.*?)\t</regexp>
	139	+</searchlist>
	140	+<searchlist listNo="22">
	141	+<item>EPS</item>
	142	+<htmltag>div</htmltag>
	143	+<htmlclass>lineFi yjMS clearfix</htmlclass>
	144	+<around>6</around>
	145	+<regexp>^(.*?)\t</regexp>
	146	+</searchlist>
	147	+<searchlist listNo="23">
	148	+<item>BPS</item>
	149	+<htmltag>div</htmltag>
	150	+<htmlclass>lineFi yjMS clearfix</htmlclass>
	151	+<around>7</around>
	152	+<regexp>^(.*?)\t</regexp>
	153	+</searchlist>
	154	+<searchlist listNo="24">
	155	+<item>最低購入代金</item>
	156	+<htmltag>div</htmltag>
	157	+<htmlclass>lineFi yjMS clearfix</htmlclass>
	158	+<around>8</around>
	159	+<regexp>^(.*?)\t</regexp>
	160	+</searchlist>
	161	+<searchlist listNo="25">
	162	+<item>単元株数</item>
	163	+<htmltag>div</htmltag>
	164	+<htmlclass>lineFi yjMS clearfix</htmlclass>
	165	+<around>9</around>
	166	+<regexp>^(.*?)\t</regexp>
	167	+</searchlist>
	168	+<searchlist listNo="26">
	169	+<item>年初来高値</item>
	170	+<htmltag>div</htmltag>
	171	+<htmlclass>lineFi yjMS clearfix</htmlclass>
	172	+<around>10</around>
	173	+<regexp>^(.*?)\t</regexp>
	174	+</searchlist>
	175	+<searchlist listNo="27">
	176	+<item>年初来安値</item>
	177	+<htmltag>div</htmltag>
	178	+<htmlclass>lineFi yjMS clearfix</htmlclass>
	179	+<around>11</around>
	180	+<regexp>^(.*?)\t</regexp>
	181	+</searchlist>
	182	+<searchlist listNo="28">
	183	+<item>信用買残</item>
	184	+<htmltag>div</htmltag>
	185	+<htmlclass>lineFi yjMS clearfix</htmlclass>
	186	+<around>12</around>
	187	+<regexp>^(.*?)\t</regexp>
	188	+</searchlist>
	189	+<searchlist listNo="29">
	190	+<item>信用買残前週比</item>
	191	+<htmltag>div</htmltag>
	192	+<htmlclass>lineFi yjMS clearfix</htmlclass>
	193	+<around>13</around>
	194	+<regexp>^(.*?)\t</regexp>
	195	+</searchlist>
	196	+<searchlist listNo="30">
	197	+<item>信用売残</item>
	198	+<htmltag>div</htmltag>
	199	+<htmlclass>lineFi yjMS clearfix</htmlclass>
	200	+<around>14</around>
	201	+<regexp>^(.*?)\t</regexp>
	202	+</searchlist>
	203	+<searchlist listNo="31">
	204	+<item>信用売残前週比</item>
	205	+<htmltag>div</htmltag>
	206	+<htmlclass>lineFi yjMS clearfix</htmlclass>
	207	+<around>15</around>
	208	+<regexp>^(.*?)\t</regexp>
	209	+</searchlist>
	210	+<searchlist listNo="32">
	211	+<item>貸借倍率</item>
	212	+<htmltag>div</htmltag>
	213	+<htmlclass>yjMS clearfix</htmlclass>
	214	+<regexp>^(.*?)\t</regexp>
	215	+</searchlist>
	216	+</webscraping>
	217	+</xmlcontainer>

--- trunk/HtmlTest2/test/utility/test1/SearchDataRW.java (nonexistent)

+++ trunk/HtmlTest2/test/utility/test1/SearchDataRW.java (revision 132)

		@@ -0,0 +1,314 @@
	1	+/*
	2	+ * Copyright (C) 2014 kgto.
	3	+ *
	4	+ * This library is free software; you can redistribute it and/or
	5	+ * modify it under the terms of the GNU Lesser General Public
	6	+ * License as published by the Free Software Foundation; either
	7	+ * version 2.1 of the License, or (at your option) any later version.
	8	+ *
	9	+ * This library is distributed in the hope that it will be useful,
	10	+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	12	+ * Lesser General Public License for more details.
	13	+ *
	14	+ * You should have received a copy of the GNU Lesser General Public
	15	+ * License along with this library; if not, write to the Free Software
	16	+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
	17	+ * MA 02110-1301 USA
	18	+ */
	19	+/*
	20	+ * $Id$
	21	+ */
	22	+
	23	+package utility.test1;
	24	+
	25	+import webScraping.core.SearchData;
	26	+import java.io.File;
	27	+import java.io.FileNotFoundException;
	28	+import java.io.FileOutputStream;
	29	+import java.io.IOException;
	30	+import java.util.logging.Level;
	31	+import java.util.logging.Logger;
	32	+import javax.xml.parsers.DocumentBuilder;
	33	+import javax.xml.parsers.DocumentBuilderFactory;
	34	+import javax.xml.parsers.ParserConfigurationException;
	35	+import javax.xml.transform.Transformer;
	36	+import javax.xml.transform.TransformerConfigurationException;
	37	+import javax.xml.transform.TransformerException;
	38	+import javax.xml.transform.TransformerFactory;
	39	+import javax.xml.transform.dom.DOMSource;
	40	+import javax.xml.transform.stream.StreamResult;
	41	+import org.w3c.dom.DOMImplementation;
	42	+import org.w3c.dom.Document;
	43	+import org.w3c.dom.Element;
	44	+import org.w3c.dom.Node;
	45	+import org.w3c.dom.NodeList;
	46	+import org.xml.sax.SAXException;
	47	+
	48	+/**
	49	+ * 検索データ読込・保存.
	50	+ * @author kgto
	51	+ */
	52	+public class SearchDataRW {
	53	+ /* ---------------------------------------------------------------------- *
	54	+ * フィールド
	55	+ * ---------------------------------------------------------------------- */
	56	+ private String UrlAdress;
	57	+
	58	+ DocumentBuilder builder;
	59	+ public Document document;
	60	+ Element root;
	61	+
	62	+ /* ---------------------------------------------------------------------- *
	63	+ * コンストラクタ
	64	+ * ---------------------------------------------------------------------- */
	65	+ public SearchDataRW() {
	66	+ try {
	67	+ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
	68	+ builder = factory.newDocumentBuilder();
	69	+
	70	+ } catch (ParserConfigurationException ex) {
	71	+ Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
	72	+ }
	73	+ }
	74	+
	75	+ /* ---------------------------------------------------------------------- *
	76	+ * Setter
	77	+ * ---------------------------------------------------------------------- */
	78	+ public void seturl(String UrlAdress) {
	79	+ this.UrlAdress = UrlAdress;
	80	+ }
	81	+
	82	+ /* ---------------------------------------------------------------------- *
	83	+ * Getter
	84	+ * ---------------------------------------------------------------------- */
	85	+ public String geturl() {
	86	+ return UrlAdress;
	87	+ }
	88	+
	89	+ /* ---------------------------------------------------------------------- *
	90	+ * メソッド
	91	+ * ---------------------------------------------------------------------- */
	92	+ /**
	93	+ * 保存.
	94	+ * @param file
	95	+ */
	96	+ public void save(File file) {
	97	+ saveUrl(UrlAdress);
	98	+ saveSearchList();
	99	+ write(file);
	100	+ }
	101	+
	102	+ /**
	103	+ * 読込.
	104	+ * @param file
	105	+ */
	106	+ public void load(File file) {
	107	+ read(file);
	108	+ loadUrl();
	109	+ loadSearchList();
	110	+ }
	111	+
	112	+ /* ---------------------------------------------------------------------- */
	113	+
	114	+ void loadUrl() {
	115	+ NodeList nodelist = root.getElementsByTagName("url");
	116	+ Node node = nodelist.item(0);
	117	+ UrlAdress = node.getFirstChild().getNodeValue();
	118	+ }
	119	+
	120	+ public void loadSearchList() {
	121	+ SearchData.clear();
	122	+
	123	+ NodeList nodelist = root.getElementsByTagName("searchlist");
	124	+ for(int i = 0; i < nodelist.getLength(); i++) {
	125	+ Node childnode = nodelist.item(i);
	126	+
	127	+ boolean sdatflg = false;
	128	+ SearchData sdat = new SearchData();
	129	+ for (Node child = childnode.getFirstChild(); child != null; child = child.getNextSibling()) {
	130	+ if(child.getNodeType() == Node.ELEMENT_NODE) {
	131	+ String tag = child.getNodeName();
	132	+ String rtn = "";
	133	+ if(child.getFirstChild() != null) {
	134	+ rtn = child.getFirstChild().getNodeValue();
	135	+ }
	136	+ switch (tag) {
	137	+ case "item" :
	138	+ sdat.setitem(rtn);
	139	+ sdatflg = true;
	140	+ break;
	141	+ case "htmltag" :
	142	+ sdat.setHtmltag(rtn);
	143	+ sdatflg = true;
	144	+ break;
	145	+ case "htmlid" :
	146	+ sdat.setHtmlid(rtn);
	147	+ sdatflg = true;
	148	+ break;
	149	+ case "htmlclass" :
	150	+ sdat.setHtmlclass(rtn);
	151	+ sdatflg = true;
	152	+ break;
	153	+ case "around" :
	154	+ sdat.setaround(rtn);
	155	+ sdatflg = true;
	156	+ break;
	157	+ case "regexp" :
	158	+ sdat.setregexp(rtn);
	159	+ sdatflg = true;
	160	+ break;
	161	+ }
	162	+ }
	163	+ }
	164	+ if(sdatflg) SearchData.add(sdat);
	165	+ }
	166	+ }
	167	+
	168	+ public String loadMsg404() {
	169	+ StringBuilder strbuf = new StringBuilder();
	170	+ NodeList nodelist = root.getElementsByTagName("msg404");
	171	+ for(int i = 0; i < nodelist.getLength(); i++) {
	172	+ Node childnode = nodelist.item(i);
	173	+ String str = childnode.getFirstChild().getNodeValue();
	174	+ if(strbuf.length() > 0) {
	175	+ strbuf.append("\n");
	176	+ }
	177	+ strbuf.append(str);
	178	+ }
	179	+ return strbuf.toString();
	180	+ }
	181	+
	182	+ public Element loadElement(String elementTagName) {
	183	+ NodeList nodelist = root.getElementsByTagName(elementTagName);
	184	+ Element element = (Element)nodelist.item(0);
	185	+
	186	+ return element;
	187	+ }
	188	+
	189	+ /* ---------------------------------------------------------------------- */
	190	+
	191	+ void saveUrl(String urladdress) {
	192	+ checkdoc();
	193	+ removeElement("url"); // 既にElementが存在してた場合、一度削除
	194	+
	195	+ Element url = document.createElement("url");
	196	+ url.appendChild(document.createTextNode(urladdress));
	197	+ root.appendChild(url);
	198	+ }
	199	+
	200	+ void saveSearchList() {
	201	+ checkdoc();
	202	+ removeElement("searchlist"); // 既にElementが存在してた場合、一度削除
	203	+
	204	+ int count = 0;
	205	+ for(int i = 0; i < SearchData.size(); i++) {
	206	+ SearchData sdat = SearchData.get(i);
	207	+
	208	+ Element cslist = document.createElement("searchlist");
	209	+ cslist.setAttribute("listNo", String.valueOf(++count));
	210	+
	211	+ addChild(cslist, "item", sdat.getitem());
	212	+ addChild(cslist, "htmltag", sdat.getHtmltag());
	213	+ addChild(cslist, "htmlid", sdat.getHtmlid());
	214	+ addChild(cslist, "htmlclass", sdat.getHtmlclass());
	215	+ addChild(cslist, "around", sdat.getaround());
	216	+ addChild(cslist, "regexp", sdat.getregexp());
	217	+
	218	+ root.appendChild(cslist);
	219	+ }
	220	+ }
	221	+
	222	+ void saveMsg404(String msg) {
	223	+ checkdoc();
	224	+ removeElement("msg404"); // 既にElementが存在してた場合、一度削除
	225	+
	226	+ String[] msgs = msg.split("\n");
	227	+ int count = 0;
	228	+ for(String msgOne : msgs) {
	229	+ Element msgElement = document.createElement("msg404");
	230	+ msgElement.setAttribute("No", String.valueOf(++count));
	231	+ msgElement.appendChild(document.createTextNode(msgOne));
	232	+
	233	+ root.appendChild(msgElement);
	234	+ }
	235	+ }
	236	+
	237	+ public void saveElement(Element element) {
	238	+ checkdoc();
	239	+ removeElement(element.getTagName()); // 既にElementが存在してた場合、一度削除
	240	+
	241	+ root.appendChild(element);
	242	+ }
	243	+
	244	+ /* ---------------------------------------------------------------------- */
	245	+
	246	+ private void addChild(Element cslist, String keyword, String data) {
	247	+ if(!data.isEmpty()) {
	248	+ Element element = document.createElement(keyword);
	249	+ element.appendChild(document.createTextNode(data));
	250	+ cslist.appendChild(element);
	251	+ }
	252	+ }
	253	+
	254	+ private void removeElement(String elementTagName) {
	255	+ int nodeSize;
	256	+ do {
	257	+ NodeList nodelist = document.getElementsByTagName(elementTagName);
	258	+ nodeSize = nodelist.getLength();
	259	+ for(int i = 0; i < nodelist.getLength(); i++) {
	260	+ Node node = nodelist.item(i);
	261	+ root.removeChild(node);
	262	+ }
	263	+ } while(nodeSize > 0);
	264	+ }
	265	+
	266	+ /**
	267	+ * ドキュメントチェック.
	268	+ * 新規の場合やＸＭＬファイルの読込みが行われていない状態時、新たにルートエレメントを作成する。
	269	+ * 既読の場合、ルートエレメントの取得を行う。
	270	+ */
	271	+ public void checkdoc() {
	272	+ if(document == null) {
	273	+ DOMImplementation domImpl = builder.getDOMImplementation();
	274	+ document = domImpl.createDocument("","searchdata",null);
	275	+ }
	276	+ root = document.getDocumentElement();
	277	+ }
	278	+
	279	+ /**
	280	+ * XML読込み.
	281	+ * @param file
	282	+ */
	283	+ public void read(File file) {
	284	+ try {
	285	+ document = builder.parse(file);
	286	+ root = document.getDocumentElement();
	287	+
	288	+ } catch (SAXException \| IOException ex) {
	289	+ Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
	290	+ }
	291	+ }
	292	+
	293	+ /**
	294	+ * XML書込み.
	295	+ * @param file
	296	+ */
	297	+ public void write(File file) {
	298	+ try {
	299	+ TransformerFactory transFactory = TransformerFactory.newInstance();
	300	+ Transformer transformer = transFactory.newTransformer();
	301	+
	302	+ DOMSource source = new DOMSource(document);
	303	+ FileOutputStream os = new FileOutputStream(file);
	304	+ StreamResult result = new StreamResult(os);
	305	+ transformer.transform(source, result);
	306	+
	307	+ } catch (TransformerConfigurationException ex) {
	308	+ Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
	309	+ } catch (FileNotFoundException \| TransformerException ex) {
	310	+ Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
	311	+ }
	312	+ }
	313	+
	314	+}

Added: svn:keywords

## -0,0 +1 ##

+Id

\ No newline at end of property

--- trunk/HtmlTest2/test/utility/test1/SearchDataRWT01.java (revision 131)

+++ trunk/HtmlTest2/test/utility/test1/SearchDataRWT01.java (revision 132)

		@@ -1,7 +1,6 @@
1	1
2	2	package utility.test1;
3	3
4		-import webScraping.utility.SearchDataRW;
5	4	import java.io.File;
6	5	import java.lang.reflect.InvocationTargetException;
7	6	import java.lang.reflect.Method;

--- trunk/HtmlTest2/test/utility/test1/SearchDataRWT02.java (revision 131)

+++ trunk/HtmlTest2/test/utility/test1/SearchDataRWT02.java (revision 132)

		@@ -1,7 +1,6 @@
1	1
2	2	package utility.test1;
3	3
4		-import webScraping.utility.SearchDataRW;
5	4	import java.io.File;
6	5	import webScraping.core.SearchData;
7	6

--- trunk/HtmlTest2/test/utility/test1/ConvertXml01.java (nonexistent)

+++ trunk/HtmlTest2/test/utility/test1/ConvertXml01.java (revision 132)

		@@ -0,0 +1,42 @@
	1	+
	2	+package utility.test1;
	3	+
	4	+import java.io.File;
	5	+import webScraping.utility.ScrapingXml;
	6	+
	7	+/**
	8	+ * XMLコンバータ
	9	+ * 旧:SearchDataRW.java → 新:ScrapingXml.java
	10	+ * @author kgto
	11	+ */
	12	+public class ConvertXml01 {
	13	+
	14	+ private String UrlAdress;
	15	+ File file = new File("test1.xml");
	16	+
	17	+ /**
	18	+ * @param args the command line arguments
	19	+ */
	20	+ public static void main(String[] args) {
	21	+ ConvertXml01 conv = new ConvertXml01();
	22	+
	23	+ conv.readold();
	24	+ conv.writenew();
	25	+
	26	+ System.exit(0);
	27	+ }
	28	+
	29	+ void readold() {
	30	+ SearchDataRW sdatrw = new SearchDataRW();
	31	+ sdatrw.load(file);
	32	+ UrlAdress = sdatrw.geturl();
	33	+ }
	34	+
	35	+ void writenew() {
	36	+ ScrapingXml xmlwriter = new ScrapingXml();
	37	+ xmlwriter.setTestUrl(UrlAdress);
	38	+ xmlwriter.setSdata();
	39	+ xmlwriter.save(file);
	40	+ }
	41	+
	42	+}

--- trunk/HtmlTest2/test1.xml (revision 131)

+++ trunk/HtmlTest2/test1.xml (revision 132)

		@@ -1,16 +1,71 @@
1		-<?xml version="1.0" encoding="UTF-8" standalone="no"?><searchdata>
2		-
3		-
4		-
5		-
6		-
7		-
8		-
9		-
10		-
11		-
12		-
13		-
14		-
15		-
16		-<url>http://weather.yahoo.co.jp/weather/</url><searchlist listNo="1"><item>天気０１</item><htmltag>li</htmltag><htmlclass>point pt1400</htmlclass></searchlist><searchlist listNo="2"><item>天気０２</item><htmltag>li</htmltag><htmlclass>point pt1900</htmlclass></searchlist><searchlist listNo="3"><item>天気０３</item><htmltag>li</htmltag><htmlclass>point pt3410</htmlclass></searchlist><searchlist listNo="4"><item>天気０４</item><htmltag>li</htmltag><htmlclass>point pt4410</htmlclass></searchlist><searchlist listNo="5"><item>天気０５</item><htmltag>li</htmltag><htmlclass>point pt5110</htmlclass></searchlist><searchlist listNo="6"><item>天気０６</item><htmltag>li</htmltag><htmlclass>point pt5410</htmlclass></searchlist><searchlist listNo="7"><item>天気０７</item><htmltag>li</htmltag><htmlclass>point pt5610</htmlclass></searchlist><searchlist listNo="8"><item>天気０８</item><htmltag>li</htmltag><htmlclass>point pt6200</htmlclass></searchlist><searchlist listNo="9"><item>天気０９</item><htmltag>li</htmltag><htmlclass>point pt6710</htmlclass></searchlist><searchlist listNo="10"><item>天気１０</item><htmltag>li</htmltag><htmlclass>point pt7410</htmlclass></searchlist><searchlist listNo="11"><item>天気１１</item><htmltag>li</htmltag><htmlclass>point pt8210</htmlclass></searchlist><searchlist listNo="12"><item>天気１２</item><htmltag>li</htmltag><htmlclass>point pt8810</htmlclass></searchlist><searchlist listNo="13"><item>天気１３</item><htmltag>li</htmltag><htmlclass>point pt9110</htmlclass></searchlist></searchdata>
		\ No newline at end of file
	1	+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
	2	+<xmlcontainer>
	3	+<webscraping>
	4	+<url>http://weather.yahoo.co.jp/weather/</url>
	5	+<searchlist listNo="1">
	6	+<item>天気０１</item>
	7	+<htmltag>li</htmltag>
	8	+<htmlclass>point pt1400</htmlclass>
	9	+</searchlist>
	10	+<searchlist listNo="2">
	11	+<item>天気０２</item>
	12	+<htmltag>li</htmltag>
	13	+<htmlclass>point pt1900</htmlclass>
	14	+</searchlist>
	15	+<searchlist listNo="3">
	16	+<item>天気０３</item>
	17	+<htmltag>li</htmltag>
	18	+<htmlclass>point pt3410</htmlclass>
	19	+</searchlist>
	20	+<searchlist listNo="4">
	21	+<item>天気０４</item>
	22	+<htmltag>li</htmltag>
	23	+<htmlclass>point pt4410</htmlclass>
	24	+</searchlist>
	25	+<searchlist listNo="5">
	26	+<item>天気０５</item>
	27	+<htmltag>li</htmltag>
	28	+<htmlclass>point pt5110</htmlclass>
	29	+</searchlist>
	30	+<searchlist listNo="6">
	31	+<item>天気０６</item>
	32	+<htmltag>li</htmltag>
	33	+<htmlclass>point pt5410</htmlclass>
	34	+</searchlist>
	35	+<searchlist listNo="7">
	36	+<item>天気０７</item>
	37	+<htmltag>li</htmltag>
	38	+<htmlclass>point pt5610</htmlclass>
	39	+</searchlist>
	40	+<searchlist listNo="8">
	41	+<item>天気０８</item>
	42	+<htmltag>li</htmltag>
	43	+<htmlclass>point pt6200</htmlclass>
	44	+</searchlist>
	45	+<searchlist listNo="9">
	46	+<item>天気０９</item>
	47	+<htmltag>li</htmltag>
	48	+<htmlclass>point pt6710</htmlclass>
	49	+</searchlist>
	50	+<searchlist listNo="10">
	51	+<item>天気１０</item>
	52	+<htmltag>li</htmltag>
	53	+<htmlclass>point pt7410</htmlclass>
	54	+</searchlist>
	55	+<searchlist listNo="11">
	56	+<item>天気１１</item>
	57	+<htmltag>li</htmltag>
	58	+<htmlclass>point pt8210</htmlclass>
	59	+</searchlist>
	60	+<searchlist listNo="12">
	61	+<item>天気１２</item>
	62	+<htmltag>li</htmltag>
	63	+<htmlclass>point pt8810</htmlclass>
	64	+</searchlist>
	65	+<searchlist listNo="13">
	66	+<item>天気１３</item>
	67	+<htmltag>li</htmltag>
	68	+<htmlclass>point pt9110</htmlclass>
	69	+</searchlist>
	70	+</webscraping>
	71	+</xmlcontainer>

--- trunk/HtmlTest2/src/WebScraping/utility/SearchDataRW.java (revision 131)

+++ trunk/HtmlTest2/src/WebScraping/utility/SearchDataRW.java (nonexistent)

		@@ -1,547 +0,0 @@
1		-/*
2		- * Copyright (C) 2014 kgto.
3		- *
4		- * This library is free software; you can redistribute it and/or
5		- * modify it under the terms of the GNU Lesser General Public
6		- * License as published by the Free Software Foundation; either
7		- * version 2.1 of the License, or (at your option) any later version.
8		- *
9		- * This library is distributed in the hope that it will be useful,
10		- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11		- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12		- * Lesser General Public License for more details.
13		- *
14		- * You should have received a copy of the GNU Lesser General Public
15		- * License along with this library; if not, write to the Free Software
16		- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17		- * MA 02110-1301 USA
18		- */
19		-/*
20		- * $Id$
21		- */
22		-
23		-package webScraping.utility;
24		-
25		-import webScraping.core.SearchData;
26		-import java.io.BufferedReader;
27		-import java.io.BufferedWriter;
28		-import java.io.File;
29		-import java.io.FileInputStream;
30		-import java.io.FileNotFoundException;
31		-import java.io.FileOutputStream;
32		-import java.io.IOException;
33		-import java.io.InputStreamReader;
34		-import java.io.OutputStreamWriter;
35		-import java.util.ArrayList;
36		-import java.util.logging.Level;
37		-import java.util.logging.Logger;
38		-import javax.xml.parsers.DocumentBuilder;
39		-import javax.xml.parsers.DocumentBuilderFactory;
40		-import javax.xml.parsers.ParserConfigurationException;
41		-import javax.xml.transform.Transformer;
42		-import javax.xml.transform.TransformerConfigurationException;
43		-import javax.xml.transform.TransformerException;
44		-import javax.xml.transform.TransformerFactory;
45		-import javax.xml.transform.dom.DOMSource;
46		-import javax.xml.transform.stream.StreamResult;
47		-import org.w3c.dom.DOMImplementation;
48		-import org.w3c.dom.Document;
49		-import org.w3c.dom.Element;
50		-import org.w3c.dom.Node;
51		-import org.w3c.dom.NodeList;
52		-import org.xml.sax.SAXException;
53		-
54		-/**
55		- *
56		- * @author kgto
57		- */
58		-public class SearchDataRW {
59		-
60		- DocumentBuilder builder;
61		- public Document document;
62		- Element root;
63		-
64		- private final String splitchar = "\t";
65		-
66		- private String UrlAdress;
67		- private ArrayList<SearchData> slist = new ArrayList<>();
68		-
69		- public SearchDataRW() {
70		- try {
71		- DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
72		- builder = factory.newDocumentBuilder();
73		-
74		- } catch (ParserConfigurationException ex) {
75		- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
76		- }
77		- }
78		-
79		- public void seturl(String UrlAdress) {
80		- this.UrlAdress = UrlAdress;
81		- }
82		-
83		- public void setslist(ArrayList slist) {
84		- this.slist = slist;
85		- }
86		-
87		- public String geturl() {
88		- return UrlAdress;
89		- }
90		-
91		- public ArrayList getslist() {
92		- return slist;
93		- }
94		-
95		- /**
96		- * 保存.
97		- * @param file
98		- */
99		- public void save(File file) {
100		- //saveCsv(file);
101		- //saveXml(file);
102		-
103		- saveUrl(UrlAdress);
104		- saveSearchList(slist);
105		- write(file);
106		- }
107		-
108		- /**
109		- * 読込.
110		- * @param file
111		- */
112		- public void load(File file) {
113		- //loadCsv(file);
114		- //loadXml(file);
115		-
116		- read(file);
117		- loadUrl();
118		- loadSearchList();
119		- }
120		-
121		- /* ---------------------------------------------------------------------- */
122		- /**
123		- * 保存(CSV形式).
124		- * @param file
125		- */
126		- public void saveCsv(File file) {
127		- BufferedWriter bufferedwriter = null;
128		- try {
129		- //空のファイルを作成
130		- file.createNewFile();
131		- FileOutputStream fileoutputstream = new FileOutputStream(file);
132		- OutputStreamWriter outputstreamwriter = new OutputStreamWriter(fileoutputstream, "UTF-8");
133		- bufferedwriter = new BufferedWriter(outputstreamwriter);
134		-
135		- // ＵＲＬ
136		- bufferedwriter.write(UrlAdress);
137		- bufferedwriter.write("\n");
138		- // 検索情報
139		- for(Object slist1 : slist) {
140		- SearchData sdat = (SearchData)slist1;
141		- //
142		- StringBuilder str = new StringBuilder();
143		- str.append(sdat.getitem()).append(splitchar);
144		- str.append(sdat.getHtmltag()).append(splitchar);
145		- str.append(sdat.getHtmlid()).append(splitchar);
146		- str.append(sdat.getHtmlclass()).append(splitchar);
147		- str.append(sdat.getaround()).append(splitchar);
148		- str.append(sdat.getregexp()).append("\n");
149		- // 書込み
150		- bufferedwriter.write(str.toString());
151		- }
152		-
153		- } catch (IOException ex) {
154		- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
155		- } finally {
156		- try {
157		- if(bufferedwriter != null) {
158		- bufferedwriter.close();
159		- }
160		-
161		- } catch (IOException ex) {
162		- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
163		- }
164		- }
165		- }
166		-
167		- /**
168		- * 読込(CSV形式).
169		- * @param file
170		- */
171		- public void loadCsv(File file) {
172		- slist = new ArrayList();
173		-
174		- BufferedReader bufferedreader = null;
175		- try {
176		- FileInputStream fileinputstream = new FileInputStream(file);
177		- InputStreamReader inputstreamreader = new InputStreamReader(fileinputstream, "UTF-8");
178		- bufferedreader = new BufferedReader(inputstreamreader);
179		-
180		- // ＵＲＬ
181		- UrlAdress = bufferedreader.readLine();
182		- // 検索情報
183		- String rec;
184		- while((rec = bufferedreader.readLine()) != null) {
185		- String[] recary = rec.split(splitchar, -1);
186		- SearchData sdat = new SearchData();
187		- sdat.setitem(recary[0]);
188		- sdat.setHtmltag(recary[1]);
189		- sdat.setHtmlid(recary[2]);
190		- sdat.setHtmlclass(recary[3]);
191		- sdat.setaround(recary[4]);
192		- sdat.setregexp(recary[5]);
193		-
194		- slist.add(sdat);
195		- }
196		-
197		- } catch(IOException ex) {
198		- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
199		-
200		- } finally {
201		- try {
202		- if(bufferedreader != null) {
203		- bufferedreader.close();
204		- }
205		-
206		- } catch (IOException ex) {
207		- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
208		- }
209		- }
210		- }
211		-
212		- /* ---------------------------------------------------------------------- */
213		- /**
214		- * 保存(XML形式).
215		- * @param file
216		- */
217		- public void saveXml(File file) {
218		- try {
219		- DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
220		- DocumentBuilder wkBuilder = factory.newDocumentBuilder();
221		- DOMImplementation domImpl = wkBuilder.getDOMImplementation();
222		-
223		- Document doc = domImpl.createDocument("","searchdata",null);
224		- Element wkRoot = doc.getDocumentElement();
225		-
226		- // ＵＲＬ
227		- Element url = doc.createElement("url");
228		- url.appendChild(doc.createTextNode(UrlAdress));
229		- wkRoot.appendChild(url);
230		-
231		- // 検索情報
232		- for (Object slist1 : slist) {
233		- SearchData sdat = (SearchData) slist1;
234		-
235		- Element cslist = doc.createElement("searchlist");
236		- Element item = doc.createElement("item");
237		- Element htmltag = doc.createElement("htmltag");
238		- Element htmlid = doc.createElement("htmlid");
239		- Element htmlclass = doc.createElement("htmlclass");
240		- Element around = doc.createElement("around");
241		- Element regexp = doc.createElement("regexp");
242		-
243		- item.appendChild(doc.createTextNode(sdat.getitem()));
244		- htmltag.appendChild(doc.createTextNode(sdat.getHtmltag()));
245		- htmlid.appendChild(doc.createTextNode(sdat.getHtmlid()));
246		- htmlclass.appendChild(doc.createTextNode(sdat.getHtmlclass()));
247		- around.appendChild(doc.createTextNode(sdat.getaround()));
248		- regexp.appendChild(doc.createTextNode(sdat.getregexp()));
249		-
250		- cslist.appendChild(item);
251		- cslist.appendChild(htmltag);
252		- cslist.appendChild(htmlid);
253		- cslist.appendChild(htmlclass);
254		- cslist.appendChild(around);
255		- cslist.appendChild(regexp);
256		-
257		- wkRoot.appendChild(cslist);
258		- }
259		- // 出力
260		- TransformerFactory transFactory = TransformerFactory.newInstance();
261		- Transformer transformer = transFactory.newTransformer();
262		-
263		- DOMSource source = new DOMSource(doc);
264		- FileOutputStream os = new FileOutputStream(file);
265		- StreamResult result = new StreamResult(os);
266		- transformer.transform(source, result);
267		-
268		- } catch (ParserConfigurationException \| FileNotFoundException ex) {
269		- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
270		- } catch (TransformerConfigurationException ex) {
271		- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
272		- } catch (TransformerException ex) {
273		- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
274		- }
275		- }
276		-
277		- /**
278		- * 読込(XML形式).
279		- * @param file
280		- */
281		- public void loadXml(File file) {
282		- slist = new ArrayList();
283		-
284		- try {
285		- DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
286		- DocumentBuilder wkBuilder = factory.newDocumentBuilder();
287		- Document doc = wkBuilder.parse(file);
288		-
289		- // ルート要素の取得
290		- Element wkRoot = doc.getDocumentElement();
291		-
292		- // ＵＲＬ
293		- NodeList url = wkRoot.getElementsByTagName("url");
294		- Node urlnode = url.item(0);
295		- UrlAdress = urlnode.getFirstChild().getNodeValue();
296		-
297		- // 検索情報
298		- NodeList cslist = wkRoot.getElementsByTagName("searchlist");
299		- for(int i = 0; i < cslist.getLength(); i++) {
300		- SearchData sdat = new SearchData();
301		-
302		- Node slistnode = cslist.item(i);
303		- Node child;
304		- for (child = slistnode.getFirstChild(); child != null; child = child.getNextSibling()) {
305		- if(child.getNodeType() == Node.ELEMENT_NODE) {
306		-
307		- String tag = child.getNodeName();
308		- String rtn = "";
309		- if(child.getFirstChild() != null) {
310		- rtn = child.getFirstChild().getNodeValue();
311		- }
312		-
313		- switch (tag) {
314		- case "item" :
315		- sdat.setitem(rtn);
316		- break;
317		- case "htmltag" :
318		- sdat.setHtmltag(rtn);
319		- break;
320		- case "htmlid" :
321		- sdat.setHtmlid(rtn);
322		- break;
323		- case "htmlclass" :
324		- sdat.setHtmlclass(rtn);
325		- break;
326		- case "around" :
327		- sdat.setaround(rtn);
328		- break;
329		- case "regexp" :
330		- sdat.setregexp(rtn);
331		- break;
332		- }
333		- }
334		- }
335		- slist.add(sdat);
336		- }
337		-
338		- } catch (ParserConfigurationException \| SAXException \| IOException ex) {
339		- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
340		- }
341		- }
342		-
343		- /* ---------------------------------------------------------------------- */
344		-
345		- void loadUrl() {
346		- NodeList nodelist = root.getElementsByTagName("url");
347		- Node node = nodelist.item(0);
348		- UrlAdress = node.getFirstChild().getNodeValue();
349		- }
350		-
351		- public void loadSearchList() {
352		- slist.clear();
353		- SearchData.clear();
354		-
355		- NodeList nodelist = root.getElementsByTagName("searchlist");
356		- for(int i = 0; i < nodelist.getLength(); i++) {
357		- Node childnode = nodelist.item(i);
358		-
359		- boolean sdatflg = false;
360		- SearchData sdat = new SearchData();
361		- for (Node child = childnode.getFirstChild(); child != null; child = child.getNextSibling()) {
362		- if(child.getNodeType() == Node.ELEMENT_NODE) {
363		- String tag = child.getNodeName();
364		- String rtn = "";
365		- if(child.getFirstChild() != null) {
366		- rtn = child.getFirstChild().getNodeValue();
367		- }
368		- switch (tag) {
369		- case "item" :
370		- sdat.setitem(rtn);
371		- sdatflg = true;
372		- break;
373		- case "htmltag" :
374		- sdat.setHtmltag(rtn);
375		- sdatflg = true;
376		- break;
377		- case "htmlid" :
378		- sdat.setHtmlid(rtn);
379		- sdatflg = true;
380		- break;
381		- case "htmlclass" :
382		- sdat.setHtmlclass(rtn);
383		- sdatflg = true;
384		- break;
385		- case "around" :
386		- sdat.setaround(rtn);
387		- sdatflg = true;
388		- break;
389		- case "regexp" :
390		- sdat.setregexp(rtn);
391		- sdatflg = true;
392		- break;
393		- }
394		- }
395		- }
396		- if(sdatflg) slist.add(sdat);
397		- if(sdatflg) SearchData.add(sdat);
398		- }
399		- }
400		-
401		- public String loadMsg404() {
402		- StringBuilder strbuf = new StringBuilder();
403		- NodeList nodelist = root.getElementsByTagName("msg404");
404		- for(int i = 0; i < nodelist.getLength(); i++) {
405		- Node childnode = nodelist.item(i);
406		- String str = childnode.getFirstChild().getNodeValue();
407		- if(strbuf.length() > 0) {
408		- strbuf.append("\n");
409		- }
410		- strbuf.append(str);
411		- }
412		- return strbuf.toString();
413		- }
414		-
415		- public Element loadElement(String elementTagName) {
416		- NodeList nodelist = root.getElementsByTagName(elementTagName);
417		- Element element = (Element)nodelist.item(0);
418		-
419		- return element;
420		- }
421		-
422		- /* ---------------------------------------------------------------------- */
423		-
424		- void saveUrl(String urladdress) {
425		- checkdoc();
426		- removeElement("url"); // 既にElementが存在してた場合、一度削除
427		-
428		- Element url = document.createElement("url");
429		- url.appendChild(document.createTextNode(urladdress));
430		- root.appendChild(url);
431		- }
432		-
433		- void saveSearchList(ArrayList slist) {
434		- checkdoc();
435		- removeElement("searchlist"); // 既にElementが存在してた場合、一度削除
436		-
437		- int count = 0;
438		- for (Object slist1 : slist) {
439		- SearchData sdat = (SearchData) slist1;
440		-
441		- Element cslist = document.createElement("searchlist");
442		- cslist.setAttribute("listNo", String.valueOf(++count));
443		-
444		- addChild(cslist, "item", sdat.getitem());
445		- addChild(cslist, "htmltag", sdat.getHtmltag());
446		- addChild(cslist, "htmlid", sdat.getHtmlid());
447		- addChild(cslist, "htmlclass", sdat.getHtmlclass());
448		- addChild(cslist, "around", sdat.getaround());
449		- addChild(cslist, "regexp", sdat.getregexp());
450		-
451		- root.appendChild(cslist);
452		- }
453		- }
454		-
455		- void saveMsg404(String msg) {
456		- checkdoc();
457		- removeElement("msg404"); // 既にElementが存在してた場合、一度削除
458		-
459		- String[] msgs = msg.split("\n");
460		- int count = 0;
461		- for(String msgOne : msgs) {
462		- Element msgElement = document.createElement("msg404");
463		- msgElement.setAttribute("No", String.valueOf(++count));
464		- msgElement.appendChild(document.createTextNode(msgOne));
465		-
466		- root.appendChild(msgElement);
467		- }
468		- }
469		-
470		- public void saveElement(Element element) {
471		- checkdoc();
472		- removeElement(element.getTagName()); // 既にElementが存在してた場合、一度削除
473		-
474		- root.appendChild(element);
475		- }
476		-
477		- /* ---------------------------------------------------------------------- */
478		-
479		- private void addChild(Element cslist, String keyword, String data) {
480		- if(!data.isEmpty()) {
481		- Element element = document.createElement(keyword);
482		- element.appendChild(document.createTextNode(data));
483		- cslist.appendChild(element);
484		- }
485		- }
486		-
487		- private void removeElement(String elementTagName) {
488		- int nodeSize;
489		- do {
490		- NodeList nodelist = document.getElementsByTagName(elementTagName);
491		- nodeSize = nodelist.getLength();
492		- for(int i = 0; i < nodelist.getLength(); i++) {
493		- Node node = nodelist.item(i);
494		- root.removeChild(node);
495		- }
496		- } while(nodeSize > 0);
497		- }
498		-
499		- /**
500		- * ドキュメントチェック.
501		- * 新規の場合やＸＭＬファイルの読込みが行われていない状態時、新たにルートエレメントを作成する。
502		- * 既読の場合、ルートエレメントの取得を行う。
503		- */
504		- public void checkdoc() {
505		- if(document == null) {
506		- DOMImplementation domImpl = builder.getDOMImplementation();
507		- document = domImpl.createDocument("","searchdata",null);
508		- }
509		- root = document.getDocumentElement();
510		- }
511		-
512		- /**
513		- * XML読込み.
514		- * @param file
515		- */
516		- public void read(File file) {
517		- try {
518		- document = builder.parse(file);
519		- root = document.getDocumentElement();
520		-
521		- } catch (SAXException \| IOException ex) {
522		- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
523		- }
524		- }
525		-
526		- /**
527		- * XML書込み.
528		- * @param file
529		- */
530		- public void write(File file) {
531		- try {
532		- TransformerFactory transFactory = TransformerFactory.newInstance();
533		- Transformer transformer = transFactory.newTransformer();
534		-
535		- DOMSource source = new DOMSource(document);
536		- FileOutputStream os = new FileOutputStream(file);
537		- StreamResult result = new StreamResult(os);
538		- transformer.transform(source, result);
539		-
540		- } catch (TransformerConfigurationException ex) {
541		- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
542		- } catch (FileNotFoundException \| TransformerException ex) {
543		- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
544		- }
545		- }
546		-
547		-}

Deleted: svn:keywords

## -1 +0,0 ##

-Id

\ No newline at end of property

--- trunk/HtmlTest2/src/WebScraping/utility/LibraryXml.java (nonexistent)

+++ trunk/HtmlTest2/src/WebScraping/utility/LibraryXml.java (revision 132)

		@@ -0,0 +1,142 @@
	1	+/*
	2	+ * Copyright (C) 2014-2015 kgto.
	3	+ *
	4	+ * This library is free software; you can redistribute it and/or
	5	+ * modify it under the terms of the GNU Lesser General Public
	6	+ * License as published by the Free Software Foundation; either
	7	+ * version 2.1 of the License, or (at your option) any later version.
	8	+ *
	9	+ * This library is distributed in the hope that it will be useful,
	10	+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	12	+ * Lesser General Public License for more details.
	13	+ *
	14	+ * You should have received a copy of the GNU Lesser General Public
	15	+ * License along with this library; if not, write to the Free Software
	16	+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
	17	+ * MA 02110-1301 USA
	18	+ */
	19	+/*
	20	+ * $Id$
	21	+ */
	22	+
	23	+package webScraping.utility;
	24	+
	25	+import java.io.File;
	26	+import java.io.FileNotFoundException;
	27	+import java.io.FileOutputStream;
	28	+import java.io.IOException;
	29	+import java.util.logging.Level;
	30	+import java.util.logging.Logger;
	31	+
	32	+import javax.xml.parsers.DocumentBuilder;
	33	+import javax.xml.parsers.DocumentBuilderFactory;
	34	+import javax.xml.parsers.ParserConfigurationException;
	35	+import javax.xml.transform.Transformer;
	36	+import javax.xml.transform.TransformerConfigurationException;
	37	+import javax.xml.transform.TransformerException;
	38	+import javax.xml.transform.TransformerFactory;
	39	+import javax.xml.transform.dom.DOMSource;
	40	+import javax.xml.transform.stream.StreamResult;
	41	+
	42	+import org.w3c.dom.DOMImplementation;
	43	+import org.w3c.dom.Document;
	44	+import org.w3c.dom.Element;
	45	+import org.w3c.dom.Node;
	46	+import org.w3c.dom.NodeList;
	47	+import org.xml.sax.SAXException;
	48	+
	49	+public class LibraryXml {
	50	+
	51	+ String xmlrootname = "xmlcontainer";
	52	+
	53	+ DocumentBuilder builder;
	54	+ public Document readdoc, writedoc;
	55	+ Element xmlroot;
	56	+
	57	+ /* ---------------------------------------------------------------------- *
	58	+ * コンストラクタ
	59	+ * ---------------------------------------------------------------------- */
	60	+ public LibraryXml() {
	61	+ try {
	62	+ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
	63	+ builder = factory.newDocumentBuilder();
	64	+
	65	+ } catch (ParserConfigurationException ex) {
	66	+ Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
	67	+ }
	68	+ }
	69	+
	70	+ /* ---------------------------------------------------------------------- *
	71	+ * メソッド
	72	+ * ---------------------------------------------------------------------- */
	73	+ /* 読込み処理 */
	74	+ public Element getwriteRoot(String elementName) {
	75	+ mainElement();
	76	+ Element element = writedoc.createElement(elementName);
	77	+ xmlroot.appendChild(element);
	78	+ return element;
	79	+ }
	80	+
	81	+ private void mainElement() {
	82	+ if(writedoc == null) {
	83	+ DOMImplementation domImpl = builder.getDOMImplementation();
	84	+ writedoc = domImpl.createDocument("", xmlrootname, null);
	85	+ xmlroot = writedoc.getDocumentElement();
	86	+ }
	87	+ }
	88	+
	89	+ /**
	90	+ * XML書込み.
	91	+ * @param file
	92	+ */
	93	+ public void write(File file) {
	94	+ try (FileOutputStream os = new FileOutputStream(file)) {
	95	+ TransformerFactory transFactory = TransformerFactory.newInstance();
	96	+ Transformer transformer = transFactory.newTransformer();
	97	+
	98	+ transformer.setOutputProperty("indent", "yes"); // 改行指定
	99	+ transformer.setOutputProperty("method", "xml");
	100	+
	101	+ DOMSource source = new DOMSource(writedoc);
	102	+ StreamResult result = new StreamResult(os);
	103	+ transformer.transform(source, result);
	104	+
	105	+ // 作成したＸＭＬをクリア
	106	+ writedoc = null;
	107	+
	108	+ } catch (TransformerConfigurationException ex) {
	109	+ Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
	110	+ } catch (FileNotFoundException \| TransformerException ex) {
	111	+ Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
	112	+ } catch (IOException ex) {
	113	+ Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
	114	+ }
	115	+ }
	116	+
	117	+ /* ---------------------------------------------------------------------- */
	118	+ /* 書込み処理 */
	119	+
	120	+ public Element getreadRoot(String elementName) {
	121	+ NodeList nodelist = xmlroot.getElementsByTagName(elementName);
	122	+ Node node = nodelist.item(0);
	123	+ return (node.getNodeType() == Node.ELEMENT_NODE ? (Element)node : null);
	124	+ }
	125	+
	126	+ /**
	127	+ * XML読込み.
	128	+ * @param file
	129	+ */
	130	+ public void read(File file) {
	131	+ try {
	132	+ readdoc = builder.parse(file);
	133	+ xmlroot = readdoc.getDocumentElement();
	134	+
	135	+ } catch (SAXException \| IOException ex) {
	136	+ Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
	137	+ }
	138	+ }
	139	+
	140	+ /* ---------------------------------------------------------------------- */
	141	+
	142	+}

Added: svn:keywords

## -0,0 +1 ##

+Id

\ No newline at end of property

--- trunk/HtmlTest2/src/WebScraping/utility/ScrapingXml.java (nonexistent)

+++ trunk/HtmlTest2/src/WebScraping/utility/ScrapingXml.java (revision 132)

		@@ -0,0 +1,198 @@
	1	+/*
	2	+ * Copyright (C) 2014-2015 kgto.
	3	+ *
	4	+ * This library is free software; you can redistribute it and/or
	5	+ * modify it under the terms of the GNU Lesser General Public
	6	+ * License as published by the Free Software Foundation; either
	7	+ * version 2.1 of the License, or (at your option) any later version.
	8	+ *
	9	+ * This library is distributed in the hope that it will be useful,
	10	+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	12	+ * Lesser General Public License for more details.
	13	+ *
	14	+ * You should have received a copy of the GNU Lesser General Public
	15	+ * License along with this library; if not, write to the Free Software
	16	+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
	17	+ * MA 02110-1301 USA
	18	+ */
	19	+/*
	20	+ * $Id$
	21	+ */
	22	+
	23	+package webScraping.utility;
	24	+
	25	+import webScraping.core.SearchData;
	26	+import java.io.File;
	27	+import java.util.ArrayList;
	28	+import org.w3c.dom.Element;
	29	+import org.w3c.dom.Node;
	30	+import org.w3c.dom.NodeList;
	31	+
	32	+public class ScrapingXml {
	33	+ /* ---------------------------------------------------------------------- *
	34	+ * フィールド
	35	+ * ---------------------------------------------------------------------- */
	36	+ String rootnameScraping = "webscraping";
	37	+
	38	+ private String testUrl;
	39	+ private SearchData[] sdata;
	40	+
	41	+ public LibraryXml xlib = new LibraryXml();
	42	+ public Element root;
	43	+
	44	+ /* ---------------------------------------------------------------------- *
	45	+ * コンストラクタ
	46	+ * ---------------------------------------------------------------------- */
	47	+ public ScrapingXml() {
	48	+ }
	49	+
	50	+ /* ---------------------------------------------------------------------- *
	51	+ * Setter
	52	+ * ---------------------------------------------------------------------- */
	53	+ public void setTestUrl(String testUrl) {
	54	+ this.testUrl = testUrl;
	55	+ }
	56	+
	57	+ public void setSdata() {
	58	+ this.sdata = new SearchData[SearchData.size()];
	59	+ for(int i = 0; i < SearchData.size(); i++) {
	60	+ this.sdata[i] = SearchData.get(i);
	61	+ }
	62	+ }
	63	+
	64	+ /* ---------------------------------------------------------------------- *
	65	+ * Getter
	66	+ * ---------------------------------------------------------------------- */
	67	+ public String getTestUrl() {
	68	+ return testUrl;
	69	+ }
	70	+
	71	+ public void getSdata() {
	72	+ SearchData.clear();
	73	+ for(SearchData sdata1 : sdata) {
	74	+ SearchData.add(sdata1);
	75	+ }
	76	+ }
	77	+
	78	+ /* ---------------------------------------------------------------------- *
	79	+ * メソッド
	80	+ * ---------------------------------------------------------------------- */
	81	+ public void save(File file) {
	82	+
	83	+ elementset();
	84	+
	85	+ xlib.write(file);
	86	+ }
	87	+
	88	+ public void elementset() {
	89	+ root = xlib.getwriteRoot(rootnameScraping);
	90	+ elementsetUrl();
	91	+ elementsetSearchdata();
	92	+ System.out.println("elementset XmlScraping");
	93	+ }
	94	+
	95	+ private void elementsetUrl() {
	96	+ Element url = xlib.writedoc.createElement("url");
	97	+ url.appendChild(xlib.writedoc.createTextNode(testUrl));
	98	+ root.appendChild(url);
	99	+ }
	100	+
	101	+ private void elementsetSearchdata() {
	102	+ int count = 0;
	103	+ for(SearchData sdat : sdata) {
	104	+ Element cslist = xlib.writedoc.createElement("searchlist");
	105	+ cslist.setAttribute("listNo", String.valueOf(++count));
	106	+
	107	+ addChild(cslist, "item" , sdat.getitem());
	108	+ addChild(cslist, "htmltag" , sdat.getHtmltag());
	109	+ addChild(cslist, "htmlid" , sdat.getHtmlid());
	110	+ addChild(cslist, "htmlclass", sdat.getHtmlclass());
	111	+ addChild(cslist, "around" , sdat.getaround());
	112	+ addChild(cslist, "regexp" , sdat.getregexp());
	113	+
	114	+ root.appendChild(cslist);
	115	+ }
	116	+ }
	117	+
	118	+ private void addChild(Element cslist, String keyword, String data) {
	119	+ if(!data.isEmpty()) {
	120	+ Element element = xlib.writedoc.createElement(keyword);
	121	+ element.appendChild(xlib.writedoc.createTextNode(data));
	122	+ cslist.appendChild(element);
	123	+ }
	124	+ }
	125	+
	126	+ /* ---------------------------------------------------------------------- */
	127	+
	128	+ void load(File file) {
	129	+ xlib.read(file);
	130	+ elementget();
	131	+ }
	132	+
	133	+ public void elementget() {
	134	+ root = xlib.getreadRoot(rootnameScraping);
	135	+ elementgetUrl();
	136	+ elementgetSearchdata();
	137	+ }
	138	+
	139	+ private void elementgetUrl() {
	140	+ NodeList nodelist = root.getElementsByTagName("url");
	141	+ Node node = nodelist.item(0);
	142	+ testUrl = node.getFirstChild().getNodeValue();
	143	+ }
	144	+
	145	+ private void elementgetSearchdata() {
	146	+ ArrayList<SearchData> slist = new ArrayList<>();
	147	+
	148	+ NodeList nodelist = root.getElementsByTagName("searchlist");
	149	+ for(int i = 0; i < nodelist.getLength(); i++) {
	150	+ Node childnode = nodelist.item(i);
	151	+
	152	+ boolean sdatflg = false;
	153	+ SearchData sdat = new SearchData();
	154	+ for (Node child = childnode.getFirstChild(); child != null; child = child.getNextSibling()) {
	155	+ if(child.getNodeType() == Node.ELEMENT_NODE) {
	156	+ String tag = child.getNodeName();
	157	+ String rtn = "";
	158	+ if(child.getFirstChild() != null) {
	159	+ rtn = child.getFirstChild().getNodeValue();
	160	+ }
	161	+ switch (tag) {
	162	+ case "item" :
	163	+ sdat.setitem(rtn);
	164	+ sdatflg = true;
	165	+ break;
	166	+ case "htmltag" :
	167	+ sdat.setHtmltag(rtn);
	168	+ sdatflg = true;
	169	+ break;
	170	+ case "htmlid" :
	171	+ sdat.setHtmlid(rtn);
	172	+ sdatflg = true;
	173	+ break;
	174	+ case "htmlclass" :
	175	+ sdat.setHtmlclass(rtn);
	176	+ sdatflg = true;
	177	+ break;
	178	+ case "around" :
	179	+ sdat.setaround(rtn);
	180	+ sdatflg = true;
	181	+ break;
	182	+ case "regexp" :
	183	+ sdat.setregexp(rtn);
	184	+ sdatflg = true;
	185	+ break;
	186	+ }
	187	+ }
	188	+ }
	189	+ if(sdatflg) slist.add(sdat);
	190	+ }
	191	+ // 配列化
	192	+ sdata = new SearchData[slist.size()];
	193	+ for(int i = 0; i < slist.size(); i++) {
	194	+ sdata[i] = slist.get(i);
	195	+ }
	196	+ }
	197	+
	198	+}

Added: svn:keywords

## -0,0 +1 ##

+Id

\ No newline at end of property

--- trunk/HtmlTest2/src/WebScraping/utility/HtmlSearch.java (revision 131)

+++ trunk/HtmlTest2/src/WebScraping/utility/HtmlSearch.java (revision 132)

		@@ -40,7 +40,7 @@
40	40	* @author kgto
41	41	*/
42	42	public class HtmlSearch extends javax.swing.JFrame {
43		- private final SearchDataRW sio = new SearchDataRW();
	43	+ private final ScrapingXml xmlwriter = new ScrapingXml();
44	44
45	45	SearchDataTableModel sdatatblmodel;
46	46

		@@ -332,8 +332,9 @@
332	332	int selected = jFileChooser1.showOpenDialog(this);
333	333	if (selected == JFileChooser.APPROVE_OPTION) {
334	334	File file = jFileChooser1.getSelectedFile();
335		- sio.load(file);
336		- jTxtUrl.setText(sio.geturl());
	335	+ xmlwriter.load(file);
	336	+ jTxtUrl.setText(xmlwriter.getTestUrl());
	337	+ xmlwriter.getSdata();
337	338	sdatatblmodel.setRowCount(0);
338	339	for(int i = 0; i < SearchData.size(); i++) {
339	340	SearchData sdata = SearchData.get(i);

		@@ -347,7 +348,7 @@
347	348	int selected = jFileChooser1.showSaveDialog(this);
348	349	if (selected == JFileChooser.APPROVE_OPTION) {
349	350	File file = jFileChooser1.getSelectedFile();
350		- sio.seturl(jTxtUrl.getText());
	351	+ xmlwriter.setTestUrl(jTxtUrl.getText());
351	352
352	353	SearchData.clear();
353	354	for(int row = 0; row < sdatatblmodel.getRowCount(); row++) {

		@@ -354,7 +355,8 @@
354	355	SearchData sdata = sdatatblmodel.getSearchData(row);
355	356	SearchData.add(sdata);
356	357	}
357		- sio.save(file);
	358	+ xmlwriter.setSdata();
	359	+ xmlwriter.save(file);
358	360	}
359	361	}//GEN-LAST:event_jMenuSaveActionPerformed
360	362

--- trunk/HtmlTest2/src/WebScraping/core/AttributeData.java (revision 131)

+++ trunk/HtmlTest2/src/WebScraping/core/AttributeData.java (revision 132)

		@@ -76,7 +76,8 @@
76	76	for (Object AttrList1 : AttrList) {
77	77	AttrData a = (AttrData)AttrList1;
78	78	if(a.tag == tag) {
79		- if(a.attrname.equals(attrname) && a.attrvalue.equals(attrvalue)) {
	79	+ //if(a.attrname.equals(attrname) && a.attrvalue.equals(attrvalue)) {
	80	+ if(a.attrname.equals(attrname) && a.attrvalue.startsWith(attrvalue)) {
80	81	ret = true;
81	82	}
82	83	}

--- trunk/HtmlTest2/src/WebScraping/core/HtmlParserCallback.java (revision 131)

+++ trunk/HtmlTest2/src/WebScraping/core/HtmlParserCallback.java (revision 132)

		@@ -33,7 +33,9 @@
33	33	* @author kgto
34	34	*/
35	35	class HtmlParserCallback extends HTMLEditorKit.ParserCallback {
36		-
	36	+ /* ---------------------------------------------------------------------- *
	37	+ * フィールド
	38	+ * ---------------------------------------------------------------------- */
37	39	// Tag毎の階層
38	40	HashMap<HTML.Tag,Integer> tagMap = new HashMap<>();
39	41

		@@ -54,6 +56,9 @@
54	56	// 属性データ
55	57	AttributeData attrdata;
56	58
	59	+ /* ---------------------------------------------------------------------- *
	60	+ * コンストラクタ
	61	+ * ---------------------------------------------------------------------- */
57	62	protected HtmlParserCallback(SearchData skey) {
58	63
59	64	// キー情報展開

		@@ -64,10 +69,16 @@
64	69	sData = new ArrayList();
65	70	}
66	71
	72	+ /* ---------------------------------------------------------------------- *
	73	+ * Getter
	74	+ * ---------------------------------------------------------------------- */
67	75	ArrayList getrtnData() {
68	76	return this.sData;
69	77	}
70	78
	79	+ /* ---------------------------------------------------------------------- *
	80	+ * メソッド
	81	+ * ---------------------------------------------------------------------- */
71	82	@Override
72	83	public void handleStartTag(HTML.Tag tag, MutableAttributeSet attr, int pos){
73	84	// Tag毎の階層を保持

--- trunk/HtmlTest2/src/WebScraping/core/SearchData.java (revision 131)

+++ trunk/HtmlTest2/src/WebScraping/core/SearchData.java (revision 132)

		@@ -42,6 +42,28 @@
42	42	/* ---------------------------------------------------------------------- *
43	43	* static 処理
44	44	* ---------------------------------------------------------------------- */
	45	+ public static class Context {
	46	+ public Class columnClass;
	47	+ public String columnName;
	48	+ public String columnNameJp;
	49	+
	50	+ public Context(Class columnClass, String columnName, String columnNameJp) {
	51	+ this.columnClass = columnClass;
	52	+ this.columnName = columnName;
	53	+ this.columnNameJp = columnNameJp;
	54	+ }
	55	+ }
	56	+
	57	+ public static final Context[] context = {
	58	+ /* 0 */ new Context(String.class , "item" , "項目名"),
	59	+ /* 1 */ new Context(String.class , "htmltag" , "タグ"),
	60	+ /* 2 */ new Context(String.class , "htmlid" , "ＩＤ"),
	61	+ /* 3 */ new Context(String.class , "htmlclass" , "クラス"),
	62	+ /* 4 */ new Context(String.class , "around" , "位置"),
	63	+ /* 5 */ new Context(String.class , "regexp" , "抽出条件")
	64	+ };
	65	+
	66	+ /* ---------------------------------------------------------------------- */
45	67	private static ArrayList<SearchData> slist = new ArrayList<>();
46	68
47	69	public static void addSearchData(

		@@ -162,5 +184,17 @@
162	184	this.around = "";
163	185	this.regexp = "";
164	186	}
165		-
	187	+
	188	+ public Object[] getObjData() {
	189	+ Object[] obj = {
	190	+ /* 0 */ getitem(), // 項目名
	191	+ /* 1 */ getHtmltag(), // タグ
	192	+ /* 2 */ getHtmlid(), // ＩＤ
	193	+ /* 3 */ getHtmlclass(), // クラス
	194	+ /* 4 */ getaround(), // 位置
	195	+ /* 5 */ getregexp() // 抽出条件
	196	+ };
	197	+ return obj;
	198	+ }
	199	+
166	200	}

--- trunk/HtmlTest2/src/WebScraping/core/HtmlParser.java (revision 131)

+++ trunk/HtmlTest2/src/WebScraping/core/HtmlParser.java (revision 132)

		@@ -32,20 +32,25 @@
32	32	import javax.swing.text.html.parser.ParserDelegator;
33	33
34	34	/**
35		- *
	35	+ * ＨＴＭＬパーサ.
36	36	* @author kgto
37	37	*/
38	38	public class HtmlParser {
39		-
	39	+ /* ---------------------------------------------------------------------- *
	40	+ * フィールド
	41	+ * ---------------------------------------------------------------------- */
40	42	URL url;
41	43	String pageData;
42	44	ArrayList sData;
43	45
44	46	// 作業ワーク
45		- String htmltag;
46		- String htmlid;
47		- String htmlclass;
	47	+ private String htmltag;
	48	+ private String htmlid;
	49	+ private String htmlclass;
48	50
	51	+ /* ---------------------------------------------------------------------- *
	52	+ * コンストラクタ
	53	+ * ---------------------------------------------------------------------- */
49	54	public HtmlParser(URL UrlAdress) {
50	55	DebugProcess.debuglog_set();
51	56	this.url = UrlAdress;

		@@ -68,15 +73,24 @@
68	73	url = null;
69	74	}
70	75
	76	+ /* ---------------------------------------------------------------------- *
	77	+ * Getter
	78	+ * ---------------------------------------------------------------------- */
71	79	public String getStringPageData() {
72	80	return pageData;
73	81	}
74	82
	83	+ /* ---------------------------------------------------------------------- *
	84	+ * Setter
	85	+ * ---------------------------------------------------------------------- */
75	86	public void seturl(URL UrlAdress) {
76	87	this.url = UrlAdress;
77	88	getPageData();
78	89	}
79	90
	91	+ /* ---------------------------------------------------------------------- *
	92	+ * メソッド
	93	+ * ---------------------------------------------------------------------- */
80	94	public void seturl(String UrlAdress) {
81	95	try {
82	96	url = new URL(UrlAdress);

お試し作業部屋

提交

标签

Frequently used words (click to add to your profile)

Commit MetaInfo

Log Message

更改概述

差异