• R/O
  • SSH
  • HTTPS

提交

标签

Frequently used words (click to add to your profile)

javaandroidc++linuxc#objective-ccocoa誰得qtrubypythongamewindowsbathyscaphephpguic翻訳omegattwitterframeworktestbtronarduinovb.net計画中(planning stage)directxpreviewerゲームエンジンdom

作業部屋の使い方を試しています。


Commit MetaInfo

修订版145 (tree)
时间2016-07-12 14:17:52
作者tuna_p

Log Message

(empty log message)

更改概述

差异

--- trunk/webScraping/nbproject/build-impl.xml (revision 144)
+++ trunk/webScraping/nbproject/build-impl.xml (revision 145)
@@ -191,7 +191,12 @@
191191 </not>
192192 </and>
193193 </condition>
194- <property name="javac.fork" value="${jdkBug6558476}"/>
194+ <condition else="false" property="javac.fork">
195+ <or>
196+ <istrue value="${jdkBug6558476}"/>
197+ <istrue value="${javac.external.vm}"/>
198+ </or>
199+ </condition>
195200 <property name="jar.index" value="false"/>
196201 <property name="jar.index.metainf" value="${jar.index}"/>
197202 <property name="copylibs.rebase" value="true"/>
@@ -217,6 +222,7 @@
217222 <condition else="" property="testng.debug.mode" value="-mixed">
218223 <istrue value="${junit+testng.available}"/>
219224 </condition>
225+ <property name="java.failonerror" value="true"/>
220226 </target>
221227 <target name="-post-init">
222228 <!-- Empty placeholder for easier customization. -->
@@ -693,7 +699,7 @@
693699 <sequential>
694700 <property environment="env"/>
695701 <resolve name="profiler.current.path" value="${profiler.info.pathvar}"/>
696- <java classname="@{classname}" dir="${profiler.info.dir}" fork="true" jvm="${profiler.info.jvm}">
702+ <java classname="@{classname}" dir="${profiler.info.dir}" failonerror="${java.failonerror}" fork="true" jvm="${profiler.info.jvm}">
697703 <jvmarg line="${endorsed.classpath.cmd.line.arg}"/>
698704 <jvmarg value="${profiler.info.jvmargs.agent}"/>
699705 <jvmarg line="${profiler.info.jvmargs}"/>
@@ -768,7 +774,7 @@
768774 <attribute default="${debug.classpath}" name="classpath"/>
769775 <element name="customize" optional="true"/>
770776 <sequential>
771- <java classname="@{classname}" dir="${work.dir}" fork="true">
777+ <java classname="@{classname}" dir="${work.dir}" failonerror="${java.failonerror}" fork="true">
772778 <jvmarg line="${endorsed.classpath.cmd.line.arg}"/>
773779 <jvmarg line="${debug-args-line}"/>
774780 <jvmarg value="-Xrunjdwp:transport=${debug-transport},address=${jpda.address}"/>
@@ -795,7 +801,7 @@
795801 <attribute default="jvm" name="jvm"/>
796802 <element name="customize" optional="true"/>
797803 <sequential>
798- <java classname="@{classname}" dir="${work.dir}" fork="true">
804+ <java classname="@{classname}" dir="${work.dir}" failonerror="${java.failonerror}" fork="true">
799805 <jvmarg line="${endorsed.classpath.cmd.line.arg}"/>
800806 <jvmarg value="-Dfile.encoding=${runtime.encoding}"/>
801807 <redirector errorencoding="${runtime.encoding}" inputencoding="${runtime.encoding}" outputencoding="${runtime.encoding}"/>
--- trunk/webScraping/src/WebScraping/utility/LibraryXml.java (revision 144)
+++ trunk/webScraping/src/WebScraping/utility/LibraryXml.java (nonexistent)
@@ -1,142 +0,0 @@
1-/*
2- * Copyright (C) 2014-2015 kgto.
3- *
4- * This library is free software; you can redistribute it and/or
5- * modify it under the terms of the GNU Lesser General Public
6- * License as published by the Free Software Foundation; either
7- * version 2.1 of the License, or (at your option) any later version.
8- *
9- * This library is distributed in the hope that it will be useful,
10- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12- * Lesser General Public License for more details.
13- *
14- * You should have received a copy of the GNU Lesser General Public
15- * License along with this library; if not, write to the Free Software
16- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17- * MA 02110-1301 USA
18- */
19-/*
20- * $Id$
21- */
22-
23-package webScraping.utility;
24-
25-import java.io.File;
26-import java.io.FileNotFoundException;
27-import java.io.FileOutputStream;
28-import java.io.IOException;
29-import java.util.logging.Level;
30-import java.util.logging.Logger;
31-
32-import javax.xml.parsers.DocumentBuilder;
33-import javax.xml.parsers.DocumentBuilderFactory;
34-import javax.xml.parsers.ParserConfigurationException;
35-import javax.xml.transform.Transformer;
36-import javax.xml.transform.TransformerConfigurationException;
37-import javax.xml.transform.TransformerException;
38-import javax.xml.transform.TransformerFactory;
39-import javax.xml.transform.dom.DOMSource;
40-import javax.xml.transform.stream.StreamResult;
41-
42-import org.w3c.dom.DOMImplementation;
43-import org.w3c.dom.Document;
44-import org.w3c.dom.Element;
45-import org.w3c.dom.Node;
46-import org.w3c.dom.NodeList;
47-import org.xml.sax.SAXException;
48-
49-public class LibraryXml {
50-
51- String xmlrootname = "xmlcontainer";
52-
53- DocumentBuilder builder;
54- public Document readdoc, writedoc;
55- Element xmlroot;
56-
57- /* ---------------------------------------------------------------------- *
58- * コンストラクタ
59- * ---------------------------------------------------------------------- */
60- public LibraryXml() {
61- try {
62- DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
63- builder = factory.newDocumentBuilder();
64-
65- } catch (ParserConfigurationException ex) {
66- Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
67- }
68- }
69-
70- /* ---------------------------------------------------------------------- *
71- * メソッド
72- * ---------------------------------------------------------------------- */
73- /* 読込み処理 */
74- public Element getwriteRoot(String elementName) {
75- mainElement();
76- Element element = writedoc.createElement(elementName);
77- xmlroot.appendChild(element);
78- return element;
79- }
80-
81- private void mainElement() {
82- if(writedoc == null) {
83- DOMImplementation domImpl = builder.getDOMImplementation();
84- writedoc = domImpl.createDocument("", xmlrootname, null);
85- xmlroot = writedoc.getDocumentElement();
86- }
87- }
88-
89- /**
90- * XML書込み.
91- * @param file
92- */
93- public void write(File file) {
94- try (FileOutputStream os = new FileOutputStream(file)) {
95- TransformerFactory transFactory = TransformerFactory.newInstance();
96- Transformer transformer = transFactory.newTransformer();
97-
98- transformer.setOutputProperty("indent", "yes"); // 改行指定
99- transformer.setOutputProperty("method", "xml");
100-
101- DOMSource source = new DOMSource(writedoc);
102- StreamResult result = new StreamResult(os);
103- transformer.transform(source, result);
104-
105- // 作成したXMLをクリア
106- writedoc = null;
107-
108- } catch (TransformerConfigurationException ex) {
109- Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
110- } catch (FileNotFoundException | TransformerException ex) {
111- Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
112- } catch (IOException ex) {
113- Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
114- }
115- }
116-
117- /* ---------------------------------------------------------------------- */
118- /* 書込み処理 */
119-
120- public Element getreadRoot(String elementName) {
121- NodeList nodelist = xmlroot.getElementsByTagName(elementName);
122- Node node = nodelist.item(0);
123- return (node.getNodeType() == Node.ELEMENT_NODE ? (Element)node : null);
124- }
125-
126- /**
127- * XML読込み.
128- * @param file
129- */
130- public void read(File file) {
131- try {
132- readdoc = builder.parse(file);
133- xmlroot = readdoc.getDocumentElement();
134-
135- } catch (SAXException | IOException ex) {
136- Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
137- }
138- }
139-
140- /* ---------------------------------------------------------------------- */
141-
142-}
Deleted: svn:keywords
## -1 +0,0 ##
-Id
\ No newline at end of property
--- trunk/webScraping/src/WebScraping/utility/ScrapingXml.java (revision 144)
+++ trunk/webScraping/src/WebScraping/utility/ScrapingXml.java (nonexistent)
@@ -1,198 +0,0 @@
1-/*
2- * Copyright (C) 2014-2015 kgto.
3- *
4- * This library is free software; you can redistribute it and/or
5- * modify it under the terms of the GNU Lesser General Public
6- * License as published by the Free Software Foundation; either
7- * version 2.1 of the License, or (at your option) any later version.
8- *
9- * This library is distributed in the hope that it will be useful,
10- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12- * Lesser General Public License for more details.
13- *
14- * You should have received a copy of the GNU Lesser General Public
15- * License along with this library; if not, write to the Free Software
16- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17- * MA 02110-1301 USA
18- */
19-/*
20- * $Id$
21- */
22-
23-package webScraping.utility;
24-
25-import webScraping.core.SearchData;
26-import java.io.File;
27-import java.util.ArrayList;
28-import org.w3c.dom.Element;
29-import org.w3c.dom.Node;
30-import org.w3c.dom.NodeList;
31-
32-public class ScrapingXml {
33- /* ---------------------------------------------------------------------- *
34- * フィールド
35- * ---------------------------------------------------------------------- */
36- String rootnameScraping = "webscraping";
37-
38- private String testUrl;
39- private SearchData[] sdata;
40-
41- public LibraryXml xlib = new LibraryXml();
42- public Element root;
43-
44- /* ---------------------------------------------------------------------- *
45- * コンストラクタ
46- * ---------------------------------------------------------------------- */
47- public ScrapingXml() {
48- }
49-
50- /* ---------------------------------------------------------------------- *
51- * Setter
52- * ---------------------------------------------------------------------- */
53- public void setTestUrl(String testUrl) {
54- this.testUrl = testUrl;
55- }
56-
57- public void setSdata() {
58- this.sdata = new SearchData[SearchData.size()];
59- for(int i = 0; i < SearchData.size(); i++) {
60- this.sdata[i] = SearchData.get(i);
61- }
62- }
63-
64- /* ---------------------------------------------------------------------- *
65- * Getter
66- * ---------------------------------------------------------------------- */
67- public String getTestUrl() {
68- return testUrl;
69- }
70-
71- public void getSdata() {
72- SearchData.clear();
73- for(SearchData sdata1 : sdata) {
74- SearchData.add(sdata1);
75- }
76- }
77-
78- /* ---------------------------------------------------------------------- *
79- * メソッド
80- * ---------------------------------------------------------------------- */
81- public void save(File file) {
82-
83- elementset();
84-
85- xlib.write(file);
86- }
87-
88- public void elementset() {
89- root = xlib.getwriteRoot(rootnameScraping);
90- elementsetUrl();
91- elementsetSearchdata();
92- System.out.println("elementset XmlScraping");
93- }
94-
95- private void elementsetUrl() {
96- Element url = xlib.writedoc.createElement("url");
97- url.appendChild(xlib.writedoc.createTextNode(testUrl));
98- root.appendChild(url);
99- }
100-
101- private void elementsetSearchdata() {
102- int count = 0;
103- for(SearchData sdat : sdata) {
104- Element cslist = xlib.writedoc.createElement("searchlist");
105- cslist.setAttribute("listNo", String.valueOf(++count));
106-
107- addChild(cslist, "item" , sdat.getitem());
108- addChild(cslist, "htmltag" , sdat.getHtmltag());
109- addChild(cslist, "htmlid" , sdat.getHtmlid());
110- addChild(cslist, "htmlclass", sdat.getHtmlclass());
111- addChild(cslist, "around" , sdat.getaround());
112- addChild(cslist, "regexp" , sdat.getregexp());
113-
114- root.appendChild(cslist);
115- }
116- }
117-
118- private void addChild(Element cslist, String keyword, String data) {
119- if(!data.isEmpty()) {
120- Element element = xlib.writedoc.createElement(keyword);
121- element.appendChild(xlib.writedoc.createTextNode(data));
122- cslist.appendChild(element);
123- }
124- }
125-
126- /* ---------------------------------------------------------------------- */
127-
128- void load(File file) {
129- xlib.read(file);
130- elementget();
131- }
132-
133- public void elementget() {
134- root = xlib.getreadRoot(rootnameScraping);
135- elementgetUrl();
136- elementgetSearchdata();
137- }
138-
139- private void elementgetUrl() {
140- NodeList nodelist = root.getElementsByTagName("url");
141- Node node = nodelist.item(0);
142- testUrl = node.getFirstChild().getNodeValue();
143- }
144-
145- private void elementgetSearchdata() {
146- ArrayList<SearchData> slist = new ArrayList<>();
147-
148- NodeList nodelist = root.getElementsByTagName("searchlist");
149- for(int i = 0; i < nodelist.getLength(); i++) {
150- Node childnode = nodelist.item(i);
151-
152- boolean sdatflg = false;
153- SearchData sdat = new SearchData();
154- for (Node child = childnode.getFirstChild(); child != null; child = child.getNextSibling()) {
155- if(child.getNodeType() == Node.ELEMENT_NODE) {
156- String tag = child.getNodeName();
157- String rtn = "";
158- if(child.getFirstChild() != null) {
159- rtn = child.getFirstChild().getNodeValue();
160- }
161- switch (tag) {
162- case "item" :
163- sdat.setitem(rtn);
164- sdatflg = true;
165- break;
166- case "htmltag" :
167- sdat.setHtmltag(rtn);
168- sdatflg = true;
169- break;
170- case "htmlid" :
171- sdat.setHtmlid(rtn);
172- sdatflg = true;
173- break;
174- case "htmlclass" :
175- sdat.setHtmlclass(rtn);
176- sdatflg = true;
177- break;
178- case "around" :
179- sdat.setaround(rtn);
180- sdatflg = true;
181- break;
182- case "regexp" :
183- sdat.setregexp(rtn);
184- sdatflg = true;
185- break;
186- }
187- }
188- }
189- if(sdatflg) slist.add(sdat);
190- }
191- // 配列化
192- sdata = new SearchData[slist.size()];
193- for(int i = 0; i < slist.size(); i++) {
194- sdata[i] = slist.get(i);
195- }
196- }
197-
198-}
Deleted: svn:keywords
## -1 +0,0 ##
-Id
\ No newline at end of property
--- trunk/webScraping/src/WebScraping/utility/HtmlSearch.java (revision 144)
+++ trunk/webScraping/src/WebScraping/utility/HtmlSearch.java (nonexistent)
@@ -1,568 +0,0 @@
1-/*
2- * Copyright (C) 2014 kgto.
3- *
4- * This library is free software; you can redistribute it and/or
5- * modify it under the terms of the GNU Lesser General Public
6- * License as published by the Free Software Foundation; either
7- * version 2.1 of the License, or (at your option) any later version.
8- *
9- * This library is distributed in the hope that it will be useful,
10- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12- * Lesser General Public License for more details.
13- *
14- * You should have received a copy of the GNU Lesser General Public
15- * License along with this library; if not, write to the Free Software
16- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17- * MA 02110-1301 USA
18- */
19-/*
20- * $Id$
21- */
22-package webScraping.utility;
23-
24-import webScraping.core.HtmlParser;
25-import webScraping.core.SearchData;
26-import java.awt.Desktop;
27-import java.io.File;
28-import java.io.IOException;
29-import java.net.URI;
30-import java.net.URISyntaxException;
31-import java.util.logging.Level;
32-import java.util.logging.Logger;
33-import javax.swing.JFileChooser;
34-import javax.swing.filechooser.FileFilter;
35-import javax.swing.filechooser.FileNameExtensionFilter;
36-import javax.swing.table.DefaultTableModel;
37-
38-/**
39- * HTMLページ上の特定の項目を検索し、その項目内容の値を取得する.
40- * @author kgto
41- */
42-public class HtmlSearch extends javax.swing.JFrame {
43- private final ScrapingXml xmlwriter = new ScrapingXml();
44-
45- SearchDataTableModel sdatatblmodel;
46-
47- /**
48- * Creates new form Frame1
49- */
50- public HtmlSearch() {
51- sdatatblmodel = new SearchDataTableModel();
52-
53- initComponents();
54-
55- // カレントディレクトリ取得
56- String dir = System.getProperty("user.dir");
57- File file = new java.io.File(dir + "\\data");
58- jFileChooser1.setCurrentDirectory(file);
59-
60- FileFilter filter1 = new FileNameExtensionFilter("XMLファイル", "xml");
61- FileFilter filter2 = new FileNameExtensionFilter("TEXTファイル", "txt");
62- jFileChooser1.addChoosableFileFilter(filter1);
63- jFileChooser1.addChoosableFileFilter(filter2);
64- jFileChooser1.setFileFilter(filter1);
65-
66- }
67-
68- /**
69- * This method is called from within the constructor to initialize the form.
70- * WARNING: Do NOT modify this code. The content of this method is always
71- * regenerated by the Form Editor.
72- */
73- @SuppressWarnings("unchecked")
74- // <editor-fold defaultstate="collapsed" desc="Generated Code">//GEN-BEGIN:initComponents
75- private void initComponents() {
76-
77- jFileChooser1 = new javax.swing.JFileChooser();
78- jRadioButton1 = new javax.swing.JRadioButton();
79- jLabel1 = new javax.swing.JLabel();
80- jTxtUrl = new javax.swing.JTextField();
81- jBtnSearch = new javax.swing.JButton();
82- jTabbedPane1 = new javax.swing.JTabbedPane();
83- jPanelTab1 = new javax.swing.JPanel();
84- jScrollPane1 = new javax.swing.JScrollPane();
85- jTable1 = new javax.swing.JTable();
86- jBtnRowIns = new javax.swing.JButton();
87- jBtnRowDel = new javax.swing.JButton();
88- jBtnRowCpy = new javax.swing.JButton();
89- jPanelTab2 = new javax.swing.JPanel();
90- jScrollPaneLabel = new javax.swing.JScrollPane();
91- jTxtLabel = new javax.swing.JTextArea();
92- jScrollPane404msg = new javax.swing.JScrollPane();
93- jTxt404msg = new javax.swing.JTextArea();
94- jPanelRtn = new javax.swing.JPanel();
95- jScrollPaneRtn = new javax.swing.JScrollPane();
96- jTxtRtn = new javax.swing.JTextArea();
97- jMenuBar1 = new javax.swing.JMenuBar();
98- jMenu1 = new javax.swing.JMenu();
99- jMenuLoad = new javax.swing.JMenuItem();
100- jMenuSave = new javax.swing.JMenuItem();
101- jMenu3 = new javax.swing.JMenu();
102- jMenuItem1 = new javax.swing.JMenuItem();
103- jMenu2 = new javax.swing.JMenu();
104-
105- jFileChooser1.setCurrentDirectory(null);
106- jFileChooser1.setDialogTitle("");
107-
108- jRadioButton1.setText("jRadioButton1");
109-
110- setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE);
111- setTitle("タグ検索");
112-
113- jLabel1.setText(" URL:");
114-
115- jBtnSearch.setText("検索");
116- jBtnSearch.addActionListener(new java.awt.event.ActionListener() {
117- public void actionPerformed(java.awt.event.ActionEvent evt) {
118- jBtnSearchActionPerformed(evt);
119- }
120- });
121-
122- jPanelTab1.setBorder(javax.swing.BorderFactory.createTitledBorder("検索情報"));
123-
124- jTable1.setModel(sdatatblmodel);
125- jTable1.setSelectionMode(javax.swing.ListSelectionModel.SINGLE_SELECTION);
126- jTable1.getTableHeader().setReorderingAllowed(false);
127- jScrollPane1.setViewportView(jTable1);
128-
129- jBtnRowIns.setText("行挿入");
130- jBtnRowIns.addActionListener(new java.awt.event.ActionListener() {
131- public void actionPerformed(java.awt.event.ActionEvent evt) {
132- jBtnRowInsActionPerformed(evt);
133- }
134- });
135-
136- jBtnRowDel.setText("行削除");
137- jBtnRowDel.addActionListener(new java.awt.event.ActionListener() {
138- public void actionPerformed(java.awt.event.ActionEvent evt) {
139- jBtnRowDelActionPerformed(evt);
140- }
141- });
142-
143- jBtnRowCpy.setText("行コピー");
144- jBtnRowCpy.addActionListener(new java.awt.event.ActionListener() {
145- public void actionPerformed(java.awt.event.ActionEvent evt) {
146- jBtnRowCpyActionPerformed(evt);
147- }
148- });
149-
150- javax.swing.GroupLayout jPanelTab1Layout = new javax.swing.GroupLayout(jPanelTab1);
151- jPanelTab1.setLayout(jPanelTab1Layout);
152- jPanelTab1Layout.setHorizontalGroup(
153- jPanelTab1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
154- .addGroup(jPanelTab1Layout.createSequentialGroup()
155- .addContainerGap(javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
156- .addComponent(jBtnRowCpy)
157- .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
158- .addComponent(jBtnRowDel)
159- .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
160- .addComponent(jBtnRowIns))
161- .addComponent(jScrollPane1, javax.swing.GroupLayout.PREFERRED_SIZE, 0, Short.MAX_VALUE)
162- );
163- jPanelTab1Layout.setVerticalGroup(
164- jPanelTab1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
165- .addGroup(jPanelTab1Layout.createSequentialGroup()
166- .addComponent(jScrollPane1, javax.swing.GroupLayout.DEFAULT_SIZE, 173, Short.MAX_VALUE)
167- .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
168- .addGroup(jPanelTab1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE)
169- .addComponent(jBtnRowDel)
170- .addComponent(jBtnRowIns)
171- .addComponent(jBtnRowCpy)))
172- );
173-
174- jTabbedPane1.addTab("キー設定", jPanelTab1);
175-
176- jPanelTab2.setBorder(javax.swing.BorderFactory.createTitledBorder("メッセージ"));
177-
178- jScrollPaneLabel.setHorizontalScrollBarPolicy(javax.swing.ScrollPaneConstants.HORIZONTAL_SCROLLBAR_NEVER);
179- jScrollPaneLabel.setVerticalScrollBarPolicy(javax.swing.ScrollPaneConstants.VERTICAL_SCROLLBAR_NEVER);
180-
181- jTxtLabel.setEditable(false);
182- jTxtLabel.setBackground(java.awt.Color.lightGray);
183- jTxtLabel.setColumns(20);
184- jTxtLabel.setFont(new java.awt.Font("MS UI Gothic", 0, 12)); // NOI18N
185- jTxtLabel.setLineWrap(true);
186- jTxtLabel.setRows(2);
187- jTxtLabel.setText("取得ページに以下のメッセージが含まれていた場合、対象データが取得出来なかったと通知します。");
188- jTxtLabel.setAutoscrolls(false);
189- jTxtLabel.setBorder(null);
190- jTxtLabel.setCursor(new java.awt.Cursor(java.awt.Cursor.DEFAULT_CURSOR));
191- jTxtLabel.setFocusable(false);
192- jTxtLabel.setHighlighter(null);
193- jTxtLabel.setKeymap(null);
194- jTxtLabel.setOpaque(false);
195- jTxtLabel.setRequestFocusEnabled(false);
196- jTxtLabel.setVerifyInputWhenFocusTarget(false);
197- jScrollPaneLabel.setViewportView(jTxtLabel);
198-
199- jTxt404msg.setColumns(20);
200- jTxt404msg.setRows(3);
201- jTxt404msg.setText("一致する銘柄は見つかりませんでした\n");
202- jScrollPane404msg.setViewportView(jTxt404msg);
203-
204- javax.swing.GroupLayout jPanelTab2Layout = new javax.swing.GroupLayout(jPanelTab2);
205- jPanelTab2.setLayout(jPanelTab2Layout);
206- jPanelTab2Layout.setHorizontalGroup(
207- jPanelTab2Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
208- .addComponent(jScrollPane404msg)
209- .addGroup(javax.swing.GroupLayout.Alignment.TRAILING, jPanelTab2Layout.createSequentialGroup()
210- .addContainerGap()
211- .addComponent(jScrollPaneLabel, javax.swing.GroupLayout.DEFAULT_SIZE, 359, Short.MAX_VALUE)
212- .addContainerGap())
213- );
214- jPanelTab2Layout.setVerticalGroup(
215- jPanelTab2Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
216- .addGroup(jPanelTab2Layout.createSequentialGroup()
217- .addComponent(jScrollPaneLabel, javax.swing.GroupLayout.PREFERRED_SIZE, 38, javax.swing.GroupLayout.PREFERRED_SIZE)
218- .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED)
219- .addComponent(jScrollPane404msg))
220- );
221-
222- jTabbedPane1.addTab("結果無し判定", jPanelTab2);
223-
224- jPanelRtn.setBorder(javax.swing.BorderFactory.createTitledBorder("検索結果"));
225-
226- jTxtRtn.setColumns(20);
227- jTxtRtn.setRows(5);
228- jScrollPaneRtn.setViewportView(jTxtRtn);
229-
230- javax.swing.GroupLayout jPanelRtnLayout = new javax.swing.GroupLayout(jPanelRtn);
231- jPanelRtn.setLayout(jPanelRtnLayout);
232- jPanelRtnLayout.setHorizontalGroup(
233- jPanelRtnLayout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
234- .addComponent(jScrollPaneRtn)
235- );
236- jPanelRtnLayout.setVerticalGroup(
237- jPanelRtnLayout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
238- .addComponent(jScrollPaneRtn, javax.swing.GroupLayout.DEFAULT_SIZE, 163, Short.MAX_VALUE)
239- );
240-
241- jMenu1.setText("ファイル");
242-
243- jMenuLoad.setText("LOAD");
244- jMenuLoad.addActionListener(new java.awt.event.ActionListener() {
245- public void actionPerformed(java.awt.event.ActionEvent evt) {
246- jMenuLoadActionPerformed(evt);
247- }
248- });
249- jMenu1.add(jMenuLoad);
250-
251- jMenuSave.setText("SAVE");
252- jMenuSave.addActionListener(new java.awt.event.ActionListener() {
253- public void actionPerformed(java.awt.event.ActionEvent evt) {
254- jMenuSaveActionPerformed(evt);
255- }
256- });
257- jMenu1.add(jMenuSave);
258-
259- jMenuBar1.add(jMenu1);
260-
261- jMenu3.setText("ツール");
262-
263- jMenuItem1.setText("ブラウザで表示");
264- jMenuItem1.addActionListener(new java.awt.event.ActionListener() {
265- public void actionPerformed(java.awt.event.ActionEvent evt) {
266- jMenuItem1ActionPerformed(evt);
267- }
268- });
269- jMenu3.add(jMenuItem1);
270-
271- jMenuBar1.add(jMenu3);
272-
273- jMenu2.setText("検索");
274- jMenu2.addMouseListener(new java.awt.event.MouseAdapter() {
275- public void mouseClicked(java.awt.event.MouseEvent evt) {
276- jMenu2MouseClicked(evt);
277- }
278- });
279- jMenuBar1.add(jMenu2);
280-
281- setJMenuBar(jMenuBar1);
282-
283- javax.swing.GroupLayout layout = new javax.swing.GroupLayout(getContentPane());
284- getContentPane().setLayout(layout);
285- layout.setHorizontalGroup(
286- layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
287- .addComponent(jPanelRtn, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
288- .addGroup(layout.createSequentialGroup()
289- .addComponent(jLabel1)
290- .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
291- .addComponent(jTxtUrl)
292- .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
293- .addComponent(jBtnSearch))
294- .addComponent(jTabbedPane1)
295- );
296- layout.setVerticalGroup(
297- layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
298- .addGroup(layout.createSequentialGroup()
299- .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE)
300- .addComponent(jLabel1)
301- .addComponent(jTxtUrl, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE)
302- .addComponent(jBtnSearch))
303- .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
304- .addComponent(jTabbedPane1, javax.swing.GroupLayout.PREFERRED_SIZE, 250, javax.swing.GroupLayout.PREFERRED_SIZE)
305- .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
306- .addComponent(jPanelRtn, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
307- .addContainerGap())
308- );
309-
310- pack();
311- }// </editor-fold>//GEN-END:initComponents
312-
313- private void jBtnRowInsActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowInsActionPerformed
314- int SelectedRow = jTable1.getSelectedRow();
315- SearchData sdata = new SearchData();
316- if(SelectedRow >= 0) {
317- sdatatblmodel.insertRow(SelectedRow, sdata);
318- } else {
319- sdatatblmodel.addRow(sdata);
320- }
321- }//GEN-LAST:event_jBtnRowInsActionPerformed
322-
323- private void jBtnRowDelActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowDelActionPerformed
324- int SelectedRow = jTable1.getSelectedRow();
325- if(!(SelectedRow < 0)) {
326- sdatatblmodel.removeRow(SelectedRow);
327- }
328- }//GEN-LAST:event_jBtnRowDelActionPerformed
329-
330- private void jMenuLoadActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuLoadActionPerformed
331- jFileChooser1.setDialogTitle("読込");
332- int selected = jFileChooser1.showOpenDialog(this);
333- if (selected == JFileChooser.APPROVE_OPTION) {
334- File file = jFileChooser1.getSelectedFile();
335- xmlwriter.load(file);
336- jTxtUrl.setText(xmlwriter.getTestUrl());
337- xmlwriter.getSdata();
338- sdatatblmodel.setRowCount(0);
339- for(int i = 0; i < SearchData.size(); i++) {
340- SearchData sdata = SearchData.get(i);
341- sdatatblmodel.addRow(sdata);
342- }
343- }
344- }//GEN-LAST:event_jMenuLoadActionPerformed
345-
346- private void jMenuSaveActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuSaveActionPerformed
347- jFileChooser1.setDialogTitle("保存");
348- int selected = jFileChooser1.showSaveDialog(this);
349- if (selected == JFileChooser.APPROVE_OPTION) {
350- File file = jFileChooser1.getSelectedFile();
351- xmlwriter.setTestUrl(jTxtUrl.getText());
352-
353- SearchData.clear();
354- for(int row = 0; row < sdatatblmodel.getRowCount(); row++) {
355- SearchData sdata = sdatatblmodel.getSearchData(row);
356- SearchData.add(sdata);
357- }
358- xmlwriter.setSdata();
359- xmlwriter.save(file);
360- }
361- }//GEN-LAST:event_jMenuSaveActionPerformed
362-
363- private void jBtnRowCpyActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowCpyActionPerformed
364- int SelectedRow = jTable1.getSelectedRow();
365- if(SelectedRow >= 0) {
366- SearchData sdata = sdatatblmodel.getSearchData(SelectedRow);
367- sdatatblmodel.insertRow(SelectedRow, sdata);
368- }
369- }//GEN-LAST:event_jBtnRowCpyActionPerformed
370-
371- private void jMenuItem1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuItem1ActionPerformed
372- Desktop desktop = Desktop.getDesktop();
373- String uriString = jTxtUrl.getText();
374- try {
375- URI uri = new URI(uriString);
376- desktop.browse(uri);
377-
378- } catch (URISyntaxException | IOException ex) {
379- Logger.getLogger(HtmlSearch.class.getName()).log(Level.SEVERE, null, ex);
380- }
381- }//GEN-LAST:event_jMenuItem1ActionPerformed
382-
383- private void jMenu2MouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_jMenu2MouseClicked
384- Search_execution();
385- }//GEN-LAST:event_jMenu2MouseClicked
386-
387- private void jBtnSearchActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnSearchActionPerformed
388- Search_execution();
389- }//GEN-LAST:event_jBtnSearchActionPerformed
390-
391- /**
392- * 検索実行.
393- */
394- void Search_execution() {
395- jTxtRtn.setText(null);
396- HtmlParser par = new HtmlParser(jTxtUrl.getText());
397-
398- // データ無し(404)判定
399- String strdata = par.getStringPageData();
400- if(strdata == null) {
401- jTxtRtn.append("読込みページがありません");
402- return;
403- }
404- String text = jTxt404msg.getText();
405- String[] strsearch = text.split("\n");
406- for(String strsearch1 : strsearch) {
407- if(strdata.contains(strsearch1)) {
408- jTxtRtn.append(strsearch1);
409- return;
410- }
411- }
412-
413- // 検索結果
414- for(int row = 0; row < sdatatblmodel.getRowCount(); row++) {
415- SearchData sdata = sdatatblmodel.getSearchData(row);
416- String ans = sdata.getitem();
417- String rtn = par.search(sdata);
418- jTxtRtn.append(ans + "\t" + rtn + "\n");
419- }
420-
421- jTxtRtn.setCaretPosition(0);
422- }
423-
424- /**
425- * @param args the command line arguments
426- */
427- public static void main(String args[]) {
428- /* Set the Nimbus look and feel */
429- //<editor-fold defaultstate="collapsed" desc=" Look and feel setting code (optional) ">
430- /* If Nimbus (introduced in Java SE 6) is not available, stay with the default look and feel.
431- * For details see http://download.oracle.com/javase/tutorial/uiswing/lookandfeel/plaf.html
432- */
433- try {
434- for (javax.swing.UIManager.LookAndFeelInfo info : javax.swing.UIManager.getInstalledLookAndFeels()) {
435- if ("Nimbus".equals(info.getName())) {
436- javax.swing.UIManager.setLookAndFeel(info.getClassName());
437- break;
438- }
439- }
440- } catch (ClassNotFoundException
441- | InstantiationException
442- | IllegalAccessException
443- | javax.swing.UnsupportedLookAndFeelException ex) {
444- java.util.logging.Logger.getLogger(HtmlSearch.class.getName()).log(java.util.logging.Level.SEVERE, null, ex);
445- }
446- //</editor-fold>
447-
448- /* Create and display the form */
449- java.awt.EventQueue.invokeLater(new Runnable() {
450- @Override
451- public void run() {
452- new HtmlSearch().setVisible(true);
453- }
454- });
455- }
456-
457- // Variables declaration - do not modify//GEN-BEGIN:variables
458- private javax.swing.JButton jBtnRowCpy;
459- private javax.swing.JButton jBtnRowDel;
460- private javax.swing.JButton jBtnRowIns;
461- private javax.swing.JButton jBtnSearch;
462- private javax.swing.JFileChooser jFileChooser1;
463- private javax.swing.JLabel jLabel1;
464- private javax.swing.JMenu jMenu1;
465- private javax.swing.JMenu jMenu2;
466- private javax.swing.JMenu jMenu3;
467- private javax.swing.JMenuBar jMenuBar1;
468- private javax.swing.JMenuItem jMenuItem1;
469- private javax.swing.JMenuItem jMenuLoad;
470- private javax.swing.JMenuItem jMenuSave;
471- private javax.swing.JPanel jPanelRtn;
472- private javax.swing.JPanel jPanelTab1;
473- private javax.swing.JPanel jPanelTab2;
474- private javax.swing.JRadioButton jRadioButton1;
475- private javax.swing.JScrollPane jScrollPane1;
476- private javax.swing.JScrollPane jScrollPane404msg;
477- private javax.swing.JScrollPane jScrollPaneLabel;
478- private javax.swing.JScrollPane jScrollPaneRtn;
479- private javax.swing.JTabbedPane jTabbedPane1;
480- private javax.swing.JTable jTable1;
481- private javax.swing.JTextArea jTxt404msg;
482- private javax.swing.JTextArea jTxtLabel;
483- private javax.swing.JTextArea jTxtRtn;
484- private javax.swing.JTextField jTxtUrl;
485- // End of variables declaration//GEN-END:variables
486-}
487-
488-class SearchDataTableModel extends DefaultTableModel {
489- /* ---------------------------------------------------------------------- *
490- * データ属性
491- * ---------------------------------------------------------------------- */
492- public String[] columnName = {
493- /* 0 */ "項目名",
494- /* 1 */ "タグ",
495- /* 2 */ "ID",
496- /* 3 */ "クラス",
497- /* 4 */ "位置",
498- /* 5 */ "抽出条件"
499- };
500-
501- public Class[] columnClass = {
502- /* 0 */ String.class,
503- /* 1 */ String.class,
504- /* 2 */ String.class,
505- /* 3 */ String.class,
506- /* 4 */ String.class,
507- /* 5 */ String.class
508- };
509-
510- int column_item = 0;
511- int column_htmltag = 1;
512- int column_htmlid = 2;
513- int column_htmlclass = 3;
514- int column_around = 4;
515- int column_regexp = 5;
516-
517- /* ---------------------------------------------------------------------- *
518- * 処理
519- * ---------------------------------------------------------------------- */
520- @Override
521- public String getColumnName(int modelIndex) {
522- return columnName[modelIndex];
523- }
524-
525- @Override
526- public Class<?> getColumnClass(int modelIndex) {
527- return columnClass[modelIndex];
528- }
529-
530- @Override
531- public int getColumnCount() {
532- return columnName.length;
533- }
534-
535- /* ---------------------------------------------------------------------- */
536-
537- public SearchData getSearchData(int row) {
538- SearchData sdata = new SearchData();
539- sdata.setitem(String.valueOf(getValueAt(row, column_item)));
540- sdata.setHtmltag(String.valueOf(getValueAt(row, column_htmltag)));
541- sdata.setHtmlid(String.valueOf(getValueAt(row, column_htmlid)));
542- sdata.setHtmlclass(String.valueOf(getValueAt(row, column_htmlclass)));
543- sdata.setaround(String.valueOf(getValueAt(row, column_around)));
544- sdata.setregexp(String.valueOf(getValueAt(row, column_regexp)));
545- return sdata;
546- }
547-
548- public void addRow(SearchData sdata) {
549- addRow(getObjdata(sdata));
550- }
551-
552- public void insertRow(int row, SearchData sdata) {
553- insertRow(row, getObjdata(sdata));
554- }
555-
556- private Object[] getObjdata(SearchData sdata) {
557- Object[] obj = new Object[] {
558- sdata.getitem(),
559- sdata.getHtmltag(),
560- sdata.getHtmlid(),
561- sdata.getHtmlclass(),
562- sdata.getaround(),
563- sdata.getregexp()
564- };
565- return obj;
566- }
567-
568-}
\ No newline at end of file
Deleted: svn:keywords
## -1 +0,0 ##
-Id
\ No newline at end of property
--- trunk/webScraping/src/WebScraping/core/AttributeData.java (revision 144)
+++ trunk/webScraping/src/WebScraping/core/AttributeData.java (nonexistent)
@@ -1,164 +0,0 @@
1-/*
2- * Copyright (C) 2014 kgto.
3- *
4- * This library is free software; you can redistribute it and/or
5- * modify it under the terms of the GNU Lesser General Public
6- * License as published by the Free Software Foundation; either
7- * version 2.1 of the License, or (at your option) any later version.
8- *
9- * This library is distributed in the hope that it will be useful,
10- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12- * Lesser General Public License for more details.
13- *
14- * You should have received a copy of the GNU Lesser General Public
15- * License along with this library; if not, write to the Free Software
16- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17- * MA 02110-1301 USA
18- */
19-/*
20- * $Id$
21- */
22-
23-package webScraping.core;
24-
25-import java.util.ArrayList;
26-import java.util.Enumeration;
27-import javax.swing.text.MutableAttributeSet;
28-import javax.swing.text.html.HTML;
29-
30-/**
31- * HTMLタグの属性情報を保持する.
32- * @author kgto
33- */
34-public class AttributeData {
35-
36- public AttributeData() {
37- AttrList = new ArrayList();
38- size = 0;
39- }
40-
41- /**
42- * 属性情報追加.
43- * @param tag
44- * @param attr
45- */
46- public void add(HTML.Tag tag, MutableAttributeSet attr) {
47-
48- int tagcount = tagcnt(tag);
49- ++tagcount;
50-
51- Enumeration e = attr.getAttributeNames();
52- while(e.hasMoreElements()) {
53- Object obj = e.nextElement();
54-
55- AttrData a = new AttrData();
56- a.tag = tag;
57- a.count = tagcount;
58- a.attrname = obj.toString();
59- a.attrvalue = attr.getAttribute(obj).toString();
60-
61- AttrList.add(a);
62- size = AttrList.size();
63- }
64-
65- }
66-
67- /**
68- * 属性情報検索.
69- * @param tag
70- * @param attrname
71- * @param attrvalue
72- * @return
73- */
74- public boolean search(HTML.Tag tag, String attrname, String attrvalue) {
75- boolean ret = false;
76- for (Object AttrList1 : AttrList) {
77- AttrData a = (AttrData)AttrList1;
78- if(a.tag == tag) {
79- //if(a.attrname.equals(attrname) && a.attrvalue.equals(attrvalue)) {
80- if(a.attrname.equals(attrname) && a.attrvalue.startsWith(attrvalue)) {
81- ret = true;
82- }
83- }
84- }
85- return ret;
86- }
87-
88- public boolean searchId(HTML.Tag tag, String attrvalue) {
89- return search(tag, "id", attrvalue);
90- }
91-
92- public boolean searchClass(HTML.Tag tag, String attrvalue) {
93- return search(tag, "class", attrvalue);
94- }
95-
96- /**
97- * 属性の値を取得する.
98- * @param tag
99- * @param attrname
100- * @return
101- */
102- public ArrayList getvale(HTML.Tag tag, String attrname) {
103- ArrayList ret = new ArrayList();
104- for (Object AttrList1 : AttrList) {
105- AttrData a = (AttrData)AttrList1;
106- if(a.tag == tag) {
107- if(a.attrname.equals(attrname)) {
108- ret.add(a.attrvalue);
109- }
110- }
111- }
112- return ret;
113- }
114-
115- /**
116- * 引数で渡されたTAGの最新カウント数を返す.
117- * @param tag
118- * @return
119- */
120- private int tagcnt(HTML.Tag tag) {
121- int wkcnt = 0;
122- for (Object AttrList1 : AttrList) {
123- AttrData a = (AttrData)AttrList1;
124- if(a.tag == tag) {
125- if(wkcnt < a.count) {
126- wkcnt = a.count;
127- }
128- }
129- }
130- return wkcnt;
131- }
132-
133- // AttrList の内容を返すメソッド
134- public HTML.Tag gettag(int i) {
135- AttrData a = (AttrData)AttrList.get(i);
136- return a.tag;
137- }
138-
139- public int getcount(int i) {
140- AttrData a = (AttrData)AttrList.get(i);
141- return a.count;
142- }
143-
144- public String getattrname(int i) {
145- AttrData a = (AttrData)AttrList.get(i);
146- return a.attrname;
147- }
148-
149- public String getattrvalue(int i) {
150- AttrData a = (AttrData)AttrList.get(i);
151- return a.attrvalue;
152- }
153-
154- // フィールド変数
155- public class AttrData {
156- public HTML.Tag tag;
157- public int count;
158- public String attrname;
159- public String attrvalue;
160- }
161- public ArrayList AttrList;
162- public int size; // AttrListのサイズ
163-
164-}
Deleted: svn:keywords
## -1 +0,0 ##
-Id
\ No newline at end of property
--- trunk/webScraping/src/WebScraping/core/HtmlParserCallback.java (revision 144)
+++ trunk/webScraping/src/WebScraping/core/HtmlParserCallback.java (nonexistent)
@@ -1,222 +0,0 @@
1-/*
2- * Copyright (C) 2014 kgto.
3- *
4- * This library is free software; you can redistribute it and/or
5- * modify it under the terms of the GNU Lesser General Public
6- * License as published by the Free Software Foundation; either
7- * version 2.1 of the License, or (at your option) any later version.
8- *
9- * This library is distributed in the hope that it will be useful,
10- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12- * Lesser General Public License for more details.
13- *
14- * You should have received a copy of the GNU Lesser General Public
15- * License along with this library; if not, write to the Free Software
16- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17- * MA 02110-1301 USA
18- */
19-/*
20- * $Id$
21- */
22-
23-package webScraping.core;
24-
25-import java.util.ArrayList;
26-import java.util.HashMap;
27-import javax.swing.text.MutableAttributeSet;
28-import javax.swing.text.html.HTML;
29-import javax.swing.text.html.HTMLEditorKit;
30-
31-/**
32- * HTMLパーサ部品.
33- * @author kgto
34- */
35-class HtmlParserCallback extends HTMLEditorKit.ParserCallback {
36- /* ---------------------------------------------------------------------- *
37- * フィールド
38- * ---------------------------------------------------------------------- */
39- // Tag毎の階層
40- HashMap<HTML.Tag,Integer> tagMap = new HashMap<>();
41-
42- // serach key 情報
43- String keytag;
44- String keyid;
45- String keyclass;
46-
47- // serach key と一致時の情報退避
48- int bufCount = 0;
49- HTML.Tag bufTag = null;
50- // serach key と一致時の情報格納ワーク
51- StringBuilder bufText;
52-
53- // serach key と一致時のデータ一覧
54- ArrayList sData;
55-
56- // 属性データ
57- AttributeData attrdata;
58-
59- /* ---------------------------------------------------------------------- *
60- * コンストラクタ
61- * ---------------------------------------------------------------------- */
62- protected HtmlParserCallback(SearchData skey) {
63-
64- // キー情報展開
65- keytag = skey.getHtmltag();
66- keyid = skey.getHtmlid();
67- keyclass = skey.getHtmlclass();
68-
69- sData = new ArrayList();
70- }
71-
72- /* ---------------------------------------------------------------------- *
73- * Getter
74- * ---------------------------------------------------------------------- */
75- ArrayList getrtnData() {
76- return this.sData;
77- }
78-
79- /* ---------------------------------------------------------------------- *
80- * メソッド
81- * ---------------------------------------------------------------------- */
82- @Override
83- public void handleStartTag(HTML.Tag tag, MutableAttributeSet attr, int pos){
84- // Tag毎の階層を保持
85- int count = 1;
86- if(tagMap.containsKey(tag)) {
87- count = tagMap.get(tag);
88- count++;
89- }
90- tagMap.put(tag, count);
91-
92- // 属性解析
93- AttributeData handleStartattrdata = new AttributeData();
94- handleStartattrdata.add(tag, attr);
95-
96- DebugProcess.htmlinfo(tag, attr, "handleStartTag", count);
97-
98- if(bufCount == 0) {
99- if(tag.toString().equals(keytag)) {
100- //if(serachAttribute(attr)) {
101- if(serachAttribute(tag, handleStartattrdata)) {
102- bufCount = count;
103- bufTag = tag;
104- attrdata = new AttributeData();
105- bufText = new StringBuilder();
106- }
107- }
108- }
109- if(bufCount > 0) {
110- attrdata.add(tag, attr);
111- }
112- }
113-
114- @Override
115- public void handleEndTag(HTML.Tag tag, int pos){
116- // Tag毎の階層を取得
117- int count = 0;
118- if(tagMap.containsKey(tag)) {
119- count = tagMap.get(tag);
120- }
121-
122- DebugProcess.htmlinfo(tag, null, "handleEndTag", count);
123-
124- if(tag.equals(bufTag) && count <= bufCount) {
125-
126- // 溜め込んだ一致情報をリストへ格納
127- sData.add(bufText.toString());
128-
129- // 退避したserach keyとの一致情報クリア
130- bufCount = 0;
131- bufTag = null;
132- bufText = null;
133- }
134-
135- // Tag毎の階層減算
136- tagMap.put(tag, --count);
137- }
138-
139- @Override
140- public void handleText(char[] data, int pos){
141-
142- DebugProcess.htmlinfo(data, "handleText");
143-
144- String splitchar = "\t";
145- //制御文字の削除
146- // &nbsp; 0xa0
147- StringBuilder buf = new StringBuilder();
148- for(int i = 0; i < data.length; i++) {
149- if(data[i] > 0x1f && data[i] != 0x7f && data[i] != 0xa0) {
150- buf.append(data[i]);
151- }
152- }
153- if(bufCount > 0) {
154- if(bufText.length() > 0) {
155- bufText.append(splitchar);
156- }
157- bufText.append(buf.toString());
158- }
159- }
160-
161- @Override
162- public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attr, int pos){
163- if(bufCount > 0) {
164- attrdata.add(tag, attr);
165- }
166- DebugProcess.htmlinfo(tag, attr, "handleSimpleTag", 0);
167- }
168-
169- /**
170- * ページ内のID/CLASS値と検索キーを比較する.
171- * @param attr ページのMutableAttributeSet
172- * @return boolean 検索キーと一致の時、true
173- */
174- boolean serachAttribute(MutableAttributeSet attr) {
175- String currentID = (String)attr.getAttribute(HTML.Attribute.ID);
176- String currentClass = (String)attr.getAttribute(HTML.Attribute.CLASS);
177-
178- if(keyid.isEmpty() == false && keyclass.isEmpty() == false) {
179- if(keyid.equals(currentID) && keyclass.equals(currentClass)) {
180- return true;
181- }
182- }
183-
184- if(keyid.isEmpty() == false) {
185- if(keyid.equals(currentID)) {
186- return true;
187- }
188- }
189-
190- if(keyclass.isEmpty() == false) {
191- if(keyclass.equals(currentClass)) {
192- return true;
193- }
194- }
195-
196- return false;
197- }
198-
199- /**
200- * ページ内のID/CLASS値と検索キーを比較する.
201- * @param tag
202- * @param attrdata
203- * @return boolean 検索キーと一致の時、true
204- */
205- boolean serachAttribute(HTML.Tag tag, AttributeData attrdata) {
206- // ID と CLASS の両方にキー入力有りの場合
207- if(keyid.isEmpty() == false && keyclass.isEmpty() == false) {
208- if(attrdata.searchId(tag, keyid) && attrdata.searchClass(tag, keyclass)) {
209- return true;
210- }
211- }
212- // ID のキーチェック
213- if(keyid.isEmpty() == false) {
214- return attrdata.searchId(tag, keyid);
215- }
216- // CLASS のキーチェック
217- if(keyclass.isEmpty() == false) {
218- return attrdata.searchClass(tag, keyclass);
219- }
220- return false;
221- }
222-}
Deleted: svn:keywords
## -1 +0,0 ##
-Id
\ No newline at end of property
--- trunk/webScraping/src/WebScraping/core/SearchData.java (revision 144)
+++ trunk/webScraping/src/WebScraping/core/SearchData.java (nonexistent)
@@ -1,200 +0,0 @@
1-/*
2- * Copyright (C) 2014 kgto.
3- *
4- * This library is free software; you can redistribute it and/or
5- * modify it under the terms of the GNU Lesser General Public
6- * License as published by the Free Software Foundation; either
7- * version 2.1 of the License, or (at your option) any later version.
8- *
9- * This library is distributed in the hope that it will be useful,
10- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12- * Lesser General Public License for more details.
13- *
14- * You should have received a copy of the GNU Lesser General Public
15- * License along with this library; if not, write to the Free Software
16- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17- * MA 02110-1301 USA
18- */
19-/*
20- * $Id$
21- */
22-
23-package webScraping.core;
24-
25-import java.util.ArrayList;
26-
27-/**
28- * タグ検索データ.
29- * @author kgto
30- */
31-public class SearchData {
32- /* ---------------------------------------------------------------------- *
33- * フィールド
34- * ---------------------------------------------------------------------- */
35- private String item;
36- private String htmltag;
37- private String htmlid;
38- private String htmlclass;
39- private String around;
40- private String regexp;
41-
42- /* ---------------------------------------------------------------------- *
43- * static 処理
44- * ---------------------------------------------------------------------- */
45- public static class Context {
46- public Class columnClass;
47- public String columnName;
48- public String columnNameJp;
49-
50- public Context(Class columnClass, String columnName, String columnNameJp) {
51- this.columnClass = columnClass;
52- this.columnName = columnName;
53- this.columnNameJp = columnNameJp;
54- }
55- }
56-
57- public static final Context[] context = {
58- /* 0 */ new Context(String.class , "item" , "項目名"),
59- /* 1 */ new Context(String.class , "htmltag" , "タグ"),
60- /* 2 */ new Context(String.class , "htmlid" , "ID"),
61- /* 3 */ new Context(String.class , "htmlclass" , "クラス"),
62- /* 4 */ new Context(String.class , "around" , "位置"),
63- /* 5 */ new Context(String.class , "regexp" , "抽出条件")
64- };
65-
66- /* ---------------------------------------------------------------------- */
67- private static ArrayList<SearchData> slist = new ArrayList<>();
68-
69- public static void addSearchData(
70- String item, String htmltag, String htmlid,
71- String htmlclass, String around, String regexp) {
72- SearchData sdat = new SearchData();
73- sdat.setitem(item);
74- sdat.setHtmltag(htmltag);
75- sdat.setHtmlid(htmlid);
76- sdat.setHtmlclass(htmlclass);
77- sdat.setaround(around);
78- sdat.setregexp(regexp);
79-
80- slist.add(sdat);
81- }
82-
83- public static void add(SearchData sdat) {
84- slist.add(sdat);
85- }
86-
87- public static SearchData get(int i) {
88- return slist.get(i);
89- }
90-
91- public static int size() {
92- return slist.size();
93- }
94-
95- public static SearchData remove(int index) {
96- return slist.remove(index);
97- }
98-
99- public static void clear() {
100- slist.clear();
101- }
102-
103- /* ---------------------------------------------------------------------- *
104- * コンストラクタ
105- * ---------------------------------------------------------------------- */
106- public SearchData() {
107- initialize();
108- }
109-
110- public SearchData(SearchData dat) {
111- this.item = dat.getitem();
112- this.htmltag = dat.getHtmltag();
113- this.htmlid = dat.getHtmlid();
114- this.htmlclass = dat.getHtmlclass();
115- this.around = dat.getaround();
116- this.regexp = dat.getregexp();
117- }
118-
119- /* ---------------------------------------------------------------------- *
120- * Setter
121- * ---------------------------------------------------------------------- */
122- public void setitem(String item) {
123- this.item = item;
124- }
125-
126- public void setHtmltag(String htmltag) {
127- this.htmltag = htmltag;
128- }
129-
130- public void setHtmlid(String htmlid) {
131- this.htmlid = htmlid;
132- }
133-
134- public void setHtmlclass(String htmlclass) {
135- this.htmlclass = htmlclass;
136- }
137-
138- public void setaround(String around) {
139- this.around = around;
140- }
141-
142- public void setregexp(String regexp) {
143- this.regexp = regexp;
144- }
145-
146- /* ---------------------------------------------------------------------- *
147- * Getter
148- * ---------------------------------------------------------------------- */
149- public String getitem() {
150- return item;
151- }
152-
153- public String getHtmltag() {
154- return htmltag;
155- }
156-
157- public String getHtmlid() {
158- return htmlid;
159- }
160-
161- public String getHtmlclass() {
162- return htmlclass;
163- }
164-
165- public String getaround() {
166- return around;
167- }
168-
169- public String getregexp() {
170- return regexp;
171- }
172-
173- /* ---------------------------------------------------------------------- *
174- * メソッド
175- * ---------------------------------------------------------------------- */
176- /**
177- * データ初期化.
178- */
179- public final void initialize() {
180- this.item = "";
181- this.htmltag = "";
182- this.htmlid = "";
183- this.htmlclass = "";
184- this.around = "";
185- this.regexp = "";
186- }
187-
188- public Object[] getObjData() {
189- Object[] obj = {
190- /* 0 */ getitem(), // 項目名
191- /* 1 */ getHtmltag(), // タグ
192- /* 2 */ getHtmlid(), // ID
193- /* 3 */ getHtmlclass(), // クラス
194- /* 4 */ getaround(), // 位置
195- /* 5 */ getregexp() // 抽出条件
196- };
197- return obj;
198- }
199-
200-}
Deleted: svn:keywords
## -1 +0,0 ##
-Id
\ No newline at end of property
--- trunk/webScraping/src/WebScraping/core/HtmlParser.java (revision 144)
+++ trunk/webScraping/src/WebScraping/core/HtmlParser.java (nonexistent)
@@ -1,273 +0,0 @@
1-/*
2- * Copyright (C) 2014 kgto.
3- *
4- * This library is free software; you can redistribute it and/or
5- * modify it under the terms of the GNU Lesser General Public
6- * License as published by the Free Software Foundation; either
7- * version 2.1 of the License, or (at your option) any later version.
8- *
9- * This library is distributed in the hope that it will be useful,
10- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12- * Lesser General Public License for more details.
13- *
14- * You should have received a copy of the GNU Lesser General Public
15- * License along with this library; if not, write to the Free Software
16- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17- * MA 02110-1301 USA
18- */
19-/*
20- * $Id$
21- */
22-
23-package webScraping.core;
24-
25-import java.io.*;
26-import java.net.*;
27-import java.util.ArrayList;
28-import java.util.logging.Level;
29-import java.util.logging.Logger;
30-import java.util.regex.Matcher;
31-import java.util.regex.Pattern;
32-import javax.swing.text.html.parser.ParserDelegator;
33-
34-/**
35- * HTMLパーサ.
36- * @author kgto
37- */
38-public class HtmlParser {
39- /* ---------------------------------------------------------------------- *
40- * フィールド
41- * ---------------------------------------------------------------------- */
42- URL url;
43- String pageData;
44- ArrayList sData;
45-
46- // 作業ワーク
47- private String htmltag;
48- private String htmlid;
49- private String htmlclass;
50-
51- /* ---------------------------------------------------------------------- *
52- * コンストラクタ
53- * ---------------------------------------------------------------------- */
54- public HtmlParser(URL UrlAdress) {
55- DebugProcess.debuglog_set();
56- this.url = UrlAdress;
57- getPageData();
58- }
59-
60- public HtmlParser(String UrlAdress) {
61- DebugProcess.debuglog_set();
62- try {
63- url = new URL(UrlAdress);
64- getPageData();
65-
66- } catch (MalformedURLException ex) {
67- Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex);
68- }
69- }
70-
71- public HtmlParser() {
72- DebugProcess.debuglog_set();
73- url = null;
74- }
75-
76- /* ---------------------------------------------------------------------- *
77- * Getter
78- * ---------------------------------------------------------------------- */
79- public String getStringPageData() {
80- return pageData;
81- }
82-
83- /* ---------------------------------------------------------------------- *
84- * Setter
85- * ---------------------------------------------------------------------- */
86- public void seturl(URL UrlAdress) {
87- this.url = UrlAdress;
88- getPageData();
89- }
90-
91- /* ---------------------------------------------------------------------- *
92- * メソッド
93- * ---------------------------------------------------------------------- */
94- public void seturl(String UrlAdress) {
95- try {
96- url = new URL(UrlAdress);
97- getPageData();
98-
99- } catch (MalformedURLException ex) {
100- Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex);
101- }
102- }
103-
104- /**
105- * HTMLページ内検索.
106- * 検索キーとして渡されたタグ,ID,クラスから、対象となるタグを探し出し、
107- * around(タグ位置)として指定された箇所の文字列をregexp(正規表現)で指定された整形を
108- * 行った結果を返す。<br>
109- * aroundの初期値:0 検索キーとして未指定(未入力)の場合、最初(0)の文字列。<br>
110- * regexpが指定(入力)ありの場合、正規表現にて整形を行う。<br>
111- * 渡された検索キーに一致するタグが存在しなかった場合、NULLを返す。
112- * @param skey 検索キーデータ(SearchData)
113- * @return String 検索キーに一致するデータの文字列
114- */
115- public String search(SearchData skey) {
116-
117- // htmlページ内を検索
118- if(isHtmlkeyEq(skey) == false) {
119- searchPageData(skey);
120- }
121- /*
122- around 出現位置指定 入力有り:指定された位置の情報のみ返す。
123- 入力無し:取得した全ての情報を返す。
124- */
125- String regexp = skey.getregexp();
126- if(skey.getaround().length() > 0) {
127- int wkAround = Integer.parseInt(skey.getaround()); // 検索位置を数値変換
128- if(wkAround < sData.size()) {
129- String str = (String)sData.get(wkAround);
130- String rtn = RegularExpression(str, regexp);
131- return rtn;
132- }
133- } else {
134- StringBuilder strbuf = new StringBuilder();
135- for (Object sData1 : sData) {
136- String str = (String)sData1;
137- String rtn = RegularExpression(str, regexp);
138- if(strbuf.length() > 0) {
139- strbuf.append("\t");
140- }
141- strbuf.append(rtn);
142- }
143- return strbuf.toString();
144- }
145- return null;
146- }
147-
148- /**
149- * 直近のHTMLタグ/ID/CLASS値と引数の値を比較する.
150- * @param skey HTMLタグ/ID/CLASSが格納された検索キー
151- * @return boolean HTMLタグ/ID/CLASS値が一致する時、true
152- */
153- boolean isHtmlkeyEq(SearchData skey) {
154-
155- String stag = skey.getHtmltag();
156- String sid = skey.getHtmlid();
157- String sclass = skey.getHtmlclass();
158-
159- boolean rtn = true;
160-
161- // htmltag
162- if(htmltag == null) {
163- rtn = false;
164- } else {
165- if(htmltag.equals(stag) == false) {
166- rtn = false;
167- }
168- }
169-
170- // htmlid
171- if(htmlid == null) {
172- rtn = false;
173- } else {
174- if(htmlid.equals(sid) == false) {
175- rtn = false;
176- }
177- }
178-
179- // htmlclass
180- if(htmlclass == null) {
181- rtn = false;
182- } else {
183- if(htmlclass.equals(sclass) == false) {
184- rtn = false;
185- }
186- }
187-
188- if(!rtn) {
189- htmltag = stag;
190- htmlid = sid;
191- htmlclass = sclass;
192- }
193-
194- return rtn;
195- }
196-
197- /**
198- * 正規表現検索.
199- * @param strdata
200- * @param regexp
201- * @return
202- */
203- String RegularExpression(String strdata, String regexp) {
204- String expdata = null;
205-
206- //regexpのチェック
207- if(regexp.isEmpty()) {
208- expdata = strdata;
209- return expdata;
210- }
211-
212- //正規表現検索
213- Pattern ptn = Pattern.compile(regexp);
214- Matcher matchdata = ptn.matcher(strdata);
215- if (matchdata.find()) {
216- if(matchdata.groupCount() >= 1) {
217- expdata = matchdata.group(1);
218- }
219- }
220- return expdata;
221- }
222-
223- /**
224- * インターネット接続.
225- */
226- private void getPageData() {
227- HttpURLConnection con = null;
228- try {
229- con = (HttpURLConnection)url.openConnection();
230- con.setRequestMethod("GET");
231- BufferedReader reader = new BufferedReader(
232- new InputStreamReader(con.getInputStream(), "utf-8"));
233- String wkline;
234- StringBuilder sb = new StringBuilder();
235- while((wkline = reader.readLine()) != null) {
236- sb.append(wkline).append("\n");
237- }
238- pageData = sb.toString();
239-
240- } catch(FileNotFoundException ex) {
241- pageData = null;
242- } catch (IOException ex) {
243- Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex);
244- } finally {
245- if(con != null) {
246- con.disconnect();
247- }
248- }
249- }
250-
251- /**
252- * HTMLパーサ.
253- * @param skey
254- */
255- private void searchPageData(SearchData skey) {
256-
257- DebugProcess.searchDatainfo(skey);
258-
259- Reader reader;
260- try {
261- reader = new BufferedReader(new StringReader(pageData));
262- HtmlParserCallback cb = new HtmlParserCallback(skey);
263- ParserDelegator pd = new ParserDelegator();
264- pd.parse(reader, cb, true);
265- reader.close();
266-
267- sData = cb.getrtnData();
268-
269- } catch (IOException ex) {
270- Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex);
271- }
272- }
273-}
Deleted: svn:keywords
## -1 +0,0 ##
-Id
\ No newline at end of property
--- trunk/webScraping/src/WebScraping/core/DebugProcess.java (revision 144)
+++ trunk/webScraping/src/WebScraping/core/DebugProcess.java (nonexistent)
@@ -1,264 +0,0 @@
1-/*
2- * Copyright (C) 2014 kgto.
3- *
4- * This library is free software; you can redistribute it and/or
5- * modify it under the terms of the GNU Lesser General Public
6- * License as published by the Free Software Foundation; either
7- * version 2.1 of the License, or (at your option) any later version.
8- *
9- * This library is distributed in the hope that it will be useful,
10- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12- * Lesser General Public License for more details.
13- *
14- * You should have received a copy of the GNU Lesser General Public
15- * License along with this library; if not, write to the Free Software
16- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17- * MA 02110-1301 USA
18- */
19-/*
20- * $Id$
21- */
22-
23-package webScraping.core;
24-
25-import java.io.File;
26-import java.io.FileInputStream;
27-import java.io.FileNotFoundException;
28-import java.io.IOException;
29-import java.util.logging.FileHandler;
30-import java.util.logging.Formatter;
31-import java.util.logging.Handler;
32-import java.util.logging.Level;
33-import java.util.logging.LogManager;
34-import java.util.logging.LogRecord;
35-import java.util.logging.Logger;
36-import javax.swing.text.MutableAttributeSet;
37-import javax.swing.text.html.HTML;
38-
39-/**
40- * デバック情報.
41- * カレントディレクトリに設定ファイル(Debug.prop)を置くことで、デバックログの出力を制御する。
42- * @author kgto
43- */
44-public class DebugProcess {
45- // 設定ファイル名
46- protected static final String configurationFilename = "Debug.prop";
47- // ロガー名
48- protected static final Logger logger = Logger.getLogger("WebScraping");
49- // ログ出力デフォルトレベル
50- protected static final Level loggerlevel = Level.FINEST;
51-
52-
53- /**
54- * ログ出力設定.
55- * ログ設定ファイルの存在をチェック、(最終的な)ログレベルにより、
56- * ファイルハンドラの設定と出力書式の設定を行う。
57- */
58- public static void debuglog_set() {
59- try {
60- initLogConfiguration();
61-
62- if(Level.ALL.equals(logger.getLevel())) {
63- //logger.addHandler(new FileHandler("WebScraping%g.log", 100000, 2));
64- logger.addHandler(new FileHandler("WebScraping%g.log", true));
65- }
66- setFomatter();
67-
68- } catch (IOException | SecurityException ex) {
69- Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex);
70- }
71- }
72-
73- /**
74- * ログ出力設定解除.
75- */
76- public static void debuglog_unset() {
77- }
78-
79-
80- /**
81- * デバック出力(HTML解析-タグ&属性).
82- * HTMLのタグと属性の解析状態を出力する。
83- * 書式: 9 : x : タグ名 [属性名]属性数 = 属性値<br>
84- * 凡例: 9 = 階層レベル(count値), x = F(tagの開始)/E(tagの終了)/S(単独tag)の何れか1文字<br>
85- * @param tag タグ
86- * @param attr 属性
87- * @param methodname このメソッドを呼び出した親メソッド名
88- * @param count HTMLタグの階層レベル
89- */
90- public static void htmlinfo(HTML.Tag tag, MutableAttributeSet attr,
91- String methodname, int count) {
92-
93- // ログ出力レベルチェック
94- if(logger.getLevel() == null) {
95- return;
96- }
97- if(logger.getLevel().intValue() > loggerlevel.intValue()) {
98- return;
99- }
100-
101- // 編集処理
102- char kbn = ' ';
103- if("handleStartTag".equals(methodname)) {
104- kbn = 'F';
105- }
106- if("handleEndTag".equals(methodname)) {
107- kbn = 'E';
108- }
109- if("handleSimpleTag".equals(methodname)) {
110- kbn = 'S';
111- }
112-
113- StringBuilder strBuf = new StringBuilder(80);
114- strBuf.append(count).append(" : ");
115- strBuf.append(kbn).append(" : ");
116- strBuf.append(tag.toString());
117- // 属性情報
118- if(attr != null) {
119- if(attr.getAttributeCount() != 0) {
120- AttributeData handleAttrData = new AttributeData();
121- handleAttrData.add(tag, attr);
122- for(int i = 0; i < handleAttrData.size; i++) {
123- strBuf.append(" [");
124- strBuf.append(handleAttrData.getattrname(i));
125- strBuf.append("]");
126- strBuf.append(handleAttrData.getcount(i));
127- strBuf.append(" = ");
128- strBuf.append(handleAttrData.getattrvalue(i));
129- }
130- }
131- }
132-
133- logger.log(loggerlevel, strBuf.toString());
134- }
135-
136- /**
137- * デバック出力(メッセージ).
138- * 引数に渡された任意のメッセージを出力する。
139- * @param str メッセージ
140- * @param methodname このメソッドを呼び出した親メソッド名
141- */
142- public static void htmlinfo(String str, String methodname) {
143- logger.log(loggerlevel, str);
144- }
145-
146- public static void htmlinfo(String str) {
147- logger.log(loggerlevel, str);
148- }
149-
150- /**
151- * デバック出力(HTML解析-本文).
152- * 本文の内容を出力する。
153- * @param data 本文(HTML内の文字列)
154- * @param methodname このメソッドを呼び出した親メソッド名
155- */
156- public static void htmlinfo(char[] data, String methodname) {
157- String dat = new String(data);
158- logger.log(loggerlevel, dat);
159- }
160-
161- public static void htmlinfo(char[] data) {
162- String dat = new String(data);
163- logger.log(loggerlevel, dat);
164- }
165-
166- /**
167- * デバック出力(検索キー).
168- * 検索キー(SearchData)の内容を出力する。
169- * @param skey
170- */
171- public static void searchDatainfo(SearchData skey) {
172-
173- StringBuilder strBuf = new StringBuilder(30);
174- strBuf.append("SearchData KEY tag[");
175- strBuf.append(skey.getHtmltag());
176- strBuf.append("] ID[");
177- strBuf.append(skey.getHtmlid());
178- strBuf.append("] CLASS[");
179- strBuf.append(skey.getHtmlclass());
180- strBuf.append("]\n");
181-
182- logger.log(loggerlevel, strBuf.toString());
183- }
184-
185- /**
186- * ログ出力設定ファイルチェック.
187- * 設定ファイルの存在をチェックし存在する場合、設定ファイルの内容を設定する。
188- */
189- private static void initLogConfiguration() {
190-
191- File file = new File(configurationFilename);
192- try {
193- if(file.exists()) {
194- FileInputStream inputStream = new FileInputStream(file);
195- // 設定ファイルの読み込み
196- LogManager.getLogManager().readConfiguration(inputStream);
197- }
198-
199- } catch (FileNotFoundException ex) {
200- Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex);
201- } catch (IOException ex) {
202- Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex);
203- }
204- }
205-
206- /**
207- * ログ出力フォーマッター設定.
208- * ファイルへログ出力時の書式を設定する。
209- */
210- private static void setFomatter() {
211- Handler[] handlers = logger.getHandlers();
212- for(int i = 0 ; i < handlers.length ; i++) {
213- if(handlers[i] instanceof java.util.logging.FileHandler) {
214- handlers[i].setFormatter(new HtmlFormatter());
215- }
216- }
217- }
218-
219-}
220-
221-/**
222- * ログ出力フォーマッター.
223- * @author kgto
224- */
225-class HtmlFormatter extends Formatter {
226- /**
227- * Logの出力文字列を生成する。
228- * 出力書式:<br>
229- * YYYY-MM-DD HH:SS:MM ログレベル<メソッド名>メッセージ
230- */
231- @Override
232- public synchronized String format(final LogRecord aRecord) {
233-
234- final StringBuffer message = new StringBuffer(100);
235-
236- long millis = aRecord.getMillis();
237- String time = String.format("%tF %<tT", millis);
238-
239- message.append(time);
240- message.append(' ');
241-
242- message.append(aRecord.getLevel());
243- message.append('<');
244- String methodName = aRecord.getSourceMethodName();
245- message.append(methodName != null ? methodName : "N/A");
246- message.append('>');
247-
248- message.append(formatMessage(aRecord));
249- message.append('\n');
250-
251- // 例外エラーの場合、エラー内容とスタックトレース出力
252- Throwable throwable = aRecord.getThrown();
253- if (throwable != null) {
254- message.append(throwable.toString());
255- message.append('\n');
256- for (StackTraceElement trace : throwable.getStackTrace()) {
257- message.append('\t');
258- message.append(trace.toString());
259- message.append('\n');
260- }
261- }
262- return message.toString();
263- }
264-}
Deleted: svn:keywords
## -1 +0,0 ##
-Id
\ No newline at end of property
--- trunk/webScraping/src/webScraping/core/DebugProcess.java (nonexistent)
+++ trunk/webScraping/src/webScraping/core/DebugProcess.java (revision 145)
@@ -0,0 +1,264 @@
1+/*
2+ * Copyright (C) 2014 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id: DebugProcess.java 106 2014-12-10 13:45:01Z tuna_p $
21+ */
22+
23+package webScraping.core;
24+
25+import java.io.File;
26+import java.io.FileInputStream;
27+import java.io.FileNotFoundException;
28+import java.io.IOException;
29+import java.util.logging.FileHandler;
30+import java.util.logging.Formatter;
31+import java.util.logging.Handler;
32+import java.util.logging.Level;
33+import java.util.logging.LogManager;
34+import java.util.logging.LogRecord;
35+import java.util.logging.Logger;
36+import javax.swing.text.MutableAttributeSet;
37+import javax.swing.text.html.HTML;
38+
39+/**
40+ * デバック情報.
41+ * カレントディレクトリに設定ファイル(Debug.prop)を置くことで、デバックログの出力を制御する。
42+ * @author kgto
43+ */
44+public class DebugProcess {
45+ // 設定ファイル名
46+ protected static final String configurationFilename = "Debug.prop";
47+ // ロガー名
48+ protected static final Logger logger = Logger.getLogger("WebScraping");
49+ // ログ出力デフォルトレベル
50+ protected static final Level loggerlevel = Level.FINEST;
51+
52+
53+ /**
54+ * ログ出力設定.
55+ * ログ設定ファイルの存在をチェック、(最終的な)ログレベルにより、
56+ * ファイルハンドラの設定と出力書式の設定を行う。
57+ */
58+ public static void debuglog_set() {
59+ try {
60+ initLogConfiguration();
61+
62+ if(Level.ALL.equals(logger.getLevel())) {
63+ //logger.addHandler(new FileHandler("WebScraping%g.log", 100000, 2));
64+ logger.addHandler(new FileHandler("WebScraping%g.log", true));
65+ }
66+ setFomatter();
67+
68+ } catch (IOException | SecurityException ex) {
69+ Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex);
70+ }
71+ }
72+
73+ /**
74+ * ログ出力設定解除.
75+ */
76+ public static void debuglog_unset() {
77+ }
78+
79+
80+ /**
81+ * デバック出力(HTML解析-タグ&属性).
82+ * HTMLのタグと属性の解析状態を出力する。
83+ * 書式: 9 : x : タグ名 [属性名]属性数 = 属性値<br>
84+ * 凡例: 9 = 階層レベル(count値), x = F(tagの開始)/E(tagの終了)/S(単独tag)の何れか1文字<br>
85+ * @param tag タグ
86+ * @param attr 属性
87+ * @param methodname このメソッドを呼び出した親メソッド名
88+ * @param count HTMLタグの階層レベル
89+ */
90+ public static void htmlinfo(HTML.Tag tag, MutableAttributeSet attr,
91+ String methodname, int count) {
92+
93+ // ログ出力レベルチェック
94+ if(logger.getLevel() == null) {
95+ return;
96+ }
97+ if(logger.getLevel().intValue() > loggerlevel.intValue()) {
98+ return;
99+ }
100+
101+ // 編集処理
102+ char kbn = ' ';
103+ if("handleStartTag".equals(methodname)) {
104+ kbn = 'F';
105+ }
106+ if("handleEndTag".equals(methodname)) {
107+ kbn = 'E';
108+ }
109+ if("handleSimpleTag".equals(methodname)) {
110+ kbn = 'S';
111+ }
112+
113+ StringBuilder strBuf = new StringBuilder(80);
114+ strBuf.append(count).append(" : ");
115+ strBuf.append(kbn).append(" : ");
116+ strBuf.append(tag.toString());
117+ // 属性情報
118+ if(attr != null) {
119+ if(attr.getAttributeCount() != 0) {
120+ AttributeData handleAttrData = new AttributeData();
121+ handleAttrData.add(tag, attr);
122+ for(int i = 0; i < handleAttrData.size; i++) {
123+ strBuf.append(" [");
124+ strBuf.append(handleAttrData.getattrname(i));
125+ strBuf.append("]");
126+ strBuf.append(handleAttrData.getcount(i));
127+ strBuf.append(" = ");
128+ strBuf.append(handleAttrData.getattrvalue(i));
129+ }
130+ }
131+ }
132+
133+ logger.log(loggerlevel, strBuf.toString());
134+ }
135+
136+ /**
137+ * デバック出力(メッセージ).
138+ * 引数に渡された任意のメッセージを出力する。
139+ * @param str メッセージ
140+ * @param methodname このメソッドを呼び出した親メソッド名
141+ */
142+ public static void htmlinfo(String str, String methodname) {
143+ logger.log(loggerlevel, str);
144+ }
145+
146+ public static void htmlinfo(String str) {
147+ logger.log(loggerlevel, str);
148+ }
149+
150+ /**
151+ * デバック出力(HTML解析-本文).
152+ * 本文の内容を出力する。
153+ * @param data 本文(HTML内の文字列)
154+ * @param methodname このメソッドを呼び出した親メソッド名
155+ */
156+ public static void htmlinfo(char[] data, String methodname) {
157+ String dat = new String(data);
158+ logger.log(loggerlevel, dat);
159+ }
160+
161+ public static void htmlinfo(char[] data) {
162+ String dat = new String(data);
163+ logger.log(loggerlevel, dat);
164+ }
165+
166+ /**
167+ * デバック出力(検索キー).
168+ * 検索キー(SearchData)の内容を出力する。
169+ * @param skey
170+ */
171+ public static void searchDatainfo(SearchData skey) {
172+
173+ StringBuilder strBuf = new StringBuilder(30);
174+ strBuf.append("SearchData KEY tag[");
175+ strBuf.append(skey.getHtmltag());
176+ strBuf.append("] ID[");
177+ strBuf.append(skey.getHtmlid());
178+ strBuf.append("] CLASS[");
179+ strBuf.append(skey.getHtmlclass());
180+ strBuf.append("]\n");
181+
182+ logger.log(loggerlevel, strBuf.toString());
183+ }
184+
185+ /**
186+ * ログ出力設定ファイルチェック.
187+ * 設定ファイルの存在をチェックし存在する場合、設定ファイルの内容を設定する。
188+ */
189+ private static void initLogConfiguration() {
190+
191+ File file = new File(configurationFilename);
192+ try {
193+ if(file.exists()) {
194+ FileInputStream inputStream = new FileInputStream(file);
195+ // 設定ファイルの読み込み
196+ LogManager.getLogManager().readConfiguration(inputStream);
197+ }
198+
199+ } catch (FileNotFoundException ex) {
200+ Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex);
201+ } catch (IOException ex) {
202+ Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex);
203+ }
204+ }
205+
206+ /**
207+ * ログ出力フォーマッター設定.
208+ * ファイルへログ出力時の書式を設定する。
209+ */
210+ private static void setFomatter() {
211+ Handler[] handlers = logger.getHandlers();
212+ for(int i = 0 ; i < handlers.length ; i++) {
213+ if(handlers[i] instanceof java.util.logging.FileHandler) {
214+ handlers[i].setFormatter(new HtmlFormatter());
215+ }
216+ }
217+ }
218+
219+}
220+
221+/**
222+ * ログ出力フォーマッター.
223+ * @author kgto
224+ */
225+class HtmlFormatter extends Formatter {
226+ /**
227+ * Logの出力文字列を生成する。
228+ * 出力書式:<br>
229+ * YYYY-MM-DD HH:SS:MM ログレベル<メソッド名>メッセージ
230+ */
231+ @Override
232+ public synchronized String format(final LogRecord aRecord) {
233+
234+ final StringBuffer message = new StringBuffer(100);
235+
236+ long millis = aRecord.getMillis();
237+ String time = String.format("%tF %<tT", millis);
238+
239+ message.append(time);
240+ message.append(' ');
241+
242+ message.append(aRecord.getLevel());
243+ message.append('<');
244+ String methodName = aRecord.getSourceMethodName();
245+ message.append(methodName != null ? methodName : "N/A");
246+ message.append('>');
247+
248+ message.append(formatMessage(aRecord));
249+ message.append('\n');
250+
251+ // 例外エラーの場合、エラー内容とスタックトレース出力
252+ Throwable throwable = aRecord.getThrown();
253+ if (throwable != null) {
254+ message.append(throwable.toString());
255+ message.append('\n');
256+ for (StackTraceElement trace : throwable.getStackTrace()) {
257+ message.append('\t');
258+ message.append(trace.toString());
259+ message.append('\n');
260+ }
261+ }
262+ return message.toString();
263+ }
264+}
--- trunk/webScraping/src/webScraping/core/AttributeData.java (nonexistent)
+++ trunk/webScraping/src/webScraping/core/AttributeData.java (revision 145)
@@ -0,0 +1,164 @@
1+/*
2+ * Copyright (C) 2014 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id: AttributeData.java 132 2015-03-31 02:44:57Z tuna_p $
21+ */
22+
23+package webScraping.core;
24+
25+import java.util.ArrayList;
26+import java.util.Enumeration;
27+import javax.swing.text.MutableAttributeSet;
28+import javax.swing.text.html.HTML;
29+
30+/**
31+ * HTMLタグの属性情報を保持する.
32+ * @author kgto
33+ */
34+public class AttributeData {
35+
36+ public AttributeData() {
37+ AttrList = new ArrayList();
38+ size = 0;
39+ }
40+
41+ /**
42+ * 属性情報追加.
43+ * @param tag
44+ * @param attr
45+ */
46+ public void add(HTML.Tag tag, MutableAttributeSet attr) {
47+
48+ int tagcount = tagcnt(tag);
49+ ++tagcount;
50+
51+ Enumeration e = attr.getAttributeNames();
52+ while(e.hasMoreElements()) {
53+ Object obj = e.nextElement();
54+
55+ AttrData a = new AttrData();
56+ a.tag = tag;
57+ a.count = tagcount;
58+ a.attrname = obj.toString();
59+ a.attrvalue = attr.getAttribute(obj).toString();
60+
61+ AttrList.add(a);
62+ size = AttrList.size();
63+ }
64+
65+ }
66+
67+ /**
68+ * 属性情報検索.
69+ * @param tag
70+ * @param attrname
71+ * @param attrvalue
72+ * @return
73+ */
74+ public boolean search(HTML.Tag tag, String attrname, String attrvalue) {
75+ boolean ret = false;
76+ for (Object AttrList1 : AttrList) {
77+ AttrData a = (AttrData)AttrList1;
78+ if(a.tag == tag) {
79+ //if(a.attrname.equals(attrname) && a.attrvalue.equals(attrvalue)) {
80+ if(a.attrname.equals(attrname) && a.attrvalue.startsWith(attrvalue)) {
81+ ret = true;
82+ }
83+ }
84+ }
85+ return ret;
86+ }
87+
88+ public boolean searchId(HTML.Tag tag, String attrvalue) {
89+ return search(tag, "id", attrvalue);
90+ }
91+
92+ public boolean searchClass(HTML.Tag tag, String attrvalue) {
93+ return search(tag, "class", attrvalue);
94+ }
95+
96+ /**
97+ * 属性の値を取得する.
98+ * @param tag
99+ * @param attrname
100+ * @return
101+ */
102+ public ArrayList getvale(HTML.Tag tag, String attrname) {
103+ ArrayList ret = new ArrayList();
104+ for (Object AttrList1 : AttrList) {
105+ AttrData a = (AttrData)AttrList1;
106+ if(a.tag == tag) {
107+ if(a.attrname.equals(attrname)) {
108+ ret.add(a.attrvalue);
109+ }
110+ }
111+ }
112+ return ret;
113+ }
114+
115+ /**
116+ * 引数で渡されたTAGの最新カウント数を返す.
117+ * @param tag
118+ * @return
119+ */
120+ private int tagcnt(HTML.Tag tag) {
121+ int wkcnt = 0;
122+ for (Object AttrList1 : AttrList) {
123+ AttrData a = (AttrData)AttrList1;
124+ if(a.tag == tag) {
125+ if(wkcnt < a.count) {
126+ wkcnt = a.count;
127+ }
128+ }
129+ }
130+ return wkcnt;
131+ }
132+
133+ // AttrList の内容を返すメソッド
134+ public HTML.Tag gettag(int i) {
135+ AttrData a = (AttrData)AttrList.get(i);
136+ return a.tag;
137+ }
138+
139+ public int getcount(int i) {
140+ AttrData a = (AttrData)AttrList.get(i);
141+ return a.count;
142+ }
143+
144+ public String getattrname(int i) {
145+ AttrData a = (AttrData)AttrList.get(i);
146+ return a.attrname;
147+ }
148+
149+ public String getattrvalue(int i) {
150+ AttrData a = (AttrData)AttrList.get(i);
151+ return a.attrvalue;
152+ }
153+
154+ // フィールド変数
155+ public class AttrData {
156+ public HTML.Tag tag;
157+ public int count;
158+ public String attrname;
159+ public String attrvalue;
160+ }
161+ public ArrayList AttrList;
162+ public int size; // AttrListのサイズ
163+
164+}
--- trunk/webScraping/src/webScraping/core/HtmlParserCallback.java (nonexistent)
+++ trunk/webScraping/src/webScraping/core/HtmlParserCallback.java (revision 145)
@@ -0,0 +1,222 @@
1+/*
2+ * Copyright (C) 2014 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id: HtmlParserCallback.java 132 2015-03-31 02:44:57Z tuna_p $
21+ */
22+
23+package webScraping.core;
24+
25+import java.util.ArrayList;
26+import java.util.HashMap;
27+import javax.swing.text.MutableAttributeSet;
28+import javax.swing.text.html.HTML;
29+import javax.swing.text.html.HTMLEditorKit;
30+
31+/**
32+ * HTMLパーサ部品.
33+ * @author kgto
34+ */
35+class HtmlParserCallback extends HTMLEditorKit.ParserCallback {
36+ /* ---------------------------------------------------------------------- *
37+ * フィールド
38+ * ---------------------------------------------------------------------- */
39+ // Tag毎の階層
40+ HashMap<HTML.Tag,Integer> tagMap = new HashMap<>();
41+
42+ // serach key 情報
43+ String keytag;
44+ String keyid;
45+ String keyclass;
46+
47+ // serach key と一致時の情報退避
48+ int bufCount = 0;
49+ HTML.Tag bufTag = null;
50+ // serach key と一致時の情報格納ワーク
51+ StringBuilder bufText;
52+
53+ // serach key と一致時のデータ一覧
54+ ArrayList sData;
55+
56+ // 属性データ
57+ AttributeData attrdata;
58+
59+ /* ---------------------------------------------------------------------- *
60+ * コンストラクタ
61+ * ---------------------------------------------------------------------- */
62+ protected HtmlParserCallback(SearchData skey) {
63+
64+ // キー情報展開
65+ keytag = skey.getHtmltag();
66+ keyid = skey.getHtmlid();
67+ keyclass = skey.getHtmlclass();
68+
69+ sData = new ArrayList();
70+ }
71+
72+ /* ---------------------------------------------------------------------- *
73+ * Getter
74+ * ---------------------------------------------------------------------- */
75+ ArrayList getrtnData() {
76+ return this.sData;
77+ }
78+
79+ /* ---------------------------------------------------------------------- *
80+ * メソッド
81+ * ---------------------------------------------------------------------- */
82+ @Override
83+ public void handleStartTag(HTML.Tag tag, MutableAttributeSet attr, int pos){
84+ // Tag毎の階層を保持
85+ int count = 1;
86+ if(tagMap.containsKey(tag)) {
87+ count = tagMap.get(tag);
88+ count++;
89+ }
90+ tagMap.put(tag, count);
91+
92+ // 属性解析
93+ AttributeData handleStartattrdata = new AttributeData();
94+ handleStartattrdata.add(tag, attr);
95+
96+ DebugProcess.htmlinfo(tag, attr, "handleStartTag", count);
97+
98+ if(bufCount == 0) {
99+ if(tag.toString().equals(keytag)) {
100+ //if(serachAttribute(attr)) {
101+ if(serachAttribute(tag, handleStartattrdata)) {
102+ bufCount = count;
103+ bufTag = tag;
104+ attrdata = new AttributeData();
105+ bufText = new StringBuilder();
106+ }
107+ }
108+ }
109+ if(bufCount > 0) {
110+ attrdata.add(tag, attr);
111+ }
112+ }
113+
114+ @Override
115+ public void handleEndTag(HTML.Tag tag, int pos){
116+ // Tag毎の階層を取得
117+ int count = 0;
118+ if(tagMap.containsKey(tag)) {
119+ count = tagMap.get(tag);
120+ }
121+
122+ DebugProcess.htmlinfo(tag, null, "handleEndTag", count);
123+
124+ if(tag.equals(bufTag) && count <= bufCount) {
125+
126+ // 溜め込んだ一致情報をリストへ格納
127+ sData.add(bufText.toString());
128+
129+ // 退避したserach keyとの一致情報クリア
130+ bufCount = 0;
131+ bufTag = null;
132+ bufText = null;
133+ }
134+
135+ // Tag毎の階層減算
136+ tagMap.put(tag, --count);
137+ }
138+
139+ @Override
140+ public void handleText(char[] data, int pos){
141+
142+ DebugProcess.htmlinfo(data, "handleText");
143+
144+ String splitchar = "\t";
145+ //制御文字の削除
146+ // &nbsp; 0xa0
147+ StringBuilder buf = new StringBuilder();
148+ for(int i = 0; i < data.length; i++) {
149+ if(data[i] > 0x1f && data[i] != 0x7f && data[i] != 0xa0) {
150+ buf.append(data[i]);
151+ }
152+ }
153+ if(bufCount > 0) {
154+ if(bufText.length() > 0) {
155+ bufText.append(splitchar);
156+ }
157+ bufText.append(buf.toString());
158+ }
159+ }
160+
161+ @Override
162+ public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attr, int pos){
163+ if(bufCount > 0) {
164+ attrdata.add(tag, attr);
165+ }
166+ DebugProcess.htmlinfo(tag, attr, "handleSimpleTag", 0);
167+ }
168+
169+ /**
170+ * ページ内のID/CLASS値と検索キーを比較する.
171+ * @param attr ページのMutableAttributeSet
172+ * @return boolean 検索キーと一致の時、true
173+ */
174+ boolean serachAttribute(MutableAttributeSet attr) {
175+ String currentID = (String)attr.getAttribute(HTML.Attribute.ID);
176+ String currentClass = (String)attr.getAttribute(HTML.Attribute.CLASS);
177+
178+ if(keyid.isEmpty() == false && keyclass.isEmpty() == false) {
179+ if(keyid.equals(currentID) && keyclass.equals(currentClass)) {
180+ return true;
181+ }
182+ }
183+
184+ if(keyid.isEmpty() == false) {
185+ if(keyid.equals(currentID)) {
186+ return true;
187+ }
188+ }
189+
190+ if(keyclass.isEmpty() == false) {
191+ if(keyclass.equals(currentClass)) {
192+ return true;
193+ }
194+ }
195+
196+ return false;
197+ }
198+
199+ /**
200+ * ページ内のID/CLASS値と検索キーを比較する.
201+ * @param tag
202+ * @param attrdata
203+ * @return boolean 検索キーと一致の時、true
204+ */
205+ boolean serachAttribute(HTML.Tag tag, AttributeData attrdata) {
206+ // ID と CLASS の両方にキー入力有りの場合
207+ if(keyid.isEmpty() == false && keyclass.isEmpty() == false) {
208+ if(attrdata.searchId(tag, keyid) && attrdata.searchClass(tag, keyclass)) {
209+ return true;
210+ }
211+ }
212+ // ID のキーチェック
213+ if(keyid.isEmpty() == false) {
214+ return attrdata.searchId(tag, keyid);
215+ }
216+ // CLASS のキーチェック
217+ if(keyclass.isEmpty() == false) {
218+ return attrdata.searchClass(tag, keyclass);
219+ }
220+ return false;
221+ }
222+}
--- trunk/webScraping/src/webScraping/core/SearchData.java (nonexistent)
+++ trunk/webScraping/src/webScraping/core/SearchData.java (revision 145)
@@ -0,0 +1,200 @@
1+/*
2+ * Copyright (C) 2014 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id: SearchData.java 132 2015-03-31 02:44:57Z tuna_p $
21+ */
22+
23+package webScraping.core;
24+
25+import java.util.ArrayList;
26+
27+/**
28+ * タグ検索データ.
29+ * @author kgto
30+ */
31+public class SearchData {
32+ /* ---------------------------------------------------------------------- *
33+ * フィールド
34+ * ---------------------------------------------------------------------- */
35+ private String item;
36+ private String htmltag;
37+ private String htmlid;
38+ private String htmlclass;
39+ private String around;
40+ private String regexp;
41+
42+ /* ---------------------------------------------------------------------- *
43+ * static 処理
44+ * ---------------------------------------------------------------------- */
45+ public static class Context {
46+ public Class columnClass;
47+ public String columnName;
48+ public String columnNameJp;
49+
50+ public Context(Class columnClass, String columnName, String columnNameJp) {
51+ this.columnClass = columnClass;
52+ this.columnName = columnName;
53+ this.columnNameJp = columnNameJp;
54+ }
55+ }
56+
57+ public static final Context[] context = {
58+ /* 0 */ new Context(String.class , "item" , "項目名"),
59+ /* 1 */ new Context(String.class , "htmltag" , "タグ"),
60+ /* 2 */ new Context(String.class , "htmlid" , "ID"),
61+ /* 3 */ new Context(String.class , "htmlclass" , "クラス"),
62+ /* 4 */ new Context(String.class , "around" , "位置"),
63+ /* 5 */ new Context(String.class , "regexp" , "抽出条件")
64+ };
65+
66+ /* ---------------------------------------------------------------------- */
67+ private static ArrayList<SearchData> slist = new ArrayList<>();
68+
69+ public static void addSearchData(
70+ String item, String htmltag, String htmlid,
71+ String htmlclass, String around, String regexp) {
72+ SearchData sdat = new SearchData();
73+ sdat.setitem(item);
74+ sdat.setHtmltag(htmltag);
75+ sdat.setHtmlid(htmlid);
76+ sdat.setHtmlclass(htmlclass);
77+ sdat.setaround(around);
78+ sdat.setregexp(regexp);
79+
80+ slist.add(sdat);
81+ }
82+
83+ public static void add(SearchData sdat) {
84+ slist.add(sdat);
85+ }
86+
87+ public static SearchData get(int i) {
88+ return slist.get(i);
89+ }
90+
91+ public static int size() {
92+ return slist.size();
93+ }
94+
95+ public static SearchData remove(int index) {
96+ return slist.remove(index);
97+ }
98+
99+ public static void clear() {
100+ slist.clear();
101+ }
102+
103+ /* ---------------------------------------------------------------------- *
104+ * コンストラクタ
105+ * ---------------------------------------------------------------------- */
106+ public SearchData() {
107+ initialize();
108+ }
109+
110+ public SearchData(SearchData dat) {
111+ this.item = dat.getitem();
112+ this.htmltag = dat.getHtmltag();
113+ this.htmlid = dat.getHtmlid();
114+ this.htmlclass = dat.getHtmlclass();
115+ this.around = dat.getaround();
116+ this.regexp = dat.getregexp();
117+ }
118+
119+ /* ---------------------------------------------------------------------- *
120+ * Setter
121+ * ---------------------------------------------------------------------- */
122+ public void setitem(String item) {
123+ this.item = item;
124+ }
125+
126+ public void setHtmltag(String htmltag) {
127+ this.htmltag = htmltag;
128+ }
129+
130+ public void setHtmlid(String htmlid) {
131+ this.htmlid = htmlid;
132+ }
133+
134+ public void setHtmlclass(String htmlclass) {
135+ this.htmlclass = htmlclass;
136+ }
137+
138+ public void setaround(String around) {
139+ this.around = around;
140+ }
141+
142+ public void setregexp(String regexp) {
143+ this.regexp = regexp;
144+ }
145+
146+ /* ---------------------------------------------------------------------- *
147+ * Getter
148+ * ---------------------------------------------------------------------- */
149+ public String getitem() {
150+ return item;
151+ }
152+
153+ public String getHtmltag() {
154+ return htmltag;
155+ }
156+
157+ public String getHtmlid() {
158+ return htmlid;
159+ }
160+
161+ public String getHtmlclass() {
162+ return htmlclass;
163+ }
164+
165+ public String getaround() {
166+ return around;
167+ }
168+
169+ public String getregexp() {
170+ return regexp;
171+ }
172+
173+ /* ---------------------------------------------------------------------- *
174+ * メソッド
175+ * ---------------------------------------------------------------------- */
176+ /**
177+ * データ初期化.
178+ */
179+ public final void initialize() {
180+ this.item = "";
181+ this.htmltag = "";
182+ this.htmlid = "";
183+ this.htmlclass = "";
184+ this.around = "";
185+ this.regexp = "";
186+ }
187+
188+ public Object[] getObjData() {
189+ Object[] obj = {
190+ /* 0 */ getitem(), // 項目名
191+ /* 1 */ getHtmltag(), // タグ
192+ /* 2 */ getHtmlid(), // ID
193+ /* 3 */ getHtmlclass(), // クラス
194+ /* 4 */ getaround(), // 位置
195+ /* 5 */ getregexp() // 抽出条件
196+ };
197+ return obj;
198+ }
199+
200+}
--- trunk/webScraping/src/webScraping/core/HtmlParser.java (nonexistent)
+++ trunk/webScraping/src/webScraping/core/HtmlParser.java (revision 145)
@@ -0,0 +1,273 @@
1+/*
2+ * Copyright (C) 2014 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id: HtmlParser.java 132 2015-03-31 02:44:57Z tuna_p $
21+ */
22+
23+package webScraping.core;
24+
25+import java.io.*;
26+import java.net.*;
27+import java.util.ArrayList;
28+import java.util.logging.Level;
29+import java.util.logging.Logger;
30+import java.util.regex.Matcher;
31+import java.util.regex.Pattern;
32+import javax.swing.text.html.parser.ParserDelegator;
33+
34+/**
35+ * HTMLパーサ.
36+ * @author kgto
37+ */
38+public class HtmlParser {
39+ /* ---------------------------------------------------------------------- *
40+ * フィールド
41+ * ---------------------------------------------------------------------- */
42+ URL url;
43+ String pageData;
44+ ArrayList sData;
45+
46+ // 作業ワーク
47+ private String htmltag;
48+ private String htmlid;
49+ private String htmlclass;
50+
51+ /* ---------------------------------------------------------------------- *
52+ * コンストラクタ
53+ * ---------------------------------------------------------------------- */
54+ public HtmlParser(URL UrlAdress) {
55+ DebugProcess.debuglog_set();
56+ this.url = UrlAdress;
57+ getPageData();
58+ }
59+
60+ public HtmlParser(String UrlAdress) {
61+ DebugProcess.debuglog_set();
62+ try {
63+ url = new URL(UrlAdress);
64+ getPageData();
65+
66+ } catch (MalformedURLException ex) {
67+ Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex);
68+ }
69+ }
70+
71+ public HtmlParser() {
72+ DebugProcess.debuglog_set();
73+ url = null;
74+ }
75+
76+ /* ---------------------------------------------------------------------- *
77+ * Getter
78+ * ---------------------------------------------------------------------- */
79+ public String getStringPageData() {
80+ return pageData;
81+ }
82+
83+ /* ---------------------------------------------------------------------- *
84+ * Setter
85+ * ---------------------------------------------------------------------- */
86+ public void seturl(URL UrlAdress) {
87+ this.url = UrlAdress;
88+ getPageData();
89+ }
90+
91+ /* ---------------------------------------------------------------------- *
92+ * メソッド
93+ * ---------------------------------------------------------------------- */
94+ public void seturl(String UrlAdress) {
95+ try {
96+ url = new URL(UrlAdress);
97+ getPageData();
98+
99+ } catch (MalformedURLException ex) {
100+ Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex);
101+ }
102+ }
103+
104+ /**
105+ * HTMLページ内検索.
106+ * 検索キーとして渡されたタグ,ID,クラスから、対象となるタグを探し出し、
107+ * around(タグ位置)として指定された箇所の文字列をregexp(正規表現)で指定された整形を
108+ * 行った結果を返す。<br>
109+ * aroundの初期値:0 検索キーとして未指定(未入力)の場合、最初(0)の文字列。<br>
110+ * regexpが指定(入力)ありの場合、正規表現にて整形を行う。<br>
111+ * 渡された検索キーに一致するタグが存在しなかった場合、NULLを返す。
112+ * @param skey 検索キーデータ(SearchData)
113+ * @return String 検索キーに一致するデータの文字列
114+ */
115+ public String search(SearchData skey) {
116+
117+ // htmlページ内を検索
118+ if(isHtmlkeyEq(skey) == false) {
119+ searchPageData(skey);
120+ }
121+ /*
122+ around 出現位置指定 入力有り:指定された位置の情報のみ返す。
123+ 入力無し:取得した全ての情報を返す。
124+ */
125+ String regexp = skey.getregexp();
126+ if(skey.getaround().length() > 0) {
127+ int wkAround = Integer.parseInt(skey.getaround()); // 検索位置を数値変換
128+ if(wkAround < sData.size()) {
129+ String str = (String)sData.get(wkAround);
130+ String rtn = RegularExpression(str, regexp);
131+ return rtn;
132+ }
133+ } else {
134+ StringBuilder strbuf = new StringBuilder();
135+ for (Object sData1 : sData) {
136+ String str = (String)sData1;
137+ String rtn = RegularExpression(str, regexp);
138+ if(strbuf.length() > 0) {
139+ strbuf.append("\t");
140+ }
141+ strbuf.append(rtn);
142+ }
143+ return strbuf.toString();
144+ }
145+ return null;
146+ }
147+
148+ /**
149+ * 直近のHTMLタグ/ID/CLASS値と引数の値を比較する.
150+ * @param skey HTMLタグ/ID/CLASSが格納された検索キー
151+ * @return boolean HTMLタグ/ID/CLASS値が一致する時、true
152+ */
153+ boolean isHtmlkeyEq(SearchData skey) {
154+
155+ String stag = skey.getHtmltag();
156+ String sid = skey.getHtmlid();
157+ String sclass = skey.getHtmlclass();
158+
159+ boolean rtn = true;
160+
161+ // htmltag
162+ if(htmltag == null) {
163+ rtn = false;
164+ } else {
165+ if(htmltag.equals(stag) == false) {
166+ rtn = false;
167+ }
168+ }
169+
170+ // htmlid
171+ if(htmlid == null) {
172+ rtn = false;
173+ } else {
174+ if(htmlid.equals(sid) == false) {
175+ rtn = false;
176+ }
177+ }
178+
179+ // htmlclass
180+ if(htmlclass == null) {
181+ rtn = false;
182+ } else {
183+ if(htmlclass.equals(sclass) == false) {
184+ rtn = false;
185+ }
186+ }
187+
188+ if(!rtn) {
189+ htmltag = stag;
190+ htmlid = sid;
191+ htmlclass = sclass;
192+ }
193+
194+ return rtn;
195+ }
196+
197+ /**
198+ * 正規表現検索.
199+ * @param strdata
200+ * @param regexp
201+ * @return
202+ */
203+ String RegularExpression(String strdata, String regexp) {
204+ String expdata = null;
205+
206+ //regexpのチェック
207+ if(regexp.isEmpty()) {
208+ expdata = strdata;
209+ return expdata;
210+ }
211+
212+ //正規表現検索
213+ Pattern ptn = Pattern.compile(regexp);
214+ Matcher matchdata = ptn.matcher(strdata);
215+ if (matchdata.find()) {
216+ if(matchdata.groupCount() >= 1) {
217+ expdata = matchdata.group(1);
218+ }
219+ }
220+ return expdata;
221+ }
222+
223+ /**
224+ * インターネット接続.
225+ */
226+ private void getPageData() {
227+ HttpURLConnection con = null;
228+ try {
229+ con = (HttpURLConnection)url.openConnection();
230+ con.setRequestMethod("GET");
231+ BufferedReader reader = new BufferedReader(
232+ new InputStreamReader(con.getInputStream(), "utf-8"));
233+ String wkline;
234+ StringBuilder sb = new StringBuilder();
235+ while((wkline = reader.readLine()) != null) {
236+ sb.append(wkline).append("\n");
237+ }
238+ pageData = sb.toString();
239+
240+ } catch(FileNotFoundException ex) {
241+ pageData = null;
242+ } catch (IOException ex) {
243+ Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex);
244+ } finally {
245+ if(con != null) {
246+ con.disconnect();
247+ }
248+ }
249+ }
250+
251+ /**
252+ * HTMLパーサ.
253+ * @param skey
254+ */
255+ private void searchPageData(SearchData skey) {
256+
257+ DebugProcess.searchDatainfo(skey);
258+
259+ Reader reader;
260+ try {
261+ reader = new BufferedReader(new StringReader(pageData));
262+ HtmlParserCallback cb = new HtmlParserCallback(skey);
263+ ParserDelegator pd = new ParserDelegator();
264+ pd.parse(reader, cb, true);
265+ reader.close();
266+
267+ sData = cb.getrtnData();
268+
269+ } catch (IOException ex) {
270+ Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex);
271+ }
272+ }
273+}
--- trunk/webScraping/src/webScraping/core/Scraping.java (nonexistent)
+++ trunk/webScraping/src/webScraping/core/Scraping.java (revision 145)
@@ -0,0 +1,71 @@
1+/*
2+ * Copyright (C) 2016 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id: Scraping.java 139 2016-05-17 09:16:40Z tuna_p $
21+ */
22+package webScraping.core;
23+
24+import java.net.URL;
25+import webScraping.core.HtmlParser;
26+import webScraping.core.SearchData;
27+
28+/**
29+ *
30+ * @author kgto
31+ */
32+public class Scraping {
33+
34+ public Scraping() {
35+ }
36+
37+ /**
38+ * HTML解析.
39+ * @param url
40+ * @return
41+ */
42+ public String[] getResult(URL url) {
43+
44+ HtmlParser par = new HtmlParser(url);
45+
46+ String[] result = new String[SearchData.size()];
47+ for(int i = 0; i < SearchData.size(); i++) {
48+ result[i] = par.search(SearchData.get(i));
49+ }
50+
51+ if(!resultCheck(result)) {
52+ return null;
53+ }
54+ return result;
55+ }
56+
57+ /**
58+ * 結果文字列チェック.
59+ * @param result
60+ * @return 文字列配列に1文字でも入力有り(null/SPACE以外)の時、true
61+ */
62+ boolean resultCheck(String[] result) {
63+ for (String result1 : result) {
64+ if (result1 != null && result1.trim().length() > 0) {
65+ return true;
66+ }
67+ }
68+ return false;
69+ }
70+
71+}
--- trunk/webScraping/src/webScraping/utility/LibraryXml.java (nonexistent)
+++ trunk/webScraping/src/webScraping/utility/LibraryXml.java (revision 145)
@@ -0,0 +1,153 @@
1+/*
2+ * Copyright (C) 2014-2015 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id: LibraryXml.java 138 2016-05-17 06:40:29Z tuna_p $
21+ */
22+
23+package webScraping.utility;
24+
25+import java.io.File;
26+import java.io.FileNotFoundException;
27+import java.io.FileOutputStream;
28+import java.io.IOException;
29+import java.io.InputStream;
30+import java.util.logging.Level;
31+import java.util.logging.Logger;
32+
33+import javax.xml.parsers.DocumentBuilder;
34+import javax.xml.parsers.DocumentBuilderFactory;
35+import javax.xml.parsers.ParserConfigurationException;
36+import javax.xml.transform.Transformer;
37+import javax.xml.transform.TransformerConfigurationException;
38+import javax.xml.transform.TransformerException;
39+import javax.xml.transform.TransformerFactory;
40+import javax.xml.transform.dom.DOMSource;
41+import javax.xml.transform.stream.StreamResult;
42+
43+import org.w3c.dom.DOMImplementation;
44+import org.w3c.dom.Document;
45+import org.w3c.dom.Element;
46+import org.w3c.dom.Node;
47+import org.w3c.dom.NodeList;
48+import org.xml.sax.SAXException;
49+
50+public class LibraryXml {
51+
52+ String xmlrootname = "xmlcontainer";
53+
54+ DocumentBuilder builder;
55+ public Document readdoc, writedoc;
56+ Element xmlroot;
57+
58+ /* ---------------------------------------------------------------------- *
59+ * コンストラクタ
60+ * ---------------------------------------------------------------------- */
61+ public LibraryXml() {
62+ try {
63+ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
64+ builder = factory.newDocumentBuilder();
65+
66+ } catch (ParserConfigurationException ex) {
67+ Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
68+ }
69+ }
70+
71+ /* ---------------------------------------------------------------------- *
72+ * メソッド
73+ * ---------------------------------------------------------------------- */
74+ /* 読込み処理 */
75+ public Element getwriteRoot(String elementName) {
76+ mainElement();
77+ Element element = writedoc.createElement(elementName);
78+ xmlroot.appendChild(element);
79+ return element;
80+ }
81+
82+ private void mainElement() {
83+ if(writedoc == null) {
84+ DOMImplementation domImpl = builder.getDOMImplementation();
85+ writedoc = domImpl.createDocument("", xmlrootname, null);
86+ xmlroot = writedoc.getDocumentElement();
87+ }
88+ }
89+
90+ /**
91+ * XML書込み.
92+ * @param file
93+ */
94+ public void write(File file) {
95+ try (FileOutputStream os = new FileOutputStream(file)) {
96+ TransformerFactory transFactory = TransformerFactory.newInstance();
97+ Transformer transformer = transFactory.newTransformer();
98+
99+ transformer.setOutputProperty("indent", "yes"); // 改行指定
100+ transformer.setOutputProperty("method", "xml");
101+
102+ DOMSource source = new DOMSource(writedoc);
103+ StreamResult result = new StreamResult(os);
104+ transformer.transform(source, result);
105+
106+ // 作成したXMLをクリア
107+ writedoc = null;
108+
109+ } catch (TransformerConfigurationException ex) {
110+ Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
111+ } catch (FileNotFoundException | TransformerException ex) {
112+ Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
113+ } catch (IOException ex) {
114+ Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
115+ }
116+ }
117+
118+ /* ---------------------------------------------------------------------- */
119+ /* 書込み処理 */
120+
121+ public Element getreadRoot(String elementName) {
122+ NodeList nodelist = xmlroot.getElementsByTagName(elementName);
123+ Node node = nodelist.item(0);
124+ return (node.getNodeType() == Node.ELEMENT_NODE ? (Element)node : null);
125+ }
126+
127+ /**
128+ * XML読込み.
129+ * @param file
130+ */
131+ public void read(File file) {
132+ try {
133+ readdoc = builder.parse(file);
134+ xmlroot = readdoc.getDocumentElement();
135+
136+ } catch (SAXException | IOException ex) {
137+ Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
138+ }
139+ }
140+
141+ public void read(InputStream is) {
142+ try {
143+ readdoc = builder.parse(is);
144+ xmlroot = readdoc.getDocumentElement();
145+
146+ } catch (SAXException | IOException ex) {
147+ Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
148+ }
149+ }
150+
151+ /* ---------------------------------------------------------------------- */
152+
153+}
--- trunk/webScraping/src/webScraping/utility/ScrapingXml.java (nonexistent)
+++ trunk/webScraping/src/webScraping/utility/ScrapingXml.java (revision 145)
@@ -0,0 +1,209 @@
1+/*
2+ * Copyright (C) 2014-2015 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id: ScrapingXml.java 138 2016-05-17 06:40:29Z tuna_p $
21+ */
22+
23+package webScraping.utility;
24+
25+import webScraping.core.SearchData;
26+import java.io.File;
27+import java.io.InputStream;
28+import java.util.ArrayList;
29+import org.w3c.dom.Element;
30+import org.w3c.dom.Node;
31+import org.w3c.dom.NodeList;
32+
33+public class ScrapingXml {
34+ /* ---------------------------------------------------------------------- *
35+ * フィールド
36+ * ---------------------------------------------------------------------- */
37+ String rootnameScraping = "webscraping";
38+
39+ private String testUrl;
40+ private SearchData[] sdata;
41+
42+ public LibraryXml xlib = new LibraryXml();
43+ public Element root;
44+
45+ /* ---------------------------------------------------------------------- *
46+ * コンストラクタ
47+ * ---------------------------------------------------------------------- */
48+ public ScrapingXml() {
49+ }
50+
51+ /* ---------------------------------------------------------------------- *
52+ * Setter
53+ * ---------------------------------------------------------------------- */
54+ public void setTestUrl(String testUrl) {
55+ this.testUrl = testUrl;
56+ }
57+
58+ public void setSdata() {
59+ this.sdata = new SearchData[SearchData.size()];
60+ for(int i = 0; i < SearchData.size(); i++) {
61+ this.sdata[i] = SearchData.get(i);
62+ }
63+ }
64+
65+ /* ---------------------------------------------------------------------- *
66+ * Getter
67+ * ---------------------------------------------------------------------- */
68+ public String getTestUrl() {
69+ return testUrl;
70+ }
71+
72+ public void getSdata() {
73+ SearchData.clear();
74+ for(SearchData sdata1 : sdata) {
75+ SearchData.add(sdata1);
76+ }
77+ }
78+
79+ /* ---------------------------------------------------------------------- *
80+ * メソッド
81+ * ---------------------------------------------------------------------- */
82+ public void save(File file) {
83+
84+ elementset();
85+
86+ xlib.write(file);
87+ }
88+
89+ public void elementset() {
90+ root = xlib.getwriteRoot(rootnameScraping);
91+ elementsetUrl();
92+ elementsetSearchdata();
93+ System.out.println("elementset XmlScraping");
94+ }
95+
96+ private void elementsetUrl() {
97+ if(testUrl == null) return;
98+ if(testUrl.isEmpty()) return;
99+
100+ Element url = xlib.writedoc.createElement("url");
101+ url.appendChild(xlib.writedoc.createTextNode(testUrl));
102+ root.appendChild(url);
103+ }
104+
105+ private void elementsetSearchdata() {
106+ int count = 0;
107+ for(SearchData sdat : sdata) {
108+ Element cslist = xlib.writedoc.createElement("searchlist");
109+ cslist.setAttribute("listNo", String.valueOf(++count));
110+
111+ addChild(cslist, "item" , sdat.getitem());
112+ addChild(cslist, "htmltag" , sdat.getHtmltag());
113+ addChild(cslist, "htmlid" , sdat.getHtmlid());
114+ addChild(cslist, "htmlclass", sdat.getHtmlclass());
115+ addChild(cslist, "around" , sdat.getaround());
116+ addChild(cslist, "regexp" , sdat.getregexp());
117+
118+ root.appendChild(cslist);
119+ }
120+ }
121+
122+ private void addChild(Element cslist, String keyword, String data) {
123+ if(!data.isEmpty()) {
124+ Element element = xlib.writedoc.createElement(keyword);
125+ element.appendChild(xlib.writedoc.createTextNode(data));
126+ cslist.appendChild(element);
127+ }
128+ }
129+
130+ /* ---------------------------------------------------------------------- */
131+
132+ public void load(File file) {
133+ xlib.read(file);
134+ elementget();
135+ }
136+
137+ public void load(InputStream is) {
138+ xlib.read(is);
139+ elementget();
140+ }
141+
142+ public void elementget() {
143+ root = xlib.getreadRoot(rootnameScraping);
144+ elementgetUrl();
145+ elementgetSearchdata();
146+ }
147+
148+ private void elementgetUrl() {
149+ NodeList nodelist = root.getElementsByTagName("url");
150+ if(nodelist.getLength() > 0) {
151+ Node node = nodelist.item(0);
152+ testUrl = node.getFirstChild().getNodeValue();
153+ }
154+ }
155+
156+ private void elementgetSearchdata() {
157+ ArrayList<SearchData> slist = new ArrayList<>();
158+
159+ NodeList nodelist = root.getElementsByTagName("searchlist");
160+ for(int i = 0; i < nodelist.getLength(); i++) {
161+ Node childnode = nodelist.item(i);
162+
163+ boolean sdatflg = false;
164+ SearchData sdat = new SearchData();
165+ for (Node child = childnode.getFirstChild(); child != null; child = child.getNextSibling()) {
166+ if(child.getNodeType() == Node.ELEMENT_NODE) {
167+ String tag = child.getNodeName();
168+ String rtn = "";
169+ if(child.getFirstChild() != null) {
170+ rtn = child.getFirstChild().getNodeValue();
171+ }
172+ switch (tag) {
173+ case "item" :
174+ sdat.setitem(rtn);
175+ sdatflg = true;
176+ break;
177+ case "htmltag" :
178+ sdat.setHtmltag(rtn);
179+ sdatflg = true;
180+ break;
181+ case "htmlid" :
182+ sdat.setHtmlid(rtn);
183+ sdatflg = true;
184+ break;
185+ case "htmlclass" :
186+ sdat.setHtmlclass(rtn);
187+ sdatflg = true;
188+ break;
189+ case "around" :
190+ sdat.setaround(rtn);
191+ sdatflg = true;
192+ break;
193+ case "regexp" :
194+ sdat.setregexp(rtn);
195+ sdatflg = true;
196+ break;
197+ }
198+ }
199+ }
200+ if(sdatflg) slist.add(sdat);
201+ }
202+ // 配列化
203+ sdata = new SearchData[slist.size()];
204+ for(int i = 0; i < slist.size(); i++) {
205+ sdata[i] = slist.get(i);
206+ }
207+ }
208+
209+}
--- trunk/webScraping/src/webScraping/utility/HtmlSearch.java (nonexistent)
+++ trunk/webScraping/src/webScraping/utility/HtmlSearch.java (revision 145)
@@ -0,0 +1,611 @@
1+/*
2+ * Copyright (C) 2014 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id: HtmlSearch.java 139 2016-05-17 09:16:40Z tuna_p $
21+ */
22+package webScraping.utility;
23+
24+import webScraping.core.Scraping;
25+import webScraping.core.HtmlParser;
26+import java.awt.Desktop;
27+import java.io.File;
28+import java.io.IOException;
29+import java.net.MalformedURLException;
30+import java.net.URI;
31+import java.net.URISyntaxException;
32+import java.net.URL;
33+import java.util.logging.Level;
34+import java.util.logging.Logger;
35+import javax.swing.JFileChooser;
36+import javax.swing.filechooser.FileFilter;
37+import javax.swing.filechooser.FileNameExtensionFilter;
38+import javax.swing.table.DefaultTableModel;
39+import webScraping.core.SearchData;
40+
41+/**
42+ * HTMLページ上の特定の項目を検索し、その項目内容の値を取得する.
43+ * @author kgto
44+ */
45+public class HtmlSearch extends javax.swing.JFrame {
46+ private final ScrapingXml xmlwriter = new ScrapingXml();
47+
48+ SearchDataTableModel sdatatblmodel;
49+
50+ /**
51+ * Creates new form Frame1
52+ */
53+ public HtmlSearch() {
54+ sdatatblmodel = new SearchDataTableModel();
55+
56+ initComponents();
57+
58+ // カレントディレクトリ取得
59+ String dir = System.getProperty("user.dir");
60+ File file = new java.io.File(dir + "\\data");
61+ jFileChooser1.setCurrentDirectory(file);
62+
63+ FileFilter filter1 = new FileNameExtensionFilter("XMLファイル", "xml");
64+ FileFilter filter2 = new FileNameExtensionFilter("TEXTファイル", "txt");
65+ jFileChooser1.addChoosableFileFilter(filter1);
66+ jFileChooser1.addChoosableFileFilter(filter2);
67+ jFileChooser1.setFileFilter(filter1);
68+
69+ }
70+
71+ /**
72+ * This method is called from within the constructor to initialize the form.
73+ * WARNING: Do NOT modify this code. The content of this method is always
74+ * regenerated by the Form Editor.
75+ */
76+ @SuppressWarnings("unchecked")
77+ // <editor-fold defaultstate="collapsed" desc="Generated Code">//GEN-BEGIN:initComponents
78+ private void initComponents() {
79+
80+ jFileChooser1 = new javax.swing.JFileChooser();
81+ jRadioButton1 = new javax.swing.JRadioButton();
82+ jLabel1 = new javax.swing.JLabel();
83+ jTxtUrl = new javax.swing.JTextField();
84+ jBtnSearch = new javax.swing.JButton();
85+ jTabbedPane1 = new javax.swing.JTabbedPane();
86+ jPanelTab1 = new javax.swing.JPanel();
87+ jScrollPane1 = new javax.swing.JScrollPane();
88+ jTable1 = new javax.swing.JTable();
89+ jBtnRowIns = new javax.swing.JButton();
90+ jBtnRowDel = new javax.swing.JButton();
91+ jBtnRowCpy = new javax.swing.JButton();
92+ jPanelTab2 = new javax.swing.JPanel();
93+ jScrollPaneLabel = new javax.swing.JScrollPane();
94+ jTxtLabel = new javax.swing.JTextArea();
95+ jScrollPane404msg = new javax.swing.JScrollPane();
96+ jTxt404msg = new javax.swing.JTextArea();
97+ jPanelRtn = new javax.swing.JPanel();
98+ jScrollPaneRtn = new javax.swing.JScrollPane();
99+ jTxtRtn = new javax.swing.JTextArea();
100+ jMenuBar1 = new javax.swing.JMenuBar();
101+ jMenu1 = new javax.swing.JMenu();
102+ jMenuLoad = new javax.swing.JMenuItem();
103+ jMenuSave = new javax.swing.JMenuItem();
104+ jMenu3 = new javax.swing.JMenu();
105+ jMenuItem1 = new javax.swing.JMenuItem();
106+ jMenu2 = new javax.swing.JMenu();
107+
108+ jFileChooser1.setCurrentDirectory(null);
109+ jFileChooser1.setDialogTitle("");
110+
111+ jRadioButton1.setText("jRadioButton1");
112+
113+ setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE);
114+ setTitle("タグ検索");
115+
116+ jLabel1.setText(" URL:");
117+
118+ jBtnSearch.setText("検索");
119+ jBtnSearch.addActionListener(new java.awt.event.ActionListener() {
120+ public void actionPerformed(java.awt.event.ActionEvent evt) {
121+ jBtnSearchActionPerformed(evt);
122+ }
123+ });
124+
125+ jPanelTab1.setBorder(javax.swing.BorderFactory.createTitledBorder("検索情報"));
126+
127+ jTable1.setModel(sdatatblmodel);
128+ jTable1.setSelectionMode(javax.swing.ListSelectionModel.SINGLE_SELECTION);
129+ jTable1.getTableHeader().setReorderingAllowed(false);
130+ jScrollPane1.setViewportView(jTable1);
131+
132+ jBtnRowIns.setText("行挿入");
133+ jBtnRowIns.addActionListener(new java.awt.event.ActionListener() {
134+ public void actionPerformed(java.awt.event.ActionEvent evt) {
135+ jBtnRowInsActionPerformed(evt);
136+ }
137+ });
138+
139+ jBtnRowDel.setText("行削除");
140+ jBtnRowDel.addActionListener(new java.awt.event.ActionListener() {
141+ public void actionPerformed(java.awt.event.ActionEvent evt) {
142+ jBtnRowDelActionPerformed(evt);
143+ }
144+ });
145+
146+ jBtnRowCpy.setText("行コピー");
147+ jBtnRowCpy.addActionListener(new java.awt.event.ActionListener() {
148+ public void actionPerformed(java.awt.event.ActionEvent evt) {
149+ jBtnRowCpyActionPerformed(evt);
150+ }
151+ });
152+
153+ javax.swing.GroupLayout jPanelTab1Layout = new javax.swing.GroupLayout(jPanelTab1);
154+ jPanelTab1.setLayout(jPanelTab1Layout);
155+ jPanelTab1Layout.setHorizontalGroup(
156+ jPanelTab1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
157+ .addGroup(jPanelTab1Layout.createSequentialGroup()
158+ .addContainerGap(javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
159+ .addComponent(jBtnRowCpy)
160+ .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
161+ .addComponent(jBtnRowDel)
162+ .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
163+ .addComponent(jBtnRowIns))
164+ .addComponent(jScrollPane1, javax.swing.GroupLayout.PREFERRED_SIZE, 0, Short.MAX_VALUE)
165+ );
166+ jPanelTab1Layout.setVerticalGroup(
167+ jPanelTab1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
168+ .addGroup(jPanelTab1Layout.createSequentialGroup()
169+ .addComponent(jScrollPane1, javax.swing.GroupLayout.DEFAULT_SIZE, 173, Short.MAX_VALUE)
170+ .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
171+ .addGroup(jPanelTab1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE)
172+ .addComponent(jBtnRowDel)
173+ .addComponent(jBtnRowIns)
174+ .addComponent(jBtnRowCpy)))
175+ );
176+
177+ jTabbedPane1.addTab("キー設定", jPanelTab1);
178+
179+ jPanelTab2.setBorder(javax.swing.BorderFactory.createTitledBorder("メッセージ"));
180+
181+ jScrollPaneLabel.setHorizontalScrollBarPolicy(javax.swing.ScrollPaneConstants.HORIZONTAL_SCROLLBAR_NEVER);
182+ jScrollPaneLabel.setVerticalScrollBarPolicy(javax.swing.ScrollPaneConstants.VERTICAL_SCROLLBAR_NEVER);
183+
184+ jTxtLabel.setEditable(false);
185+ jTxtLabel.setBackground(java.awt.Color.lightGray);
186+ jTxtLabel.setColumns(20);
187+ jTxtLabel.setFont(new java.awt.Font("MS UI Gothic", 0, 12)); // NOI18N
188+ jTxtLabel.setLineWrap(true);
189+ jTxtLabel.setRows(2);
190+ jTxtLabel.setText("取得ページに以下のメッセージが含まれていた場合、対象データが取得出来なかったと通知します。");
191+ jTxtLabel.setAutoscrolls(false);
192+ jTxtLabel.setBorder(null);
193+ jTxtLabel.setCursor(new java.awt.Cursor(java.awt.Cursor.DEFAULT_CURSOR));
194+ jTxtLabel.setFocusable(false);
195+ jTxtLabel.setHighlighter(null);
196+ jTxtLabel.setKeymap(null);
197+ jTxtLabel.setOpaque(false);
198+ jTxtLabel.setRequestFocusEnabled(false);
199+ jTxtLabel.setVerifyInputWhenFocusTarget(false);
200+ jScrollPaneLabel.setViewportView(jTxtLabel);
201+
202+ jTxt404msg.setColumns(20);
203+ jTxt404msg.setRows(3);
204+ jTxt404msg.setText("一致する銘柄は見つかりませんでした\n");
205+ jScrollPane404msg.setViewportView(jTxt404msg);
206+
207+ javax.swing.GroupLayout jPanelTab2Layout = new javax.swing.GroupLayout(jPanelTab2);
208+ jPanelTab2.setLayout(jPanelTab2Layout);
209+ jPanelTab2Layout.setHorizontalGroup(
210+ jPanelTab2Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
211+ .addComponent(jScrollPane404msg)
212+ .addGroup(javax.swing.GroupLayout.Alignment.TRAILING, jPanelTab2Layout.createSequentialGroup()
213+ .addContainerGap()
214+ .addComponent(jScrollPaneLabel, javax.swing.GroupLayout.DEFAULT_SIZE, 359, Short.MAX_VALUE)
215+ .addContainerGap())
216+ );
217+ jPanelTab2Layout.setVerticalGroup(
218+ jPanelTab2Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
219+ .addGroup(jPanelTab2Layout.createSequentialGroup()
220+ .addComponent(jScrollPaneLabel, javax.swing.GroupLayout.PREFERRED_SIZE, 38, javax.swing.GroupLayout.PREFERRED_SIZE)
221+ .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED)
222+ .addComponent(jScrollPane404msg))
223+ );
224+
225+ jTabbedPane1.addTab("結果無し判定", jPanelTab2);
226+
227+ jPanelRtn.setBorder(javax.swing.BorderFactory.createTitledBorder("検索結果"));
228+
229+ jTxtRtn.setColumns(20);
230+ jTxtRtn.setRows(5);
231+ jScrollPaneRtn.setViewportView(jTxtRtn);
232+
233+ javax.swing.GroupLayout jPanelRtnLayout = new javax.swing.GroupLayout(jPanelRtn);
234+ jPanelRtn.setLayout(jPanelRtnLayout);
235+ jPanelRtnLayout.setHorizontalGroup(
236+ jPanelRtnLayout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
237+ .addComponent(jScrollPaneRtn)
238+ );
239+ jPanelRtnLayout.setVerticalGroup(
240+ jPanelRtnLayout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
241+ .addComponent(jScrollPaneRtn, javax.swing.GroupLayout.DEFAULT_SIZE, 163, Short.MAX_VALUE)
242+ );
243+
244+ jMenu1.setText("ファイル");
245+
246+ jMenuLoad.setText("LOAD");
247+ jMenuLoad.addActionListener(new java.awt.event.ActionListener() {
248+ public void actionPerformed(java.awt.event.ActionEvent evt) {
249+ jMenuLoadActionPerformed(evt);
250+ }
251+ });
252+ jMenu1.add(jMenuLoad);
253+
254+ jMenuSave.setText("SAVE");
255+ jMenuSave.addActionListener(new java.awt.event.ActionListener() {
256+ public void actionPerformed(java.awt.event.ActionEvent evt) {
257+ jMenuSaveActionPerformed(evt);
258+ }
259+ });
260+ jMenu1.add(jMenuSave);
261+
262+ jMenuBar1.add(jMenu1);
263+
264+ jMenu3.setText("ツール");
265+
266+ jMenuItem1.setText("ブラウザで表示");
267+ jMenuItem1.addActionListener(new java.awt.event.ActionListener() {
268+ public void actionPerformed(java.awt.event.ActionEvent evt) {
269+ jMenuItem1ActionPerformed(evt);
270+ }
271+ });
272+ jMenu3.add(jMenuItem1);
273+
274+ jMenuBar1.add(jMenu3);
275+
276+ jMenu2.setText("検索");
277+ jMenu2.addMouseListener(new java.awt.event.MouseAdapter() {
278+ public void mouseClicked(java.awt.event.MouseEvent evt) {
279+ jMenu2MouseClicked(evt);
280+ }
281+ });
282+ jMenuBar1.add(jMenu2);
283+
284+ setJMenuBar(jMenuBar1);
285+
286+ javax.swing.GroupLayout layout = new javax.swing.GroupLayout(getContentPane());
287+ getContentPane().setLayout(layout);
288+ layout.setHorizontalGroup(
289+ layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
290+ .addComponent(jPanelRtn, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
291+ .addGroup(layout.createSequentialGroup()
292+ .addComponent(jLabel1)
293+ .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
294+ .addComponent(jTxtUrl)
295+ .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
296+ .addComponent(jBtnSearch))
297+ .addComponent(jTabbedPane1)
298+ );
299+ layout.setVerticalGroup(
300+ layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
301+ .addGroup(layout.createSequentialGroup()
302+ .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE)
303+ .addComponent(jLabel1)
304+ .addComponent(jTxtUrl, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE)
305+ .addComponent(jBtnSearch))
306+ .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
307+ .addComponent(jTabbedPane1, javax.swing.GroupLayout.PREFERRED_SIZE, 250, javax.swing.GroupLayout.PREFERRED_SIZE)
308+ .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
309+ .addComponent(jPanelRtn, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
310+ .addContainerGap())
311+ );
312+
313+ pack();
314+ }// </editor-fold>//GEN-END:initComponents
315+
316+ private void jBtnRowInsActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowInsActionPerformed
317+ int SelectedRow = jTable1.getSelectedRow();
318+ SearchData sdata = new SearchData();
319+ if(SelectedRow >= 0) {
320+ sdatatblmodel.insertRow(SelectedRow, sdata);
321+ } else {
322+ sdatatblmodel.addRow(sdata);
323+ }
324+ }//GEN-LAST:event_jBtnRowInsActionPerformed
325+
326+ private void jBtnRowDelActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowDelActionPerformed
327+ int SelectedRow = jTable1.getSelectedRow();
328+ if(!(SelectedRow < 0)) {
329+ sdatatblmodel.removeRow(SelectedRow);
330+ }
331+ }//GEN-LAST:event_jBtnRowDelActionPerformed
332+
333+ private void jMenuLoadActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuLoadActionPerformed
334+ jFileChooser1.setDialogTitle("読込");
335+ int selected = jFileChooser1.showOpenDialog(this);
336+ if (selected == JFileChooser.APPROVE_OPTION) {
337+ File file = jFileChooser1.getSelectedFile();
338+ xmlwriter.load(file);
339+ jTxtUrl.setText(xmlwriter.getTestUrl());
340+ xmlwriter.getSdata();
341+ sdatatblmodel.setRowCount(0);
342+ for(int i = 0; i < SearchData.size(); i++) {
343+ SearchData sdata = SearchData.get(i);
344+ sdatatblmodel.addRow(sdata);
345+ }
346+ }
347+ }//GEN-LAST:event_jMenuLoadActionPerformed
348+
349+ private void jMenuSaveActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuSaveActionPerformed
350+ jFileChooser1.setDialogTitle("保存");
351+ int selected = jFileChooser1.showSaveDialog(this);
352+ if (selected == JFileChooser.APPROVE_OPTION) {
353+ File file = jFileChooser1.getSelectedFile();
354+ xmlwriter.setTestUrl(jTxtUrl.getText());
355+
356+ SearchData.clear();
357+ for(int row = 0; row < sdatatblmodel.getRowCount(); row++) {
358+ SearchData sdata = sdatatblmodel.getSearchData(row);
359+ SearchData.add(sdata);
360+ }
361+ xmlwriter.setSdata();
362+ xmlwriter.save(file);
363+ }
364+ }//GEN-LAST:event_jMenuSaveActionPerformed
365+
366+ private void jBtnRowCpyActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowCpyActionPerformed
367+ int SelectedRow = jTable1.getSelectedRow();
368+ if(SelectedRow >= 0) {
369+ SearchData sdata = sdatatblmodel.getSearchData(SelectedRow);
370+ sdatatblmodel.insertRow(SelectedRow, sdata);
371+ }
372+ }//GEN-LAST:event_jBtnRowCpyActionPerformed
373+
374+ private void jMenuItem1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuItem1ActionPerformed
375+ Desktop desktop = Desktop.getDesktop();
376+ String uriString = jTxtUrl.getText();
377+ try {
378+ URI uri = new URI(uriString);
379+ desktop.browse(uri);
380+
381+ } catch (URISyntaxException | IOException ex) {
382+ Logger.getLogger(HtmlSearch.class.getName()).log(Level.SEVERE, null, ex);
383+ }
384+ }//GEN-LAST:event_jMenuItem1ActionPerformed
385+
386+ private void jMenu2MouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_jMenu2MouseClicked
387+ Search_execution();
388+ }//GEN-LAST:event_jMenu2MouseClicked
389+
390+ private void jBtnSearchActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnSearchActionPerformed
391+ Search_execution();
392+ }//GEN-LAST:event_jBtnSearchActionPerformed
393+
394+ /**
395+ * 検索実行.
396+ */
397+ void Search_execution_old() {
398+ jTxtRtn.setText(null);
399+ HtmlParser par = new HtmlParser(jTxtUrl.getText());
400+
401+ // データ無し(404)判定
402+ String strdata = par.getStringPageData();
403+ if(strdata == null) {
404+ jTxtRtn.append("読込みページがありません");
405+ return;
406+ }
407+ String text = jTxt404msg.getText();
408+ String[] strsearch = text.split("\n");
409+ for(String strsearch1 : strsearch) {
410+ if(strdata.contains(strsearch1)) {
411+ jTxtRtn.append(strsearch1);
412+ return;
413+ }
414+ }
415+
416+ // 検索結果
417+ for(int row = 0; row < sdatatblmodel.getRowCount(); row++) {
418+ SearchData sdata = sdatatblmodel.getSearchData(row);
419+ String ans = sdata.getitem();
420+ String rtn = par.search(sdata);
421+ jTxtRtn.append(ans + "\t" + rtn + "\n");
422+ }
423+
424+ jTxtRtn.setCaretPosition(0);
425+ }
426+
427+ /**
428+ * 検索実行.
429+ */
430+ void Search_execution() {
431+ jTxtRtn.setText(null);
432+ Scraping scrap = new Scraping();
433+
434+ // URL生成
435+ URL url = null;
436+ try {
437+ url = new URL(jTxtUrl.getText());
438+ } catch (MalformedURLException ex) {
439+ Logger.getLogger(HtmlSearch.class.getName()).log(Level.SEVERE, null, ex);
440+ }
441+
442+ // SearchData生成
443+ SearchData.clear();
444+ for(int row = 0; row < sdatatblmodel.getRowCount(); row++) {
445+ SearchData.add(sdatatblmodel.getSearchData(row));
446+ }
447+
448+ // HTML検索
449+ String[] result = scrap.getResult(url);
450+
451+ // 検索結果
452+ if(result == null) {
453+ jTxtRtn.append("Data not find");
454+ return;
455+ }
456+
457+ // 結果表示
458+ for(int i = 0; i < SearchData.size(); i++) {
459+ String ans = SearchData.get(i).getitem();
460+ String rtn = result[i];
461+ jTxtRtn.append(ans + "\t" + rtn + "\n");
462+ }
463+
464+ jTxtRtn.setCaretPosition(0);
465+ }
466+
467+ /**
468+ * @param args the command line arguments
469+ */
470+ public static void main(String args[]) {
471+ /* Set the Nimbus look and feel */
472+ //<editor-fold defaultstate="collapsed" desc=" Look and feel setting code (optional) ">
473+ /* If Nimbus (introduced in Java SE 6) is not available, stay with the default look and feel.
474+ * For details see http://download.oracle.com/javase/tutorial/uiswing/lookandfeel/plaf.html
475+ */
476+ try {
477+ for (javax.swing.UIManager.LookAndFeelInfo info : javax.swing.UIManager.getInstalledLookAndFeels()) {
478+ if ("Nimbus".equals(info.getName())) {
479+ javax.swing.UIManager.setLookAndFeel(info.getClassName());
480+ break;
481+ }
482+ }
483+ } catch (ClassNotFoundException
484+ | InstantiationException
485+ | IllegalAccessException
486+ | javax.swing.UnsupportedLookAndFeelException ex) {
487+ java.util.logging.Logger.getLogger(HtmlSearch.class.getName()).log(java.util.logging.Level.SEVERE, null, ex);
488+ }
489+ //</editor-fold>
490+
491+ /* Create and display the form */
492+ java.awt.EventQueue.invokeLater(new Runnable() {
493+ @Override
494+ public void run() {
495+ new HtmlSearch().setVisible(true);
496+ }
497+ });
498+ }
499+
500+ // Variables declaration - do not modify//GEN-BEGIN:variables
501+ private javax.swing.JButton jBtnRowCpy;
502+ private javax.swing.JButton jBtnRowDel;
503+ private javax.swing.JButton jBtnRowIns;
504+ private javax.swing.JButton jBtnSearch;
505+ private javax.swing.JFileChooser jFileChooser1;
506+ private javax.swing.JLabel jLabel1;
507+ private javax.swing.JMenu jMenu1;
508+ private javax.swing.JMenu jMenu2;
509+ private javax.swing.JMenu jMenu3;
510+ private javax.swing.JMenuBar jMenuBar1;
511+ private javax.swing.JMenuItem jMenuItem1;
512+ private javax.swing.JMenuItem jMenuLoad;
513+ private javax.swing.JMenuItem jMenuSave;
514+ private javax.swing.JPanel jPanelRtn;
515+ private javax.swing.JPanel jPanelTab1;
516+ private javax.swing.JPanel jPanelTab2;
517+ private javax.swing.JRadioButton jRadioButton1;
518+ private javax.swing.JScrollPane jScrollPane1;
519+ private javax.swing.JScrollPane jScrollPane404msg;
520+ private javax.swing.JScrollPane jScrollPaneLabel;
521+ private javax.swing.JScrollPane jScrollPaneRtn;
522+ private javax.swing.JTabbedPane jTabbedPane1;
523+ private javax.swing.JTable jTable1;
524+ private javax.swing.JTextArea jTxt404msg;
525+ private javax.swing.JTextArea jTxtLabel;
526+ private javax.swing.JTextArea jTxtRtn;
527+ private javax.swing.JTextField jTxtUrl;
528+ // End of variables declaration//GEN-END:variables
529+}
530+
531+class SearchDataTableModel extends DefaultTableModel {
532+ /* ---------------------------------------------------------------------- *
533+ * データ属性
534+ * ---------------------------------------------------------------------- */
535+ public String[] columnName = {
536+ /* 0 */ "項目名",
537+ /* 1 */ "タグ",
538+ /* 2 */ "ID",
539+ /* 3 */ "クラス",
540+ /* 4 */ "位置",
541+ /* 5 */ "抽出条件"
542+ };
543+
544+ public Class[] columnClass = {
545+ /* 0 */ String.class,
546+ /* 1 */ String.class,
547+ /* 2 */ String.class,
548+ /* 3 */ String.class,
549+ /* 4 */ String.class,
550+ /* 5 */ String.class
551+ };
552+
553+ int column_item = 0;
554+ int column_htmltag = 1;
555+ int column_htmlid = 2;
556+ int column_htmlclass = 3;
557+ int column_around = 4;
558+ int column_regexp = 5;
559+
560+ /* ---------------------------------------------------------------------- *
561+ * 処理
562+ * ---------------------------------------------------------------------- */
563+ @Override
564+ public String getColumnName(int modelIndex) {
565+ return columnName[modelIndex];
566+ }
567+
568+ @Override
569+ public Class<?> getColumnClass(int modelIndex) {
570+ return columnClass[modelIndex];
571+ }
572+
573+ @Override
574+ public int getColumnCount() {
575+ return columnName.length;
576+ }
577+
578+ /* ---------------------------------------------------------------------- */
579+
580+ public SearchData getSearchData(int row) {
581+ SearchData sdata = new SearchData();
582+ sdata.setitem(String.valueOf(getValueAt(row, column_item)));
583+ sdata.setHtmltag(String.valueOf(getValueAt(row, column_htmltag)));
584+ sdata.setHtmlid(String.valueOf(getValueAt(row, column_htmlid)));
585+ sdata.setHtmlclass(String.valueOf(getValueAt(row, column_htmlclass)));
586+ sdata.setaround(String.valueOf(getValueAt(row, column_around)));
587+ sdata.setregexp(String.valueOf(getValueAt(row, column_regexp)));
588+ return sdata;
589+ }
590+
591+ public void addRow(SearchData sdata) {
592+ addRow(getObjdata(sdata));
593+ }
594+
595+ public void insertRow(int row, SearchData sdata) {
596+ insertRow(row, getObjdata(sdata));
597+ }
598+
599+ private Object[] getObjdata(SearchData sdata) {
600+ Object[] obj = new Object[] {
601+ sdata.getitem(),
602+ sdata.getHtmltag(),
603+ sdata.getHtmlid(),
604+ sdata.getHtmlclass(),
605+ sdata.getaround(),
606+ sdata.getregexp()
607+ };
608+ return obj;
609+ }
610+
611+}
\ No newline at end of file