• R/O
  • SSH
  • HTTPS

提交

标签

Frequently used words (click to add to your profile)

javaandroidc++linuxc#objective-ccocoa誰得qtrubypythongamewindowsbathyscaphephpguic翻訳omegattwitterframeworktestbtronarduinovb.net計画中(planning stage)directxpreviewerゲームエンジンdom

作業部屋の使い方を試しています。


Commit MetaInfo

修订版140 (tree)
时间2016-05-17 20:24:42
作者tuna_p

Log Message

(empty log message)

更改概述

差异

--- branches/b4/webScraping/src/webScraping/utility/LibraryXml.java (nonexistent)
+++ branches/b4/webScraping/src/webScraping/utility/LibraryXml.java (revision 140)
@@ -0,0 +1,153 @@
1+/*
2+ * Copyright (C) 2014-2015 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id: LibraryXml.java 138 2016-05-17 06:40:29Z tuna_p $
21+ */
22+
23+package webScraping.utility;
24+
25+import java.io.File;
26+import java.io.FileNotFoundException;
27+import java.io.FileOutputStream;
28+import java.io.IOException;
29+import java.io.InputStream;
30+import java.util.logging.Level;
31+import java.util.logging.Logger;
32+
33+import javax.xml.parsers.DocumentBuilder;
34+import javax.xml.parsers.DocumentBuilderFactory;
35+import javax.xml.parsers.ParserConfigurationException;
36+import javax.xml.transform.Transformer;
37+import javax.xml.transform.TransformerConfigurationException;
38+import javax.xml.transform.TransformerException;
39+import javax.xml.transform.TransformerFactory;
40+import javax.xml.transform.dom.DOMSource;
41+import javax.xml.transform.stream.StreamResult;
42+
43+import org.w3c.dom.DOMImplementation;
44+import org.w3c.dom.Document;
45+import org.w3c.dom.Element;
46+import org.w3c.dom.Node;
47+import org.w3c.dom.NodeList;
48+import org.xml.sax.SAXException;
49+
50+public class LibraryXml {
51+
52+ String xmlrootname = "xmlcontainer";
53+
54+ DocumentBuilder builder;
55+ public Document readdoc, writedoc;
56+ Element xmlroot;
57+
58+ /* ---------------------------------------------------------------------- *
59+ * コンストラクタ
60+ * ---------------------------------------------------------------------- */
61+ public LibraryXml() {
62+ try {
63+ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
64+ builder = factory.newDocumentBuilder();
65+
66+ } catch (ParserConfigurationException ex) {
67+ Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
68+ }
69+ }
70+
71+ /* ---------------------------------------------------------------------- *
72+ * メソッド
73+ * ---------------------------------------------------------------------- */
74+ /* 読込み処理 */
75+ public Element getwriteRoot(String elementName) {
76+ mainElement();
77+ Element element = writedoc.createElement(elementName);
78+ xmlroot.appendChild(element);
79+ return element;
80+ }
81+
82+ private void mainElement() {
83+ if(writedoc == null) {
84+ DOMImplementation domImpl = builder.getDOMImplementation();
85+ writedoc = domImpl.createDocument("", xmlrootname, null);
86+ xmlroot = writedoc.getDocumentElement();
87+ }
88+ }
89+
90+ /**
91+ * XML書込み.
92+ * @param file
93+ */
94+ public void write(File file) {
95+ try (FileOutputStream os = new FileOutputStream(file)) {
96+ TransformerFactory transFactory = TransformerFactory.newInstance();
97+ Transformer transformer = transFactory.newTransformer();
98+
99+ transformer.setOutputProperty("indent", "yes"); // 改行指定
100+ transformer.setOutputProperty("method", "xml");
101+
102+ DOMSource source = new DOMSource(writedoc);
103+ StreamResult result = new StreamResult(os);
104+ transformer.transform(source, result);
105+
106+ // 作成したXMLをクリア
107+ writedoc = null;
108+
109+ } catch (TransformerConfigurationException ex) {
110+ Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
111+ } catch (FileNotFoundException | TransformerException ex) {
112+ Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
113+ } catch (IOException ex) {
114+ Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
115+ }
116+ }
117+
118+ /* ---------------------------------------------------------------------- */
119+ /* 書込み処理 */
120+
121+ public Element getreadRoot(String elementName) {
122+ NodeList nodelist = xmlroot.getElementsByTagName(elementName);
123+ Node node = nodelist.item(0);
124+ return (node.getNodeType() == Node.ELEMENT_NODE ? (Element)node : null);
125+ }
126+
127+ /**
128+ * XML読込み.
129+ * @param file
130+ */
131+ public void read(File file) {
132+ try {
133+ readdoc = builder.parse(file);
134+ xmlroot = readdoc.getDocumentElement();
135+
136+ } catch (SAXException | IOException ex) {
137+ Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
138+ }
139+ }
140+
141+ public void read(InputStream is) {
142+ try {
143+ readdoc = builder.parse(is);
144+ xmlroot = readdoc.getDocumentElement();
145+
146+ } catch (SAXException | IOException ex) {
147+ Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex);
148+ }
149+ }
150+
151+ /* ---------------------------------------------------------------------- */
152+
153+}
--- branches/b4/webScraping/src/webScraping/utility/ScrapingXml.java (nonexistent)
+++ branches/b4/webScraping/src/webScraping/utility/ScrapingXml.java (revision 140)
@@ -0,0 +1,209 @@
1+/*
2+ * Copyright (C) 2014-2015 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id: ScrapingXml.java 138 2016-05-17 06:40:29Z tuna_p $
21+ */
22+
23+package webScraping.utility;
24+
25+import webScraping.core.SearchData;
26+import java.io.File;
27+import java.io.InputStream;
28+import java.util.ArrayList;
29+import org.w3c.dom.Element;
30+import org.w3c.dom.Node;
31+import org.w3c.dom.NodeList;
32+
33+public class ScrapingXml {
34+ /* ---------------------------------------------------------------------- *
35+ * フィールド
36+ * ---------------------------------------------------------------------- */
37+ String rootnameScraping = "webscraping";
38+
39+ private String testUrl;
40+ private SearchData[] sdata;
41+
42+ public LibraryXml xlib = new LibraryXml();
43+ public Element root;
44+
45+ /* ---------------------------------------------------------------------- *
46+ * コンストラクタ
47+ * ---------------------------------------------------------------------- */
48+ public ScrapingXml() {
49+ }
50+
51+ /* ---------------------------------------------------------------------- *
52+ * Setter
53+ * ---------------------------------------------------------------------- */
54+ public void setTestUrl(String testUrl) {
55+ this.testUrl = testUrl;
56+ }
57+
58+ public void setSdata() {
59+ this.sdata = new SearchData[SearchData.size()];
60+ for(int i = 0; i < SearchData.size(); i++) {
61+ this.sdata[i] = SearchData.get(i);
62+ }
63+ }
64+
65+ /* ---------------------------------------------------------------------- *
66+ * Getter
67+ * ---------------------------------------------------------------------- */
68+ public String getTestUrl() {
69+ return testUrl;
70+ }
71+
72+ public void getSdata() {
73+ SearchData.clear();
74+ for(SearchData sdata1 : sdata) {
75+ SearchData.add(sdata1);
76+ }
77+ }
78+
79+ /* ---------------------------------------------------------------------- *
80+ * メソッド
81+ * ---------------------------------------------------------------------- */
82+ public void save(File file) {
83+
84+ elementset();
85+
86+ xlib.write(file);
87+ }
88+
89+ public void elementset() {
90+ root = xlib.getwriteRoot(rootnameScraping);
91+ elementsetUrl();
92+ elementsetSearchdata();
93+ System.out.println("elementset XmlScraping");
94+ }
95+
96+ private void elementsetUrl() {
97+ if(testUrl == null) return;
98+ if(testUrl.isEmpty()) return;
99+
100+ Element url = xlib.writedoc.createElement("url");
101+ url.appendChild(xlib.writedoc.createTextNode(testUrl));
102+ root.appendChild(url);
103+ }
104+
105+ private void elementsetSearchdata() {
106+ int count = 0;
107+ for(SearchData sdat : sdata) {
108+ Element cslist = xlib.writedoc.createElement("searchlist");
109+ cslist.setAttribute("listNo", String.valueOf(++count));
110+
111+ addChild(cslist, "item" , sdat.getitem());
112+ addChild(cslist, "htmltag" , sdat.getHtmltag());
113+ addChild(cslist, "htmlid" , sdat.getHtmlid());
114+ addChild(cslist, "htmlclass", sdat.getHtmlclass());
115+ addChild(cslist, "around" , sdat.getaround());
116+ addChild(cslist, "regexp" , sdat.getregexp());
117+
118+ root.appendChild(cslist);
119+ }
120+ }
121+
122+ private void addChild(Element cslist, String keyword, String data) {
123+ if(!data.isEmpty()) {
124+ Element element = xlib.writedoc.createElement(keyword);
125+ element.appendChild(xlib.writedoc.createTextNode(data));
126+ cslist.appendChild(element);
127+ }
128+ }
129+
130+ /* ---------------------------------------------------------------------- */
131+
132+ public void load(File file) {
133+ xlib.read(file);
134+ elementget();
135+ }
136+
137+ public void load(InputStream is) {
138+ xlib.read(is);
139+ elementget();
140+ }
141+
142+ public void elementget() {
143+ root = xlib.getreadRoot(rootnameScraping);
144+ elementgetUrl();
145+ elementgetSearchdata();
146+ }
147+
148+ private void elementgetUrl() {
149+ NodeList nodelist = root.getElementsByTagName("url");
150+ if(nodelist.getLength() > 0) {
151+ Node node = nodelist.item(0);
152+ testUrl = node.getFirstChild().getNodeValue();
153+ }
154+ }
155+
156+ private void elementgetSearchdata() {
157+ ArrayList<SearchData> slist = new ArrayList<>();
158+
159+ NodeList nodelist = root.getElementsByTagName("searchlist");
160+ for(int i = 0; i < nodelist.getLength(); i++) {
161+ Node childnode = nodelist.item(i);
162+
163+ boolean sdatflg = false;
164+ SearchData sdat = new SearchData();
165+ for (Node child = childnode.getFirstChild(); child != null; child = child.getNextSibling()) {
166+ if(child.getNodeType() == Node.ELEMENT_NODE) {
167+ String tag = child.getNodeName();
168+ String rtn = "";
169+ if(child.getFirstChild() != null) {
170+ rtn = child.getFirstChild().getNodeValue();
171+ }
172+ switch (tag) {
173+ case "item" :
174+ sdat.setitem(rtn);
175+ sdatflg = true;
176+ break;
177+ case "htmltag" :
178+ sdat.setHtmltag(rtn);
179+ sdatflg = true;
180+ break;
181+ case "htmlid" :
182+ sdat.setHtmlid(rtn);
183+ sdatflg = true;
184+ break;
185+ case "htmlclass" :
186+ sdat.setHtmlclass(rtn);
187+ sdatflg = true;
188+ break;
189+ case "around" :
190+ sdat.setaround(rtn);
191+ sdatflg = true;
192+ break;
193+ case "regexp" :
194+ sdat.setregexp(rtn);
195+ sdatflg = true;
196+ break;
197+ }
198+ }
199+ }
200+ if(sdatflg) slist.add(sdat);
201+ }
202+ // 配列化
203+ sdata = new SearchData[slist.size()];
204+ for(int i = 0; i < slist.size(); i++) {
205+ sdata[i] = slist.get(i);
206+ }
207+ }
208+
209+}
--- branches/b4/webScraping/src/webScraping/utility/HtmlSearch.java (nonexistent)
+++ branches/b4/webScraping/src/webScraping/utility/HtmlSearch.java (revision 140)
@@ -0,0 +1,611 @@
1+/*
2+ * Copyright (C) 2014 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id: HtmlSearch.java 139 2016-05-17 09:16:40Z tuna_p $
21+ */
22+package webScraping.utility;
23+
24+import webScraping.core.Scraping;
25+import webScraping.core.HtmlParser;
26+import java.awt.Desktop;
27+import java.io.File;
28+import java.io.IOException;
29+import java.net.MalformedURLException;
30+import java.net.URI;
31+import java.net.URISyntaxException;
32+import java.net.URL;
33+import java.util.logging.Level;
34+import java.util.logging.Logger;
35+import javax.swing.JFileChooser;
36+import javax.swing.filechooser.FileFilter;
37+import javax.swing.filechooser.FileNameExtensionFilter;
38+import javax.swing.table.DefaultTableModel;
39+import webScraping.core.SearchData;
40+
41+/**
42+ * HTMLページ上の特定の項目を検索し、その項目内容の値を取得する.
43+ * @author kgto
44+ */
45+public class HtmlSearch extends javax.swing.JFrame {
46+ private final ScrapingXml xmlwriter = new ScrapingXml();
47+
48+ SearchDataTableModel sdatatblmodel;
49+
50+ /**
51+ * Creates new form Frame1
52+ */
53+ public HtmlSearch() {
54+ sdatatblmodel = new SearchDataTableModel();
55+
56+ initComponents();
57+
58+ // カレントディレクトリ取得
59+ String dir = System.getProperty("user.dir");
60+ File file = new java.io.File(dir + "\\data");
61+ jFileChooser1.setCurrentDirectory(file);
62+
63+ FileFilter filter1 = new FileNameExtensionFilter("XMLファイル", "xml");
64+ FileFilter filter2 = new FileNameExtensionFilter("TEXTファイル", "txt");
65+ jFileChooser1.addChoosableFileFilter(filter1);
66+ jFileChooser1.addChoosableFileFilter(filter2);
67+ jFileChooser1.setFileFilter(filter1);
68+
69+ }
70+
71+ /**
72+ * This method is called from within the constructor to initialize the form.
73+ * WARNING: Do NOT modify this code. The content of this method is always
74+ * regenerated by the Form Editor.
75+ */
76+ @SuppressWarnings("unchecked")
77+ // <editor-fold defaultstate="collapsed" desc="Generated Code">//GEN-BEGIN:initComponents
78+ private void initComponents() {
79+
80+ jFileChooser1 = new javax.swing.JFileChooser();
81+ jRadioButton1 = new javax.swing.JRadioButton();
82+ jLabel1 = new javax.swing.JLabel();
83+ jTxtUrl = new javax.swing.JTextField();
84+ jBtnSearch = new javax.swing.JButton();
85+ jTabbedPane1 = new javax.swing.JTabbedPane();
86+ jPanelTab1 = new javax.swing.JPanel();
87+ jScrollPane1 = new javax.swing.JScrollPane();
88+ jTable1 = new javax.swing.JTable();
89+ jBtnRowIns = new javax.swing.JButton();
90+ jBtnRowDel = new javax.swing.JButton();
91+ jBtnRowCpy = new javax.swing.JButton();
92+ jPanelTab2 = new javax.swing.JPanel();
93+ jScrollPaneLabel = new javax.swing.JScrollPane();
94+ jTxtLabel = new javax.swing.JTextArea();
95+ jScrollPane404msg = new javax.swing.JScrollPane();
96+ jTxt404msg = new javax.swing.JTextArea();
97+ jPanelRtn = new javax.swing.JPanel();
98+ jScrollPaneRtn = new javax.swing.JScrollPane();
99+ jTxtRtn = new javax.swing.JTextArea();
100+ jMenuBar1 = new javax.swing.JMenuBar();
101+ jMenu1 = new javax.swing.JMenu();
102+ jMenuLoad = new javax.swing.JMenuItem();
103+ jMenuSave = new javax.swing.JMenuItem();
104+ jMenu3 = new javax.swing.JMenu();
105+ jMenuItem1 = new javax.swing.JMenuItem();
106+ jMenu2 = new javax.swing.JMenu();
107+
108+ jFileChooser1.setCurrentDirectory(null);
109+ jFileChooser1.setDialogTitle("");
110+
111+ jRadioButton1.setText("jRadioButton1");
112+
113+ setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE);
114+ setTitle("タグ検索");
115+
116+ jLabel1.setText(" URL:");
117+
118+ jBtnSearch.setText("検索");
119+ jBtnSearch.addActionListener(new java.awt.event.ActionListener() {
120+ public void actionPerformed(java.awt.event.ActionEvent evt) {
121+ jBtnSearchActionPerformed(evt);
122+ }
123+ });
124+
125+ jPanelTab1.setBorder(javax.swing.BorderFactory.createTitledBorder("検索情報"));
126+
127+ jTable1.setModel(sdatatblmodel);
128+ jTable1.setSelectionMode(javax.swing.ListSelectionModel.SINGLE_SELECTION);
129+ jTable1.getTableHeader().setReorderingAllowed(false);
130+ jScrollPane1.setViewportView(jTable1);
131+
132+ jBtnRowIns.setText("行挿入");
133+ jBtnRowIns.addActionListener(new java.awt.event.ActionListener() {
134+ public void actionPerformed(java.awt.event.ActionEvent evt) {
135+ jBtnRowInsActionPerformed(evt);
136+ }
137+ });
138+
139+ jBtnRowDel.setText("行削除");
140+ jBtnRowDel.addActionListener(new java.awt.event.ActionListener() {
141+ public void actionPerformed(java.awt.event.ActionEvent evt) {
142+ jBtnRowDelActionPerformed(evt);
143+ }
144+ });
145+
146+ jBtnRowCpy.setText("行コピー");
147+ jBtnRowCpy.addActionListener(new java.awt.event.ActionListener() {
148+ public void actionPerformed(java.awt.event.ActionEvent evt) {
149+ jBtnRowCpyActionPerformed(evt);
150+ }
151+ });
152+
153+ javax.swing.GroupLayout jPanelTab1Layout = new javax.swing.GroupLayout(jPanelTab1);
154+ jPanelTab1.setLayout(jPanelTab1Layout);
155+ jPanelTab1Layout.setHorizontalGroup(
156+ jPanelTab1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
157+ .addGroup(jPanelTab1Layout.createSequentialGroup()
158+ .addContainerGap(javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
159+ .addComponent(jBtnRowCpy)
160+ .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
161+ .addComponent(jBtnRowDel)
162+ .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
163+ .addComponent(jBtnRowIns))
164+ .addComponent(jScrollPane1, javax.swing.GroupLayout.PREFERRED_SIZE, 0, Short.MAX_VALUE)
165+ );
166+ jPanelTab1Layout.setVerticalGroup(
167+ jPanelTab1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
168+ .addGroup(jPanelTab1Layout.createSequentialGroup()
169+ .addComponent(jScrollPane1, javax.swing.GroupLayout.DEFAULT_SIZE, 173, Short.MAX_VALUE)
170+ .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
171+ .addGroup(jPanelTab1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE)
172+ .addComponent(jBtnRowDel)
173+ .addComponent(jBtnRowIns)
174+ .addComponent(jBtnRowCpy)))
175+ );
176+
177+ jTabbedPane1.addTab("キー設定", jPanelTab1);
178+
179+ jPanelTab2.setBorder(javax.swing.BorderFactory.createTitledBorder("メッセージ"));
180+
181+ jScrollPaneLabel.setHorizontalScrollBarPolicy(javax.swing.ScrollPaneConstants.HORIZONTAL_SCROLLBAR_NEVER);
182+ jScrollPaneLabel.setVerticalScrollBarPolicy(javax.swing.ScrollPaneConstants.VERTICAL_SCROLLBAR_NEVER);
183+
184+ jTxtLabel.setEditable(false);
185+ jTxtLabel.setBackground(java.awt.Color.lightGray);
186+ jTxtLabel.setColumns(20);
187+ jTxtLabel.setFont(new java.awt.Font("MS UI Gothic", 0, 12)); // NOI18N
188+ jTxtLabel.setLineWrap(true);
189+ jTxtLabel.setRows(2);
190+ jTxtLabel.setText("取得ページに以下のメッセージが含まれていた場合、対象データが取得出来なかったと通知します。");
191+ jTxtLabel.setAutoscrolls(false);
192+ jTxtLabel.setBorder(null);
193+ jTxtLabel.setCursor(new java.awt.Cursor(java.awt.Cursor.DEFAULT_CURSOR));
194+ jTxtLabel.setFocusable(false);
195+ jTxtLabel.setHighlighter(null);
196+ jTxtLabel.setKeymap(null);
197+ jTxtLabel.setOpaque(false);
198+ jTxtLabel.setRequestFocusEnabled(false);
199+ jTxtLabel.setVerifyInputWhenFocusTarget(false);
200+ jScrollPaneLabel.setViewportView(jTxtLabel);
201+
202+ jTxt404msg.setColumns(20);
203+ jTxt404msg.setRows(3);
204+ jTxt404msg.setText("一致する銘柄は見つかりませんでした\n");
205+ jScrollPane404msg.setViewportView(jTxt404msg);
206+
207+ javax.swing.GroupLayout jPanelTab2Layout = new javax.swing.GroupLayout(jPanelTab2);
208+ jPanelTab2.setLayout(jPanelTab2Layout);
209+ jPanelTab2Layout.setHorizontalGroup(
210+ jPanelTab2Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
211+ .addComponent(jScrollPane404msg)
212+ .addGroup(javax.swing.GroupLayout.Alignment.TRAILING, jPanelTab2Layout.createSequentialGroup()
213+ .addContainerGap()
214+ .addComponent(jScrollPaneLabel, javax.swing.GroupLayout.DEFAULT_SIZE, 359, Short.MAX_VALUE)
215+ .addContainerGap())
216+ );
217+ jPanelTab2Layout.setVerticalGroup(
218+ jPanelTab2Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
219+ .addGroup(jPanelTab2Layout.createSequentialGroup()
220+ .addComponent(jScrollPaneLabel, javax.swing.GroupLayout.PREFERRED_SIZE, 38, javax.swing.GroupLayout.PREFERRED_SIZE)
221+ .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED)
222+ .addComponent(jScrollPane404msg))
223+ );
224+
225+ jTabbedPane1.addTab("結果無し判定", jPanelTab2);
226+
227+ jPanelRtn.setBorder(javax.swing.BorderFactory.createTitledBorder("検索結果"));
228+
229+ jTxtRtn.setColumns(20);
230+ jTxtRtn.setRows(5);
231+ jScrollPaneRtn.setViewportView(jTxtRtn);
232+
233+ javax.swing.GroupLayout jPanelRtnLayout = new javax.swing.GroupLayout(jPanelRtn);
234+ jPanelRtn.setLayout(jPanelRtnLayout);
235+ jPanelRtnLayout.setHorizontalGroup(
236+ jPanelRtnLayout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
237+ .addComponent(jScrollPaneRtn)
238+ );
239+ jPanelRtnLayout.setVerticalGroup(
240+ jPanelRtnLayout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
241+ .addComponent(jScrollPaneRtn, javax.swing.GroupLayout.DEFAULT_SIZE, 163, Short.MAX_VALUE)
242+ );
243+
244+ jMenu1.setText("ファイル");
245+
246+ jMenuLoad.setText("LOAD");
247+ jMenuLoad.addActionListener(new java.awt.event.ActionListener() {
248+ public void actionPerformed(java.awt.event.ActionEvent evt) {
249+ jMenuLoadActionPerformed(evt);
250+ }
251+ });
252+ jMenu1.add(jMenuLoad);
253+
254+ jMenuSave.setText("SAVE");
255+ jMenuSave.addActionListener(new java.awt.event.ActionListener() {
256+ public void actionPerformed(java.awt.event.ActionEvent evt) {
257+ jMenuSaveActionPerformed(evt);
258+ }
259+ });
260+ jMenu1.add(jMenuSave);
261+
262+ jMenuBar1.add(jMenu1);
263+
264+ jMenu3.setText("ツール");
265+
266+ jMenuItem1.setText("ブラウザで表示");
267+ jMenuItem1.addActionListener(new java.awt.event.ActionListener() {
268+ public void actionPerformed(java.awt.event.ActionEvent evt) {
269+ jMenuItem1ActionPerformed(evt);
270+ }
271+ });
272+ jMenu3.add(jMenuItem1);
273+
274+ jMenuBar1.add(jMenu3);
275+
276+ jMenu2.setText("検索");
277+ jMenu2.addMouseListener(new java.awt.event.MouseAdapter() {
278+ public void mouseClicked(java.awt.event.MouseEvent evt) {
279+ jMenu2MouseClicked(evt);
280+ }
281+ });
282+ jMenuBar1.add(jMenu2);
283+
284+ setJMenuBar(jMenuBar1);
285+
286+ javax.swing.GroupLayout layout = new javax.swing.GroupLayout(getContentPane());
287+ getContentPane().setLayout(layout);
288+ layout.setHorizontalGroup(
289+ layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
290+ .addComponent(jPanelRtn, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
291+ .addGroup(layout.createSequentialGroup()
292+ .addComponent(jLabel1)
293+ .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
294+ .addComponent(jTxtUrl)
295+ .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
296+ .addComponent(jBtnSearch))
297+ .addComponent(jTabbedPane1)
298+ );
299+ layout.setVerticalGroup(
300+ layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
301+ .addGroup(layout.createSequentialGroup()
302+ .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE)
303+ .addComponent(jLabel1)
304+ .addComponent(jTxtUrl, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE)
305+ .addComponent(jBtnSearch))
306+ .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
307+ .addComponent(jTabbedPane1, javax.swing.GroupLayout.PREFERRED_SIZE, 250, javax.swing.GroupLayout.PREFERRED_SIZE)
308+ .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
309+ .addComponent(jPanelRtn, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
310+ .addContainerGap())
311+ );
312+
313+ pack();
314+ }// </editor-fold>//GEN-END:initComponents
315+
316+ private void jBtnRowInsActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowInsActionPerformed
317+ int SelectedRow = jTable1.getSelectedRow();
318+ SearchData sdata = new SearchData();
319+ if(SelectedRow >= 0) {
320+ sdatatblmodel.insertRow(SelectedRow, sdata);
321+ } else {
322+ sdatatblmodel.addRow(sdata);
323+ }
324+ }//GEN-LAST:event_jBtnRowInsActionPerformed
325+
326+ private void jBtnRowDelActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowDelActionPerformed
327+ int SelectedRow = jTable1.getSelectedRow();
328+ if(!(SelectedRow < 0)) {
329+ sdatatblmodel.removeRow(SelectedRow);
330+ }
331+ }//GEN-LAST:event_jBtnRowDelActionPerformed
332+
333+ private void jMenuLoadActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuLoadActionPerformed
334+ jFileChooser1.setDialogTitle("読込");
335+ int selected = jFileChooser1.showOpenDialog(this);
336+ if (selected == JFileChooser.APPROVE_OPTION) {
337+ File file = jFileChooser1.getSelectedFile();
338+ xmlwriter.load(file);
339+ jTxtUrl.setText(xmlwriter.getTestUrl());
340+ xmlwriter.getSdata();
341+ sdatatblmodel.setRowCount(0);
342+ for(int i = 0; i < SearchData.size(); i++) {
343+ SearchData sdata = SearchData.get(i);
344+ sdatatblmodel.addRow(sdata);
345+ }
346+ }
347+ }//GEN-LAST:event_jMenuLoadActionPerformed
348+
349+ private void jMenuSaveActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuSaveActionPerformed
350+ jFileChooser1.setDialogTitle("保存");
351+ int selected = jFileChooser1.showSaveDialog(this);
352+ if (selected == JFileChooser.APPROVE_OPTION) {
353+ File file = jFileChooser1.getSelectedFile();
354+ xmlwriter.setTestUrl(jTxtUrl.getText());
355+
356+ SearchData.clear();
357+ for(int row = 0; row < sdatatblmodel.getRowCount(); row++) {
358+ SearchData sdata = sdatatblmodel.getSearchData(row);
359+ SearchData.add(sdata);
360+ }
361+ xmlwriter.setSdata();
362+ xmlwriter.save(file);
363+ }
364+ }//GEN-LAST:event_jMenuSaveActionPerformed
365+
366+ private void jBtnRowCpyActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowCpyActionPerformed
367+ int SelectedRow = jTable1.getSelectedRow();
368+ if(SelectedRow >= 0) {
369+ SearchData sdata = sdatatblmodel.getSearchData(SelectedRow);
370+ sdatatblmodel.insertRow(SelectedRow, sdata);
371+ }
372+ }//GEN-LAST:event_jBtnRowCpyActionPerformed
373+
374+ private void jMenuItem1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuItem1ActionPerformed
375+ Desktop desktop = Desktop.getDesktop();
376+ String uriString = jTxtUrl.getText();
377+ try {
378+ URI uri = new URI(uriString);
379+ desktop.browse(uri);
380+
381+ } catch (URISyntaxException | IOException ex) {
382+ Logger.getLogger(HtmlSearch.class.getName()).log(Level.SEVERE, null, ex);
383+ }
384+ }//GEN-LAST:event_jMenuItem1ActionPerformed
385+
386+ private void jMenu2MouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_jMenu2MouseClicked
387+ Search_execution();
388+ }//GEN-LAST:event_jMenu2MouseClicked
389+
390+ private void jBtnSearchActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnSearchActionPerformed
391+ Search_execution();
392+ }//GEN-LAST:event_jBtnSearchActionPerformed
393+
394+ /**
395+ * 検索実行.
396+ */
397+ void Search_execution_old() {
398+ jTxtRtn.setText(null);
399+ HtmlParser par = new HtmlParser(jTxtUrl.getText());
400+
401+ // データ無し(404)判定
402+ String strdata = par.getStringPageData();
403+ if(strdata == null) {
404+ jTxtRtn.append("読込みページがありません");
405+ return;
406+ }
407+ String text = jTxt404msg.getText();
408+ String[] strsearch = text.split("\n");
409+ for(String strsearch1 : strsearch) {
410+ if(strdata.contains(strsearch1)) {
411+ jTxtRtn.append(strsearch1);
412+ return;
413+ }
414+ }
415+
416+ // 検索結果
417+ for(int row = 0; row < sdatatblmodel.getRowCount(); row++) {
418+ SearchData sdata = sdatatblmodel.getSearchData(row);
419+ String ans = sdata.getitem();
420+ String rtn = par.search(sdata);
421+ jTxtRtn.append(ans + "\t" + rtn + "\n");
422+ }
423+
424+ jTxtRtn.setCaretPosition(0);
425+ }
426+
427+ /**
428+ * 検索実行.
429+ */
430+ void Search_execution() {
431+ jTxtRtn.setText(null);
432+ Scraping scrap = new Scraping();
433+
434+ // URL生成
435+ URL url = null;
436+ try {
437+ url = new URL(jTxtUrl.getText());
438+ } catch (MalformedURLException ex) {
439+ Logger.getLogger(HtmlSearch.class.getName()).log(Level.SEVERE, null, ex);
440+ }
441+
442+ // SearchData生成
443+ SearchData.clear();
444+ for(int row = 0; row < sdatatblmodel.getRowCount(); row++) {
445+ SearchData.add(sdatatblmodel.getSearchData(row));
446+ }
447+
448+ // HTML検索
449+ String[] result = scrap.getResult(url);
450+
451+ // 検索結果
452+ if(result == null) {
453+ jTxtRtn.append("Data not find");
454+ return;
455+ }
456+
457+ // 結果表示
458+ for(int i = 0; i < SearchData.size(); i++) {
459+ String ans = SearchData.get(i).getitem();
460+ String rtn = result[i];
461+ jTxtRtn.append(ans + "\t" + rtn + "\n");
462+ }
463+
464+ jTxtRtn.setCaretPosition(0);
465+ }
466+
467+ /**
468+ * @param args the command line arguments
469+ */
470+ public static void main(String args[]) {
471+ /* Set the Nimbus look and feel */
472+ //<editor-fold defaultstate="collapsed" desc=" Look and feel setting code (optional) ">
473+ /* If Nimbus (introduced in Java SE 6) is not available, stay with the default look and feel.
474+ * For details see http://download.oracle.com/javase/tutorial/uiswing/lookandfeel/plaf.html
475+ */
476+ try {
477+ for (javax.swing.UIManager.LookAndFeelInfo info : javax.swing.UIManager.getInstalledLookAndFeels()) {
478+ if ("Nimbus".equals(info.getName())) {
479+ javax.swing.UIManager.setLookAndFeel(info.getClassName());
480+ break;
481+ }
482+ }
483+ } catch (ClassNotFoundException
484+ | InstantiationException
485+ | IllegalAccessException
486+ | javax.swing.UnsupportedLookAndFeelException ex) {
487+ java.util.logging.Logger.getLogger(HtmlSearch.class.getName()).log(java.util.logging.Level.SEVERE, null, ex);
488+ }
489+ //</editor-fold>
490+
491+ /* Create and display the form */
492+ java.awt.EventQueue.invokeLater(new Runnable() {
493+ @Override
494+ public void run() {
495+ new HtmlSearch().setVisible(true);
496+ }
497+ });
498+ }
499+
500+ // Variables declaration - do not modify//GEN-BEGIN:variables
501+ private javax.swing.JButton jBtnRowCpy;
502+ private javax.swing.JButton jBtnRowDel;
503+ private javax.swing.JButton jBtnRowIns;
504+ private javax.swing.JButton jBtnSearch;
505+ private javax.swing.JFileChooser jFileChooser1;
506+ private javax.swing.JLabel jLabel1;
507+ private javax.swing.JMenu jMenu1;
508+ private javax.swing.JMenu jMenu2;
509+ private javax.swing.JMenu jMenu3;
510+ private javax.swing.JMenuBar jMenuBar1;
511+ private javax.swing.JMenuItem jMenuItem1;
512+ private javax.swing.JMenuItem jMenuLoad;
513+ private javax.swing.JMenuItem jMenuSave;
514+ private javax.swing.JPanel jPanelRtn;
515+ private javax.swing.JPanel jPanelTab1;
516+ private javax.swing.JPanel jPanelTab2;
517+ private javax.swing.JRadioButton jRadioButton1;
518+ private javax.swing.JScrollPane jScrollPane1;
519+ private javax.swing.JScrollPane jScrollPane404msg;
520+ private javax.swing.JScrollPane jScrollPaneLabel;
521+ private javax.swing.JScrollPane jScrollPaneRtn;
522+ private javax.swing.JTabbedPane jTabbedPane1;
523+ private javax.swing.JTable jTable1;
524+ private javax.swing.JTextArea jTxt404msg;
525+ private javax.swing.JTextArea jTxtLabel;
526+ private javax.swing.JTextArea jTxtRtn;
527+ private javax.swing.JTextField jTxtUrl;
528+ // End of variables declaration//GEN-END:variables
529+}
530+
531+class SearchDataTableModel extends DefaultTableModel {
532+ /* ---------------------------------------------------------------------- *
533+ * データ属性
534+ * ---------------------------------------------------------------------- */
535+ public String[] columnName = {
536+ /* 0 */ "項目名",
537+ /* 1 */ "タグ",
538+ /* 2 */ "ID",
539+ /* 3 */ "クラス",
540+ /* 4 */ "位置",
541+ /* 5 */ "抽出条件"
542+ };
543+
544+ public Class[] columnClass = {
545+ /* 0 */ String.class,
546+ /* 1 */ String.class,
547+ /* 2 */ String.class,
548+ /* 3 */ String.class,
549+ /* 4 */ String.class,
550+ /* 5 */ String.class
551+ };
552+
553+ int column_item = 0;
554+ int column_htmltag = 1;
555+ int column_htmlid = 2;
556+ int column_htmlclass = 3;
557+ int column_around = 4;
558+ int column_regexp = 5;
559+
560+ /* ---------------------------------------------------------------------- *
561+ * 処理
562+ * ---------------------------------------------------------------------- */
563+ @Override
564+ public String getColumnName(int modelIndex) {
565+ return columnName[modelIndex];
566+ }
567+
568+ @Override
569+ public Class<?> getColumnClass(int modelIndex) {
570+ return columnClass[modelIndex];
571+ }
572+
573+ @Override
574+ public int getColumnCount() {
575+ return columnName.length;
576+ }
577+
578+ /* ---------------------------------------------------------------------- */
579+
580+ public SearchData getSearchData(int row) {
581+ SearchData sdata = new SearchData();
582+ sdata.setitem(String.valueOf(getValueAt(row, column_item)));
583+ sdata.setHtmltag(String.valueOf(getValueAt(row, column_htmltag)));
584+ sdata.setHtmlid(String.valueOf(getValueAt(row, column_htmlid)));
585+ sdata.setHtmlclass(String.valueOf(getValueAt(row, column_htmlclass)));
586+ sdata.setaround(String.valueOf(getValueAt(row, column_around)));
587+ sdata.setregexp(String.valueOf(getValueAt(row, column_regexp)));
588+ return sdata;
589+ }
590+
591+ public void addRow(SearchData sdata) {
592+ addRow(getObjdata(sdata));
593+ }
594+
595+ public void insertRow(int row, SearchData sdata) {
596+ insertRow(row, getObjdata(sdata));
597+ }
598+
599+ private Object[] getObjdata(SearchData sdata) {
600+ Object[] obj = new Object[] {
601+ sdata.getitem(),
602+ sdata.getHtmltag(),
603+ sdata.getHtmlid(),
604+ sdata.getHtmlclass(),
605+ sdata.getaround(),
606+ sdata.getregexp()
607+ };
608+ return obj;
609+ }
610+
611+}
\ No newline at end of file
--- branches/b4/webScraping/src/webScraping/core/AttributeData.java (nonexistent)
+++ branches/b4/webScraping/src/webScraping/core/AttributeData.java (revision 140)
@@ -0,0 +1,164 @@
1+/*
2+ * Copyright (C) 2014 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id: AttributeData.java 132 2015-03-31 02:44:57Z tuna_p $
21+ */
22+
23+package webScraping.core;
24+
25+import java.util.ArrayList;
26+import java.util.Enumeration;
27+import javax.swing.text.MutableAttributeSet;
28+import javax.swing.text.html.HTML;
29+
30+/**
31+ * HTMLタグの属性情報を保持する.
32+ * @author kgto
33+ */
34+public class AttributeData {
35+
36+ public AttributeData() {
37+ AttrList = new ArrayList();
38+ size = 0;
39+ }
40+
41+ /**
42+ * 属性情報追加.
43+ * @param tag
44+ * @param attr
45+ */
46+ public void add(HTML.Tag tag, MutableAttributeSet attr) {
47+
48+ int tagcount = tagcnt(tag);
49+ ++tagcount;
50+
51+ Enumeration e = attr.getAttributeNames();
52+ while(e.hasMoreElements()) {
53+ Object obj = e.nextElement();
54+
55+ AttrData a = new AttrData();
56+ a.tag = tag;
57+ a.count = tagcount;
58+ a.attrname = obj.toString();
59+ a.attrvalue = attr.getAttribute(obj).toString();
60+
61+ AttrList.add(a);
62+ size = AttrList.size();
63+ }
64+
65+ }
66+
67+ /**
68+ * 属性情報検索.
69+ * @param tag
70+ * @param attrname
71+ * @param attrvalue
72+ * @return
73+ */
74+ public boolean search(HTML.Tag tag, String attrname, String attrvalue) {
75+ boolean ret = false;
76+ for (Object AttrList1 : AttrList) {
77+ AttrData a = (AttrData)AttrList1;
78+ if(a.tag == tag) {
79+ //if(a.attrname.equals(attrname) && a.attrvalue.equals(attrvalue)) {
80+ if(a.attrname.equals(attrname) && a.attrvalue.startsWith(attrvalue)) {
81+ ret = true;
82+ }
83+ }
84+ }
85+ return ret;
86+ }
87+
88+ public boolean searchId(HTML.Tag tag, String attrvalue) {
89+ return search(tag, "id", attrvalue);
90+ }
91+
92+ public boolean searchClass(HTML.Tag tag, String attrvalue) {
93+ return search(tag, "class", attrvalue);
94+ }
95+
96+ /**
97+ * 属性の値を取得する.
98+ * @param tag
99+ * @param attrname
100+ * @return
101+ */
102+ public ArrayList getvale(HTML.Tag tag, String attrname) {
103+ ArrayList ret = new ArrayList();
104+ for (Object AttrList1 : AttrList) {
105+ AttrData a = (AttrData)AttrList1;
106+ if(a.tag == tag) {
107+ if(a.attrname.equals(attrname)) {
108+ ret.add(a.attrvalue);
109+ }
110+ }
111+ }
112+ return ret;
113+ }
114+
115+ /**
116+ * 引数で渡されたTAGの最新カウント数を返す.
117+ * @param tag
118+ * @return
119+ */
120+ private int tagcnt(HTML.Tag tag) {
121+ int wkcnt = 0;
122+ for (Object AttrList1 : AttrList) {
123+ AttrData a = (AttrData)AttrList1;
124+ if(a.tag == tag) {
125+ if(wkcnt < a.count) {
126+ wkcnt = a.count;
127+ }
128+ }
129+ }
130+ return wkcnt;
131+ }
132+
133+ // AttrList の内容を返すメソッド
134+ public HTML.Tag gettag(int i) {
135+ AttrData a = (AttrData)AttrList.get(i);
136+ return a.tag;
137+ }
138+
139+ public int getcount(int i) {
140+ AttrData a = (AttrData)AttrList.get(i);
141+ return a.count;
142+ }
143+
144+ public String getattrname(int i) {
145+ AttrData a = (AttrData)AttrList.get(i);
146+ return a.attrname;
147+ }
148+
149+ public String getattrvalue(int i) {
150+ AttrData a = (AttrData)AttrList.get(i);
151+ return a.attrvalue;
152+ }
153+
154+ // フィールド変数
155+ public class AttrData {
156+ public HTML.Tag tag;
157+ public int count;
158+ public String attrname;
159+ public String attrvalue;
160+ }
161+ public ArrayList AttrList;
162+ public int size; // AttrListのサイズ
163+
164+}
--- branches/b4/webScraping/src/webScraping/core/HtmlParserCallback.java (nonexistent)
+++ branches/b4/webScraping/src/webScraping/core/HtmlParserCallback.java (revision 140)
@@ -0,0 +1,222 @@
1+/*
2+ * Copyright (C) 2014 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id: HtmlParserCallback.java 132 2015-03-31 02:44:57Z tuna_p $
21+ */
22+
23+package webScraping.core;
24+
25+import java.util.ArrayList;
26+import java.util.HashMap;
27+import javax.swing.text.MutableAttributeSet;
28+import javax.swing.text.html.HTML;
29+import javax.swing.text.html.HTMLEditorKit;
30+
31+/**
32+ * HTMLパーサ部品.
33+ * @author kgto
34+ */
35+class HtmlParserCallback extends HTMLEditorKit.ParserCallback {
36+ /* ---------------------------------------------------------------------- *
37+ * フィールド
38+ * ---------------------------------------------------------------------- */
39+ // Tag毎の階層
40+ HashMap<HTML.Tag,Integer> tagMap = new HashMap<>();
41+
42+ // serach key 情報
43+ String keytag;
44+ String keyid;
45+ String keyclass;
46+
47+ // serach key と一致時の情報退避
48+ int bufCount = 0;
49+ HTML.Tag bufTag = null;
50+ // serach key と一致時の情報格納ワーク
51+ StringBuilder bufText;
52+
53+ // serach key と一致時のデータ一覧
54+ ArrayList sData;
55+
56+ // 属性データ
57+ AttributeData attrdata;
58+
59+ /* ---------------------------------------------------------------------- *
60+ * コンストラクタ
61+ * ---------------------------------------------------------------------- */
62+ protected HtmlParserCallback(SearchData skey) {
63+
64+ // キー情報展開
65+ keytag = skey.getHtmltag();
66+ keyid = skey.getHtmlid();
67+ keyclass = skey.getHtmlclass();
68+
69+ sData = new ArrayList();
70+ }
71+
72+ /* ---------------------------------------------------------------------- *
73+ * Getter
74+ * ---------------------------------------------------------------------- */
75+ ArrayList getrtnData() {
76+ return this.sData;
77+ }
78+
79+ /* ---------------------------------------------------------------------- *
80+ * メソッド
81+ * ---------------------------------------------------------------------- */
82+ @Override
83+ public void handleStartTag(HTML.Tag tag, MutableAttributeSet attr, int pos){
84+ // Tag毎の階層を保持
85+ int count = 1;
86+ if(tagMap.containsKey(tag)) {
87+ count = tagMap.get(tag);
88+ count++;
89+ }
90+ tagMap.put(tag, count);
91+
92+ // 属性解析
93+ AttributeData handleStartattrdata = new AttributeData();
94+ handleStartattrdata.add(tag, attr);
95+
96+ DebugProcess.htmlinfo(tag, attr, "handleStartTag", count);
97+
98+ if(bufCount == 0) {
99+ if(tag.toString().equals(keytag)) {
100+ //if(serachAttribute(attr)) {
101+ if(serachAttribute(tag, handleStartattrdata)) {
102+ bufCount = count;
103+ bufTag = tag;
104+ attrdata = new AttributeData();
105+ bufText = new StringBuilder();
106+ }
107+ }
108+ }
109+ if(bufCount > 0) {
110+ attrdata.add(tag, attr);
111+ }
112+ }
113+
114+ @Override
115+ public void handleEndTag(HTML.Tag tag, int pos){
116+ // Tag毎の階層を取得
117+ int count = 0;
118+ if(tagMap.containsKey(tag)) {
119+ count = tagMap.get(tag);
120+ }
121+
122+ DebugProcess.htmlinfo(tag, null, "handleEndTag", count);
123+
124+ if(tag.equals(bufTag) && count <= bufCount) {
125+
126+ // 溜め込んだ一致情報をリストへ格納
127+ sData.add(bufText.toString());
128+
129+ // 退避したserach keyとの一致情報クリア
130+ bufCount = 0;
131+ bufTag = null;
132+ bufText = null;
133+ }
134+
135+ // Tag毎の階層減算
136+ tagMap.put(tag, --count);
137+ }
138+
139+ @Override
140+ public void handleText(char[] data, int pos){
141+
142+ DebugProcess.htmlinfo(data, "handleText");
143+
144+ String splitchar = "\t";
145+ //制御文字の削除
146+ // &nbsp; 0xa0
147+ StringBuilder buf = new StringBuilder();
148+ for(int i = 0; i < data.length; i++) {
149+ if(data[i] > 0x1f && data[i] != 0x7f && data[i] != 0xa0) {
150+ buf.append(data[i]);
151+ }
152+ }
153+ if(bufCount > 0) {
154+ if(bufText.length() > 0) {
155+ bufText.append(splitchar);
156+ }
157+ bufText.append(buf.toString());
158+ }
159+ }
160+
161+ @Override
162+ public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attr, int pos){
163+ if(bufCount > 0) {
164+ attrdata.add(tag, attr);
165+ }
166+ DebugProcess.htmlinfo(tag, attr, "handleSimpleTag", 0);
167+ }
168+
169+ /**
170+ * ページ内のID/CLASS値と検索キーを比較する.
171+ * @param attr ページのMutableAttributeSet
172+ * @return boolean 検索キーと一致の時、true
173+ */
174+ boolean serachAttribute(MutableAttributeSet attr) {
175+ String currentID = (String)attr.getAttribute(HTML.Attribute.ID);
176+ String currentClass = (String)attr.getAttribute(HTML.Attribute.CLASS);
177+
178+ if(keyid.isEmpty() == false && keyclass.isEmpty() == false) {
179+ if(keyid.equals(currentID) && keyclass.equals(currentClass)) {
180+ return true;
181+ }
182+ }
183+
184+ if(keyid.isEmpty() == false) {
185+ if(keyid.equals(currentID)) {
186+ return true;
187+ }
188+ }
189+
190+ if(keyclass.isEmpty() == false) {
191+ if(keyclass.equals(currentClass)) {
192+ return true;
193+ }
194+ }
195+
196+ return false;
197+ }
198+
199+ /**
200+ * ページ内のID/CLASS値と検索キーを比較する.
201+ * @param tag
202+ * @param attrdata
203+ * @return boolean 検索キーと一致の時、true
204+ */
205+ boolean serachAttribute(HTML.Tag tag, AttributeData attrdata) {
206+ // ID と CLASS の両方にキー入力有りの場合
207+ if(keyid.isEmpty() == false && keyclass.isEmpty() == false) {
208+ if(attrdata.searchId(tag, keyid) && attrdata.searchClass(tag, keyclass)) {
209+ return true;
210+ }
211+ }
212+ // ID のキーチェック
213+ if(keyid.isEmpty() == false) {
214+ return attrdata.searchId(tag, keyid);
215+ }
216+ // CLASS のキーチェック
217+ if(keyclass.isEmpty() == false) {
218+ return attrdata.searchClass(tag, keyclass);
219+ }
220+ return false;
221+ }
222+}
--- branches/b4/webScraping/src/webScraping/core/SearchData.java (nonexistent)
+++ branches/b4/webScraping/src/webScraping/core/SearchData.java (revision 140)
@@ -0,0 +1,200 @@
1+/*
2+ * Copyright (C) 2014 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id: SearchData.java 132 2015-03-31 02:44:57Z tuna_p $
21+ */
22+
23+package webScraping.core;
24+
25+import java.util.ArrayList;
26+
27+/**
28+ * タグ検索データ.
29+ * @author kgto
30+ */
31+public class SearchData {
32+ /* ---------------------------------------------------------------------- *
33+ * フィールド
34+ * ---------------------------------------------------------------------- */
35+ private String item;
36+ private String htmltag;
37+ private String htmlid;
38+ private String htmlclass;
39+ private String around;
40+ private String regexp;
41+
42+ /* ---------------------------------------------------------------------- *
43+ * static 処理
44+ * ---------------------------------------------------------------------- */
45+ public static class Context {
46+ public Class columnClass;
47+ public String columnName;
48+ public String columnNameJp;
49+
50+ public Context(Class columnClass, String columnName, String columnNameJp) {
51+ this.columnClass = columnClass;
52+ this.columnName = columnName;
53+ this.columnNameJp = columnNameJp;
54+ }
55+ }
56+
57+ public static final Context[] context = {
58+ /* 0 */ new Context(String.class , "item" , "項目名"),
59+ /* 1 */ new Context(String.class , "htmltag" , "タグ"),
60+ /* 2 */ new Context(String.class , "htmlid" , "ID"),
61+ /* 3 */ new Context(String.class , "htmlclass" , "クラス"),
62+ /* 4 */ new Context(String.class , "around" , "位置"),
63+ /* 5 */ new Context(String.class , "regexp" , "抽出条件")
64+ };
65+
66+ /* ---------------------------------------------------------------------- */
67+ private static ArrayList<SearchData> slist = new ArrayList<>();
68+
69+ public static void addSearchData(
70+ String item, String htmltag, String htmlid,
71+ String htmlclass, String around, String regexp) {
72+ SearchData sdat = new SearchData();
73+ sdat.setitem(item);
74+ sdat.setHtmltag(htmltag);
75+ sdat.setHtmlid(htmlid);
76+ sdat.setHtmlclass(htmlclass);
77+ sdat.setaround(around);
78+ sdat.setregexp(regexp);
79+
80+ slist.add(sdat);
81+ }
82+
83+ public static void add(SearchData sdat) {
84+ slist.add(sdat);
85+ }
86+
87+ public static SearchData get(int i) {
88+ return slist.get(i);
89+ }
90+
91+ public static int size() {
92+ return slist.size();
93+ }
94+
95+ public static SearchData remove(int index) {
96+ return slist.remove(index);
97+ }
98+
99+ public static void clear() {
100+ slist.clear();
101+ }
102+
103+ /* ---------------------------------------------------------------------- *
104+ * コンストラクタ
105+ * ---------------------------------------------------------------------- */
106+ public SearchData() {
107+ initialize();
108+ }
109+
110+ public SearchData(SearchData dat) {
111+ this.item = dat.getitem();
112+ this.htmltag = dat.getHtmltag();
113+ this.htmlid = dat.getHtmlid();
114+ this.htmlclass = dat.getHtmlclass();
115+ this.around = dat.getaround();
116+ this.regexp = dat.getregexp();
117+ }
118+
119+ /* ---------------------------------------------------------------------- *
120+ * Setter
121+ * ---------------------------------------------------------------------- */
122+ public void setitem(String item) {
123+ this.item = item;
124+ }
125+
126+ public void setHtmltag(String htmltag) {
127+ this.htmltag = htmltag;
128+ }
129+
130+ public void setHtmlid(String htmlid) {
131+ this.htmlid = htmlid;
132+ }
133+
134+ public void setHtmlclass(String htmlclass) {
135+ this.htmlclass = htmlclass;
136+ }
137+
138+ public void setaround(String around) {
139+ this.around = around;
140+ }
141+
142+ public void setregexp(String regexp) {
143+ this.regexp = regexp;
144+ }
145+
146+ /* ---------------------------------------------------------------------- *
147+ * Getter
148+ * ---------------------------------------------------------------------- */
149+ public String getitem() {
150+ return item;
151+ }
152+
153+ public String getHtmltag() {
154+ return htmltag;
155+ }
156+
157+ public String getHtmlid() {
158+ return htmlid;
159+ }
160+
161+ public String getHtmlclass() {
162+ return htmlclass;
163+ }
164+
165+ public String getaround() {
166+ return around;
167+ }
168+
169+ public String getregexp() {
170+ return regexp;
171+ }
172+
173+ /* ---------------------------------------------------------------------- *
174+ * メソッド
175+ * ---------------------------------------------------------------------- */
176+ /**
177+ * データ初期化.
178+ */
179+ public final void initialize() {
180+ this.item = "";
181+ this.htmltag = "";
182+ this.htmlid = "";
183+ this.htmlclass = "";
184+ this.around = "";
185+ this.regexp = "";
186+ }
187+
188+ public Object[] getObjData() {
189+ Object[] obj = {
190+ /* 0 */ getitem(), // 項目名
191+ /* 1 */ getHtmltag(), // タグ
192+ /* 2 */ getHtmlid(), // ID
193+ /* 3 */ getHtmlclass(), // クラス
194+ /* 4 */ getaround(), // 位置
195+ /* 5 */ getregexp() // 抽出条件
196+ };
197+ return obj;
198+ }
199+
200+}
--- branches/b4/webScraping/src/webScraping/core/HtmlParser.java (nonexistent)
+++ branches/b4/webScraping/src/webScraping/core/HtmlParser.java (revision 140)
@@ -0,0 +1,273 @@
1+/*
2+ * Copyright (C) 2014 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id: HtmlParser.java 132 2015-03-31 02:44:57Z tuna_p $
21+ */
22+
23+package webScraping.core;
24+
25+import java.io.*;
26+import java.net.*;
27+import java.util.ArrayList;
28+import java.util.logging.Level;
29+import java.util.logging.Logger;
30+import java.util.regex.Matcher;
31+import java.util.regex.Pattern;
32+import javax.swing.text.html.parser.ParserDelegator;
33+
34+/**
35+ * HTMLパーサ.
36+ * @author kgto
37+ */
38+public class HtmlParser {
39+ /* ---------------------------------------------------------------------- *
40+ * フィールド
41+ * ---------------------------------------------------------------------- */
42+ URL url;
43+ String pageData;
44+ ArrayList sData;
45+
46+ // 作業ワーク
47+ private String htmltag;
48+ private String htmlid;
49+ private String htmlclass;
50+
51+ /* ---------------------------------------------------------------------- *
52+ * コンストラクタ
53+ * ---------------------------------------------------------------------- */
54+ public HtmlParser(URL UrlAdress) {
55+ DebugProcess.debuglog_set();
56+ this.url = UrlAdress;
57+ getPageData();
58+ }
59+
60+ public HtmlParser(String UrlAdress) {
61+ DebugProcess.debuglog_set();
62+ try {
63+ url = new URL(UrlAdress);
64+ getPageData();
65+
66+ } catch (MalformedURLException ex) {
67+ Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex);
68+ }
69+ }
70+
71+ public HtmlParser() {
72+ DebugProcess.debuglog_set();
73+ url = null;
74+ }
75+
76+ /* ---------------------------------------------------------------------- *
77+ * Getter
78+ * ---------------------------------------------------------------------- */
79+ public String getStringPageData() {
80+ return pageData;
81+ }
82+
83+ /* ---------------------------------------------------------------------- *
84+ * Setter
85+ * ---------------------------------------------------------------------- */
86+ public void seturl(URL UrlAdress) {
87+ this.url = UrlAdress;
88+ getPageData();
89+ }
90+
91+ /* ---------------------------------------------------------------------- *
92+ * メソッド
93+ * ---------------------------------------------------------------------- */
94+ public void seturl(String UrlAdress) {
95+ try {
96+ url = new URL(UrlAdress);
97+ getPageData();
98+
99+ } catch (MalformedURLException ex) {
100+ Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex);
101+ }
102+ }
103+
104+ /**
105+ * HTMLページ内検索.
106+ * 検索キーとして渡されたタグ,ID,クラスから、対象となるタグを探し出し、
107+ * around(タグ位置)として指定された箇所の文字列をregexp(正規表現)で指定された整形を
108+ * 行った結果を返す。<br>
109+ * aroundの初期値:0 検索キーとして未指定(未入力)の場合、最初(0)の文字列。<br>
110+ * regexpが指定(入力)ありの場合、正規表現にて整形を行う。<br>
111+ * 渡された検索キーに一致するタグが存在しなかった場合、NULLを返す。
112+ * @param skey 検索キーデータ(SearchData)
113+ * @return String 検索キーに一致するデータの文字列
114+ */
115+ public String search(SearchData skey) {
116+
117+ // htmlページ内を検索
118+ if(isHtmlkeyEq(skey) == false) {
119+ searchPageData(skey);
120+ }
121+ /*
122+ around 出現位置指定 入力有り:指定された位置の情報のみ返す。
123+ 入力無し:取得した全ての情報を返す。
124+ */
125+ String regexp = skey.getregexp();
126+ if(skey.getaround().length() > 0) {
127+ int wkAround = Integer.parseInt(skey.getaround()); // 検索位置を数値変換
128+ if(wkAround < sData.size()) {
129+ String str = (String)sData.get(wkAround);
130+ String rtn = RegularExpression(str, regexp);
131+ return rtn;
132+ }
133+ } else {
134+ StringBuilder strbuf = new StringBuilder();
135+ for (Object sData1 : sData) {
136+ String str = (String)sData1;
137+ String rtn = RegularExpression(str, regexp);
138+ if(strbuf.length() > 0) {
139+ strbuf.append("\t");
140+ }
141+ strbuf.append(rtn);
142+ }
143+ return strbuf.toString();
144+ }
145+ return null;
146+ }
147+
148+ /**
149+ * 直近のHTMLタグ/ID/CLASS値と引数の値を比較する.
150+ * @param skey HTMLタグ/ID/CLASSが格納された検索キー
151+ * @return boolean HTMLタグ/ID/CLASS値が一致する時、true
152+ */
153+ boolean isHtmlkeyEq(SearchData skey) {
154+
155+ String stag = skey.getHtmltag();
156+ String sid = skey.getHtmlid();
157+ String sclass = skey.getHtmlclass();
158+
159+ boolean rtn = true;
160+
161+ // htmltag
162+ if(htmltag == null) {
163+ rtn = false;
164+ } else {
165+ if(htmltag.equals(stag) == false) {
166+ rtn = false;
167+ }
168+ }
169+
170+ // htmlid
171+ if(htmlid == null) {
172+ rtn = false;
173+ } else {
174+ if(htmlid.equals(sid) == false) {
175+ rtn = false;
176+ }
177+ }
178+
179+ // htmlclass
180+ if(htmlclass == null) {
181+ rtn = false;
182+ } else {
183+ if(htmlclass.equals(sclass) == false) {
184+ rtn = false;
185+ }
186+ }
187+
188+ if(!rtn) {
189+ htmltag = stag;
190+ htmlid = sid;
191+ htmlclass = sclass;
192+ }
193+
194+ return rtn;
195+ }
196+
197+ /**
198+ * 正規表現検索.
199+ * @param strdata
200+ * @param regexp
201+ * @return
202+ */
203+ String RegularExpression(String strdata, String regexp) {
204+ String expdata = null;
205+
206+ //regexpのチェック
207+ if(regexp.isEmpty()) {
208+ expdata = strdata;
209+ return expdata;
210+ }
211+
212+ //正規表現検索
213+ Pattern ptn = Pattern.compile(regexp);
214+ Matcher matchdata = ptn.matcher(strdata);
215+ if (matchdata.find()) {
216+ if(matchdata.groupCount() >= 1) {
217+ expdata = matchdata.group(1);
218+ }
219+ }
220+ return expdata;
221+ }
222+
223+ /**
224+ * インターネット接続.
225+ */
226+ private void getPageData() {
227+ HttpURLConnection con = null;
228+ try {
229+ con = (HttpURLConnection)url.openConnection();
230+ con.setRequestMethod("GET");
231+ BufferedReader reader = new BufferedReader(
232+ new InputStreamReader(con.getInputStream(), "utf-8"));
233+ String wkline;
234+ StringBuilder sb = new StringBuilder();
235+ while((wkline = reader.readLine()) != null) {
236+ sb.append(wkline).append("\n");
237+ }
238+ pageData = sb.toString();
239+
240+ } catch(FileNotFoundException ex) {
241+ pageData = null;
242+ } catch (IOException ex) {
243+ Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex);
244+ } finally {
245+ if(con != null) {
246+ con.disconnect();
247+ }
248+ }
249+ }
250+
251+ /**
252+ * HTMLパーサ.
253+ * @param skey
254+ */
255+ private void searchPageData(SearchData skey) {
256+
257+ DebugProcess.searchDatainfo(skey);
258+
259+ Reader reader;
260+ try {
261+ reader = new BufferedReader(new StringReader(pageData));
262+ HtmlParserCallback cb = new HtmlParserCallback(skey);
263+ ParserDelegator pd = new ParserDelegator();
264+ pd.parse(reader, cb, true);
265+ reader.close();
266+
267+ sData = cb.getrtnData();
268+
269+ } catch (IOException ex) {
270+ Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex);
271+ }
272+ }
273+}
--- branches/b4/webScraping/src/webScraping/core/DebugProcess.java (nonexistent)
+++ branches/b4/webScraping/src/webScraping/core/DebugProcess.java (revision 140)
@@ -0,0 +1,264 @@
1+/*
2+ * Copyright (C) 2014 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id: DebugProcess.java 106 2014-12-10 13:45:01Z tuna_p $
21+ */
22+
23+package webScraping.core;
24+
25+import java.io.File;
26+import java.io.FileInputStream;
27+import java.io.FileNotFoundException;
28+import java.io.IOException;
29+import java.util.logging.FileHandler;
30+import java.util.logging.Formatter;
31+import java.util.logging.Handler;
32+import java.util.logging.Level;
33+import java.util.logging.LogManager;
34+import java.util.logging.LogRecord;
35+import java.util.logging.Logger;
36+import javax.swing.text.MutableAttributeSet;
37+import javax.swing.text.html.HTML;
38+
39+/**
40+ * デバック情報.
41+ * カレントディレクトリに設定ファイル(Debug.prop)を置くことで、デバックログの出力を制御する。
42+ * @author kgto
43+ */
44+public class DebugProcess {
45+ // 設定ファイル名
46+ protected static final String configurationFilename = "Debug.prop";
47+ // ロガー名
48+ protected static final Logger logger = Logger.getLogger("WebScraping");
49+ // ログ出力デフォルトレベル
50+ protected static final Level loggerlevel = Level.FINEST;
51+
52+
53+ /**
54+ * ログ出力設定.
55+ * ログ設定ファイルの存在をチェック、(最終的な)ログレベルにより、
56+ * ファイルハンドラの設定と出力書式の設定を行う。
57+ */
58+ public static void debuglog_set() {
59+ try {
60+ initLogConfiguration();
61+
62+ if(Level.ALL.equals(logger.getLevel())) {
63+ //logger.addHandler(new FileHandler("WebScraping%g.log", 100000, 2));
64+ logger.addHandler(new FileHandler("WebScraping%g.log", true));
65+ }
66+ setFomatter();
67+
68+ } catch (IOException | SecurityException ex) {
69+ Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex);
70+ }
71+ }
72+
73+ /**
74+ * ログ出力設定解除.
75+ */
76+ public static void debuglog_unset() {
77+ }
78+
79+
80+ /**
81+ * デバック出力(HTML解析-タグ&属性).
82+ * HTMLのタグと属性の解析状態を出力する。
83+ * 書式: 9 : x : タグ名 [属性名]属性数 = 属性値<br>
84+ * 凡例: 9 = 階層レベル(count値), x = F(tagの開始)/E(tagの終了)/S(単独tag)の何れか1文字<br>
85+ * @param tag タグ
86+ * @param attr 属性
87+ * @param methodname このメソッドを呼び出した親メソッド名
88+ * @param count HTMLタグの階層レベル
89+ */
90+ public static void htmlinfo(HTML.Tag tag, MutableAttributeSet attr,
91+ String methodname, int count) {
92+
93+ // ログ出力レベルチェック
94+ if(logger.getLevel() == null) {
95+ return;
96+ }
97+ if(logger.getLevel().intValue() > loggerlevel.intValue()) {
98+ return;
99+ }
100+
101+ // 編集処理
102+ char kbn = ' ';
103+ if("handleStartTag".equals(methodname)) {
104+ kbn = 'F';
105+ }
106+ if("handleEndTag".equals(methodname)) {
107+ kbn = 'E';
108+ }
109+ if("handleSimpleTag".equals(methodname)) {
110+ kbn = 'S';
111+ }
112+
113+ StringBuilder strBuf = new StringBuilder(80);
114+ strBuf.append(count).append(" : ");
115+ strBuf.append(kbn).append(" : ");
116+ strBuf.append(tag.toString());
117+ // 属性情報
118+ if(attr != null) {
119+ if(attr.getAttributeCount() != 0) {
120+ AttributeData handleAttrData = new AttributeData();
121+ handleAttrData.add(tag, attr);
122+ for(int i = 0; i < handleAttrData.size; i++) {
123+ strBuf.append(" [");
124+ strBuf.append(handleAttrData.getattrname(i));
125+ strBuf.append("]");
126+ strBuf.append(handleAttrData.getcount(i));
127+ strBuf.append(" = ");
128+ strBuf.append(handleAttrData.getattrvalue(i));
129+ }
130+ }
131+ }
132+
133+ logger.log(loggerlevel, strBuf.toString());
134+ }
135+
136+ /**
137+ * デバック出力(メッセージ).
138+ * 引数に渡された任意のメッセージを出力する。
139+ * @param str メッセージ
140+ * @param methodname このメソッドを呼び出した親メソッド名
141+ */
142+ public static void htmlinfo(String str, String methodname) {
143+ logger.log(loggerlevel, str);
144+ }
145+
146+ public static void htmlinfo(String str) {
147+ logger.log(loggerlevel, str);
148+ }
149+
150+ /**
151+ * デバック出力(HTML解析-本文).
152+ * 本文の内容を出力する。
153+ * @param data 本文(HTML内の文字列)
154+ * @param methodname このメソッドを呼び出した親メソッド名
155+ */
156+ public static void htmlinfo(char[] data, String methodname) {
157+ String dat = new String(data);
158+ logger.log(loggerlevel, dat);
159+ }
160+
161+ public static void htmlinfo(char[] data) {
162+ String dat = new String(data);
163+ logger.log(loggerlevel, dat);
164+ }
165+
166+ /**
167+ * デバック出力(検索キー).
168+ * 検索キー(SearchData)の内容を出力する。
169+ * @param skey
170+ */
171+ public static void searchDatainfo(SearchData skey) {
172+
173+ StringBuilder strBuf = new StringBuilder(30);
174+ strBuf.append("SearchData KEY tag[");
175+ strBuf.append(skey.getHtmltag());
176+ strBuf.append("] ID[");
177+ strBuf.append(skey.getHtmlid());
178+ strBuf.append("] CLASS[");
179+ strBuf.append(skey.getHtmlclass());
180+ strBuf.append("]\n");
181+
182+ logger.log(loggerlevel, strBuf.toString());
183+ }
184+
185+ /**
186+ * ログ出力設定ファイルチェック.
187+ * 設定ファイルの存在をチェックし存在する場合、設定ファイルの内容を設定する。
188+ */
189+ private static void initLogConfiguration() {
190+
191+ File file = new File(configurationFilename);
192+ try {
193+ if(file.exists()) {
194+ FileInputStream inputStream = new FileInputStream(file);
195+ // 設定ファイルの読み込み
196+ LogManager.getLogManager().readConfiguration(inputStream);
197+ }
198+
199+ } catch (FileNotFoundException ex) {
200+ Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex);
201+ } catch (IOException ex) {
202+ Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex);
203+ }
204+ }
205+
206+ /**
207+ * ログ出力フォーマッター設定.
208+ * ファイルへログ出力時の書式を設定する。
209+ */
210+ private static void setFomatter() {
211+ Handler[] handlers = logger.getHandlers();
212+ for(int i = 0 ; i < handlers.length ; i++) {
213+ if(handlers[i] instanceof java.util.logging.FileHandler) {
214+ handlers[i].setFormatter(new HtmlFormatter());
215+ }
216+ }
217+ }
218+
219+}
220+
221+/**
222+ * ログ出力フォーマッター.
223+ * @author kgto
224+ */
225+class HtmlFormatter extends Formatter {
226+ /**
227+ * Logの出力文字列を生成する。
228+ * 出力書式:<br>
229+ * YYYY-MM-DD HH:SS:MM ログレベル<メソッド名>メッセージ
230+ */
231+ @Override
232+ public synchronized String format(final LogRecord aRecord) {
233+
234+ final StringBuffer message = new StringBuffer(100);
235+
236+ long millis = aRecord.getMillis();
237+ String time = String.format("%tF %<tT", millis);
238+
239+ message.append(time);
240+ message.append(' ');
241+
242+ message.append(aRecord.getLevel());
243+ message.append('<');
244+ String methodName = aRecord.getSourceMethodName();
245+ message.append(methodName != null ? methodName : "N/A");
246+ message.append('>');
247+
248+ message.append(formatMessage(aRecord));
249+ message.append('\n');
250+
251+ // 例外エラーの場合、エラー内容とスタックトレース出力
252+ Throwable throwable = aRecord.getThrown();
253+ if (throwable != null) {
254+ message.append(throwable.toString());
255+ message.append('\n');
256+ for (StackTraceElement trace : throwable.getStackTrace()) {
257+ message.append('\t');
258+ message.append(trace.toString());
259+ message.append('\n');
260+ }
261+ }
262+ return message.toString();
263+ }
264+}
--- branches/b4/webScraping/src/webScraping/core/Scraping.java (nonexistent)
+++ branches/b4/webScraping/src/webScraping/core/Scraping.java (revision 140)
@@ -0,0 +1,71 @@
1+/*
2+ * Copyright (C) 2016 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id: Scraping.java 139 2016-05-17 09:16:40Z tuna_p $
21+ */
22+package webScraping.core;
23+
24+import java.net.URL;
25+import webScraping.core.HtmlParser;
26+import webScraping.core.SearchData;
27+
28+/**
29+ *
30+ * @author kgto
31+ */
32+public class Scraping {
33+
34+ public Scraping() {
35+ }
36+
37+ /**
38+ * HTML解析.
39+ * @param url
40+ * @return
41+ */
42+ public String[] getResult(URL url) {
43+
44+ HtmlParser par = new HtmlParser(url);
45+
46+ String[] result = new String[SearchData.size()];
47+ for(int i = 0; i < SearchData.size(); i++) {
48+ result[i] = par.search(SearchData.get(i));
49+ }
50+
51+ if(!resultCheck(result)) {
52+ return null;
53+ }
54+ return result;
55+ }
56+
57+ /**
58+ * 結果文字列チェック.
59+ * @param result
60+ * @return 文字列配列に1文字でも入力有り(null/SPACE以外)の時、true
61+ */
62+ boolean resultCheck(String[] result) {
63+ for (String result1 : result) {
64+ if (result1 != null && result1.trim().length() > 0) {
65+ return true;
66+ }
67+ }
68+ return false;
69+ }
70+
71+}