• R/O
  • SSH
  • HTTPS

nls: 提交


Commit MetaInfo

修订版8 (tree)
时间2011-04-01 20:10:49
作者linuxchecker

Log Message

made API

更改概述

差异

--- test/searcherTest.java (revision 7)
+++ test/searcherTest.java (revision 8)
@@ -1,13 +1,28 @@
11 import org.mathsci.distance.NldSearcher;
22
33 import org.junit.Test;
4+import java.io.IOException;
5+public class searcherTest extends NldSearcher
6+{
7+ public searcherTest(){ };
48
5-class NldSearcherTest
6-{
7-
8- @Test public void test1()
9+ @Test
10+ public void test1(String[] args)
911 {
10- NldSearcher s = new NldSearcher();
12+ searcherTest s = new searcherTest();
13+ double x,y,xy,lx,ly,lxy;
14+ double m = 1000000;
15+ try{
16+ x = s.searcher(args[0]);
17+ y = s.searcher(args[1]);
18+ xy = s.searcher(args[0]+" AND "+args[1]);
19+ lx = Math.log(x);
20+ ly = Math.log(y);
21+ lxy = Math.log(xy);
22+ System.out.println(args[0]+" and "+args[1]+"="+(Math.max(lx,ly)-lxy)/(Math.log(m)-Math.min(lx,ly)));
23+ }catch(Exception e){
24+ e.printStackTrace();
25+ }
1126 }
1227
1328 }
\ No newline at end of file
--- src/org/mathsci/distance/NldSearcher.java (revision 7)
+++ src/org/mathsci/distance/NldSearcher.java (revision 8)
@@ -38,6 +38,7 @@
3838 public class NldSearcher {
3939
4040 static final Properties properties = new Properties();
41+ static public String index;
4142 static File INDEX_DIR;
4243
4344 public NldSearcher()
@@ -46,42 +47,43 @@
4647 {
4748 InputStreamReader inFile = new InputStreamReader(NldSearcher.class.getClassLoader().getResourceAsStream("nld.properties"));
4849 properties.load(inFile);
49- INDEX_DIR = new File(properties.getProperty("lucene.index.path"));
50+ index = properties.getProperty("lucene.index.path");
51+ INDEX_DIR = new File(index);
5052 }catch(IOException e)
5153 {
5254 e.printStackTrace();
5355 }
54-System.out.println("index="+properties.getProperty("lucene.index.path"));
56+//System.out.println("index="+properties.getProperty("lucene.index.path"));
5557 }
56- public void searcher(String arg) throws Exception {
58+ public double searcher(String arg) throws Exception {
5759 NldSearcher search = new NldSearcher();
5860
59- String index = "index";
6061 String field = "contents";
6162 String queries = arg;
6263 boolean paging = true;
6364
64-System.out.println("くえりーqueries="+queries);
65+//System.out.println("queries="+queries);
6566 IndexReader reader = IndexReader.open(FSDirectory.open(new File(index)), true);
66-System.out.println("reader="+reader);
67+//System.out.println("reader="+reader);
6768 Searcher searcher = new IndexSearcher(reader);
68-System.out.println("searcher="+searcher);
69+//System.out.println("searcher="+searcher);
6970 // Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
7071 Analyzer analyzer = new IpadicAnalyzer(new Tagger("/home/tetsato/ipadic"));
71-System.out.println("analyzer="+analyzer);
72+//System.out.println("analyzer="+analyzer);
7273 BufferedReader in = null;
7374 if (queries != null) {
7475 in = new BufferedReader(new StringReader(queries));
7576 }
76-System.out.println("in="+in);
77+//System.out.println("in="+in);
7778 QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, field, analyzer);
7879 String line = in.readLine();
7980 line = line.trim();
80-System.out.println("line="+line);
81+//System.out.println("line="+line);
8182 Query query = parser.parse(line);
82- System.out.println("Searching for: " + query.toString(field));
83- doStreamingSearch(searcher, query);
83+// System.out.println("Searching for: " + query.toString(field));
84+ double numHitsTotal = doStreamingSearch(searcher, query);
8485 reader.close();
86+ return(numHitsTotal);
8587 }
8688
8789
@@ -94,7 +96,6 @@
9496 System.exit(0);
9597 }
9698
97- String index = "index";
9899 String field = "contents";
99100 String queries = null;
100101 boolean paging = true;
@@ -107,11 +108,11 @@
107108 field = args[i+1];
108109 i++;
109110 } else if ("-queries".equals(args[i])) {
110-System.out.println("queries="+args[i+1]+" length="+args[i+1].length());
111- System.out.println("Default encoding: " +
112- new InputStreamReader(System.in).getEncoding());
113- System.out.println("file.encoding: " +
114- java.lang.System.getProperty("file.encoding"));
111+//System.out.println("queries="+args[i+1]+" length="+args[i+1].length());
112+// System.out.println("Default encoding: " +
113+// new InputStreamReader(System.in).getEncoding());
114+// System.out.println("file.encoding: " +
115+// java.lang.System.getProperty("file.encoding"));
115116 if(queries == null)
116117 {
117118 queries = args[i+1];
@@ -122,30 +123,30 @@
122123 i++;
123124 }
124125 }
125-System.out.println("くえりーqueries="+queries);
126+System.out.println("くえりー="+queries);
126127 IndexReader reader = IndexReader.open(FSDirectory.open(new File(index)), true);
127-System.out.println("reader="+reader);
128+//System.out.println("reader="+reader);
128129 Searcher searcher = new IndexSearcher(reader);
129-System.out.println("searcher="+searcher);
130+//System.out.println("searcher="+searcher);
130131 // Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
131132 Analyzer analyzer = new IpadicAnalyzer(new Tagger("/home/tetsato/ipadic"));
132-System.out.println("analyzer="+analyzer);
133+//System.out.println("analyzer="+analyzer);
133134 BufferedReader in = null;
134135 if (queries != null) {
135136 in = new BufferedReader(new StringReader(queries));
136137 }
137-System.out.println("in="+in);
138+//System.out.println("in="+in);
138139 QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, field, analyzer);
139140 String line = in.readLine();
140141 line = line.trim();
141-System.out.println("line="+line);
142+//System.out.println("line="+line);
142143 Query query = parser.parse(line);
143144 System.out.println("Searching for: " + query.toString(field));
144- doStreamingSearch(searcher, query);
145+ double numHitsTotal = doStreamingSearch(searcher, query);
145146 reader.close();
146147 }
147148
148- public static void doStreamingSearch(final Searcher searcher, Query query) throws IOException
149+ public static double doStreamingSearch(final Searcher searcher, Query query) throws IOException
149150 {
150151 int hitsPerPage = searcher.maxDoc();
151152 TopScoreDocCollector streamingHitCollector = TopScoreDocCollector.create(
@@ -153,9 +154,10 @@
153154 searcher.search(query, streamingHitCollector);
154155 ScoreDoc[] hits = streamingHitCollector.topDocs().scoreDocs;
155156 int numTotalHits = streamingHitCollector.getTotalHits();
156- System.out.println(numTotalHits + " total matching documents");
157+// System.out.println(numTotalHits + " total matching documents");
157158 int start = 0;
158159 int end = Math.min(numTotalHits, hitsPerPage);
160+/*
159161 for (int i = start; i < end; i++)
160162 {
161163 Document doc = searcher.doc(hits[i].doc);
@@ -171,6 +173,8 @@
171173 System.out.println((i+1) + ". " + "No path for this document");
172174 }
173175 }
176+*/
177+ return(numTotalHits);
174178 }
175179
176180 }
--- src/org/mathsci/distance/NldCommand.java (revision 7)
+++ src/org/mathsci/distance/NldCommand.java (revision 8)
@@ -10,8 +10,8 @@
1010 import java.io.FileNotFoundException;
1111 import java.io.IOException;
1212 import java.util.Date;
13+import java.util.Properties;
1314
14-
1515 import java.io.FileReader;
1616 import java.io.StringReader;
1717
@@ -36,19 +36,36 @@
3636 import org.apache.lucene.search.Searcher;
3737 import org.apache.lucene.search.TopScoreDocCollector;
3838
39+import java.lang.Integer;
40+
3941 public class NldCommand {
40-
41- private NldCommand() {}
42+ static final Properties properties = new Properties();
43+ static public String index;
44+ static File INDEX_DIR;
45+ static int max_pages;
46+ static private void initNldCommand()
47+ {
48+ try
49+ {
50+ InputStreamReader inFile = new InputStreamReader(NldSearcher.class.getClassLoader().getResourceAsStream("nld.properties"));
51+ properties.load(inFile);
52+ index = properties.getProperty("lucene.index.path");
53+ INDEX_DIR = new File(index);
54+ max_pages = new Integer(properties.getProperty("lucene.index.pages"));
55+ }catch(IOException e)
56+ {
57+ e.printStackTrace();
58+ }
59+ }
4260
43- static final File INDEX_DIR = new File("index");
44-
4561 public static void main(String[] args) {
62+ initNldCommand();
4663 String usage = "java org.apache.lucene.demo.IndexFiles <root_directory>";
4764 if (args.length == 0) {
4865 System.err.println("Usage: " + usage);
4966 System.exit(1);
5067 }
51-
68+//System.out.println("args="+args[0]);
5269 if("indexer".equals(args[0]))
5370 {
5471 NldIndexer indexer = new NldIndexer();
@@ -60,8 +77,34 @@
6077 searcher.searcher(args);
6178 }catch(Exception e)
6279 {
80+ e.printStackTrace();
6381 }
82+ }else if("calculator".equals(args[0]))
83+ {
84+ computeScore(args);
6485 }
6586 }
87+ public static void computeScore(String[] args)
88+ {
89+ double x,y,xy,lx,ly,lxy;
90+ double m = max_pages;
91+ try{
92+ NldSearcher s = new NldSearcher();
93+
94+ System.out.println("arg1="+(new String(args[1].getBytes("EUC-JP"),"UTF-8")));
95+ x = s.searcher(args[1]);
96+ y = s.searcher(args[2]);
97+ xy = s.searcher(args[1]+" AND "+args[2]);
98+ lx = Math.log(x);
99+ ly = Math.log(y);
100+ lxy = Math.log(xy);
66101
102+ System.out.println((new String(args[1].getBytes(),"UTF-8"))+" <> "+args[2]+"="+(Math.max(lx,ly)-lxy)/(Math.log(m)-Math.min(lx,ly)));
103+ System.out.println((Math.max(lx,ly)-lxy)/(Math.log(m)-Math.min(lx,ly)));
104+ }catch(Exception e){
105+ e.printStackTrace();
106+ }
107+
108+ }
109+
67110 }
--- bin/nld.sh (revision 7)
+++ bin/nld.sh (revision 8)
@@ -2,7 +2,7 @@
22
33 JAVA=/usr/bin/java
44 NLD_JAR=../bin/nld.jar
5-LIBS=../lib:../lib/lucene-core-3.0.3.jar:../lib/igo-analyzer-0.0.1.jar:../lib/igo-0.4.2.jar
5+LIBS=../lib:../lib/lucene-core-3.0.3.jar:../lib/igo-analyzer-0.0.1.jar:../lib/igo-0.4.2.jar:../conf
66 OPTIONS=-Dfile.encoding=UTF-8
77 cd nls/bin;
88 if [ "X"$1 = "Xindexer" ];
@@ -29,6 +29,20 @@
2929 fi
3030 $JAVA $OPTIONS -cp $NLD_JAR:$LIBS org.mathsci.distance.NldCommand searcher $DATA $DATA2
3131 fi
32+elif [ "X"$1 = "Xcalculator" ];
33+then
34+ if [ "X"$2 = "X" ];
35+ then
36+ echo "usage: " $0 "calculator <word1> <word2>";
37+ exit 1;
38+ else
39+ DATA="$2"
40+ if [ "X"$3 != "X" ];
41+ then
42+ DATA2="$3";
43+ fi
44+ $JAVA $OPTIONS -cp $NLD_JAR:$LIBS org.mathsci.distance.NldCommand calculator $DATA $DATA2
45+ fi
3246 else
33- echo "usage: " $0 "[indexer|searcher]"
47+ echo "usage: " $0 "[indexer|searcher|calculator]"
3448 fi
Show on old repository browser