• R/O
  • SSH
  • HTTPS

nls: 提交


Commit MetaInfo

修订版19 (tree)
时间2011-05-20 21:29:35
作者linuxchecker

Log Message

homework to tullio

更改概述

差异

--- test/searcherTest.java (revision 18)
+++ test/searcherTest.java (revision 19)
@@ -7,7 +7,7 @@
77 public searcherTest(){ };
88
99 @Test
10- public void test1(String[] args)
10+ public void test1()
1111 {
1212 searcherTest s = new searcherTest();
1313 double x,y,xy,lx,ly,lxy;
--- src/org/mathsci/distance/NldCore.java (revision 18)
+++ src/org/mathsci/distance/NldCore.java (revision 19)
@@ -7,6 +7,7 @@
77 static final Properties properties = new Properties();
88 static public String index;
99 static File INDEX_DIR;
10+ static public String IPADIC;
1011 static int max_pages;
1112 static int debug_level;
1213 static public boolean timewatch = false;
@@ -20,8 +21,14 @@
2021 InputStreamReader inFile = new InputStreamReader(rs);
2122 properties.load(inFile);
2223 index = properties.getProperty("lucene.index.path");
24+ if(index==null) throw new IOException("index path should be indicated in property file");
25+
26+ INDEX_DIR = new File(index);
27+
2328 debug_level = Integer.parseInt(properties.getProperty("lucene.debug.level","0"));
24- INDEX_DIR = new File(index);
29+ IPADIC = properties.getProperty("ipadic.path");
30+ if(IPADIC==null) throw new IOException("ipadic path should be indicated in property file");
31+
2532 max_pages = new Integer(properties.getProperty("lucene.index.pages"));
2633 if("yes".equals(properties.getProperty("lucene.timewatch")) ||
2734 "true".equals(properties.getProperty("lucene.timewatch")) )
--- src/org/mathsci/distance/NldSearcher.java (revision 18)
+++ src/org/mathsci/distance/NldSearcher.java (revision 19)
@@ -1,6 +1,7 @@
11 package org.mathsci.distance;
22
33 import org.apache.lucene.store.FSDirectory;
4+import org.apache.lucene.store.NIOFSDirectory;
45 import org.apache.lucene.util.Version;
56
67 import java.io.File;
@@ -52,8 +53,8 @@
5253 super();
5354 try
5455 {
55- IPADIC = properties.getProperty("ipadic.path");
56- dir = FSDirectory.open(new File(index));
56+// dir = FSDirectory.open(new File(index));
57+ dir = NIOFSDirectory.open(new File(index));
5758
5859 //RAM_INDEX_DIR = new RAMDirectory();
5960
--- src/org/mathsci/distance/NldIndexer.java (revision 18)
+++ src/org/mathsci/distance/NldIndexer.java (revision 19)
@@ -23,19 +23,17 @@
2323 import org.apache.lucene.index.IndexWriter;
2424 import org.apache.lucene.index.IndexWriterConfig;
2525 import org.apache.lucene.store.FSDirectory;
26+import org.apache.lucene.store.NIOFSDirectory;
2627 import org.apache.lucene.util.Version;
2728
28-public class NldIndexer
29+public class NldIndexer extends NldCore
2930 {
30- static final Properties properties = new Properties();
31- static File INDEX_DIR;
32- static public String index;
33- static public String IPADIC;
3431 static public IndexWriter writer;
3532 static public IndexWriterConfig INDEX_CONFIG;
3633
3734 public NldIndexer()
3835 {
36+ super();
3937 try
4038 {
4139 final Tagger tagger = new Tagger(IPADIC);
@@ -43,16 +41,11 @@
4341 INDEX_CONFIG = new IndexWriterConfig(Version.LUCENE_31, analyzer);
4442 IndexWriter writer = new IndexWriter(FSDirectory.open(INDEX_DIR), INDEX_CONFIG);
4543 // new IpadicAnalyzer(tagger), IndexWriter.MaxFieldLength.LIMITED);
46- InputStreamReader inFile = new InputStreamReader(NldSearcher.class.getClassLoader().getResourceAsStream("nld.properties"));
47- properties.load(inFile);
48- index = properties.getProperty("lucene.index.path");
49- INDEX_DIR = new File(index);
50- IPADIC = properties.getProperty("ipadic.path");
5144 }catch(IOException e)
5245 {
5346 e.printStackTrace();
5447 }
55-//System.out.println("index="+properties.getProperty("lucene.index.path"));
48+System.out.println("index="+properties.getProperty("lucene.index.path"));
5649 }
5750
5851 static void indexer(String[] args)
@@ -115,11 +108,12 @@
115108 {
116109 Document doc = new Document();
117110
118- doc.add(new Field("path", f.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED));
111+// doc.add(new Field("path", f.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED));
112+ doc.add(new Field("path", f.getPath(), Field.Store.NO, Field.Index.NOT_ANALYZED));
119113
120114 doc.add(new Field("modified",
121115 DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE),
122- Field.Store.YES, Field.Index.NOT_ANALYZED));
116+ Field.Store.NO, Field.Index.NOT_ANALYZED));
123117
124118 doc.add(new Field("contents", new FileReader(f)));
125119
--- README.txt (revision 18)
+++ README.txt (revision 19)
@@ -83,6 +83,9 @@
8383 777.349u 18.024s 14:00.50 94.6% 36+1096k 9667+42845io 1pf+0w
8484 time ./nls/bin/wikipedia_parse.rb jawiki-latest-pages-articles.xml
8585 1455.073u 25529.807s 7:33:16.24 99.2% 5+-469k 31835+44883io 20pf+0w
86+
87+
88+doc.add(new Field("path", f.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED));
8689 > time ./nls/bin/nld.sh indexer data
8790 adding ../../data/1571104.txt
8891 Optimizing...
@@ -89,3 +92,17 @@
8992 7255930 total milliseconds
9093 6495.171u 381.982s 2:00:56.62 94.7% 88+1041k 1112532+85991io 7pf+0w
9194
95+$ ls -l nls/bin/index/
96+-rw-r--r-- 1 tetsato tetsato 40262986 2011-04-25 20:11 _76.fdt
97+-rw-r--r-- 1 tetsato tetsato 8000004 2011-04-25 20:11 _76.fdx
98+-rw-r--r-- 1 tetsato tetsato 32 2011-04-25 20:11 _76.fnm
99+-rw-r--r-- 1 tetsato tetsato 431667786 2011-04-25 20:15 _76.frq
100+-rw-r--r-- 1 tetsato tetsato 3000004 2011-04-25 20:15 _76.nrm
101+-rw-r--r-- 1 tetsato tetsato 1235412936 2011-04-25 20:15 _76.prx
102+-rw-r--r-- 1 tetsato tetsato 794095 2011-04-25 20:15 _76.tii
103+-rw-r--r-- 1 tetsato tetsato 60288601 2011-04-25 20:15 _76.tis
104+-rw-r--r-- 1 tetsato tetsato 20 2011-04-25 20:15 segments.gen
105+-rw-r--r-- 1 tetsato tetsato 273 2011-04-25 20:15 segments_1
106+
107+doc.add(new Field("path", f.getPath(), Field.Store.NO, Field.Index.NOT_ANALYZED));
108+
Show on old repository browser