• R/O
  • SSH
  • HTTPS

nls: 提交


Commit MetaInfo

修订版23 (tree)
时间2011-06-16 20:25:54
作者linuxchecker

Log Message

improved startup shell

更改概述

差异

--- src/org/mathsci/distance/NldCalc.java (revision 22)
+++ src/org/mathsci/distance/NldCalc.java (revision 23)
@@ -1,6 +1,8 @@
11 package org.mathsci.distance;
22
33 import java.util.Date;
4+import java.io.PrintStream;
5+import java.io.UnsupportedEncodingException;
46
57 class NldCalc extends NldCore
68 {
@@ -8,7 +10,7 @@
810 {
911 super();
1012 }
11- public double computeScore(String[] args)
13+ public double computeScore(String[] args) throws UnsupportedEncodingException
1214 {
1315 int x, y, xy;
1416 double lx,ly,lxy;
@@ -15,15 +17,16 @@
1517 double m = max_pages;
1618 double val = 0d;
1719 Date start_time = new Date();
20+ PrintStream out = new PrintStream(System.out, true, "UTF-8");
1821 try{
1922
20- if(timewatch) System.out.println("time="+(new Date().getTime()-start_time.getTime()));
23+ if(timewatch) System.out.println("time="+(new Date().getTime()-start_time.getTime())+"<BR />");
2124 NldSearcher s = new NldSearcher();
22- if(timewatch) System.out.println("time="+(new Date().getTime()-start_time.getTime()));
25+ if(timewatch) System.out.println("time="+(new Date().getTime()-start_time.getTime())+"<BR />");
2326 x = s.searcher(args[1]);
24- if(timewatch) System.out.println("time="+(new Date().getTime()-start_time.getTime()));
27+ if(timewatch) System.out.println("time="+(new Date().getTime()-start_time.getTime())+"<BR />");
2528 y = s.searcher(args[2]);
26- if(timewatch) System.out.println("time="+(new Date().getTime()-start_time.getTime()));
29+ if(timewatch) System.out.println("time="+(new Date().getTime()-start_time.getTime())+"<BR />");
2730
2831 if(y==0)
2932 {
@@ -44,7 +47,7 @@
4447 }else
4548 {
4649 xy = s.searcher(args[1]+" AND "+args[2]);
47- if(timewatch) System.out.println("time="+(new Date().getTime()-start_time.getTime()));
50+ if(timewatch) out.println("time="+(new Date().getTime()-start_time.getTime())+"<BR />");
4851 lx = Math.log(x);
4952 ly = Math.log(y);
5053 lxy = Math.log(xy);
@@ -59,6 +62,6 @@
5962 }catch(Exception e){
6063 e.printStackTrace();
6164 }
62- return(1d-Math.exp(-0.5*(val)));
65+ return(1d-Math.exp(-0.2*(val)));
6366 }
6467 }
--- src/org/mathsci/distance/NldSearcher.java (revision 22)
+++ src/org/mathsci/distance/NldSearcher.java (revision 23)
@@ -13,8 +13,9 @@
1313
1414 import java.io.FileReader;
1515 import java.io.StringReader;
16+import java.io.PrintStream;
17+import java.io.UnsupportedEncodingException;
1618
17-
1819 import org.apache.lucene.document.Field;
1920
2021 import java.io.BufferedReader;
@@ -71,13 +72,12 @@
7172 {
7273 reader.close();
7374 }
74- public int searcher(String arg) throws Exception {
75+ public int searcher(String arg) throws Exception,UnsupportedEncodingException {
7576
7677 String field = "contents";
7778 String queries = arg;
7879 boolean paging = false;
79-
80-
80+ PrintStream out = new PrintStream(System.out, true, "UTF-8");
8181 // for (String file : dir.listAll())
8282 // {
8383 // dir.copy(RAM_INDEX_DIR, file, file); // newFile can be either file, or a new name
@@ -92,7 +92,7 @@
9292 String line = in.readLine();
9393 line = line.trim();
9494 Query query = parser.parse(line);
95- System.out.println("Searching for: " + query.toString(field));
95+ out.println("Searching for: " + query.toString(field)+"</ BR>");
9696 int numHitsTotal = doStreamingSearch(searcher, query);
9797 return(numHitsTotal);
9898 }
--- src/org/mathsci/distance/NldCommand.java (revision 22)
+++ src/org/mathsci/distance/NldCommand.java (revision 23)
@@ -4,8 +4,11 @@
44 import org.mathsci.distance.NldIndexer;
55 import org.mathsci.distance.NldSearcher;
66 import java.util.*;
7+import java.io.*;
78 import java.io.FileReader;
89 import java.io.BufferedReader;
10+import java.io.UnsupportedEncodingException;
11+import java.util.Properties;
912
1013 public class NldCommand extends NldCore {
1114
@@ -12,6 +15,7 @@
1215 public NldCommand()
1316 {
1417 super();
18+
1519 }
1620
1721 public static void main(String[] args) {
@@ -37,8 +41,15 @@
3741 }else if("calculator".equals(args[0]))
3842 {
3943 NldCalc ncalc = new NldCalc();
40- double val = ncalc.computeScore(args);
41- System.out.println("similarity="+val);
44+ try{
45+ args[1]=System.getenv("arg1");
46+ args[2]=System.getenv("arg2");
47+ double val = ncalc.computeScore(args);
48+ System.out.println("Distance="+val);
49+ }catch(UnsupportedEncodingException e)
50+ {
51+ e.printStackTrace();
52+ }
4253 }else if("loader".equals(args[0]))
4354 {
4455 NldCalc ncalc = new NldCalc();
@@ -62,8 +73,13 @@
6273 for(int j=i+1; j<len;++j)
6374 {
6475 word[2] = stringBuffer.get(j);
65- double val = ncalc.computeScore(word);
66- System.out.println(word[1]+","+word[2]+","+val);
76+ try{
77+ double val = ncalc.computeScore(word);
78+ System.out.println(word[1]+","+word[2]+","+val);
79+ }catch(UnsupportedEncodingException e)
80+ {
81+ e.printStackTrace();
82+ }
6783 }
6884 }
6985 }
--- src/Makefile (revision 22)
+++ src/Makefile (revision 23)
@@ -1,5 +1,5 @@
11 SRCDIR = org/mathsci/distance
2-OPTIONS = -Xlint:deprecation -cp $(DEST):$(LIBS):$(CONF) -encoding UTF-8
2+OPTIONS = -Xlint:deprecation -cp $(DEST):$(LIBS):$(CONF) -encoding UTF8 -J-Dfile.encoding=UTF8
33 all: $(SRCDIR)/NldIndexer.java $(SRCDIR)/NldSearcher.java $(SRCDIR)/NldCommand.java
44 javac $(OPTIONS) org/mathsci/distance/NldIndexer.java
55 javac $(OPTIONS) org/mathsci/distance/NldSearcher.java
--- bin/nld.sh (revision 22)
+++ bin/nld.sh (revision 23)
@@ -3,7 +3,7 @@
33 JAVA=`which java`
44 NLD_JAR=../bin/nld.jar
55 LIBS=../lib:../lib/lucene-core-3.1.0.jar:../lib/igo-analyzer-0.0.1.jar:../lib/igo-0.4.2.jar:../conf
6-OPTIONS="-Dfile.encoding=UTF-8 -Xmx1048m"
6+OPTIONS="-Dfile.encoding=UTF8 -Xmx1048m"
77 cd nls/bin;
88 if [ "X"$1 = "Xindexer" ];
99 then
@@ -36,12 +36,14 @@
3636 echo "usage: " $0 "calculator <word1> <word2>";
3737 exit 1;
3838 else
39- DATA="$2"
39+ DATA=`echo "$2"|nkf -w`
4040 if [ "X"$3 != "X" ];
4141 then
4242 DATA2="$3";
4343 fi
44- $JAVA $OPTIONS -cp $NLD_JAR:$LIBS org.mathsci.distance.NldCommand calculator $DATA $DATA2
44+ export arg1=$DATA
45+ export arg2=$DATA2
46+ $JAVA $OPTIONS -cp $NLD_JAR:$LIBS org.mathsci.distance.NldCommand calculator $DATA $DATA2
4547 fi
4648 elif [ "X"$1 = "Xloader" ];
4749 then
--- README.txt (revision 22)
+++ README.txt (revision 23)
@@ -42,6 +42,12 @@
4242
4343 $ mv igo-analyzer-0.0.1.jar igo-0.4.2.jar nls/lib/
4444
45+ + MeCab for making dictionary
46+ http://sourceforge.net/projects/mecab/files/mecab/0.98/mecab-0.98.tar.gz/download
47+ $ tar xvf mecab-0.98.tar.gz
48+ $ cd mecab-0.98
49+ $ ./configur; make ; make install
50+
4551 + NAIST Japanese Dictionary
4652 http://iij.dl.sourceforge.jp/naist-jdic/48487/mecab-naist-jdic-0.6.3-20100801.tar.gz
4753 $ tar xvf mecab-naist-jdic-0.6.3-20100801.tar.gz
@@ -84,7 +90,13 @@
8490 time ./nls/bin/wikipedia_parse.rb jawiki-latest-pages-articles.xml
8591 1455.073u 25529.807s 7:33:16.24 99.2% 5+-469k 31835+44883io 20pf+0w
8692
93+$ time ./nls/bin/wikipedia_parse.rb jawiki-latest-pages-articles.xml
8794
95+real 125m23.183s
96+user 51m10.832s
97+sys 3m9.064s
98+
99+
88100 doc.add(new Field("path", f.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED));
89101 > time ./nls/bin/nld.sh indexer data
90102 adding ../../data/1571104.txt
@@ -116,4 +128,4 @@
116128 sys 7m45.093s
117129
118130 * Loader log to image map
119- grep , tmp1.log |awk 'BEGIN{i=1;j=0;FS=","}{ val[i,j]=$3; val[j,i]=$3; i+=1; if(i==20){j+=1; i=j+1;} }END{for(k=0;k<20;++k){ printf("%d,",k); for(l=0;l<20;++l) {printf("%.3f",val[k,l]); if(l!=19) printf(",");}; printf("\n");}}'
\ No newline at end of file
131+ grep , tmp1.log |awk 'BEGIN{i=1;j=0;FS=","}{ val[i,j]=$3; val[j,i]=$3; i+=1; if(i==20){j+=1; i=j+1;} }END{for(k=0;k<20;++k){ printf("%d,",k); for(l=0;l<20;++l) {printf("%.3f",val[k,l]); if(l!=19) printf(",");}; printf("\n");}}'
Show on old repository browser