• R/O
  • SSH
  • HTTPS

fess: 提交


Commit MetaInfo

修订版1795 (tree)
时间2013-09-06 14:17:51
作者shinsuke

Log Message

version 8.2.0

更改概述

差异

--- fess-solr-plugin/tags/fess-solr-plugin-8.2.0/pom.xml (nonexistent)
+++ fess-solr-plugin/tags/fess-solr-plugin-8.2.0/pom.xml (revision 1795)
@@ -0,0 +1,211 @@
1+<?xml version="1.0" encoding="UTF-8"?>
2+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
4+ <modelVersion>4.0.0</modelVersion>
5+ <groupId>jp.sf.fess</groupId>
6+ <artifactId>fess-solr-plugin</artifactId>
7+ <version>8.2.0</version>
8+ <packaging>jar</packaging>
9+ <name>fess-solr-plugin</name>
10+ <url>http://fess.codelibs.org/</url>
11+ <inceptionYear>2009</inceptionYear>
12+ <licenses>
13+ <license>
14+ <name>The Apache Software License, Version 2.0</name>
15+ <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
16+ <distribution>repo</distribution>
17+ </license>
18+ </licenses>
19+ <organization>
20+ <name>CodeLibs</name>
21+ <url>http://www.codelibs.org/</url>
22+ </organization>
23+ <developers>
24+ <developer>
25+ <id>shinsuke</id>
26+ <name>Shinsuke Sugaya</name>
27+ <email>shinsuke_at_yahoo.co.jp</email>
28+ <url>http://d.hatena.ne.jp/shinsuke_sugaya/</url>
29+ </developer>
30+ </developers>
31+ <issueManagement>
32+ <url><![CDATA[http://sourceforge.jp/projects/fess/ticket/]]></url>
33+ </issueManagement>
34+ <distributionManagement>
35+ <repository>
36+ <id>codelibs-repository</id>
37+ <url>ftp://maven.codelibs.org/home/codelibs/maven/</url>
38+ </repository>
39+ <site>
40+ <id>codelibs-site</id>
41+ <url>ftp://maven.codelibs.org/home/codelibs/fess/projects/fess-solr-plugin/</url>
42+ </site>
43+ </distributionManagement>
44+ <scm>
45+ <connection>scm:svn:http://svn.sourceforge.jp/svnroot/fess/fess-solr-plugin/trunk</connection>
46+ <developerConnection>scm:svn:https://svn.sourceforge.jp/svnroot/fess/fess-solr-plugin/trunk</developerConnection>
47+ <url>http://sourceforge.jp/projects/fess/svn/view/</url>
48+ </scm>
49+ <mailingLists>
50+ <mailingList>
51+ <name>User List</name>
52+ <subscribe>http://lists.sourceforge.jp/mailman/listinfo/fess-user</subscribe>
53+ <unsubscribe>http://lists.sourceforge.jp/mailman/listinfo/fess-user</unsubscribe>
54+ <post>fess-user@lists.sourceforge.jp</post>
55+ <archive>http://sourceforge.jp/projects/fess/lists/archive/user/</archive>
56+ </mailingList>
57+ </mailingLists>
58+ <properties>
59+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
60+ </properties>
61+ <build>
62+ <plugins>
63+ <plugin>
64+ <artifactId>maven-compiler-plugin</artifactId>
65+ <configuration>
66+ <source>1.6</source>
67+ <target>1.6</target>
68+ <encoding>UTF-8</encoding>
69+ </configuration>
70+ </plugin>
71+ <plugin>
72+ <artifactId>maven-source-plugin</artifactId>
73+ <executions>
74+ <execution>
75+ <id>source-jar</id>
76+ <phase>package</phase>
77+ <goals>
78+ <goal>jar</goal>
79+ </goals>
80+ </execution>
81+ </executions>
82+ </plugin>
83+ <plugin>
84+ <artifactId>maven-javadoc-plugin</artifactId>
85+ <configuration>
86+ <encoding>UTF-8</encoding>
87+ <docencoding>UTF-8</docencoding>
88+ <charset>UTF-8</charset>
89+ <links>
90+ <link>http://docs.oracle.com/javase/6/docs/api/</link>
91+ <link>http://docs.oracle.com/javaee/6/api/</link>
92+ <link>http://aopalliance.sourceforge.net/doc/</link>
93+ <link>http://www.csg.is.titech.ac.jp/~chiba/javassist/html/</link>
94+ <link>http://www.junit.org/junit/javadoc/4.3/</link>
95+ <link>http://www.junit.org/junit/javadoc/3.8.1/</link>
96+ <link>http://s2container.seasar.org/2.4/s2-framework/ja/apidocs/</link>
97+ <link>http://s2container.seasar.org/2.4/s2-extension/ja/apidocs/</link>
98+ <link>http://s2container.seasar.org/2.4/s2-tiger/ja/apidocs/</link>
99+ <link>http://s2robot.sandbox.seasar.org/apidocs/</link>
100+ </links>
101+ </configuration>
102+ <executions>
103+ <execution>
104+ <phase>site</phase>
105+ <goals>
106+ <goal>javadoc</goal>
107+ </goals>
108+ </execution>
109+ </executions>
110+ </plugin>
111+ <plugin>
112+ <groupId>com.mycila.maven-license-plugin</groupId>
113+ <artifactId>maven-license-plugin</artifactId>
114+ <version>1.5.0</version>
115+ <configuration>
116+ <header>${basedir}/src/etc/header.txt</header>
117+ <includes>
118+ <include>src/**/*.java</include>
119+ </includes>
120+ <encoding>UTF-8</encoding>
121+ <headerDefinitions>
122+ <headerDefinition>${basedir}/src/etc/header-definition.xml</headerDefinition>
123+ </headerDefinitions>
124+ </configuration>
125+ </plugin>
126+ <plugin>
127+ <artifactId>maven-site-plugin</artifactId>
128+ <configuration>
129+ <locales>en,ja</locales>
130+ <inputEncoding>UTF-8</inputEncoding>
131+ <outputEncoding>UTF-8</outputEncoding>
132+ </configuration>
133+ </plugin>
134+ </plugins>
135+ <extensions>
136+ <extension>
137+ <groupId>org.apache.maven.wagon</groupId>
138+ <artifactId>wagon-ftp</artifactId>
139+ <version>1.0-beta-6</version>
140+ </extension>
141+ </extensions>
142+ </build>
143+ <pluginRepositories>
144+ <pluginRepository>
145+ <id>maven.seasar.org</id>
146+ <name>The Seasar Foundation Maven2 Repository</name>
147+ <url>http://maven.seasar.org/maven2/</url>
148+ </pluginRepository>
149+ <pluginRepository>
150+ <id>maven-snapshot.seasar.org</id>
151+ <name>The Seasar Foundation Maven2 Repository</name>
152+ <url>http://maven.seasar.org/maven2-snapshot/</url>
153+ </pluginRepository>
154+ <pluginRepository>
155+ <name>oss.sonatype.org</name>
156+ <id>oss.sonatype.org</id>
157+ <url>http://oss.sonatype.org/content/groups/public/</url>
158+ </pluginRepository>
159+ </pluginRepositories>
160+ <repositories>
161+ <repository>
162+ <id>codelibs.org</id>
163+ <name>CodeLibs Repository</name>
164+ <url>http://maven.codelibs.org/</url>
165+ </repository>
166+ <repository>
167+ <id>maven.seasar.org</id>
168+ <name>The Seasar Foundation Maven2 Repository</name>
169+ <url>http://maven.seasar.org/maven2/</url>
170+ </repository>
171+ <repository>
172+ <id>maven-snapshot.seasar.org</id>
173+ <name>The Seasar Foundation Maven2 Repository</name>
174+ <url>http://maven.seasar.org/maven2-snapshot/</url>
175+ </repository>
176+ <repository>
177+ <id>orangesignal.sourceforge.jp</id>
178+ <name>OrangeSignal Repository</name>
179+ <url>http://orangesignal.sourceforge.jp/maven2/</url>
180+ </repository>
181+ </repositories>
182+ <dependencies>
183+ <dependency>
184+ <groupId>org.apache.solr</groupId>
185+ <artifactId>solr-core</artifactId>
186+ <version>4.4.0</version>
187+ <exclusions>
188+ <exclusion>
189+ <groupId>org.apache.commons</groupId>
190+ <artifactId>commons-io</artifactId>
191+ </exclusion>
192+ </exclusions>
193+ </dependency>
194+ <dependency>
195+ <groupId>org.noggit</groupId>
196+ <artifactId>noggit</artifactId>
197+ <version>0.5</version>
198+ </dependency>
199+ <dependency>
200+ <groupId>jp.sf.fess</groupId>
201+ <artifactId>fess-suggest</artifactId>
202+ <version>1.0.2</version>
203+ </dependency>
204+ <dependency>
205+ <groupId>junit</groupId>
206+ <artifactId>junit</artifactId>
207+ <version>4.11</version>
208+ <scope>test</scope>
209+ </dependency>
210+ </dependencies>
211+</project>
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
--- fess-solr-plugin/tags/fess-solr-plugin-8.2.0/src/main/java/jp/sf/fess/solr/plugin/analysis/SuggestTextTokenizer.java (nonexistent)
+++ fess-solr-plugin/tags/fess-solr-plugin-8.2.0/src/main/java/jp/sf/fess/solr/plugin/analysis/SuggestTextTokenizer.java (revision 1795)
@@ -0,0 +1,381 @@
1+/*
2+ * Copyright 2009-2013 the Fess Project and the Others.
3+ *
4+ * Licensed under the Apache License, Version 2.0 (the "License");
5+ * you may not use this file except in compliance with the License.
6+ * You may obtain a copy of the License at
7+ *
8+ * http://www.apache.org/licenses/LICENSE-2.0
9+ *
10+ * Unless required by applicable law or agreed to in writing, software
11+ * distributed under the License is distributed on an "AS IS" BASIS,
12+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
13+ * either express or implied. See the License for the specific language
14+ * governing permissions and limitations under the License.
15+ */
16+
17+package jp.sf.fess.solr.plugin.analysis;
18+
19+import java.io.IOException;
20+import java.io.Reader;
21+import java.io.StringReader;
22+import java.util.ArrayList;
23+import java.util.HashMap;
24+import java.util.List;
25+import java.util.Map;
26+
27+import jp.sf.fess.suggest.converter.SuggestConverter;
28+
29+import org.apache.commons.io.IOUtils;
30+import org.apache.lucene.analysis.TokenStream;
31+import org.apache.lucene.analysis.Tokenizer;
32+import org.apache.lucene.analysis.ja.JapaneseTokenizer;
33+import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode;
34+import org.apache.lucene.analysis.ja.dict.UserDictionary;
35+import org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute;
36+import org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute;
37+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
38+import org.slf4j.Logger;
39+import org.slf4j.LoggerFactory;
40+
41+public class SuggestTextTokenizer extends Tokenizer {
42+ private static final Logger logger = LoggerFactory
43+ .getLogger(SuggestTextTokenizer.class);
44+
45+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
46+
47+ private String inputStr = "";
48+
49+ private int offset = 0;
50+
51+ private int readingOffset = 0;
52+
53+ private final List<String> termListByKuromoji = new ArrayList<String>();
54+
55+ private final List<String> readingList = new ArrayList<String>();
56+
57+ private final List<String> partOfSpeechList = new ArrayList<String>();
58+
59+ private final List<String> suggestStringList = new ArrayList<String>();
60+
61+ private final UserDictionary userDictionary;
62+
63+ private final boolean discardPunctuation;
64+
65+ private final Mode tokenizerMode;
66+
67+ private final String wordSeparator;
68+
69+ private final TermChecker termChecker;
70+
71+ private final List<SuggestConverter> preConverterList;
72+
73+ private final List<SuggestConverter> converterList;
74+
75+ private final int maxLength;
76+
77+ public SuggestTextTokenizer(final Reader input, final int bufferSize,
78+ final UserDictionary userDictionaryPara,
79+ final boolean discardPunctuationPara, final Mode modePara,
80+ final TermChecker termChecker,
81+ final List<SuggestConverter> preconverterList,
82+ final List<SuggestConverter> converterList,
83+ final String wordSeparator, final int maxLength) {
84+ super(input);
85+
86+ userDictionary = userDictionaryPara;
87+ discardPunctuation = discardPunctuationPara;
88+ tokenizerMode = modePara;
89+ termAtt.resizeBuffer(bufferSize);
90+ this.wordSeparator = wordSeparator;
91+ this.termChecker = termChecker;
92+ preConverterList = preconverterList;
93+ this.converterList = converterList;
94+ this.maxLength = maxLength;
95+
96+ initialize();
97+ }
98+
99+ public void initialize() {
100+ termListByKuromoji.clear();
101+ partOfSpeechList.clear();
102+ readingList.clear();
103+ suggestStringList.clear();
104+ offset = 0;
105+ readingOffset = 0;
106+ inputStr = "";
107+
108+ try {
109+ String s = IOUtils.toString(input);
110+ if (s != null && s.length() > 0) {
111+ if (maxLength > 0 && s.length() > maxLength) {
112+ s = truncateInput(s);
113+ }
114+ inputStr = s;
115+ for (final SuggestConverter converter : preConverterList) {
116+ inputStr = converter.convert(inputStr);
117+ }
118+ }
119+ } catch (final IOException e) {
120+ }
121+
122+ final Reader rd = new StringReader(inputStr);
123+
124+ TokenStream stream = null;
125+
126+ try {
127+ stream = new JapaneseTokenizer(rd, userDictionary,
128+ discardPunctuation, tokenizerMode);
129+
130+ stream.reset();
131+ while (stream.incrementToken()) {
132+ final CharTermAttribute att = stream
133+ .getAttribute(CharTermAttribute.class);
134+ termListByKuromoji.add(att.toString());
135+
136+ final PartOfSpeechAttribute psAtt = stream
137+ .getAttribute(PartOfSpeechAttribute.class);
138+ final String pos = psAtt.getPartOfSpeech();
139+ partOfSpeechList.add(pos);
140+
141+ final ReadingAttribute rdAttr = stream
142+ .getAttribute(ReadingAttribute.class);
143+
144+ String reading;
145+ if (rdAttr.getReading() != null) {
146+ reading = rdAttr.getReading();
147+ } else {
148+ reading = att.toString();
149+ }
150+
151+ for (final SuggestConverter converter : converterList) {
152+ reading = converter.convert(reading);
153+ }
154+ readingList.add(reading);
155+
156+ }
157+ } catch (final Exception e) {
158+ logger.warn("JapaneseTokenizer stream error", e);
159+ } finally {
160+ try {
161+ input.reset();
162+ } catch (final Exception e) {
163+ }
164+ try {
165+ stream.end();
166+ } catch (final Exception e) {
167+ }
168+ try {
169+ rd.close();
170+ } catch (final Exception e) {
171+ }
172+ }
173+ }
174+
175+ private String truncateInput(final String s) {
176+ int pos = maxLength;
177+ while (pos > 0) {
178+ final int ch = s.codePointAt(pos);
179+ if (!Character.isLetterOrDigit(ch)) {
180+ break;
181+ }
182+ pos--;
183+ }
184+ if (pos == 0) {
185+ pos = maxLength;
186+ }
187+
188+ return s.substring(0, pos);
189+ }
190+
191+ @Override
192+ public boolean incrementToken() throws IOException {
193+
194+ if (offset < termListByKuromoji.size()) {
195+ while (partOfSpeechList.get(offset).indexOf("名詞") == -1) {
196+ offset++;
197+ if (offset >= termListByKuromoji.size()) {
198+ break;
199+ }
200+ }
201+ }
202+
203+ if (offset < termListByKuromoji.size()) {
204+ termAtt.setEmpty();
205+ termAtt.append(termListByKuromoji.get(offset));
206+ suggestStringList.add(convertSuggestString(
207+ termListByKuromoji.get(offset), readingList.get(offset)));
208+ offset++;
209+ } else {
210+
211+ int tmpOffset = offset - termListByKuromoji.size();
212+ boolean readingFlg = false;
213+ if (tmpOffset < termListByKuromoji.size()) {
214+ StringBuilder buffer = null;
215+ StringBuilder readingBuf = null;
216+ int end = 1;
217+
218+ for (; tmpOffset < partOfSpeechList.size(); tmpOffset++) {
219+ buffer = new StringBuilder();
220+ readingBuf = new StringBuilder();
221+ if (termChecker.check(partOfSpeechList.get(tmpOffset),
222+ termListByKuromoji.get(tmpOffset), "start")) {
223+ buffer.append(termListByKuromoji.get(tmpOffset));
224+ readingBuf.append(readingList.get(tmpOffset));
225+
226+ for (int i = 1; tmpOffset + i < partOfSpeechList.size(); i++) {
227+ if (termChecker.check(
228+ partOfSpeechList.get(tmpOffset + i),
229+ termListByKuromoji.get(tmpOffset + i),
230+ "middle")) {
231+ if (inputStr
232+ .indexOf(buffer.toString()
233+ + termListByKuromoji
234+ .get(tmpOffset + i)) != -1) {
235+ buffer.append(termListByKuromoji
236+ .get(tmpOffset + i));
237+ readingBuf.append(readingList.get(tmpOffset
238+ + i));
239+ end++;
240+ } else {
241+ break;
242+ }
243+ } else {
244+ break;
245+ }
246+ }
247+ if (end > 1) {
248+ break;
249+ }
250+ }
251+ }
252+
253+ if (buffer != null
254+ && tmpOffset < partOfSpeechList.size()
255+ && buffer.length() > termListByKuromoji.get(tmpOffset)
256+ .length()) {
257+ termAtt.setEmpty();
258+ termAtt.append(buffer.toString());
259+ suggestStringList.add(convertSuggestString(
260+ buffer.toString(), readingBuf.toString()));
261+
262+ } else {
263+ readingFlg = true;
264+ }
265+ offset = tmpOffset + termListByKuromoji.size() + end;
266+ } else {
267+ readingFlg = true;
268+ }
269+
270+ if (readingFlg) {
271+ if (readingOffset < suggestStringList.size()) {
272+ termAtt.setEmpty();
273+ termAtt.append(suggestStringList.get(readingOffset));
274+ readingOffset++;
275+ } else {
276+ return false;
277+ }
278+ }
279+
280+ }
281+ return true;
282+ }
283+
284+ @Override
285+ public void reset() throws IOException {
286+ super.reset();
287+ initialize();
288+ }
289+
290+ private String convertSuggestString(final String term, final String reading) {
291+ String suggestString;
292+ if (reading != null && reading.length() > 0) {
293+ suggestString = reading + wordSeparator + term;
294+ } else {
295+ suggestString = term;
296+ }
297+
298+ return suggestString;
299+ }
300+
301+ public static class TermChecker {
302+ private final Map<String, Map<String, List<String>>> paramMap = new HashMap<String, Map<String, List<String>>>(
303+ 2);
304+
305+ public TermChecker() {
306+ final Map<String, List<String>> startParamMap = new HashMap<String, List<String>>(
307+ 3);
308+ startParamMap.put("includePartOfSpeech", new ArrayList<String>());
309+ startParamMap.put("excludePartOfSpeech", new ArrayList<String>());
310+ startParamMap.put("includeCharTerm", new ArrayList<String>());
311+ paramMap.put("start", startParamMap);
312+ final Map<String, List<String>> middleParamMap = new HashMap<String, List<String>>(
313+ 3);
314+ middleParamMap.put("includePartOfSpeech", new ArrayList<String>());
315+ middleParamMap.put("excludePartOfSpeech", new ArrayList<String>());
316+ middleParamMap.put("includeCharTerm", new ArrayList<String>());
317+ paramMap.put("middle", middleParamMap);
318+ }
319+
320+ public void includePartOfSpeech(final String mode, final String value) {
321+ updateParam(mode, "includePartOfSpeech", value);
322+ }
323+
324+ public void excludePartOfSpeech(final String mode, final String value) {
325+ updateParam(mode, "excludePartOfSpeech", value);
326+ }
327+
328+ public void includeCharTerm(final String mode, final String value) {
329+ updateParam(mode, "includeCharTerm", value);
330+ }
331+
332+ private void updateParam(final String mode, final String target,
333+ final String value) {
334+ final Map<String, List<String>> modeParamMap = paramMap.get(mode);
335+ if (modeParamMap != null) {
336+ final List<String> list = modeParamMap.get(target);
337+ if (list != null) {
338+ list.add(value);
339+ }
340+ }
341+ }
342+
343+ public boolean check(final String partOfSpeech,
344+ final String termByKuromoji, final String mode) {
345+ final Map<String, List<String>> modeParamMap = paramMap.get(mode);
346+ final List<String> includePartOfSpeechList = modeParamMap
347+ .get("includePartOfSpeech");
348+ final List<String> excludePartOfSpeechList = modeParamMap
349+ .get("excludePartOfSpeech");
350+ final List<String> includeCharTermList = modeParamMap
351+ .get("includeCharTerm");
352+
353+ boolean ret = false;
354+ for (int i = 0; i < includePartOfSpeechList.size(); i++) {
355+ if (partOfSpeech.indexOf(includePartOfSpeechList.get(i)) != -1) {
356+ boolean isNg = false;
357+ for (int j = 0; j < excludePartOfSpeechList.size(); j++) {
358+ if (partOfSpeech
359+ .indexOf(excludePartOfSpeechList.get(j)) != -1) {
360+ isNg = true;
361+ }
362+ }
363+ if (!isNg) {
364+ ret = true;
365+ break;
366+ }
367+ }
368+ }
369+
370+ if (!ret) {
371+ for (int i = 0; i < includeCharTermList.size(); i++) {
372+ if (termByKuromoji.equals(includeCharTermList.get(i))) {
373+ ret = true;
374+ break;
375+ }
376+ }
377+ }
378+ return ret;
379+ }
380+ }
381+}
--- fess-solr-plugin/tags/fess-solr-plugin-8.2.0/src/main/java/jp/sf/fess/solr/plugin/analysis/SuggestTextTokenizerFactory.java (nonexistent)
+++ fess-solr-plugin/tags/fess-solr-plugin-8.2.0/src/main/java/jp/sf/fess/solr/plugin/analysis/SuggestTextTokenizerFactory.java (revision 1795)
@@ -0,0 +1,192 @@
1+/*
2+ * Copyright 2009-2013 the Fess Project and the Others.
3+ *
4+ * Licensed under the Apache License, Version 2.0 (the "License");
5+ * you may not use this file except in compliance with the License.
6+ * You may obtain a copy of the License at
7+ *
8+ * http://www.apache.org/licenses/LICENSE-2.0
9+ *
10+ * Unless required by applicable law or agreed to in writing, software
11+ * distributed under the License is distributed on an "AS IS" BASIS,
12+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
13+ * either express or implied. See the License for the specific language
14+ * governing permissions and limitations under the License.
15+ */
16+
17+package jp.sf.fess.solr.plugin.analysis;
18+
19+import java.io.InputStream;
20+import java.io.InputStreamReader;
21+import java.io.Reader;
22+import java.nio.charset.Charset;
23+import java.nio.charset.CharsetDecoder;
24+import java.nio.charset.CodingErrorAction;
25+import java.util.List;
26+import java.util.Locale;
27+import java.util.Map;
28+
29+import jp.sf.fess.solr.plugin.analysis.SuggestTextTokenizer.TermChecker;
30+import jp.sf.fess.solr.plugin.suggest.SuggestConverterCreator;
31+import jp.sf.fess.suggest.converter.SuggestConverter;
32+
33+import org.apache.lucene.analysis.Tokenizer;
34+import org.apache.lucene.analysis.ja.JapaneseTokenizer;
35+import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode;
36+import org.apache.lucene.analysis.ja.dict.UserDictionary;
37+import org.apache.lucene.analysis.util.ResourceLoader;
38+import org.apache.lucene.analysis.util.ResourceLoaderAware;
39+import org.apache.lucene.analysis.util.TokenizerFactory;
40+import org.apache.lucene.util.AttributeSource.AttributeFactory;
41+import org.apache.lucene.util.IOUtils;
42+import org.slf4j.Logger;
43+import org.slf4j.LoggerFactory;
44+
45+public class SuggestTextTokenizerFactory extends TokenizerFactory implements
46+ ResourceLoaderAware {
47+
48+ private static final Logger logger = LoggerFactory
49+ .getLogger(SuggestTextTokenizerFactory.class);
50+
51+ private static final String MODE = "mode";
52+
53+ private static final String USER_DICT_PATH = "userDictionary";
54+
55+ private static final String USER_DICT_ENCODING = "userDictionaryEncoding";
56+
57+ private static final String BUFFER_SIZE = "bufferSize";
58+
59+ private static final String WORD_SEPARATOR = "wordSeparator";
60+
61+ private static final String INCLUDE_CHAR_TERM = "includeCharTerm";
62+
63+ private static final String EXCLUDE_PART_OF_SPEECH = "excludePartOfSpeech";
64+
65+ private static final String INCLUDE_PART_OF_SPEECH = "includePartOfSpeech";
66+
67+ private static final String DISCARD_PUNCTUATION = "discardPunctuation"; // Expert option
68+
69+ private static final String MAX_LENGTH = "maxLength";
70+
71+ private UserDictionary userDictionary;
72+
73+ private final Mode mode;
74+
75+ private final String userDictionaryPath;
76+
77+ private final String userDictionaryEncoding;
78+
79+ private final boolean discardPunctuation;
80+
81+ private final int bufferSize;
82+
83+ private final String wordSeparator;
84+
85+ private final TermChecker termChecker;
86+
87+ private final List<SuggestConverter> preConverterList;
88+
89+ private final List<SuggestConverter> converterList;
90+
91+ private final int maxLength;
92+
93+ public SuggestTextTokenizerFactory(final Map<String, String> args) {
94+ super(args);
95+
96+ mode = getMode(args);
97+ userDictionaryPath = args.get(USER_DICT_PATH);
98+ userDictionaryEncoding = args.get(USER_DICT_ENCODING);
99+ bufferSize = getInt(args, BUFFER_SIZE, 256);
100+ wordSeparator = get(args, WORD_SEPARATOR, "_SP_");
101+ discardPunctuation = getBoolean(args, DISCARD_PUNCTUATION, true);
102+ maxLength = getInt(args, MAX_LENGTH, 0);
103+
104+ termChecker = new TermChecker();
105+ // ex. start:名詞,middle:動詞
106+ final String includePartOfSpeech = args.get(INCLUDE_PART_OF_SPEECH);
107+ if (includePartOfSpeech != null) {
108+ for (String text : includePartOfSpeech.split(",")) {
109+ text = text.trim();
110+ if (text.length() > 0) {
111+ final String[] values = text.split(":");
112+ if (values.length == 2) {
113+ termChecker.includePartOfSpeech(values[0].trim(),
114+ values[1].trim());
115+ }
116+ }
117+ }
118+ }
119+ final String excludePartOfSpeech = args.get(EXCLUDE_PART_OF_SPEECH);
120+ if (excludePartOfSpeech != null) {
121+ for (String text : excludePartOfSpeech.split(",")) {
122+ text = text.trim();
123+ if (text.length() > 0) {
124+ final String[] values = text.split(":");
125+ if (values.length == 2) {
126+ termChecker.excludePartOfSpeech(values[0].trim(),
127+ values[1].trim());
128+ }
129+ }
130+ }
131+ }
132+ final String includeCharTerm = args.get(INCLUDE_CHAR_TERM);
133+ if (includeCharTerm != null) {
134+ for (String text : includeCharTerm.split(",")) {
135+ text = text.trim();
136+ if (text.length() > 0) {
137+ final String[] values = text.split(":");
138+ if (values.length == 2) {
139+ termChecker.includeCharTerm(values[0].trim(),
140+ values[1].trim());
141+ }
142+ }
143+ }
144+ }
145+ preConverterList = SuggestConverterCreator.create(args
146+ .get("preConverters"));
147+ converterList = SuggestConverterCreator.create(args.get("converters"));
148+ }
149+
150+ @Override
151+ public Tokenizer create(final AttributeFactory factory, final Reader input) {
152+ return new SuggestTextTokenizer(input, bufferSize, userDictionary,
153+ discardPunctuation, mode, termChecker, preConverterList,
154+ converterList, wordSeparator, maxLength);
155+ }
156+
157+ @Override
158+ public void inform(final ResourceLoader loader) {
159+ try {
160+
161+ if (userDictionaryPath != null) {
162+ final InputStream stream = loader
163+ .openResource(userDictionaryPath);
164+ String encoding = userDictionaryEncoding;
165+ if (encoding == null) {
166+ encoding = IOUtils.UTF_8;
167+ }
168+ final CharsetDecoder decoder = Charset.forName(encoding)
169+ .newDecoder()
170+ .onMalformedInput(CodingErrorAction.REPORT)
171+ .onUnmappableCharacter(CodingErrorAction.REPORT);
172+ final Reader reader = new InputStreamReader(stream, decoder);
173+ userDictionary = new UserDictionary(reader);
174+ } else {
175+ userDictionary = null;
176+ }
177+
178+ } catch (final Exception e) {
179+ logger.warn("Initialization failed.", e);
180+ }
181+ }
182+
183+ private Mode getMode(final Map<String, String> args) {
184+ final String modeArg = args.get(MODE);
185+ if (modeArg != null) {
186+ return Mode.valueOf(modeArg.toUpperCase(Locale.ROOT));
187+ } else {
188+ return JapaneseTokenizer.Mode.NORMAL;
189+ }
190+ }
191+
192+}
--- fess-solr-plugin/tags/fess-solr-plugin-8.2.0/src/main/java/jp/sf/fess/solr/plugin/analysis/SuggestStringTokenizerFactory.java (nonexistent)
+++ fess-solr-plugin/tags/fess-solr-plugin-8.2.0/src/main/java/jp/sf/fess/solr/plugin/analysis/SuggestStringTokenizerFactory.java (revision 1795)
@@ -0,0 +1,139 @@
1+/*
2+ * Copyright 2009-2013 the Fess Project and the Others.
3+ *
4+ * Licensed under the Apache License, Version 2.0 (the "License");
5+ * you may not use this file except in compliance with the License.
6+ * You may obtain a copy of the License at
7+ *
8+ * http://www.apache.org/licenses/LICENSE-2.0
9+ *
10+ * Unless required by applicable law or agreed to in writing, software
11+ * distributed under the License is distributed on an "AS IS" BASIS,
12+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
13+ * either express or implied. See the License for the specific language
14+ * governing permissions and limitations under the License.
15+ */
16+
17+package jp.sf.fess.solr.plugin.analysis;
18+
19+import java.io.InputStream;
20+import java.io.InputStreamReader;
21+import java.io.Reader;
22+import java.nio.charset.Charset;
23+import java.nio.charset.CharsetDecoder;
24+import java.nio.charset.CodingErrorAction;
25+import java.util.List;
26+import java.util.Locale;
27+import java.util.Map;
28+
29+import jp.sf.fess.solr.plugin.suggest.SuggestConverterCreator;
30+import jp.sf.fess.suggest.converter.SuggestConverter;
31+
32+import org.apache.lucene.analysis.ja.JapaneseTokenizer;
33+import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode;
34+import org.apache.lucene.analysis.ja.dict.UserDictionary;
35+import org.apache.lucene.analysis.util.ResourceLoader;
36+import org.apache.lucene.analysis.util.ResourceLoaderAware;
37+import org.apache.lucene.analysis.util.TokenizerFactory;
38+import org.apache.lucene.util.AttributeSource.AttributeFactory;
39+import org.apache.lucene.util.IOUtils;
40+import org.slf4j.Logger;
41+import org.slf4j.LoggerFactory;
42+
43+public class SuggestStringTokenizerFactory extends TokenizerFactory implements
44+ ResourceLoaderAware {
45+
46+ private static final Logger logger = LoggerFactory
47+ .getLogger(SuggestStringTokenizerFactory.class);
48+
49+ private static final String MODE = "mode";
50+
51+ private static final String USER_DICT_PATH = "userDictionary";
52+
53+ private static final String USER_DICT_ENCODING = "userDictionaryEncoding";
54+
55+ private static final String BUFFER_SIZE = "bufferSize";
56+
57+ private static final String WORD_SEPARATOR = "wordSeparator";
58+
59+ private static final String DISCARD_PUNCTUATION = "discardPunctuation"; // Expert option
60+
61+ private UserDictionary userDictionary;
62+
63+ private final Mode mode;
64+
65+ private final String userDictionaryPath;
66+
67+ private final String userDictionaryEncoding;
68+
69+ private final boolean discardPunctuation;
70+
71+ private final int bufferSize;
72+
73+ private String wordSeparator;
74+
75+ private final List<SuggestConverter> preConverterList;
76+
77+ private final List<SuggestConverter> converterList;
78+
79+ public SuggestStringTokenizerFactory(final Map<String, String> args) {
80+ super(args);
81+
82+ mode = getMode(args);
83+ userDictionaryPath = args.get(USER_DICT_PATH);
84+ userDictionaryEncoding = args.get(USER_DICT_ENCODING);
85+ bufferSize = getInt(args, BUFFER_SIZE, 256);
86+ discardPunctuation = getBoolean(args, DISCARD_PUNCTUATION, true);
87+ wordSeparator = args.get(WORD_SEPARATOR);
88+ if (wordSeparator == null) {
89+ wordSeparator = "_SP_";
90+ }
91+
92+ preConverterList = SuggestConverterCreator.create(args
93+ .get("preConverters"));
94+ converterList = SuggestConverterCreator.create(args.get("converters"));
95+ }
96+
97+ @Override
98+ public void inform(final ResourceLoader loader) {
99+ try {
100+ if (userDictionaryPath != null) {
101+ final InputStream stream = loader
102+ .openResource(userDictionaryPath);
103+ String encoding = userDictionaryEncoding;
104+ if (encoding == null) {
105+ encoding = IOUtils.UTF_8;
106+ }
107+ final CharsetDecoder decoder = Charset.forName(encoding)
108+ .newDecoder()
109+ .onMalformedInput(CodingErrorAction.REPORT)
110+ .onUnmappableCharacter(CodingErrorAction.REPORT);
111+ final Reader reader = new InputStreamReader(stream, decoder);
112+ userDictionary = new UserDictionary(reader);
113+ } else {
114+ userDictionary = null;
115+ }
116+
117+ } catch (final Exception e) {
118+ logger.warn("Initialization failed.", e);
119+ }
120+ }
121+
122+ @Override
123+ public SuggestStringTokenizer create(final AttributeFactory factory,
124+ final Reader input) {
125+ return new SuggestStringTokenizer(input, bufferSize, userDictionary,
126+ discardPunctuation, mode, preConverterList, converterList,
127+ wordSeparator);
128+ }
129+
130+ private Mode getMode(final Map<String, String> args) {
131+ final String modeArg = args.get(MODE);
132+ if (modeArg != null) {
133+ return Mode.valueOf(modeArg.toUpperCase(Locale.ROOT));
134+ } else {
135+ return JapaneseTokenizer.Mode.NORMAL;
136+ }
137+ }
138+
139+}
--- fess-solr-plugin/tags/fess-solr-plugin-8.2.0/src/main/java/jp/sf/fess/solr/plugin/analysis/SuggestStringTokenizer.java (nonexistent)
+++ fess-solr-plugin/tags/fess-solr-plugin-8.2.0/src/main/java/jp/sf/fess/solr/plugin/analysis/SuggestStringTokenizer.java (revision 1795)
@@ -0,0 +1,220 @@
1+/*
2+ * Copyright 2009-2013 the Fess Project and the Others.
3+ *
4+ * Licensed under the Apache License, Version 2.0 (the "License");
5+ * you may not use this file except in compliance with the License.
6+ * You may obtain a copy of the License at
7+ *
8+ * http://www.apache.org/licenses/LICENSE-2.0
9+ *
10+ * Unless required by applicable law or agreed to in writing, software
11+ * distributed under the License is distributed on an "AS IS" BASIS,
12+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
13+ * either express or implied. See the License for the specific language
14+ * governing permissions and limitations under the License.
15+ */
16+
17+package jp.sf.fess.solr.plugin.analysis;
18+
19+import java.io.IOException;
20+import java.io.Reader;
21+import java.io.StringReader;
22+import java.util.ArrayList;
23+import java.util.List;
24+
25+import jp.sf.fess.suggest.converter.SuggestConverter;
26+
27+import org.apache.commons.io.IOUtils;
28+import org.apache.lucene.analysis.TokenStream;
29+import org.apache.lucene.analysis.Tokenizer;
30+import org.apache.lucene.analysis.ja.JapaneseTokenizer;
31+import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode;
32+import org.apache.lucene.analysis.ja.dict.UserDictionary;
33+import org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute;
34+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
35+import org.slf4j.Logger;
36+import org.slf4j.LoggerFactory;
37+
38+import com.ibm.icu.text.Transliterator;
39+
40+public class SuggestStringTokenizer extends Tokenizer {
41+ private static final Logger logger = LoggerFactory
42+ .getLogger(SuggestStringTokenizer.class);
43+
44+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
45+
46+ private int offset = 0;
47+
48+ private final List<String> termListByKuromoji = new ArrayList<String>();
49+
50+ private final List<String> readingList = new ArrayList<String>();
51+
52+ private String[] titleArray = null;
53+
54+ private final UserDictionary userDictionary;
55+
56+ private final boolean discardPunctuation;
57+
58+ private final Mode tokenizerMode;
59+
60+ private final String wordSeparator;
61+
62+ private final List<SuggestConverter> preConverterList;
63+
64+ private final List<SuggestConverter> converterList;
65+
66+ public SuggestStringTokenizer(final Reader input, final int bufferSize,
67+ final UserDictionary userDictionaryPara,
68+ final boolean discardPunctuationPara, final Mode modePara,
69+ final List<SuggestConverter> preconverterList,
70+ final List<SuggestConverter> converterList,
71+ final String wordSeparator) {
72+ super(input);
73+
74+ userDictionary = userDictionaryPara;
75+ discardPunctuation = discardPunctuationPara;
76+ tokenizerMode = modePara;
77+ termAtt.resizeBuffer(bufferSize);
78+ this.wordSeparator = wordSeparator;
79+ preConverterList = preconverterList;
80+ this.converterList = converterList;
81+
82+ initialize();
83+ }
84+
85+ public void initialize() {
86+ termListByKuromoji.clear();
87+ readingList.clear();
88+ titleArray = null;
89+ offset = 0;
90+ String inputStr = "";
91+
92+ try {
93+ final String s = IOUtils.toString(input);
94+ if (s != null && s.length() > 0) {
95+ inputStr = s;
96+ for (final SuggestConverter converter : preConverterList) {
97+ inputStr = converter.convert(inputStr);
98+ }
99+ titleArray = inputStr.split("\\$\\{and\\}");
100+ inputStr = inputStr.replace("${and}", " ");
101+ }
102+ } catch (final IOException e) {
103+ }
104+
105+ final Reader rd = new StringReader(inputStr);
106+
107+ TokenStream stream = null;
108+
109+ try {
110+ stream = new JapaneseTokenizer(rd, userDictionary,
111+ discardPunctuation, tokenizerMode);
112+
113+ stream.reset();
114+ while (stream.incrementToken()) {
115+ final CharTermAttribute att = stream
116+ .getAttribute(CharTermAttribute.class);
117+ termListByKuromoji.add(att.toString());
118+
119+ final ReadingAttribute rdAttr = stream
120+ .getAttribute(ReadingAttribute.class);
121+
122+ String reading;
123+ if (rdAttr.getReading() != null) {
124+ reading = rdAttr.getReading();
125+ } else {
126+ reading = att.toString();
127+ }
128+
129+ for (final SuggestConverter converter : converterList) {
130+ reading = converter.convert(reading);
131+ }
132+ readingList.add(reading);
133+
134+ }
135+
136+ } catch (final Exception e) {
137+ logger.warn("JapaneseTokenizer stream error", e);
138+ } finally {
139+ try {
140+ input.reset();
141+ } catch (final Exception e) {
142+ }
143+ try {
144+ stream.end();
145+ } catch (final Exception e) {
146+ }
147+ try {
148+ rd.close();
149+ } catch (final Exception e) {
150+ }
151+ }
152+ }
153+
154+ @Override
155+ public boolean incrementToken() throws IOException {
156+ if (titleArray == null || offset >= titleArray.length) {
157+ return false;
158+ }
159+
160+ termAtt.setEmpty();
161+ termAtt.append(convertSuggestString(titleArray[offset],
162+ getReading(titleArray[offset])));
163+ offset++;
164+ return true;
165+ }
166+
167+ @Override
168+ public void reset() throws IOException {
169+ super.reset();
170+ initialize();
171+ }
172+
173+ private String convertSuggestString(final String term, final String reading) {
174+ String suggestString;
175+ if (reading != null && reading.length() > 0) {
176+ suggestString = reading + wordSeparator + term;
177+ } else {
178+ suggestString = term;
179+ }
180+
181+ return suggestString;
182+ }
183+
184+ private String getReading(final String s) {
185+
186+ final StringBuilder buf = new StringBuilder();
187+
188+ for (int i = 0; i < s.length(); i++) {
189+ String term = "";
190+ int length = 0;
191+
192+ for (int j = 0; j < termListByKuromoji.size(); j++) {
193+ final String tmpStr = termListByKuromoji.get(j);
194+ if (s.substring(i).indexOf(tmpStr) == 0
195+ && tmpStr.length() > term.length()) {
196+ term = readingList.get(j);
197+ length = tmpStr.length();
198+ }
199+ }
200+ if (term.length() > 0) {
201+ buf.append(term);
202+ i += length - 1;
203+ } else {
204+ char c = s.charAt(i);
205+
206+ c = Transliterator.getInstance("Hiragana-Katakana")
207+ .transliterate(String.valueOf(c)).charAt(0);
208+
209+ buf.append(c);
210+ }
211+ }
212+
213+ String reading = buf.toString();
214+ for (final SuggestConverter converter : converterList) {
215+ reading = converter.convert(reading);
216+ }
217+
218+ return reading;
219+ }
220+}
--- fess-solr-plugin/tags/fess-solr-plugin-8.2.0/src/main/java/jp/sf/fess/solr/plugin/suggest/SuggestConverterCreator.java (nonexistent)
+++ fess-solr-plugin/tags/fess-solr-plugin-8.2.0/src/main/java/jp/sf/fess/solr/plugin/suggest/SuggestConverterCreator.java (revision 1795)
@@ -0,0 +1,151 @@
1+/*
2+ * Copyright 2009-2013 the Fess Project and the Others.
3+ *
4+ * Licensed under the Apache License, Version 2.0 (the "License");
5+ * you may not use this file except in compliance with the License.
6+ * You may obtain a copy of the License at
7+ *
8+ * http://www.apache.org/licenses/LICENSE-2.0
9+ *
10+ * Unless required by applicable law or agreed to in writing, software
11+ * distributed under the License is distributed on an "AS IS" BASIS,
12+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
13+ * either express or implied. See the License for the specific language
14+ * governing permissions and limitations under the License.
15+ */
16+
17+package jp.sf.fess.solr.plugin.suggest;
18+
19+import java.io.IOException;
20+import java.lang.reflect.Constructor;
21+import java.util.ArrayList;
22+import java.util.Collections;
23+import java.util.List;
24+import java.util.Map;
25+
26+import jp.sf.fess.suggest.converter.SuggestConverter;
27+
28+import org.apache.commons.lang.StringUtils;
29+import org.noggit.ObjectBuilder;
30+import org.slf4j.Logger;
31+import org.slf4j.LoggerFactory;
32+
33+public class SuggestConverterCreator {
34+ private static final Logger logger = LoggerFactory
35+ .getLogger(SuggestConverterCreator.class);
36+
37+ protected SuggestConverterCreator() {
38+ // nothing
39+ }
40+
41+ public static List<SuggestConverter> create(final String val) {
42+ if (StringUtils.isBlank(val)) {
43+ return Collections.emptyList();
44+ }
45+
46+ try {
47+ final Object obj = ObjectBuilder.fromJSON(val);
48+ if (obj instanceof List<?>) {
49+
50+ final List<SuggestConverter> converterList = new ArrayList<SuggestConverter>();
51+ for (final Object map : (List<Object>) obj) {
52+ if (map instanceof Map<?, ?>) {
53+ try {
54+ final Map<Object, Object> dataMap = (Map<Object, Object>) map;
55+ final String className = (String) dataMap
56+ .get("class");
57+ final Class<SuggestConverter> clazz = (Class<SuggestConverter>) Class
58+ .forName(className);
59+ final List<?> constructorArgs = (List<?>) dataMap
60+ .get("args");
61+ SuggestConverter converter;
62+ if (constructorArgs == null
63+ || constructorArgs.isEmpty()) {
64+ converter = clazz.newInstance();
65+ } else {
66+ final List<Class<?>> classList = new ArrayList<Class<?>>(
67+ constructorArgs.size());
68+ for (final Object arg : constructorArgs) {
69+ classList.add(getArgClass(arg));
70+ }
71+ final Constructor<SuggestConverter> constructor = clazz
72+ .getConstructor(classList
73+ .toArray(new Class<?>[constructorArgs
74+ .size()]));
75+ converter = constructor
76+ .newInstance(constructorArgs
77+ .toArray(new Object[constructorArgs
78+ .size()]));
79+ }
80+ updateInstance(dataMap, clazz, converter);
81+ converterList.add(converter);
82+ } catch (final Exception e) {
83+ logger.warn("Could not create a converter.", e);
84+ }
85+ } else {
86+ logger.info("Data for a converter should be an object: "
87+ + map.toString());
88+ }
89+ }
90+ return converterList;
91+ } else {
92+ logger.info("Could not create a converter list from " + val);
93+ }
94+ } catch (final IOException e) {
95+ logger.warn("Failed to parse " + val, e);
96+ }
97+
98+ return Collections.emptyList();
99+ }
100+
101+ private static void updateInstance(final Map<Object, Object> dataMap,
102+ final Class<SuggestConverter> clazz,
103+ final SuggestConverter converter) {
104+ if (clazz == null) {
105+ logger.warn("class is null. data:" + dataMap + ", converter: "
106+ + converter);
107+ return;
108+ }
109+ final List<?> methodList = (List<?>) dataMap.get("method");
110+ if (methodList != null && !methodList.isEmpty()) {
111+ for (final Object obj : methodList) {
112+ try {
113+ if (obj instanceof Map<?, ?>) {
114+ final Map<Object, Object> paramMap = (Map<Object, Object>) obj;
115+ final String methodName = (String) paramMap.get("name");
116+ final List<?> methodArgs = (List<?>) paramMap
117+ .get("args");
118+ final Class<?>[] argClasses;
119+ if (methodArgs == null || methodArgs.isEmpty()) {
120+ argClasses = null;
121+ } else {
122+ final List<Class<?>> classList = new ArrayList<Class<?>>(
123+ methodArgs.size());
124+ for (final Object arg : methodArgs) {
125+ classList.add(getArgClass(arg));
126+ }
127+ argClasses = classList
128+ .toArray(new Class<?>[classList.size()]);
129+ }
130+ clazz.getMethod(methodName, argClasses)
131+ .invoke(converter,
132+ methodArgs
133+ .toArray(new Object[methodArgs
134+ .size()]));
135+ }
136+ } catch (final Exception e) {
137+ logger.warn("Failed to invoke: " + obj.toString(), e);
138+ }
139+ }
140+ }
141+
142+ }
143+
144+ private static Class<? extends Object> getArgClass(final Object arg) {
145+ final Class<? extends Object> clazz = arg.getClass();
146+ if (clazz.equals(ArrayList.class)) {
147+ return List.class;
148+ }
149+ return clazz;
150+ }
151+}
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
--- fess-solr-plugin/tags/fess-solr-plugin-8.2.0/src/main/java/jp/sf/fess/solr/plugin/search/WordFreqValueSourceParser.java (nonexistent)
+++ fess-solr-plugin/tags/fess-solr-plugin-8.2.0/src/main/java/jp/sf/fess/solr/plugin/search/WordFreqValueSourceParser.java (revision 1795)
@@ -0,0 +1,101 @@
1+package jp.sf.fess.solr.plugin.search;
2+
3+import java.io.IOException;
4+import java.util.HashSet;
5+import java.util.Map;
6+import java.util.Set;
7+
8+import org.apache.commons.lang.StringUtils;
9+import org.apache.lucene.document.Document;
10+import org.apache.lucene.index.AtomicReaderContext;
11+import org.apache.lucene.queries.function.FunctionValues;
12+import org.apache.lucene.queries.function.ValueSource;
13+import org.apache.lucene.queries.function.docvalues.IntDocValues;
14+import org.apache.lucene.search.IndexSearcher;
15+import org.apache.solr.search.FunctionQParser;
16+import org.apache.solr.search.SyntaxError;
17+import org.apache.solr.search.ValueSourceParser;
18+
19+public class WordFreqValueSourceParser extends ValueSourceParser {
20+
21+ @Override
22+ public ValueSource parse(final FunctionQParser fp) throws SyntaxError {
23+ final String field = fp.parseArg();
24+ final String word = fp.parseArg();
25+ final boolean normalized = !"false".equals(fp.parseArg());
26+ return new WordFreqValueSource(field, word, normalized);
27+ }
28+
29+ public static class WordFreqValueSource extends ValueSource {
30+ protected final String field;
31+
32+ protected final String word;
33+
34+ protected final boolean normalized;
35+
36+ public WordFreqValueSource(final String field, final String word,
37+ final boolean normalized) {
38+ this.field = field;
39+ this.word = normalized ? normalize(word) : word;
40+ this.normalized = normalized;
41+ }
42+
43+ public String name() {
44+ return "wordfreq";
45+ }
46+
47+ protected String normalize(final String value) {
48+ return value.toLowerCase();
49+ }
50+
51+ @Override
52+ public FunctionValues getValues(final Map context,
53+ final AtomicReaderContext readerContext) throws IOException {
54+ return new IntDocValues(this) {
55+ @Override
56+ public int intVal(final int docId) {
57+ final IndexSearcher searcher = (IndexSearcher) context
58+ .get("searcher");
59+ final Set<String> fieldSet = new HashSet<String>();
60+ fieldSet.add(field);
61+ try {
62+ final Document doc = searcher.doc(docId, fieldSet);
63+ if (doc != null) {
64+ String value = doc.get(field);
65+ if (normalized) {
66+ value = normalize(value);
67+ }
68+ return StringUtils.countMatches(value, word);
69+ }
70+ } catch (final IOException e) {
71+ // ignore
72+ }
73+ return 0;
74+ }
75+ };
76+ }
77+
78+ @Override
79+ public boolean equals(final Object o) {
80+ if (this.getClass() != o.getClass()) {
81+ return false;
82+ }
83+ final WordFreqValueSource other = (WordFreqValueSource) o;
84+ return field.equals(other.field) && word.equals(other.word)
85+ && normalized == other.normalized;
86+
87+ }
88+
89+ @Override
90+ public int hashCode() {
91+ return (field + word).hashCode() + (normalized ? 1231 : 1237);
92+
93+ }
94+
95+ @Override
96+ public String description() {
97+ return name() + '(' + field + ',' + word + ')';
98+ }
99+
100+ }
101+}
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
--- fess-solr-plugin/tags/fess-solr-plugin-8.2.0/src/test/java/jp/sf/fess/solr/plugin/SuggestConverterCreatorTest.java (nonexistent)
+++ fess-solr-plugin/tags/fess-solr-plugin-8.2.0/src/test/java/jp/sf/fess/solr/plugin/SuggestConverterCreatorTest.java (revision 1795)
@@ -0,0 +1,86 @@
1+/*
2+ * Copyright 2009-2013 the Fess Project and the Others.
3+ *
4+ * Licensed under the Apache License, Version 2.0 (the "License");
5+ * you may not use this file except in compliance with the License.
6+ * You may obtain a copy of the License at
7+ *
8+ * http://www.apache.org/licenses/LICENSE-2.0
9+ *
10+ * Unless required by applicable law or agreed to in writing, software
11+ * distributed under the License is distributed on an "AS IS" BASIS,
12+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
13+ * either express or implied. See the License for the specific language
14+ * governing permissions and limitations under the License.
15+ */
16+
17+package jp.sf.fess.solr.plugin;
18+
19+import static org.hamcrest.core.Is.is;
20+import static org.junit.Assert.assertThat;
21+
22+import java.util.List;
23+
24+import jp.sf.fess.solr.plugin.suggest.SuggestConverterCreator;
25+import jp.sf.fess.suggest.converter.SuggestConverter;
26+
27+import org.junit.Test;
28+
29+public class SuggestConverterCreatorTest {
30+ @Test
31+ public void createTwoInstance() {
32+ final String text = "["
33+ + //
34+ "{\"class\":\"jp.sf.fess.suggest.converter.SymbolConverter\","
35+ + "\"method\":[{\"name\":\"addSymbol\",\"args\":[[\"A\"]]}]}"
36+ + ","
37+ + //
38+ "{\"class\":\"jp.sf.fess.suggest.converter.SymbolConverter\",\"args\":[\"B\",\"E\"],"
39+ + "\"method\":[{\"name\":\"addSymbol\",\"args\":[[\"X\"]]},{\"name\":\"addSymbol\",\"args\":[[\"Y\"]]}]}"
40+ + ","
41+ + //
42+ "{\"class\":\"jp.sf.fess.suggest.converter.ReplaceConverter\","
43+ + "\"method\":[{\"name\":\"addReplaceString\",\"args\":[\"x\",\"X\"]},{\"name\":\"addReplaceString\",\"args\":[\"y\",\"Y\"]}]}"
44+ + //
45+ "]";
46+ final List<SuggestConverter> list = SuggestConverterCreator
47+ .create(text);
48+ assertThat(list.size(), is(3));
49+ assertThat(list.get(0).getClass().getName(),
50+ is("jp.sf.fess.suggest.converter.SymbolConverter"));
51+ assertThat(list.get(0).convert("abcABC"), is("abc__ID0__BC"));
52+ assertThat(list.get(1).getClass().getName(),
53+ is("jp.sf.fess.suggest.converter.SymbolConverter"));
54+ assertThat(list.get(1).convert("xyzXYZ"), is("xyzB0EB1EZ"));
55+ assertThat(list.get(2).getClass().getName(),
56+ is("jp.sf.fess.suggest.converter.ReplaceConverter"));
57+ assertThat(list.get(2).convert("xyzXYZ"), is("XYzXYZ"));
58+
59+ }
60+
61+ @Test
62+ public void createOneInstance() {
63+ final String text = "["
64+ + //
65+ "{\"class\":\"jp.sf.fess.suggest.converter.ICUConverter\",\"args\":[\"Fullwidth-Halfwidth\"]}"
66+ + //
67+ "]";
68+ final List<SuggestConverter> list = SuggestConverterCreator
69+ .create(text);
70+ assertThat(list.size(), is(1));
71+ assertThat(list.get(0).getClass().getName(),
72+ is("jp.sf.fess.suggest.converter.ICUConverter"));
73+
74+ }
75+
76+ @Test
77+ public void createEmpty() {
78+ List<SuggestConverter> list;
79+
80+ list = SuggestConverterCreator.create("");
81+ assertThat(list.size(), is(0));
82+
83+ list = SuggestConverterCreator.create(null);
84+ assertThat(list.size(), is(0));
85+ }
86+}
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
--- fess-solr-plugin/tags/fess-solr-plugin-8.2.0/src/etc/header.txt (nonexistent)
+++ fess-solr-plugin/tags/fess-solr-plugin-8.2.0/src/etc/header.txt (revision 1795)
@@ -0,0 +1,14 @@
1+Copyright 2009-${year} the Fess Project and the Others.
2+
3+Licensed under the Apache License, Version 2.0 (the "License");
4+you may not use this file except in compliance with the License.
5+You may obtain a copy of the License at
6+
7+ http://www.apache.org/licenses/LICENSE-2.0
8+
9+Unless required by applicable law or agreed to in writing, software
10+distributed under the License is distributed on an "AS IS" BASIS,
11+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12+either express or implied. See the License for the specific language
13+governing permissions and limitations under the License.
14+
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
--- fess-solr-plugin/tags/fess-solr-plugin-8.2.0/src/etc/header-definition.xml (nonexistent)
+++ fess-solr-plugin/tags/fess-solr-plugin-8.2.0/src/etc/header-definition.xml (revision 1795)
@@ -0,0 +1,13 @@
1+<?xml version="1.0" encoding="UTF-8"?>
2+<additionalHeaders>
3+ <javadoc_style>
4+ <firstLine>/*</firstLine>
5+ <beforeEachLine> * </beforeEachLine>
6+ <endLine> */</endLine>
7+ <!--skipLine></skipLine-->
8+ <firstLineDetectionPattern>(\s|\t)*/\*.*$</firstLineDetectionPattern>
9+ <lastLineDetectionPattern>.*\*/(\s|\t)*$</lastLineDetectionPattern>
10+ <allowBlankLines>false</allowBlankLines>
11+ <isMultiline>true</isMultiline>
12+ </javadoc_style>
13+</additionalHeaders>
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Show on old repository browser