修订版 | 14 (tree) |
---|---|
时间 | 2010-04-28 20:45:14 |
作者 | (del#42041) |
added java version of edit distance
@@ -0,0 +1,71 @@ | ||
1 | +//package libsvm; | |
2 | +import java.io.*; | |
3 | +import java.util.*; | |
4 | + | |
5 | +class LevenshteinDistance | |
6 | +{ | |
7 | + private int isutf8(String args) throws UnsupportedEncodingException | |
8 | + { | |
9 | + int ret = -1; | |
10 | + byte[] c = args.getBytes("UTF-8"); | |
11 | + if((c[0]&0x80) == 0x00) ret = 1; | |
12 | + else if((c[0]&0xe0) == 0xc0 && (c[1]&0xc0) == 0x80) ret = 2; | |
13 | + else if((c[0]&0xf0) == 0xe0 && (c[1]&0xc0) == 0x80 && (c[2]&0xc0) == 0x80 ) ret = 3; | |
14 | + return(ret); | |
15 | + } | |
16 | + Boolean equal(String s1, String s2) throws UnsupportedEncodingException | |
17 | + { | |
18 | + Boolean ret = false; | |
19 | + if(isutf8(s1)==isutf8(s2)) | |
20 | + { | |
21 | + int f=0; | |
22 | + for(int i=0;i<isutf8(s1);++i) | |
23 | + { | |
24 | + byte[] c1 = s1.getBytes("UTF-8"); | |
25 | + byte[] c2 = s2.getBytes("UTF-8"); | |
26 | + if(c1[i]==c2[i]) ++f; | |
27 | + } | |
28 | + if(f==isutf8(s1)) ret = true; | |
29 | + } | |
30 | + return(ret); | |
31 | + } | |
32 | + int edit(String px, String py) throws UnsupportedEncodingException | |
33 | + { | |
34 | + Vector<Vector> row = new Vector<Vector>(); | |
35 | + int len1=0,len2=0; | |
36 | + int i,j; | |
37 | + int result; | |
38 | + len1 = px.length(); | |
39 | + len2 = py.length(); | |
40 | + for(i=0; i<len1+1; ++i) | |
41 | + { | |
42 | + row.add(new Vector()); | |
43 | + for(j=0; j<len2+1; ++j) | |
44 | + { | |
45 | + row.get(i).add(0); | |
46 | + } | |
47 | + } | |
48 | + | |
49 | + for(i=0;i<len1+1;i++) row.get(i).set(0,i); | |
50 | + for(i=0;i<len2+1;i++) row.get(0).set(i,i); | |
51 | + for(i=1;i<=len1;++i) | |
52 | + { | |
53 | + for(j=1;j<=len2;++j) | |
54 | + { | |
55 | + row.get(i).set(j,Math.min(Math.min( | |
56 | + (Integer)(row.get(i-1).get(j-1)) + (equal(px.substring(i-1),py.substring(j-1))?0:1) , // replace | |
57 | + (Integer)(row.get(i).get(j-1)) + 1), // delete | |
58 | + (Integer)(row.get(i-1).get(j)) + 1 )); // insert | |
59 | + } | |
60 | + } | |
61 | + result=(Integer)(row.get(len1).get(len2)); | |
62 | + | |
63 | + return result; | |
64 | + } | |
65 | + | |
66 | + public static void main(String[] args) throws UnsupportedEncodingException | |
67 | + { | |
68 | + LevenshteinDistance ld = new LevenshteinDistance() ; | |
69 | + System.out.println(ld.edit("あい","あうう")); | |
70 | + } | |
71 | +} | |
\ No newline at end of file |