1 package org.apache.lucene.search;
2
3
19
20 import org.apache.lucene.index.IndexReader;
21 import org.apache.lucene.index.Term;
22 import org.apache.lucene.util.PriorityQueue;
23 import org.apache.lucene.util.ToStringUtils;
24
25 import java.io.IOException;
26
27
30 public class FuzzyQuery extends MultiTermQuery {
31
32 public final static float defaultMinSimilarity = 0.5f;
33 public final static int defaultPrefixLength = 0;
34
35 private float minimumSimilarity;
36 private int prefixLength;
37
38
54 public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength) throws IllegalArgumentException {
55 super(term);
56
57 if (minimumSimilarity >= 1.0f)
58 throw new IllegalArgumentException("minimumSimilarity >= 1");
59 else if (minimumSimilarity < 0.0f)
60 throw new IllegalArgumentException("minimumSimilarity < 0");
61 if (prefixLength < 0)
62 throw new IllegalArgumentException("prefixLength < 0");
63
64 this.minimumSimilarity = minimumSimilarity;
65 this.prefixLength = prefixLength;
66 }
67
68
71 public FuzzyQuery(Term term, float minimumSimilarity) throws IllegalArgumentException {
72 this(term, minimumSimilarity, defaultPrefixLength);
73 }
74
75
78 public FuzzyQuery(Term term) {
79 this(term, defaultMinSimilarity, defaultPrefixLength);
80 }
81
82
86 public float getMinSimilarity() {
87 return minimumSimilarity;
88 }
89
90
95 public int getPrefixLength() {
96 return prefixLength;
97 }
98
99 protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
100 return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity, prefixLength);
101 }
102
103 public Query rewrite(IndexReader reader) throws IOException {
104 FilteredTermEnum enumerator = getEnum(reader);
105 int maxClauseCount = BooleanQuery.getMaxClauseCount();
106 ScoreTermQueue stQueue = new ScoreTermQueue(maxClauseCount);
107 ScoreTerm reusableST = null;
108
109 try {
110 do {
111 float score = 0.0f;
112 Term t = enumerator.term();
113 if (t != null) {
114 score = enumerator.difference();
115 if (reusableST == null) {
116 reusableST = new ScoreTerm(t, score);
117 } else if (score >= reusableST.score) {
118 reusableST.score = score;
122 reusableST.term = t;
123 } else {
124 continue;
125 }
126
127 reusableST = (ScoreTerm) stQueue.insertWithOverflow(reusableST);
128 }
129 } while (enumerator.next());
130 } finally {
131 enumerator.close();
132 }
133
134 BooleanQuery query = new BooleanQuery(true);
135 int size = stQueue.size();
136 for(int i = 0; i < size; i++){
137 ScoreTerm st = (ScoreTerm) stQueue.pop();
138 TermQuery tq = new TermQuery(st.term); tq.setBoost(getBoost() * st.score); query.add(tq, BooleanClause.Occur.SHOULD); }
142
143 return query;
144 }
145
146 public String toString(String field) {
147 StringBuffer buffer = new StringBuffer();
148 Term term = getTerm();
149 if (!term.field().equals(field)) {
150 buffer.append(term.field());
151 buffer.append(":");
152 }
153 buffer.append(term.text());
154 buffer.append('~');
155 buffer.append(Float.toString(minimumSimilarity));
156 buffer.append(ToStringUtils.boost(getBoost()));
157 return buffer.toString();
158 }
159
160 protected static class ScoreTerm {
161 public Term term;
162 public float score;
163
164 public ScoreTerm(Term term, float score){
165 this.term = term;
166 this.score = score;
167 }
168 }
169
170 protected static class ScoreTermQueue extends PriorityQueue {
171
172 public ScoreTermQueue(int size){
173 initialize(size);
174 }
175
176
179 protected boolean lessThan(Object a, Object b) {
180 ScoreTerm termA = (ScoreTerm)a;
181 ScoreTerm termB = (ScoreTerm)b;
182 if (termA.score == termB.score)
183 return termA.term.compareTo(termB.term) > 0;
184 else
185 return termA.score < termB.score;
186 }
187
188 }
189
190 public boolean equals(Object o) {
191 if (this == o) return true;
192 if (!(o instanceof FuzzyQuery)) return false;
193 if (!super.equals(o)) return false;
194
195 final FuzzyQuery fuzzyQuery = (FuzzyQuery) o;
196
197 if (minimumSimilarity != fuzzyQuery.minimumSimilarity) return false;
198 if (prefixLength != fuzzyQuery.prefixLength) return false;
199
200 return true;
201 }
202
203 public int hashCode() {
204 int result = super.hashCode();
205 result = 29 * result + minimumSimilarity != +0.0f ? Float.floatToIntBits(minimumSimilarity) : 0;
206 result = 29 * result + prefixLength;
207 return result;
208 }
209 }
210