1   package org.apache.lucene.search;
2   
3   /**
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements.  See the NOTICE file distributed with
6    * this work for additional information regarding copyright ownership.
7    * The ASF licenses this file to You under the Apache License, Version 2.0
8    * (the "License"); you may not use this file except in compliance with
9    * the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  import org.apache.lucene.index.IndexReader;
21  import org.apache.lucene.util.ToStringUtils;
22  import org.apache.lucene.search.BooleanClause.Occur;
23  
24  import java.io.IOException;
25  import java.util.*;
26  
27  /** A Query that matches documents matching boolean combinations of other
28    * queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other
29    * BooleanQuerys.
30    */
31  public class BooleanQuery extends Query {
32  
33    
34    private static int maxClauseCount = 1024;
35  
36    /** Thrown when an attempt is made to add more than {@link
37     * #getMaxClauseCount()} clauses. This typically happens if
38     * a PrefixQuery, FuzzyQuery, WildcardQuery, or RangeQuery 
39     * is expanded to many terms during search. 
40     */
41    public static class TooManyClauses extends RuntimeException {
42      public TooManyClauses() {}
43      public String getMessage() {
44        return "maxClauseCount is set to " + maxClauseCount;
45      }
46    }
47  
48    /** Return the maximum number of clauses permitted, 1024 by default.
49     * Attempts to add more than the permitted number of clauses cause {@link
50     * TooManyClauses} to be thrown.
51     * @see #setMaxClauseCount(int)
52     */
53    public static int getMaxClauseCount() { return maxClauseCount; }
54  
55    /** Set the maximum number of clauses permitted per BooleanQuery.
56     * Default value is 1024.
57     * <p>TermQuery clauses are generated from for example prefix queries and
58     * fuzzy queries. Each TermQuery needs some buffer space during search,
59     * so this parameter indirectly controls the maximum buffer requirements for
60     * query search.
61     * <p>When this parameter becomes a bottleneck for a Query one can use a
62     * Filter. For example instead of a {@link RangeQuery} one can use a
63     * {@link RangeFilter}.
64     * <p>Normally the buffers are allocated by the JVM. When using for example
65     * {@link org.apache.lucene.store.MMapDirectory} the buffering is left to
66     * the operating system.
67     */
68    public static void setMaxClauseCount(int maxClauseCount) {
69      if (maxClauseCount < 1)
70        throw new IllegalArgumentException("maxClauseCount must be >= 1");
71      BooleanQuery.maxClauseCount = maxClauseCount;
72    }
73  
74    private ArrayList clauses = new ArrayList();
75    private boolean disableCoord;
76  
77    /** Constructs an empty boolean query. */
78    public BooleanQuery() {}
79  
80    /** Constructs an empty boolean query.
81     *
82     * {@link Similarity#coord(int,int)} may be disabled in scoring, as
83     * appropriate. For example, this score factor does not make sense for most
84     * automatically generated queries, like {@link WildcardQuery} and {@link
85     * FuzzyQuery}.
86     *
87     * @param disableCoord disables {@link Similarity#coord(int,int)} in scoring.
88     */
89    public BooleanQuery(boolean disableCoord) {
90      this.disableCoord = disableCoord;
91    }
92  
93    /** Returns true iff {@link Similarity#coord(int,int)} is disabled in
94     * scoring for this query instance.
95     * @see #BooleanQuery(boolean)
96     */
97    public boolean isCoordDisabled() { return disableCoord; }
98  
99    // Implement coord disabling.
100   // Inherit javadoc.
101   public Similarity getSimilarity(Searcher searcher) {
102     Similarity result = super.getSimilarity(searcher);
103     if (disableCoord) {                           // disable coord as requested
104       result = new SimilarityDelegator(result) {
105           public float coord(int overlap, int maxOverlap) {
106             return 1.0f;
107           }
108         };
109     }
110     return result;
111   }
112 
113   /**
114    * Specifies a minimum number of the optional BooleanClauses
115    * which must be satisfied.
116    *
117    * <p>
118    * By default no optional clauses are necessary for a match
119    * (unless there are no required clauses).  If this method is used,
120    * then the specified number of clauses is required.
121    * </p>
122    * <p>
123    * Use of this method is totally independent of specifying that
124    * any specific clauses are required (or prohibited).  This number will
125    * only be compared against the number of matching optional clauses.
126    * </p>
127    * <p>
128    * EXPERT NOTE: Using this method may force collecting docs in order,
129    * regardless of whether setAllowDocsOutOfOrder(true) has been called.
130    * </p>
131    *
132    * @param min the number of optional clauses that must match
133    * @see #setAllowDocsOutOfOrder
134    */
135   public void setMinimumNumberShouldMatch(int min) {
136     this.minNrShouldMatch = min;
137   }
138   protected int minNrShouldMatch = 0;
139 
140   /**
141    * Gets the minimum number of the optional BooleanClauses
142    * which must be satisifed.
143    */
144   public int getMinimumNumberShouldMatch() {
145     return minNrShouldMatch;
146   }
147 
148   /** Adds a clause to a boolean query.
149    *
150    * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number
151    * @see #getMaxClauseCount()
152    */
153   public void add(Query query, BooleanClause.Occur occur) {
154     add(new BooleanClause(query, occur));
155   }
156 
157   /** Adds a clause to a boolean query.
158    * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number
159    * @see #getMaxClauseCount()
160    */
161   public void add(BooleanClause clause) {
162     if (clauses.size() >= maxClauseCount)
163       throw new TooManyClauses();
164 
165     clauses.add(clause);
166   }
167 
168   /** Returns the set of clauses in this query. */
169   public BooleanClause[] getClauses() {
170     return (BooleanClause[])clauses.toArray(new BooleanClause[clauses.size()]);
171   }
172 
173   /** Returns the list of clauses in this query. */
174   public List clauses() { return clauses; }
175 
176   private class BooleanWeight implements Weight {
177     protected Similarity similarity;
178     protected ArrayList weights = new ArrayList();
179 
180     public BooleanWeight(Searcher searcher)
181       throws IOException {
182       this.similarity = getSimilarity(searcher);
183       for (int i = 0 ; i < clauses.size(); i++) {
184         BooleanClause c = (BooleanClause)clauses.get(i);
185         weights.add(c.getQuery().createWeight(searcher));
186       }
187     }
188 
189     public Query getQuery() { return BooleanQuery.this; }
190     public float getValue() { return getBoost(); }
191 
192     public float sumOfSquaredWeights() throws IOException {
193       float sum = 0.0f;
194       for (int i = 0 ; i < weights.size(); i++) {
195         BooleanClause c = (BooleanClause)clauses.get(i);
196         Weight w = (Weight)weights.get(i);
197         // call sumOfSquaredWeights for all clauses in case of side effects
198         float s = w.sumOfSquaredWeights();         // sum sub weights
199         if (!c.isProhibited())
200           // only add to sum for non-prohibited clauses
201           sum += s;
202       }
203 
204       sum *= getBoost() * getBoost();             // boost each sub-weight
205 
206       return sum ;
207     }
208 
209 
210     public void normalize(float norm) {
211       norm *= getBoost();                         // incorporate boost
212       for (int i = 0 ; i < weights.size(); i++) {
213         Weight w = (Weight)weights.get(i);
214         // normalize all clauses, (even if prohibited in case of side affects)
215         w.normalize(norm);
216       }
217     }
218 
219     /** @return Returns BooleanScorer2 that uses and provides skipTo(),
220      *          and scores documents in document number order.
221      */
222     public Scorer scorer(IndexReader reader) throws IOException {
223       BooleanScorer2 result = new BooleanScorer2(similarity,
224                                                  minNrShouldMatch,
225                                                  allowDocsOutOfOrder);
226 
227       for (int i = 0 ; i < weights.size(); i++) {
228         BooleanClause c = (BooleanClause)clauses.get(i);
229         Weight w = (Weight)weights.get(i);
230         Scorer subScorer = w.scorer(reader);
231         if (subScorer != null)
232           result.add(subScorer, c.isRequired(), c.isProhibited());
233         else if (c.isRequired())
234           return null;
235       }
236 
237       return result;
238     }
239 
240     public Explanation explain(IndexReader reader, int doc)
241       throws IOException {
242       final int minShouldMatch =
243         BooleanQuery.this.getMinimumNumberShouldMatch();
244       ComplexExplanation sumExpl = new ComplexExplanation();
245       sumExpl.setDescription("sum of:");
246       int coord = 0;
247       int maxCoord = 0;
248       float sum = 0.0f;
249       boolean fail = false;
250       int shouldMatchCount = 0;
251       for (int i = 0 ; i < weights.size(); i++) {
252         BooleanClause c = (BooleanClause)clauses.get(i);
253         Weight w = (Weight)weights.get(i);
254         Explanation e = w.explain(reader, doc);
255         if (!c.isProhibited()) maxCoord++;
256         if (e.isMatch()) {
257           if (!c.isProhibited()) {
258             sumExpl.addDetail(e);
259             sum += e.getValue();
260             coord++;
261           } else {
262             Explanation r =
263               new Explanation(0.0f, "match on prohibited clause (" + c.getQuery().toString() + ")");
264             r.addDetail(e);
265             sumExpl.addDetail(r);
266             fail = true;
267           }
268           if (c.getOccur().equals(Occur.SHOULD))
269             shouldMatchCount++;
270         } else if (c.isRequired()) {
271           Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")");
272           r.addDetail(e);
273           sumExpl.addDetail(r);
274           fail = true;
275         }
276       }
277       if (fail) {
278         sumExpl.setMatch(Boolean.FALSE);
279         sumExpl.setValue(0.0f);
280         sumExpl.setDescription
281           ("Failure to meet condition(s) of required/prohibited clause(s)");
282         return sumExpl;
283       } else if (shouldMatchCount < minShouldMatch) {
284         sumExpl.setMatch(Boolean.FALSE);
285         sumExpl.setValue(0.0f);
286         sumExpl.setDescription("Failure to match minimum number "+
287                                "of optional clauses: " + minShouldMatch);
288         return sumExpl;
289       }
290       
291       sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE);
292       sumExpl.setValue(sum);
293       
294       float coordFactor = similarity.coord(coord, maxCoord);
295       if (coordFactor == 1.0f)                      // coord is no-op
296         return sumExpl;                             // eliminate wrapper
297       else {
298         ComplexExplanation result = new ComplexExplanation(sumExpl.isMatch(),
299                                                            sum*coordFactor,
300                                                            "product of:");
301         result.addDetail(sumExpl);
302         result.addDetail(new Explanation(coordFactor,
303                                          "coord("+coord+"/"+maxCoord+")"));
304         return result;
305       }
306     }
307   }
308 
309   /** Whether hit docs may be collected out of docid order. */
310   private static boolean allowDocsOutOfOrder = false;
311 
312   /**
313    * Expert: Indicates whether hit docs may be collected out of docid
314    * order.
315    *
316    * <p>
317    * Background: although the contract of the Scorer class requires that
318    * documents be iterated in order of doc id, this was not true in early
319    * versions of Lucene.  Many pieces of functionality in the current
320    * Lucene code base have undefined behavior if this contract is not
321    * upheld, but in some specific simple cases may be faster.  (For
322    * example: disjunction queries with less than 32 prohibited clauses;
323    * This setting has no effect for other queries.)
324    * </p>
325    *
326    * <p>
327    * Specifics: By setting this option to true, calls to 
328    * {@link HitCollector#collect(int,float)} might be
329    * invoked first for docid N and only later for docid N-1.
330    * Being static, this setting is system wide.
331    * </p>
332    */
333   public static void setAllowDocsOutOfOrder(boolean allow) {
334     allowDocsOutOfOrder = allow;
335   }  
336   
337   /**
338    * Whether hit docs may be collected out of docid order.
339    * @see #setAllowDocsOutOfOrder(boolean)
340    */
341   public static boolean getAllowDocsOutOfOrder() {
342     return allowDocsOutOfOrder;
343   }  
344   
345   /**
346    * @deprecated Use {@link #setAllowDocsOutOfOrder(boolean)} instead. 
347    */
348   public static void setUseScorer14(boolean use14) {
349     setAllowDocsOutOfOrder(use14);
350   }
351   
352   /**
353    * @deprecated Use {@link #getAllowDocsOutOfOrder()} instead.
354    */
355   public static boolean getUseScorer14() {
356     return getAllowDocsOutOfOrder();
357   }
358 
359   protected Weight createWeight(Searcher searcher) throws IOException {
360     return new BooleanWeight(searcher);
361   }
362 
363   public Query rewrite(IndexReader reader) throws IOException {
364     if (minNrShouldMatch == 0 && clauses.size() == 1) {                    // optimize 1-clause queries
365       BooleanClause c = (BooleanClause)clauses.get(0);
366       if (!c.isProhibited()) {            // just return clause
367 
368         Query query = c.getQuery().rewrite(reader);    // rewrite first
369 
370         if (getBoost() != 1.0f) {                 // incorporate boost
371           if (query == c.getQuery())                   // if rewrite was no-op
372             query = (Query)query.clone();         // then clone before boost
373           query.setBoost(getBoost() * query.getBoost());
374         }
375 
376         return query;
377       }
378     }
379 
380     BooleanQuery clone = null;                    // recursively rewrite
381     for (int i = 0 ; i < clauses.size(); i++) {
382       BooleanClause c = (BooleanClause)clauses.get(i);
383       Query query = c.getQuery().rewrite(reader);
384       if (query != c.getQuery()) {                     // clause rewrote: must clone
385         if (clone == null)
386           clone = (BooleanQuery)this.clone();
387         clone.clauses.set(i, new BooleanClause(query, c.getOccur()));
388       }
389     }
390     if (clone != null) {
391       return clone;                               // some clauses rewrote
392     } else
393       return this;                                // no clauses rewrote
394   }
395 
396   // inherit javadoc
397   public void extractTerms(Set terms) {
398       for (Iterator i = clauses.iterator(); i.hasNext();) {
399           BooleanClause clause = (BooleanClause) i.next();
400           clause.getQuery().extractTerms(terms);
401         }
402   }
403 
404   public Object clone() {
405     BooleanQuery clone = (BooleanQuery)super.clone();
406     clone.clauses = (ArrayList)this.clauses.clone();
407     return clone;
408   }
409 
410   /** Prints a user-readable version of this query. */
411   public String toString(String field) {
412     StringBuffer buffer = new StringBuffer();
413     boolean needParens=(getBoost() != 1.0) || (getMinimumNumberShouldMatch()>0) ;
414     if (needParens) {
415       buffer.append("(");
416     }
417 
418     for (int i = 0 ; i < clauses.size(); i++) {
419       BooleanClause c = (BooleanClause)clauses.get(i);
420       if (c.isProhibited())
421         buffer.append("-");
422       else if (c.isRequired())
423         buffer.append("+");
424 
425       Query subQuery = c.getQuery();
426       if (subQuery instanceof BooleanQuery) {     // wrap sub-bools in parens
427         buffer.append("(");
428         buffer.append(c.getQuery().toString(field));
429         buffer.append(")");
430       } else
431         buffer.append(c.getQuery().toString(field));
432 
433       if (i != clauses.size()-1)
434         buffer.append(" ");
435     }
436 
437     if (needParens) {
438       buffer.append(")");
439     }
440 
441     if (getMinimumNumberShouldMatch()>0) {
442       buffer.append('~');
443       buffer.append(getMinimumNumberShouldMatch());
444     }
445 
446     if (getBoost() != 1.0f)
447     {
448       buffer.append(ToStringUtils.boost(getBoost()));
449     }
450 
451     return buffer.toString();
452   }
453 
454   /** Returns true iff <code>o</code> is equal to this. */
455   public boolean equals(Object o) {
456     if (!(o instanceof BooleanQuery))
457       return false;
458     BooleanQuery other = (BooleanQuery)o;
459     return (this.getBoost() == other.getBoost())
460         && this.clauses.equals(other.clauses)
461         && this.getMinimumNumberShouldMatch() == other.getMinimumNumberShouldMatch();
462   }
463 
464   /** Returns a hash code value for this object.*/
465   public int hashCode() {
466     return Float.floatToIntBits(getBoost()) ^ clauses.hashCode()
467            + getMinimumNumberShouldMatch();
468   }
469 
470 }
471