1   package org.apache.lucene.index;
2   
3   /**
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements.  See the NOTICE file distributed with
6    * this work for additional information regarding copyright ownership.
7    * The ASF licenses this file to You under the Apache License, Version 2.0
8    * (the "License"); you may not use this file except in compliance with
9    * the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  import org.apache.lucene.document.Document;
21  import org.apache.lucene.document.FieldSelector;
22  import org.apache.lucene.search.Similarity;
23  import org.apache.lucene.store.*;
24  
25  import java.io.File;
26  import java.io.FileOutputStream;
27  import java.io.IOException;
28  import java.util.Arrays;
29  import java.util.Collection;
30  
31  /** IndexReader is an abstract class, providing an interface for accessing an
32   index.  Search of an index is done entirely through this abstract interface,
33   so that any subclass which implements it is searchable.
34  
35   <p> Concrete subclasses of IndexReader are usually constructed with a call to
36   one of the static <code>open()</code> methods, e.g. {@link #open(String)}.
37  
38   <p> For efficiency, in this API documents are often referred to via
39   <i>document numbers</i>, non-negative integers which each name a unique
40   document in the index.  These document numbers are ephemeral--they may change
41   as documents are added to and deleted from an index.  Clients should thus not
42   rely on a given document having the same number between sessions.
43  
44   <p> An IndexReader can be opened on a directory for which an IndexWriter is
45   opened already, but it cannot be used to delete documents from the index then.
46  
47   <p>
48   <b>NOTE</b>: for backwards API compatibility, several methods are not listed 
49   as abstract, but have no useful implementations in this base class and 
50   instead always throw UnsupportedOperationException.  Subclasses are 
51   strongly encouraged to override these methods, but in many cases may not 
52   need to.
53   </p>
54  
55   <p>
56  
57   <b>NOTE</b>: as of 2.4, it's possible to open a read-only
58   IndexReader using one of the static open methods that
59   accepts the boolean readOnly parameter.  Such a reader has
60   better concurrency as it's not necessary to synchronize on
61   the isDeleted method.  Currently the default for readOnly
62   is false, meaning if not specified you will get a
63   read/write IndexReader.  But in 3.0 this default will
64   change to true, meaning you must explicitly specify false
65   if you want to make changes with the resulting IndexReader.
66   </p>
67  
68   @version $Id: IndexReader.java 695510 2008-09-15 15:33:15Z otis $
69  */
70  public abstract class IndexReader {
71  
72    // NOTE: in 3.0 this will change to true
73    final static boolean READ_ONLY_DEFAULT = false;
74  
75    /**
76     * Constants describing field properties, for example used for
77     * {@link IndexReader#getFieldNames(FieldOption)}.
78     */
79    public static final class FieldOption {
80      private String option;
81      private FieldOption() { }
82      private FieldOption(String option) {
83        this.option = option;
84      }
85      public String toString() {
86        return this.option;
87      }
88      /** All fields */
89      public static final FieldOption ALL = new FieldOption ("ALL");
90      /** All indexed fields */
91      public static final FieldOption INDEXED = new FieldOption ("INDEXED");
92      /** All fields that store payloads */
93      public static final FieldOption STORES_PAYLOADS = new FieldOption ("STORES_PAYLOADS");
94      /** All fields that omit tf */
95      public static final FieldOption OMIT_TF = new FieldOption ("OMIT_TF");
96      /** All fields which are not indexed */
97      public static final FieldOption UNINDEXED = new FieldOption ("UNINDEXED");
98      /** All fields which are indexed with termvectors enabled */
99      public static final FieldOption INDEXED_WITH_TERMVECTOR = new FieldOption ("INDEXED_WITH_TERMVECTOR");
100     /** All fields which are indexed but don't have termvectors enabled */
101     public static final FieldOption INDEXED_NO_TERMVECTOR = new FieldOption ("INDEXED_NO_TERMVECTOR");
102     /** All fields with termvectors enabled. Please note that only standard termvector fields are returned */
103     public static final FieldOption TERMVECTOR = new FieldOption ("TERMVECTOR");
104     /** All fields with termvectors with position values enabled */
105     public static final FieldOption TERMVECTOR_WITH_POSITION = new FieldOption ("TERMVECTOR_WITH_POSITION");
106     /** All fields with termvectors with offset values enabled */
107     public static final FieldOption TERMVECTOR_WITH_OFFSET = new FieldOption ("TERMVECTOR_WITH_OFFSET");
108     /** All fields with termvectors with offset values and position values enabled */
109     public static final FieldOption TERMVECTOR_WITH_POSITION_OFFSET = new FieldOption ("TERMVECTOR_WITH_POSITION_OFFSET");
110   }
111 
112   private boolean closed;
113   protected boolean hasChanges;
114   
115   private volatile int refCount;
116   
117   // for testing
118   synchronized int getRefCount() {
119     return refCount;
120   }
121   
122   /**
123    * Expert: increments the refCount of this IndexReader
124    * instance.  RefCounts are used to determine when a
125    * reader can be closed safely, i.e. as soon as there are
126    * no more references.  Be sure to always call a
127    * corresponding {@link #decRef}, in a finally clause;
128    * otherwise the reader may never be closed.  Note that
129    * {@link #close} simply calls decRef(), which means that
130    * the IndexReader will not really be closed until {@link
131    * #decRef} has been called for all outstanding
132    * references.
133    *
134    * @see #decRef
135    */
136   public synchronized void incRef() {
137     assert refCount > 0;
138     ensureOpen();
139     refCount++;
140   }
141 
142   /**
143    * Expert: decreases the refCount of this IndexReader
144    * instance.  If the refCount drops to 0, then pending
145    * changes (if any) are committed to the index and this
146    * reader is closed.
147    * 
148    * @throws IOException in case an IOException occurs in commit() or doClose()
149    *
150    * @see #incRef
151    */
152   public synchronized void decRef() throws IOException {
153     assert refCount > 0;
154     ensureOpen();
155     if (refCount == 1) {
156       commit();
157       doClose();
158     }
159     refCount--;
160   }
161   
162   /** 
163    * @deprecated will be deleted when IndexReader(Directory) is deleted
164    * @see #directory()
165    */
166   private Directory directory;
167 
168   /**
169    * Legacy Constructor for backwards compatibility.
170    *
171    * <p>
172    * This Constructor should not be used, it exists for backwards 
173    * compatibility only to support legacy subclasses that did not "own" 
174    * a specific directory, but needed to specify something to be returned 
175    * by the directory() method.  Future subclasses should delegate to the 
176    * no arg constructor and implement the directory() method as appropriate.
177    * 
178    * @param directory Directory to be returned by the directory() method
179    * @see #directory()
180    * @deprecated - use IndexReader()
181    */
182   protected IndexReader(Directory directory) {
183     this();
184     this.directory = directory;
185   }
186   
187   protected IndexReader() { 
188     refCount = 1;
189   }
190   
191   /**
192    * @throws AlreadyClosedException if this IndexReader is closed
193    */
194   protected final void ensureOpen() throws AlreadyClosedException {
195     if (refCount <= 0) {
196       throw new AlreadyClosedException("this IndexReader is closed");
197     }
198   }
199 
200   /** Returns a read/write IndexReader reading the index in an FSDirectory in the named
201    path.  <b>NOTE</b>: starting in 3.0 this will return a readOnly IndexReader.
202    * @throws CorruptIndexException if the index is corrupt
203    * @throws IOException if there is a low-level IO error
204    * @param path the path to the index directory */
205   public static IndexReader open(String path) throws CorruptIndexException, IOException {
206     return open(FSDirectory.getDirectory(path), true, null, null, READ_ONLY_DEFAULT);
207   }
208 
209   /** Returns a read/write IndexReader reading the index in an FSDirectory in the named
210    * path.  <b>NOTE</b>: starting in 3.0 this will return a readOnly IndexReader.
211    * @param path the path to the index directory
212    * @throws CorruptIndexException if the index is corrupt
213    * @throws IOException if there is a low-level IO error
214    */
215   public static IndexReader open(File path) throws CorruptIndexException, IOException {
216     return open(FSDirectory.getDirectory(path), true, null, null, READ_ONLY_DEFAULT);
217   }
218 
219   /** Returns a read/write IndexReader reading the index in
220    * the given Directory. <b>NOTE</b>: starting in 3.0 this
221    * will return a readOnly IndexReader.
222    * @param directory the index directory
223    * @throws CorruptIndexException if the index is corrupt
224    * @throws IOException if there is a low-level IO error
225    */
226   public static IndexReader open(final Directory directory) throws CorruptIndexException, IOException {
227     return open(directory, false, null, null, READ_ONLY_DEFAULT);
228   }
229 
230   /** Returns a read/write or read only IndexReader reading the index in the given Directory.
231    * @param directory the index directory
232    * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
233    * @throws CorruptIndexException if the index is corrupt
234    * @throws IOException if there is a low-level IO error
235    */
236   public static IndexReader open(final Directory directory, boolean readOnly) throws CorruptIndexException, IOException {
237     return open(directory, false, null, null, readOnly);
238   }
239 
240   /** Expert: returns a read/write IndexReader reading the index in the given
241    * {@link IndexCommit}.  <b>NOTE</b>: starting in 3.0 this
242    * will return a readOnly IndexReader.
243    * @param commit the commit point to open
244    * @throws CorruptIndexException if the index is corrupt
245    * @throws IOException if there is a low-level IO error
246    */
247   public static IndexReader open(final IndexCommit commit) throws CorruptIndexException, IOException {
248     return open(commit.getDirectory(), false, null, commit, READ_ONLY_DEFAULT);
249   }
250 
251   /** Expert: returns a read/write IndexReader reading the index in the given
252    * Directory, with a custom {@link IndexDeletionPolicy}.
253    * <b>NOTE</b>: starting in 3.0 this will return a
254    * readOnly IndexReader.
255    * @param directory the index directory
256    * @param deletionPolicy a custom deletion policy (only used
257    *  if you use this reader to perform deletes or to set
258    *  norms); see {@link IndexWriter} for details.
259    * @throws CorruptIndexException if the index is corrupt
260    * @throws IOException if there is a low-level IO error
261    */
262   public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy) throws CorruptIndexException, IOException {
263     return open(directory, false, deletionPolicy, null, READ_ONLY_DEFAULT);
264   }
265 
266   /** Expert: returns a read/write or read only IndexReader reading the index in the given
267    * Directory, with a custom {@link IndexDeletionPolicy}.
268    * <b>NOTE</b>: starting in 3.0 this will return a
269    * readOnly IndexReader.
270    * @param directory the index directory
271    * @param deletionPolicy a custom deletion policy (only used
272    *  if you use this reader to perform deletes or to set
273    *  norms); see {@link IndexWriter} for details.
274    * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
275    * @throws CorruptIndexException if the index is corrupt
276    * @throws IOException if there is a low-level IO error
277    */
278   public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws CorruptIndexException, IOException {
279     return open(directory, false, deletionPolicy, null, readOnly);
280   }
281 
282   /** Expert: returns a read/write IndexReader reading the index in the given
283    * Directory, using a specific commit and with a custom
284    * {@link IndexDeletionPolicy}.  <b>NOTE</b>: starting in
285    * 3.0 this will return a readOnly IndexReader.
286    * @param commit the specific {@link IndexCommit} to open;
287    * see {@link IndexReader#listCommits} to list all commits
288    * in a directory
289    * @param deletionPolicy a custom deletion policy (only used
290    *  if you use this reader to perform deletes or to set
291    *  norms); see {@link IndexWriter} for details.
292    * @throws CorruptIndexException if the index is corrupt
293    * @throws IOException if there is a low-level IO error
294    */
295   public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy) throws CorruptIndexException, IOException {
296     return open(commit.getDirectory(), false, deletionPolicy, commit, READ_ONLY_DEFAULT);
297   }
298 
299   /** Expert: returns a read/write or read only IndexReader reading the index in the given
300    * Directory, using a specific commit and with a custom {@link IndexDeletionPolicy}.
301    * @param commit the specific {@link IndexCommit} to open;
302    * see {@link IndexReader#listCommits} to list all commits
303    * in a directory
304    * @param deletionPolicy a custom deletion policy (only used
305    *  if you use this reader to perform deletes or to set
306    *  norms); see {@link IndexWriter} for details.
307    * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
308    * @throws CorruptIndexException if the index is corrupt
309    * @throws IOException if there is a low-level IO error
310    */
311   public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws CorruptIndexException, IOException {
312     return open(commit.getDirectory(), false, deletionPolicy, commit, readOnly);
313   }
314 
315   private static IndexReader open(final Directory directory, final boolean closeDirectory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly) throws CorruptIndexException, IOException {
316     return DirectoryIndexReader.open(directory, closeDirectory, deletionPolicy, commit, readOnly);
317   }
318 
319   /**
320    * Refreshes an IndexReader if the index has changed since this instance 
321    * was (re)opened. 
322    * <p>
323    * Opening an IndexReader is an expensive operation. This method can be used
324    * to refresh an existing IndexReader to reduce these costs. This method 
325    * tries to only load segments that have changed or were created after the 
326    * IndexReader was (re)opened.
327    * <p>
328    * If the index has not changed since this instance was (re)opened, then this
329    * call is a NOOP and returns this instance. Otherwise, a new instance is 
330    * returned. The old instance is <b>not</b> closed and remains usable.<br>
331    * <b>Note:</b> The re-opened reader instance and the old instance might share
332    * the same resources. For this reason no index modification operations 
333    * (e. g. {@link #deleteDocument(int)}, {@link #setNorm(int, String, byte)}) 
334    * should be performed using one of the readers until the old reader instance
335    * is closed. <b>Otherwise, the behavior of the readers is undefined.</b> 
336    * <p>   
337    * You can determine whether a reader was actually reopened by comparing the
338    * old instance with the instance returned by this method: 
339    * <pre>
340    * IndexReader reader = ... 
341    * ...
342    * IndexReader new = r.reopen();
343    * if (new != reader) {
344    *   ...     // reader was reopened
345    *   reader.close(); 
346    * }
347    * reader = new;
348    * ...
349    * </pre>
350    * 
351    * @throws CorruptIndexException if the index is corrupt
352    * @throws IOException if there is a low-level IO error
353    */  
354   public synchronized IndexReader reopen() throws CorruptIndexException, IOException {
355     throw new UnsupportedOperationException("This reader does not support reopen().");
356   }
357 
358   /** 
359    * Returns the directory associated with this index.  The Default 
360    * implementation returns the directory specified by subclasses when 
361    * delegating to the IndexReader(Directory) constructor, or throws an 
362    * UnsupportedOperationException if one was not specified.
363    * @throws UnsupportedOperationException if no directory
364    */
365   public Directory directory() {
366     ensureOpen();
367     if (null != directory) {
368       return directory;
369     } else {
370       throw new UnsupportedOperationException("This reader does not support this method.");  
371     }
372   }
373 
374   /**
375    * Returns the time the index in the named directory was last modified.
376    * Do not use this to check whether the reader is still up-to-date, use
377    * {@link #isCurrent()} instead. 
378    * @throws CorruptIndexException if the index is corrupt
379    * @throws IOException if there is a low-level IO error
380    */
381   public static long lastModified(String directory) throws CorruptIndexException, IOException {
382     return lastModified(new File(directory));
383   }
384 
385   /**
386    * Returns the time the index in the named directory was last modified. 
387    * Do not use this to check whether the reader is still up-to-date, use
388    * {@link #isCurrent()} instead. 
389    * @throws CorruptIndexException if the index is corrupt
390    * @throws IOException if there is a low-level IO error
391    */
392   public static long lastModified(File fileDirectory) throws CorruptIndexException, IOException {
393     return ((Long) new SegmentInfos.FindSegmentsFile(fileDirectory) {
394         public Object doBody(String segmentFileName) {
395           return new Long(FSDirectory.fileModified(fileDirectory, segmentFileName));
396         }
397       }.run()).longValue();
398   }
399 
400   /**
401    * Returns the time the index in the named directory was last modified. 
402    * Do not use this to check whether the reader is still up-to-date, use
403    * {@link #isCurrent()} instead. 
404    * @throws CorruptIndexException if the index is corrupt
405    * @throws IOException if there is a low-level IO error
406    */
407   public static long lastModified(final Directory directory2) throws CorruptIndexException, IOException {
408     return ((Long) new SegmentInfos.FindSegmentsFile(directory2) {
409         public Object doBody(String segmentFileName) throws IOException {
410           return new Long(directory2.fileModified(segmentFileName));
411         }
412       }.run()).longValue();
413   }
414 
415   /**
416    * Reads version number from segments files. The version number is
417    * initialized with a timestamp and then increased by one for each change of
418    * the index.
419    * 
420    * @param directory where the index resides.
421    * @return version number.
422    * @throws CorruptIndexException if the index is corrupt
423    * @throws IOException if there is a low-level IO error
424    */
425   public static long getCurrentVersion(String directory) throws CorruptIndexException, IOException {
426     return getCurrentVersion(new File(directory));
427   }
428 
429   /**
430    * Reads version number from segments files. The version number is
431    * initialized with a timestamp and then increased by one for each change of
432    * the index.
433    * 
434    * @param directory where the index resides.
435    * @return version number.
436    * @throws CorruptIndexException if the index is corrupt
437    * @throws IOException if there is a low-level IO error
438    */
439   public static long getCurrentVersion(File directory) throws CorruptIndexException, IOException {
440     Directory dir = FSDirectory.getDirectory(directory);
441     long version = getCurrentVersion(dir);
442     dir.close();
443     return version;
444   }
445 
446   /**
447    * Reads version number from segments files. The version number is
448    * initialized with a timestamp and then increased by one for each change of
449    * the index.
450    * 
451    * @param directory where the index resides.
452    * @return version number.
453    * @throws CorruptIndexException if the index is corrupt
454    * @throws IOException if there is a low-level IO error
455    */
456   public static long getCurrentVersion(Directory directory) throws CorruptIndexException, IOException {
457     return SegmentInfos.readCurrentVersion(directory);
458   }
459 
460   /**
461    * Version number when this IndexReader was opened. Not implemented in the IndexReader base class.
462    * @throws UnsupportedOperationException unless overridden in subclass
463    */
464   public long getVersion() {
465     throw new UnsupportedOperationException("This reader does not support this method.");
466   }
467 
468   /**<p>For IndexReader implementations that use
469    * TermInfosReader to read terms, this sets the
470    * indexDivisor to subsample the number of indexed terms
471    * loaded into memory.  This has the same effect as {@link
472    * IndexWriter#setTermIndexInterval} except that setting
473    * must be done at indexing time while this setting can be
474    * set per reader.  When set to N, then one in every
475    * N*termIndexInterval terms in the index is loaded into
476    * memory.  By setting this to a value > 1 you can reduce
477    * memory usage, at the expense of higher latency when
478    * loading a TermInfo.  The default value is 1.</p>
479    *
480    * <b>NOTE:</b> you must call this before the term
481    * index is loaded.  If the index is already loaded, 
482    * an IllegalStateException is thrown.
483    * @throws IllegalStateException if the term index has already been loaded into memory
484    */
485   public void setTermInfosIndexDivisor(int indexDivisor) throws IllegalStateException {
486     throw new UnsupportedOperationException("This reader does not support this method.");
487   }
488 
489   /** <p>For IndexReader implementations that use
490    *  TermInfosReader to read terms, this returns the
491    *  current indexDivisor.
492    *  @see #setTermInfosIndexDivisor */
493   public int getTermInfosIndexDivisor() {
494     throw new UnsupportedOperationException("This reader does not support this method.");
495   }
496 
497   /**
498    * Check whether this IndexReader is still using the
499    * current (i.e., most recently committed) version of the
500    * index.  If a writer has committed any changes to the
501    * index since this reader was opened, this will return
502    * <code>false</code>, in which case you must open a new
503    * IndexReader in order to see the changes.  See the
504    * description of the <a href="IndexWriter.html#autoCommit"><code>autoCommit</code></a>
505    * flag which controls when the {@link IndexWriter}
506    * actually commits changes to the index.
507    * 
508    * <p>
509    * Not implemented in the IndexReader base class.
510    * </p>
511    * @throws CorruptIndexException if the index is corrupt
512    * @throws IOException if there is a low-level IO error
513    * @throws UnsupportedOperationException unless overridden in subclass
514    */
515   public boolean isCurrent() throws CorruptIndexException, IOException {
516     throw new UnsupportedOperationException("This reader does not support this method.");
517   }
518 
519   /**
520    * Checks is the index is optimized (if it has a single segment and 
521    * no deletions).  Not implemented in the IndexReader base class.
522    * @return <code>true</code> if the index is optimized; <code>false</code> otherwise
523    * @throws UnsupportedOperationException unless overridden in subclass
524    */
525   public boolean isOptimized() {
526     throw new UnsupportedOperationException("This reader does not support this method.");
527   }
528   
529   /**
530    *  Return an array of term frequency vectors for the specified document.
531    *  The array contains a vector for each vectorized field in the document.
532    *  Each vector contains terms and frequencies for all terms in a given vectorized field.
533    *  If no such fields existed, the method returns null. The term vectors that are
534    * returned my either be of type TermFreqVector or of type TermPositionsVector if
535    * positions or offsets have been stored.
536    * 
537    * @param docNumber document for which term frequency vectors are returned
538    * @return array of term frequency vectors. May be null if no term vectors have been
539    *  stored for the specified document.
540    * @throws IOException if index cannot be accessed
541    * @see org.apache.lucene.document.Field.TermVector
542    */
543   abstract public TermFreqVector[] getTermFreqVectors(int docNumber)
544           throws IOException;
545 
546 
547   /**
548    *  Return a term frequency vector for the specified document and field. The
549    *  returned vector contains terms and frequencies for the terms in
550    *  the specified field of this document, if the field had the storeTermVector
551    *  flag set. If termvectors had been stored with positions or offsets, a 
552    *  TermPositionsVector is returned.
553    * 
554    * @param docNumber document for which the term frequency vector is returned
555    * @param field field for which the term frequency vector is returned.
556    * @return term frequency vector May be null if field does not exist in the specified
557    * document or term vector was not stored.
558    * @throws IOException if index cannot be accessed
559    * @see org.apache.lucene.document.Field.TermVector
560    */
561   abstract public TermFreqVector getTermFreqVector(int docNumber, String field)
562           throws IOException;
563 
564   /**
565    * Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of
566    * the {@link TermFreqVector}.
567    * @param docNumber The number of the document to load the vector for
568    * @param field The name of the field to load
569    * @param mapper The {@link TermVectorMapper} to process the vector.  Must not be null
570    * @throws IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified.
571    * 
572    */
573   abstract public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException;
574 
575   /**
576    * Map all the term vectors for all fields in a Document
577    * @param docNumber The number of the document to load the vector for
578    * @param mapper The {@link TermVectorMapper} to process the vector.  Must not be null
579    * @throws IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified.
580    */
581   abstract public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException;
582 
583   /**
584    * Returns <code>true</code> if an index exists at the specified directory.
585    * If the directory does not exist or if there is no index in it.
586    * <code>false</code> is returned.
587    * @param  directory the directory to check for an index
588    * @return <code>true</code> if an index exists; <code>false</code> otherwise
589    */
590   public static boolean indexExists(String directory) {
591     return indexExists(new File(directory));
592   }
593 
594   /**
595    * Returns <code>true</code> if an index exists at the specified directory.
596    * If the directory does not exist or if there is no index in it.
597    * @param  directory the directory to check for an index
598    * @return <code>true</code> if an index exists; <code>false</code> otherwise
599    */
600 
601   public static boolean indexExists(File directory) {
602     return SegmentInfos.getCurrentSegmentGeneration(directory.list()) != -1;
603   }
604 
605   /**
606    * Returns <code>true</code> if an index exists at the specified directory.
607    * If the directory does not exist or if there is no index in it.
608    * @param  directory the directory to check for an index
609    * @return <code>true</code> if an index exists; <code>false</code> otherwise
610    * @throws IOException if there is a problem with accessing the index
611    */
612   public static boolean indexExists(Directory directory) throws IOException {
613     return SegmentInfos.getCurrentSegmentGeneration(directory) != -1;
614   }
615 
616   /** Returns the number of documents in this index. */
617   public abstract int numDocs();
618 
619   /** Returns one greater than the largest possible document number.
620    * This may be used to, e.g., determine how big to allocate an array which
621    * will have an element for every document number in an index.
622    */
623   public abstract int maxDoc();
624 
625   /** Returns the number of deleted documents. */
626   public int numDeletedDocs() {
627     return maxDoc() - numDocs();
628   }
629 
630   /** Returns the stored fields of the <code>n</code><sup>th</sup>
631    <code>Document</code> in this index.
632    * @throws CorruptIndexException if the index is corrupt
633    * @throws IOException if there is a low-level IO error
634    */
635   public Document document(int n) throws CorruptIndexException, IOException {
636     ensureOpen();
637     return document(n, null);
638   }
639 
640   /**
641    * Get the {@link org.apache.lucene.document.Document} at the <code>n</code><sup>th</sup> position. The {@link org.apache.lucene.document.FieldSelector}
642    * may be used to determine what {@link org.apache.lucene.document.Field}s to load and how they should be loaded.
643    * 
644    * <b>NOTE:</b> If this Reader (more specifically, the underlying <code>FieldsReader</code>) is closed before the lazy {@link org.apache.lucene.document.Field} is
645    * loaded an exception may be thrown.  If you want the value of a lazy {@link org.apache.lucene.document.Field} to be available after closing you must
646    * explicitly load it or fetch the Document again with a new loader.
647    * 
648    *  
649    * @param n Get the document at the <code>n</code><sup>th</sup> position
650    * @param fieldSelector The {@link org.apache.lucene.document.FieldSelector} to use to determine what Fields should be loaded on the Document.  May be null, in which case all Fields will be loaded.
651    * @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position
652    * @throws CorruptIndexException if the index is corrupt
653    * @throws IOException if there is a low-level IO error
654    * 
655    * @see org.apache.lucene.document.Fieldable
656    * @see org.apache.lucene.document.FieldSelector
657    * @see org.apache.lucene.document.SetBasedFieldSelector
658    * @see org.apache.lucene.document.LoadFirstFieldSelector
659    */
660   //When we convert to JDK 1.5 make this Set<String>
661   public abstract Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException;
662   
663   
664 
665   /** Returns true if document <i>n</i> has been deleted */
666   public abstract boolean isDeleted(int n);
667 
668   /** Returns true if any documents have been deleted */
669   public abstract boolean hasDeletions();
670 
671   /** Returns true if there are norms stored for this field. */
672   public boolean hasNorms(String field) throws IOException {
673     // backward compatible implementation.
674     // SegmentReader has an efficient implementation.
675     ensureOpen();
676     return norms(field) != null;
677   }
678 
679   /** Returns the byte-encoded normalization factor for the named field of
680    * every document.  This is used by the search code to score documents.
681    *
682    * @see org.apache.lucene.document.Field#setBoost(float)
683    */
684   public abstract byte[] norms(String field) throws IOException;
685 
686   /** Reads the byte-encoded normalization factor for the named field of every
687    *  document.  This is used by the search code to score documents.
688    *
689    * @see org.apache.lucene.document.Field#setBoost(float)
690    */
691   public abstract void norms(String field, byte[] bytes, int offset)
692     throws IOException;
693 
694   /** Expert: Resets the normalization factor for the named field of the named
695    * document.  The norm represents the product of the field's {@link
696    * org.apache.lucene.document.Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
697    * int) length normalization}.  Thus, to preserve the length normalization
698    * values when resetting this, one should base the new value upon the old.
699    *
700    * @see #norms(String)
701    * @see Similarity#decodeNorm(byte)
702    * @throws StaleReaderException if the index has changed
703    *  since this reader was opened
704    * @throws CorruptIndexException if the index is corrupt
705    * @throws LockObtainFailedException if another writer
706    *  has this index open (<code>write.lock</code> could not
707    *  be obtained)
708    * @throws IOException if there is a low-level IO error
709    */
710   public synchronized  void setNorm(int doc, String field, byte value)
711           throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
712     ensureOpen();
713     acquireWriteLock();
714     hasChanges = true;
715     doSetNorm(doc, field, value);
716   }
717 
718   /** Implements setNorm in subclass.*/
719   protected abstract void doSetNorm(int doc, String field, byte value)
720           throws CorruptIndexException, IOException;
721 
722   /** Expert: Resets the normalization factor for the named field of the named
723    * document.
724    *
725    * @see #norms(String)
726    * @see Similarity#decodeNorm(byte)
727    * 
728    * @throws StaleReaderException if the index has changed
729    *  since this reader was opened
730    * @throws CorruptIndexException if the index is corrupt
731    * @throws LockObtainFailedException if another writer
732    *  has this index open (<code>write.lock</code> could not
733    *  be obtained)
734    * @throws IOException if there is a low-level IO error
735    */
736   public void setNorm(int doc, String field, float value)
737           throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
738     ensureOpen();
739     setNorm(doc, field, Similarity.encodeNorm(value));
740   }
741 
742   /** Returns an enumeration of all the terms in the index. The
743    * enumeration is ordered by Term.compareTo(). Each term is greater
744    * than all that precede it in the enumeration. Note that after
745    * calling terms(), {@link TermEnum#next()} must be called
746    * on the resulting enumeration before calling other methods such as
747    * {@link TermEnum#term()}.
748    * @throws IOException if there is a low-level IO error
749    */
750   public abstract TermEnum terms() throws IOException;
751 
752   /** Returns an enumeration of all terms starting at a given term. If
753    * the given term does not exist, the enumeration is positioned at the
754    * first term greater than the supplied term. The enumeration is
755    * ordered by Term.compareTo(). Each term is greater than all that
756    * precede it in the enumeration.
757    * @throws IOException if there is a low-level IO error
758    */
759   public abstract TermEnum terms(Term t) throws IOException;
760 
761   /** Returns the number of documents containing the term <code>t</code>.
762    * @throws IOException if there is a low-level IO error
763    */
764   public abstract int docFreq(Term t) throws IOException;
765 
766   /** Returns an enumeration of all the documents which contain
767    * <code>term</code>. For each document, the document number, the frequency of
768    * the term in that document is also provided, for use in search scoring.
769    * Thus, this method implements the mapping:
770    * <p><ul>
771    * Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq&gt;<sup>*</sup>
772    * </ul>
773    * <p>The enumeration is ordered by document number.  Each document number
774    * is greater than all that precede it in the enumeration.
775    * @throws IOException if there is a low-level IO error
776    */
777   public TermDocs termDocs(Term term) throws IOException {
778     ensureOpen();
779     TermDocs termDocs = termDocs();
780     termDocs.seek(term);
781     return termDocs;
782   }
783 
784   /** Returns an unpositioned {@link TermDocs} enumerator.
785    * @throws IOException if there is a low-level IO error
786    */
787   public abstract TermDocs termDocs() throws IOException;
788 
789   /** Returns an enumeration of all the documents which contain
790    * <code>term</code>.  For each document, in addition to the document number
791    * and frequency of the term in that document, a list of all of the ordinal
792    * positions of the term in the document is available.  Thus, this method
793    * implements the mapping:
794    *
795    * <p><ul>
796    * Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq,
797    * &lt;pos<sub>1</sub>, pos<sub>2</sub>, ...
798    * pos<sub>freq-1</sub>&gt;
799    * &gt;<sup>*</sup>
800    * </ul>
801    * <p> This positional information facilitates phrase and proximity searching.
802    * <p>The enumeration is ordered by document number.  Each document number is
803    * greater than all that precede it in the enumeration.
804    * @throws IOException if there is a low-level IO error
805    */
806   public TermPositions termPositions(Term term) throws IOException {
807     ensureOpen();
808     TermPositions termPositions = termPositions();
809     termPositions.seek(term);
810     return termPositions;
811   }
812 
813   /** Returns an unpositioned {@link TermPositions} enumerator.
814    * @throws IOException if there is a low-level IO error
815    */
816   public abstract TermPositions termPositions() throws IOException;
817 
818 
819 
820   /** Deletes the document numbered <code>docNum</code>.  Once a document is
821    * deleted it will not appear in TermDocs or TermPostitions enumerations.
822    * Attempts to read its field with the {@link #document}
823    * method will result in an error.  The presence of this document may still be
824    * reflected in the {@link #docFreq} statistic, though
825    * this will be corrected eventually as the index is further modified.
826    *
827    * @throws StaleReaderException if the index has changed
828    * since this reader was opened
829    * @throws CorruptIndexException if the index is corrupt
830    * @throws LockObtainFailedException if another writer
831    *  has this index open (<code>write.lock</code> could not
832    *  be obtained)
833    * @throws IOException if there is a low-level IO error
834    */
835   public synchronized void deleteDocument(int docNum) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
836     ensureOpen();
837     acquireWriteLock();
838     hasChanges = true;
839     doDelete(docNum);
840   }
841 
842 
843   /** Implements deletion of the document numbered <code>docNum</code>.
844    * Applications should call {@link #deleteDocument(int)} or {@link #deleteDocuments(Term)}.
845    */
846   protected abstract void doDelete(int docNum) throws CorruptIndexException, IOException;
847 
848 
849   /** Deletes all documents that have a given <code>term</code> indexed.
850    * This is useful if one uses a document field to hold a unique ID string for
851    * the document.  Then to delete such a document, one merely constructs a
852    * term with the appropriate field and the unique ID string as its text and
853    * passes it to this method.
854    * See {@link #deleteDocument(int)} for information about when this deletion will 
855    * become effective.
856    *
857    * @return the number of documents deleted
858    * @throws StaleReaderException if the index has changed
859    *  since this reader was opened
860    * @throws CorruptIndexException if the index is corrupt
861    * @throws LockObtainFailedException if another writer
862    *  has this index open (<code>write.lock</code> could not
863    *  be obtained)
864    * @throws IOException if there is a low-level IO error
865    */
866   public int deleteDocuments(Term term) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
867     ensureOpen();
868     TermDocs docs = termDocs(term);
869     if (docs == null) return 0;
870     int n = 0;
871     try {
872       while (docs.next()) {
873         deleteDocument(docs.doc());
874         n++;
875       }
876     } finally {
877       docs.close();
878     }
879     return n;
880   }
881 
882   /** Undeletes all documents currently marked as deleted in this index.
883    *
884    * @throws StaleReaderException if the index has changed
885    *  since this reader was opened
886    * @throws LockObtainFailedException if another writer
887    *  has this index open (<code>write.lock</code> could not
888    *  be obtained)
889    * @throws CorruptIndexException if the index is corrupt
890    * @throws IOException if there is a low-level IO error
891    */
892   public synchronized void undeleteAll() throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
893     ensureOpen();
894     acquireWriteLock();
895     hasChanges = true;
896     doUndeleteAll();
897   }
898 
899   /** Implements actual undeleteAll() in subclass. */
900   protected abstract void doUndeleteAll() throws CorruptIndexException, IOException;
901 
902   /** Does nothing by default. Subclasses that require a write lock for
903    *  index modifications must implement this method. */
904   protected synchronized void acquireWriteLock() throws IOException {
905     /* NOOP */
906   }
907   
908   /**
909    * 
910    * @throws IOException
911    */
912   public final synchronized void flush() throws IOException {
913     ensureOpen();
914     commit();
915   }
916 
917   /**
918    * Commit changes resulting from delete, undeleteAll, or
919    * setNorm operations
920    *
921    * If an exception is hit, then either no changes or all
922    * changes will have been committed to the index
923    * (transactional semantics).
924    * @throws IOException if there is a low-level IO error
925    */
926   protected final synchronized void commit() throws IOException {
927     if(hasChanges){
928       doCommit();
929     }
930     hasChanges = false;
931   }
932 
933   /** Implements commit. */
934   protected abstract void doCommit() throws IOException;
935 
936   /**
937    * Closes files associated with this index.
938    * Also saves any new deletions to disk.
939    * No other methods should be called after this has been called.
940    * @throws IOException if there is a low-level IO error
941    */
942   public final synchronized void close() throws IOException {
943     if (!closed) {
944       decRef();
945       closed = true;
946     }
947   }
948   
949   /** Implements close. */
950   protected abstract void doClose() throws IOException;
951 
952 
953   /**
954    * Get a list of unique field names that exist in this index and have the specified
955    * field option information.
956    * @param fldOption specifies which field option should be available for the returned fields
957    * @return Collection of Strings indicating the names of the fields.
958    * @see IndexReader.FieldOption
959    */
960   public abstract Collection getFieldNames(FieldOption fldOption);
961 
962   /**
963    * Returns <code>true</code> iff the index in the named directory is
964    * currently locked.
965    * @param directory the directory to check for a lock
966    * @throws IOException if there is a low-level IO error
967    * @deprecated Please use {@link IndexWriter#isLocked(Directory)} instead
968    */
969   public static boolean isLocked(Directory directory) throws IOException {
970     return
971       directory.makeLock(IndexWriter.WRITE_LOCK_NAME).isLocked();
972   }
973 
974   /**
975    * Returns <code>true</code> iff the index in the named directory is
976    * currently locked.
977    * @param directory the directory to check for a lock
978    * @throws IOException if there is a low-level IO error
979    * @deprecated Please use {@link IndexWriter#isLocked(String)} instead
980    */
981   public static boolean isLocked(String directory) throws IOException {
982     Directory dir = FSDirectory.getDirectory(directory);
983     boolean result = isLocked(dir);
984     dir.close();
985     return result;
986   }
987 
988   /**
989    * Forcibly unlocks the index in the named directory.
990    * <P>
991    * Caution: this should only be used by failure recovery code,
992    * when it is known that no other process nor thread is in fact
993    * currently accessing this index.
994    * @deprecated Please use {@link IndexWriter#unlock(Directory)} instead
995    */
996   public static void unlock(Directory directory) throws IOException {
997     directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();
998   }
999 
1000  /**
1001   * Expert: return the IndexCommit that this reader has
1002   * opened.  This method is only implemented by those
1003   * readers that correspond to a Directory with its own
1004   * segments_N file.
1005   *
1006   * <p><b>WARNING</b>: this API is new and experimental and
1007   * may suddenly change.</p>
1008   */
1009  public IndexCommit getIndexCommit() throws IOException {
1010    throw new UnsupportedOperationException("This reader does not support this method.");
1011  }
1012  
1013  /**
1014   * Prints the filename and size of each file within a given compound file.
1015   * Add the -extract flag to extract files to the current working directory.
1016   * In order to make the extracted version of the index work, you have to copy
1017   * the segments file from the compound index into the directory where the extracted files are stored.
1018   * @param args Usage: org.apache.lucene.index.IndexReader [-extract] &lt;cfsfile&gt;
1019   */
1020  public static void main(String [] args) {
1021    String filename = null;
1022    boolean extract = false;
1023
1024    for (int i = 0; i < args.length; ++i) {
1025      if (args[i].equals("-extract")) {
1026        extract = true;
1027      } else if (filename == null) {
1028        filename = args[i];
1029      }
1030    }
1031
1032    if (filename == null) {
1033      System.out.println("Usage: org.apache.lucene.index.IndexReader [-extract] <cfsfile>");
1034      return;
1035    }
1036
1037    Directory dir = null;
1038    CompoundFileReader cfr = null;
1039
1040    try {
1041      File file = new File(filename);
1042      String dirname = file.getAbsoluteFile().getParent();
1043      filename = file.getName();
1044      dir = FSDirectory.getDirectory(dirname);
1045      cfr = new CompoundFileReader(dir, filename);
1046
1047      String [] files = cfr.list();
1048      Arrays.sort(files);   // sort the array of filename so that the output is more readable
1049
1050      for (int i = 0; i < files.length; ++i) {
1051        long len = cfr.fileLength(files[i]);
1052
1053        if (extract) {
1054          System.out.println("extract " + files[i] + " with " + len + " bytes to local directory...");
1055          IndexInput ii = cfr.openInput(files[i]);
1056
1057          FileOutputStream f = new FileOutputStream(files[i]);
1058
1059          // read and write with a small buffer, which is more effectiv than reading byte by byte
1060          byte[] buffer = new byte[1024];
1061          int chunk = buffer.length;
1062          while(len > 0) {
1063            final int bufLen = (int) Math.min(chunk, len);
1064            ii.readBytes(buffer, 0, bufLen);
1065            f.write(buffer, 0, bufLen);
1066            len -= bufLen;
1067          }
1068
1069          f.close();
1070          ii.close();
1071        }
1072        else
1073          System.out.println(files[i] + ": " + len + " bytes");
1074      }
1075    } catch (IOException ioe) {
1076      ioe.printStackTrace();
1077    }
1078    finally {
1079      try {
1080        if (dir != null)
1081          dir.close();
1082        if (cfr != null)
1083          cfr.close();
1084      }
1085      catch (IOException ioe) {
1086        ioe.printStackTrace();
1087      }
1088    }
1089  }
1090
1091  /** Returns all commit points that exist in the Directory.
1092   *  Normally, because the default is {@link
1093   *  KeepOnlyLastCommitDeletionPolicy}, there would be only
1094   *  one commit point.  But if you're using a custom {@link
1095   *  IndexDeletionPolicy} then there could be many commits.
1096   *  Once you have a given commit, you can open a reader on
1097   *  it by calling {@link IndexReader#open(IndexCommit)}
1098   *  There must be at least one commit in
1099   *  the Directory, else this method throws {@link
1100   *  java.io.IOException}.  Note that if a commit is in
1101   *  progress while this method is running, that commit
1102   *  may or may not be returned array.  */
1103  public static Collection listCommits(Directory dir) throws IOException {
1104    return DirectoryIndexReader.listCommits(dir);
1105  }
1106}
1107