| IndexWriter.java |
1 package org.apache.lucene.index;
2
3 /**
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 import org.apache.lucene.analysis.Analyzer;
21 import org.apache.lucene.document.Document;
22 import org.apache.lucene.search.Similarity;
23 import org.apache.lucene.search.Query;
24 import org.apache.lucene.store.Directory;
25 import org.apache.lucene.store.FSDirectory;
26 import org.apache.lucene.store.Lock;
27 import org.apache.lucene.store.LockObtainFailedException;
28 import org.apache.lucene.store.AlreadyClosedException;
29 import org.apache.lucene.util.BitVector;
30 import org.apache.lucene.util.Constants;
31
32 import java.io.File;
33 import java.io.IOException;
34 import java.io.PrintStream;
35 import java.util.List;
36 import java.util.Collection;
37 import java.util.ArrayList;
38 import java.util.HashMap;
39 import java.util.Set;
40 import java.util.HashSet;
41 import java.util.LinkedList;
42 import java.util.Iterator;
43
44 /**
45 An <code>IndexWriter</code> creates and maintains an index.
46
47 <p>The <code>create</code> argument to the
48 <a href="#IndexWriter(org.apache.lucene.store.Directory, org.apache.lucene.analysis.Analyzer, boolean)"><b>constructor</b></a>
49 determines whether a new index is created, or whether an existing index is
50 opened. Note that you
51 can open an index with <code>create=true</code> even while readers are
52 using the index. The old readers will continue to search
53 the "point in time" snapshot they had opened, and won't
54 see the newly created index until they re-open. There are
55 also <a href="#IndexWriter(org.apache.lucene.store.Directory, org.apache.lucene.analysis.Analyzer)"><b>constructors</b></a>
56 with no <code>create</code> argument which
57 will create a new index if there is not already an index at the
58 provided path and otherwise open the existing index.</p>
59
60 <p>In either case, documents are added with <a
61 href="#addDocument(org.apache.lucene.document.Document)"><b>addDocument</b></a>
62 and removed with <a
63 href="#deleteDocuments(org.apache.lucene.index.Term)"><b>deleteDocuments(Term)</b></a>
64 or <a
65 href="#deleteDocuments(org.apache.lucene.search.Query)"><b>deleteDocuments(Query)</b></a>.
66 A document can be updated with <a href="#updateDocument(org.apache.lucene.index.Term, org.apache.lucene.document.Document)"><b>updateDocument</b></a>
67 (which just deletes and then adds the entire document).
68 When finished adding, deleting and updating documents, <a href="#close()"><b>close</b></a> should be called.</p>
69
70 <a name="flush"></a>
71 <p>These changes are buffered in memory and periodically
72 flushed to the {@link Directory} (during the above method
73 calls). A flush is triggered when there are enough
74 buffered deletes (see {@link #setMaxBufferedDeleteTerms})
75 or enough added documents since the last flush, whichever
76 is sooner. For the added documents, flushing is triggered
77 either by RAM usage of the documents (see {@link
78 #setRAMBufferSizeMB}) or the number of added documents.
79 The default is to flush when RAM usage hits 16 MB. For
80 best indexing speed you should flush by RAM usage with a
81 large RAM buffer. Note that flushing just moves the
82 internal buffered state in IndexWriter into the index, but
83 these changes are not visible to IndexReader until either
84 {@link #commit()} or {@link #close} is called. A flush may
85 also trigger one or more segment merges which by default
86 run with a background thread so as not to block the
87 addDocument calls (see <a href="#mergePolicy">below</a>
88 for changing the {@link MergeScheduler}).</p>
89
90 <a name="autoCommit"></a>
91 <p>The optional <code>autoCommit</code> argument to the <a
92 href="#IndexWriter(org.apache.lucene.store.Directory,
93 boolean,
94 org.apache.lucene.analysis.Analyzer)"><b>constructors</b></a>
95 controls visibility of the changes to {@link IndexReader}
96 instances reading the same index. When this is
97 <code>false</code>, changes are not visible until {@link
98 #close()} or {@link #commit()} is called. Note that changes will still be
99 flushed to the {@link org.apache.lucene.store.Directory}
100 as new files, but are not committed (no new
101 <code>segments_N</code> file is written referencing the
102 new files, nor are the files sync'd to stable storage)
103 until {@link #close()} or {@link #commit()} is called. If something
104 goes terribly wrong (for example the JVM crashes), then
105 the index will reflect none of the changes made since the
106 last commit, or the starting state if commit was not called.
107 You can also call {@link #rollback}, which closes the writer
108 without committing any changes, and removes any index
109 files that had been flushed but are now unreferenced.
110 This mode is useful for preventing readers from refreshing
111 at a bad time (for example after you've done all your
112 deletes but before you've done your adds). It can also be
113 used to implement simple single-writer transactional
114 semantics ("all or none"). You can do a two-phase commit
115 by calling {@link #prepareCommit()}
116 followed by {@link #commit()}. This is necessary when
117 Lucene is working with an external resource (for example,
118 a database) and both must either commit or rollback the
119 transaction.</p>
120
121 <p>When <code>autoCommit</code> is <code>true</code> then
122 the writer will periodically commit on its own. [<b>Deprecated</b>: Note that in 3.0, IndexWriter will
123 no longer accept autoCommit=true (it will be hardwired to
124 false). You can always call {@link #commit()} yourself
125 when needed]. There is
126 no guarantee when exactly an auto commit will occur (it
127 used to be after every flush, but it is now after every
128 completed merge, as of 2.4). If you want to force a
129 commit, call {@link #commit()}, or, close the writer. Once
130 a commit has finished, newly opened {@link IndexReader} instances will
131 see the changes to the index as of that commit. When
132 running in this mode, be careful not to refresh your
133 readers while optimize or segment merges are taking place
134 as this can tie up substantial disk space.</p>
135
136 <p>Regardless of <code>autoCommit</code>, an {@link
137 IndexReader} or {@link org.apache.lucene.search.IndexSearcher} will only see the
138 index as of the "point in time" that it was opened. Any
139 changes committed to the index after the reader was opened
140 are not visible until the reader is re-opened.</p>
141
142 <p>If an index will not have more documents added for a while and optimal search
143 performance is desired, then either the full <a href="#optimize()"><b>optimize</b></a>
144 method or partial {@link #optimize(int)} method should be
145 called before the index is closed.</p>
146
147 <p>Opening an <code>IndexWriter</code> creates a lock file for the directory in use. Trying to open
148 another <code>IndexWriter</code> on the same directory will lead to a
149 {@link LockObtainFailedException}. The {@link LockObtainFailedException}
150 is also thrown if an IndexReader on the same directory is used to delete documents
151 from the index.</p>
152
153 <a name="deletionPolicy"></a>
154 <p>Expert: <code>IndexWriter</code> allows an optional
155 {@link IndexDeletionPolicy} implementation to be
156 specified. You can use this to control when prior commits
157 are deleted from the index. The default policy is {@link
158 KeepOnlyLastCommitDeletionPolicy} which removes all prior
159 commits as soon as a new commit is done (this matches
160 behavior before 2.2). Creating your own policy can allow
161 you to explicitly keep previous "point in time" commits
162 alive in the index for some time, to allow readers to
163 refresh to the new commit without having the old commit
164 deleted out from under them. This is necessary on
165 filesystems like NFS that do not support "delete on last
166 close" semantics, which Lucene's "point in time" search
167 normally relies on. </p>
168
169 <a name="mergePolicy"></a> <p>Expert:
170 <code>IndexWriter</code> allows you to separately change
171 the {@link MergePolicy} and the {@link MergeScheduler}.
172 The {@link MergePolicy} is invoked whenever there are
173 changes to the segments in the index. Its role is to
174 select which merges to do, if any, and return a {@link
175 MergePolicy.MergeSpecification} describing the merges. It
176 also selects merges to do for optimize(). (The default is
177 {@link LogByteSizeMergePolicy}. Then, the {@link
178 MergeScheduler} is invoked with the requested merges and
179 it decides when and how to run the merges. The default is
180 {@link ConcurrentMergeScheduler}. </p>
181 */
182
183 /*
184 * Clarification: Check Points (and commits)
185 * Being able to set autoCommit=false allows IndexWriter to flush and
186 * write new index files to the directory without writing a new segments_N
187 * file which references these new files. It also means that the state of
188 * the in memory SegmentInfos object is different than the most recent
189 * segments_N file written to the directory.
190 *
191 * Each time the SegmentInfos is changed, and matches the (possibly
192 * modified) directory files, we have a new "check point".
193 * If the modified/new SegmentInfos is written to disk - as a new
194 * (generation of) segments_N file - this check point is also an
195 * IndexCommit.
196 *
197 * With autoCommit=true, every checkPoint is also a CommitPoint.
198 * With autoCommit=false, some checkPoints may not be commits.
199 *
200 * A new checkpoint always replaces the previous checkpoint and
201 * becomes the new "front" of the index. This allows the IndexFileDeleter
202 * to delete files that are referenced only by stale checkpoints.
203 * (files that were created since the last commit, but are no longer
204 * referenced by the "front" of the index). For this, IndexFileDeleter
205 * keeps track of the last non commit checkpoint.
206 */
207 public class IndexWriter {
208
209 /**
210 * Default value for the write lock timeout (1,000).
211 * @see #setDefaultWriteLockTimeout
212 */
213 public static long WRITE_LOCK_TIMEOUT = 1000;
214
215 private long writeLockTimeout = WRITE_LOCK_TIMEOUT;
216
217 /**
218 * Name of the write lock in the index.
219 */
220 public static final String WRITE_LOCK_NAME = "write.lock";
221
222 /**
223 * @deprecated
224 * @see LogMergePolicy#DEFAULT_MERGE_FACTOR
225 */
226 public final static int DEFAULT_MERGE_FACTOR = LogMergePolicy.DEFAULT_MERGE_FACTOR;
227
228 /**
229 * Value to denote a flush trigger is disabled
230 */
231 public final static int DISABLE_AUTO_FLUSH = -1;
232
233 /**
234 * Disabled by default (because IndexWriter flushes by RAM usage
235 * by default). Change using {@link #setMaxBufferedDocs(int)}.
236 */
237 public final static int DEFAULT_MAX_BUFFERED_DOCS = DISABLE_AUTO_FLUSH;
238
239 /**
240 * Default value is 16 MB (which means flush when buffered
241 * docs consume 16 MB RAM). Change using {@link #setRAMBufferSizeMB}.
242 */
243 public final static double DEFAULT_RAM_BUFFER_SIZE_MB = 16.0;
244
245 /**
246 * Disabled by default (because IndexWriter flushes by RAM usage
247 * by default). Change using {@link #setMaxBufferedDeleteTerms(int)}.
248 */
249 public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = DISABLE_AUTO_FLUSH;
250
251 /**
252 * @deprecated
253 * @see LogDocMergePolicy#DEFAULT_MAX_MERGE_DOCS
254 */
255 public final static int DEFAULT_MAX_MERGE_DOCS = LogDocMergePolicy.DEFAULT_MAX_MERGE_DOCS;
256
257 /**
258 * Default value is 10,000. Change using {@link #setMaxFieldLength(int)}.
259 */
260 public final static int DEFAULT_MAX_FIELD_LENGTH = 10000;
261
262 /**
263 * Default value is 128. Change using {@link #setTermIndexInterval(int)}.
264 */
265 public final static int DEFAULT_TERM_INDEX_INTERVAL = 128;
266
267 /**
268 * Absolute hard maximum length for a term. If a term
269 * arrives from the analyzer longer than this length, it
270 * is skipped and a message is printed to infoStream, if
271 * set (see {@link #setInfoStream}).
272 */
273 public final static int MAX_TERM_LENGTH = DocumentsWriter.MAX_TERM_LENGTH;
274
275 /**
276 * Default for {@link #getMaxSyncPauseSeconds}. On
277 * Windows this defaults to 10.0 seconds; elsewhere it's
278 * 0.
279 */
280 public final static double DEFAULT_MAX_SYNC_PAUSE_SECONDS;
281 static {
282 if (Constants.WINDOWS)
283 DEFAULT_MAX_SYNC_PAUSE_SECONDS = 10.0;
284 else
285 DEFAULT_MAX_SYNC_PAUSE_SECONDS = 0.0;
286 }
287
288 // The normal read buffer size defaults to 1024, but
289 // increasing this during merging seems to yield
290 // performance gains. However we don't want to increase
291 // it too much because there are quite a few
292 // BufferedIndexInputs created during merging. See
293 // LUCENE-888 for details.
294 private final static int MERGE_READ_BUFFER_SIZE = 4096;
295
296 // Used for printing messages
297 private static Object MESSAGE_ID_LOCK = new Object();
298 private static int MESSAGE_ID = 0;
299 private int messageID = -1;
300 volatile private boolean hitOOM;
301
302 private Directory directory; // where this index resides
303 private Analyzer analyzer; // how to analyze text
304
305 private Similarity similarity = Similarity.getDefault(); // how to normalize
306
307 private volatile long changeCount; // increments every time a change is completed
308 private long lastCommitChangeCount; // last changeCount that was committed
309
310 private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
311 private HashMap rollbackSegments;
312
313 volatile SegmentInfos pendingCommit; // set when a commit is pending (after prepareCommit() & before commit())
314 volatile long pendingCommitChangeCount;
315
316 private SegmentInfos localRollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
317 private boolean localAutoCommit; // saved autoCommit during local transaction
318 private int localFlushedDocCount; // saved docWriter.getFlushedDocCount during local transaction
319 private boolean autoCommit = true; // false if we should commit only on close
320
321 private SegmentInfos segmentInfos = new SegmentInfos(); // the segments
322
323 private DocumentsWriter docWriter;
324 private IndexFileDeleter deleter;
325
326 private Set segmentsToOptimize = new HashSet(); // used by optimize to note those needing optimization
327
328 private Lock writeLock;
329
330 private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
331
332 private boolean closeDir;
333 private boolean closed;
334 private boolean closing;
335
336 // Holds all SegmentInfo instances currently involved in
337 // merges
338 private HashSet mergingSegments = new HashSet();
339
340 private MergePolicy mergePolicy = new LogByteSizeMergePolicy();
341 private MergeScheduler mergeScheduler = new ConcurrentMergeScheduler();
342 private LinkedList pendingMerges = new LinkedList();
343 private Set runningMerges = new HashSet();
344 private List mergeExceptions = new ArrayList();
345 private long mergeGen;
346 private boolean stopMerges;
347
348 private int flushCount;
349 private int flushDeletesCount;
350 private double maxSyncPauseSeconds = DEFAULT_MAX_SYNC_PAUSE_SECONDS;
351
352 // Used to only allow one addIndexes to proceed at once
353 // TODO: use ReadWriteLock once we are on 5.0
354 private int readCount; // count of how many threads are holding read lock
355 private Thread writeThread; // non-null if any thread holds write lock
356 private int upgradeCount;
357
358 synchronized void acquireWrite() {
359 assert writeThread != Thread.currentThread();
360 while(writeThread != null || readCount > 0)
361 doWait();
362
363 // We could have been closed while we were waiting:
364 ensureOpen();
365
366 writeThread = Thread.currentThread();
367 }
368
369 synchronized void releaseWrite() {
370 assert Thread.currentThread() == writeThread;
371 writeThread = null;
372 notifyAll();
373 }
374
375 synchronized void acquireRead() {
376 final Thread current = Thread.currentThread();
377 while(writeThread != null && writeThread != current)
378 doWait();
379
380 readCount++;
381 }
382
383 // Allows one readLock to upgrade to a writeLock even if
384 // there are other readLocks as long as all other
385 // readLocks are also blocked in this method:
386 synchronized void upgradeReadToWrite() {
387 assert readCount > 0;
388 upgradeCount++;
389 while(readCount > upgradeCount || writeThread != null) {
390 doWait();
391 }
392
393 writeThread = Thread.currentThread();
394 readCount--;
395 upgradeCount--;
396 }
397
398 synchronized void releaseRead() {
399 readCount--;
400 assert readCount >= 0;
401 notifyAll();
402 }
403
404 /**
405 * Used internally to throw an {@link
406 * AlreadyClosedException} if this IndexWriter has been
407 * closed.
408 * @throws AlreadyClosedException if this IndexWriter is
409 */
410 protected synchronized final void ensureOpen(boolean includePendingClose) throws AlreadyClosedException {
411 if (closed || (includePendingClose && closing)) {
412 throw new AlreadyClosedException("this IndexWriter is closed");
413 }
414 }
415
416 protected synchronized final void ensureOpen() throws AlreadyClosedException {
417 ensureOpen(true);
418 }
419
420 /**
421 * Prints a message to the infoStream (if non-null),
422 * prefixed with the identifying information for this
423 * writer and the thread that's calling it.
424 */
425 public void message(String message) {
426 if (infoStream != null)
427 infoStream.println("IW " + messageID + " [" + Thread.currentThread().getName() + "]: " + message);
428 }
429
430 private synchronized void setMessageID(PrintStream infoStream) {
431 if (infoStream != null && messageID == -1) {
432 synchronized(MESSAGE_ID_LOCK) {
433 messageID = MESSAGE_ID++;
434 }
435 }
436 this.infoStream = infoStream;
437 }
438
439 /**
440 * Casts current mergePolicy to LogMergePolicy, and throws
441 * an exception if the mergePolicy is not a LogMergePolicy.
442 */
443 private LogMergePolicy getLogMergePolicy() {
444 if (mergePolicy instanceof LogMergePolicy)
445 return (LogMergePolicy) mergePolicy;
446 else
447 throw new IllegalArgumentException("this method can only be called when the merge policy is the default LogMergePolicy");
448 }
449
450 /** <p>Get the current setting of whether newly flushed
451 * segments will use the compound file format. Note that
452 * this just returns the value previously set with
453 * setUseCompoundFile(boolean), or the default value
454 * (true). You cannot use this to query the status of
455 * previously flushed segments.</p>
456 *
457 * <p>Note that this method is a convenience method: it
458 * just calls mergePolicy.getUseCompoundFile as long as
459 * mergePolicy is an instance of {@link LogMergePolicy}.
460 * Otherwise an IllegalArgumentException is thrown.</p>
461 *
462 * @see #setUseCompoundFile(boolean)
463 */
464 public boolean getUseCompoundFile() {
465 return getLogMergePolicy().getUseCompoundFile();
466 }
467
468 /** <p>Setting to turn on usage of a compound file. When on,
469 * multiple files for each segment are merged into a
470 * single file when a new segment is flushed.</p>
471 *
472 * <p>Note that this method is a convenience method: it
473 * just calls mergePolicy.setUseCompoundFile as long as
474 * mergePolicy is an instance of {@link LogMergePolicy}.
475 * Otherwise an IllegalArgumentException is thrown.</p>
476 */
477 public void setUseCompoundFile(boolean value) {
478 getLogMergePolicy().setUseCompoundFile(value);
479 getLogMergePolicy().setUseCompoundDocStore(value);
480 }
481
482 /** Expert: Set the Similarity implementation used by this IndexWriter.
483 *
484 * @see Similarity#setDefault(Similarity)
485 */
486 public void setSimilarity(Similarity similarity) {
487 ensureOpen();
488 this.similarity = similarity;
489 docWriter.setSimilarity(similarity);
490 }
491
492 /** Expert: Return the Similarity implementation used by this IndexWriter.
493 *
494 * <p>This defaults to the current value of {@link Similarity#getDefault()}.
495 */
496 public Similarity getSimilarity() {
497 ensureOpen();
498 return this.similarity;
499 }
500
501 /** Expert: Set the interval between indexed terms. Large values cause less
502 * memory to be used by IndexReader, but slow random-access to terms. Small
503 * values cause more memory to be used by an IndexReader, and speed
504 * random-access to terms.
505 *
506 * This parameter determines the amount of computation required per query
507 * term, regardless of the number of documents that contain that term. In
508 * particular, it is the maximum number of other terms that must be
509 * scanned before a term is located and its frequency and position information
510 * may be processed. In a large index with user-entered query terms, query
511 * processing time is likely to be dominated not by term lookup but rather
512 * by the processing of frequency and positional data. In a small index
513 * or when many uncommon query terms are generated (e.g., by wildcard
514 * queries) term lookup may become a dominant cost.
515 *
516 * In particular, <code>numUniqueTerms/interval</code> terms are read into
517 * memory by an IndexReader, and, on average, <code>interval/2</code> terms
518 * must be scanned for each random term access.
519 *
520 * @see #DEFAULT_TERM_INDEX_INTERVAL
521 */
522 public void setTermIndexInterval(int interval) {
523 ensureOpen();
524 this.termIndexInterval = interval;
525 }
526
527 /** Expert: Return the interval between indexed terms.
528 *
529 * @see #setTermIndexInterval(int)
530 */
531 public int getTermIndexInterval() {
532 // We pass false because this method is called by SegmentMerger while we are in the process of closing
533 ensureOpen(false);
534 return termIndexInterval;
535 }
536
537 /**
538 * Constructs an IndexWriter for the index in <code>path</code>.
539 * Text will be analyzed with <code>a</code>. If <code>create</code>
540 * is true, then a new, empty index will be created in
541 * <code>path</code>, replacing the index already there,
542 * if any.
543 *
544 * <p><b>NOTE</b>: autoCommit (see <a
545 * href="#autoCommit">above</a>) is set to false with this
546 * constructor.
547 *
548 * @param path the path to the index directory
549 * @param a the analyzer to use
550 * @param create <code>true</code> to create the index or overwrite
551 * the existing one; <code>false</code> to append to the existing
552 * index
553 * @param mfl Maximum field length: LIMITED, UNLIMITED, or user-specified
554 * via the MaxFieldLength constructor.
555 * @throws CorruptIndexException if the index is corrupt
556 * @throws LockObtainFailedException if another writer
557 * has this index open (<code>write.lock</code> could not
558 * be obtained)
559 * @throws IOException if the directory cannot be read/written to, or
560 * if it does not exist and <code>create</code> is
561 * <code>false</code> or if there is any other low-level
562 * IO error
563 */
564 public IndexWriter(String path, Analyzer a, boolean create, MaxFieldLength mfl)
565 throws CorruptIndexException, LockObtainFailedException, IOException {
566 init(FSDirectory.getDirectory(path), a, create, true, null, false, mfl.getLimit());
567 }
568
569 /**
570 * Constructs an IndexWriter for the index in <code>path</code>.
571 * Text will be analyzed with <code>a</code>. If <code>create</code>
572 * is true, then a new, empty index will be created in
573 * <code>path</code>, replacing the index already there, if any.
574 *
575 * @param path the path to the index directory
576 * @param a the analyzer to use
577 * @param create <code>true</code> to create the index or overwrite
578 * the existing one; <code>false</code> to append to the existing
579 * index
580 * @throws CorruptIndexException if the index is corrupt
581 * @throws LockObtainFailedException if another writer
582 * has this index open (<code>write.lock</code> could not
583 * be obtained)
584 * @throws IOException if the directory cannot be read/written to, or
585 * if it does not exist and <code>create</code> is
586 * <code>false</code> or if there is any other low-level
587 * IO error
588 * @deprecated This constructor will be removed in the 3.0 release.
589 * Use {@link
590 * #IndexWriter(String,Analyzer,boolean,MaxFieldLength)}
591 * instead, and call {@link #commit()} when needed.
592 */
593 public IndexWriter(String path, Analyzer a, boolean create)
594 throws CorruptIndexException, LockObtainFailedException, IOException {
595 init(FSDirectory.getDirectory(path), a, create, true, null, true, DEFAULT_MAX_FIELD_LENGTH);
596 }
597
598 /**
599 * Constructs an IndexWriter for the index in <code>path</code>.
600 * Text will be analyzed with <code>a</code>. If <code>create</code>
601 * is true, then a new, empty index will be created in
602 * <code>path</code>, replacing the index already there, if any.
603 *
604 * <p><b>NOTE</b>: autoCommit (see <a
605 * href="#autoCommit">above</a>) is set to false with this
606 * constructor.
607 *
608 * @param path the path to the index directory
609 * @param a the analyzer to use
610 * @param create <code>true</code> to create the index or overwrite
611 * the existing one; <code>false</code> to append to the existing
612 * index
613 * @param mfl Maximum field length: LIMITED, UNLIMITED, or user-specified
614 * via the MaxFieldLength constructor.
615 * @throws CorruptIndexException if the index is corrupt
616 * @throws LockObtainFailedException if another writer
617 * has this index open (<code>write.lock</code> could not
618 * be obtained)
619 * @throws IOException if the directory cannot be read/written to, or
620 * if it does not exist and <code>create</code> is
621 * <code>false</code> or if there is any other low-level
622 * IO error
623 */
624 public IndexWriter(File path, Analyzer a, boolean create, MaxFieldLength mfl)
625 throws CorruptIndexException, LockObtainFailedException, IOException {
626 init(FSDirectory.getDirectory(path), a, create, true, null, false, mfl.getLimit());
627 }
628
629 /**
630 * Constructs an IndexWriter for the index in <code>path</code>.
631 * Text will be analyzed with <code>a</code>. If <code>create</code>
632 * is true, then a new, empty index will be created in
633 * <code>path</code>, replacing the index already there, if any.
634 *
635 * @param path the path to the index directory
636 * @param a the analyzer to use
637 * @param create <code>true</code> to create the index or overwrite
638 * the existing one; <code>false</code> to append to the existing
639 * index
640 * @throws CorruptIndexException if the index is corrupt
641 * @throws LockObtainFailedException if another writer
642 * has this index open (<code>write.lock</code> could not
643 * be obtained)
644 * @throws IOException if the directory cannot be read/written to, or
645 * if it does not exist and <code>create</code> is
646 * <code>false</code> or if there is any other low-level
647 * IO error
648 * @deprecated This constructor will be removed in the 3.0 release.
649 * Use {@link
650 * #IndexWriter(File,Analyzer,boolean,MaxFieldLength)}
651 * instead, and call {@link #commit()} when needed.
652 */
653 public IndexWriter(File path, Analyzer a, boolean create)
654 throws CorruptIndexException, LockObtainFailedException, IOException {
655 init(FSDirectory.getDirectory(path), a, create, true, null, true, DEFAULT_MAX_FIELD_LENGTH);
656 }
657
658 /**
659 * Constructs an IndexWriter for the index in <code>d</code>.
660 * Text will be analyzed with <code>a</code>. If <code>create</code>
661 * is true, then a new, empty index will be created in
662 * <code>d</code>, replacing the index already there, if any.
663 *
664 * <p><b>NOTE</b>: autoCommit (see <a
665 * href="#autoCommit">above</a>) is set to false with this
666 * constructor.
667 *
668 * @param d the index directory
669 * @param a the analyzer to use
670 * @param create <code>true</code> to create the index or overwrite
671 * the existing one; <code>false</code> to append to the existing
672 * index
673 * @param mfl Maximum field length: LIMITED, UNLIMITED, or user-specified
674 * via the MaxFieldLength constructor.
675 * @throws CorruptIndexException if the index is corrupt
676 * @throws LockObtainFailedException if another writer
677 * has this index open (<code>write.lock</code> could not
678 * be obtained)
679 * @throws IOException if the directory cannot be read/written to, or
680 * if it does not exist and <code>create</code> is
681 * <code>false</code> or if there is any other low-level
682 * IO error
683 */
684 public IndexWriter(Directory d, Analyzer a, boolean create, MaxFieldLength mfl)
685 throws CorruptIndexException, LockObtainFailedException, IOException {
686 init(d, a, create, false, null, false, mfl.getLimit());
687 }
688
689 /**
690 * Constructs an IndexWriter for the index in <code>d</code>.
691 * Text will be analyzed with <code>a</code>. If <code>create</code>
692 * is true, then a new, empty index will be created in
693 * <code>d</code>, replacing the index already there, if any.
694 *
695 * @param d the index directory
696 * @param a the analyzer to use
697 * @param create <code>true</code> to create the index or overwrite
698 * the existing one; <code>false</code> to append to the existing
699 * index
700 * @throws CorruptIndexException if the index is corrupt
701 * @throws LockObtainFailedException if another writer
702 * has this index open (<code>write.lock</code> could not
703 * be obtained)
704 * @throws IOException if the directory cannot be read/written to, or
705 * if it does not exist and <code>create</code> is
706 * <code>false</code> or if there is any other low-level
707 * IO error
708 * @deprecated This constructor will be removed in the 3.0
709 * release, and call {@link #commit()} when needed.
710 * Use {@link #IndexWriter(Directory,Analyzer,boolean,MaxFieldLength)} instead.
711 */
712 public IndexWriter(Directory d, Analyzer a, boolean create)
713 throws CorruptIndexException, LockObtainFailedException, IOException {
714 init(d, a, create, false, null, true, DEFAULT_MAX_FIELD_LENGTH);
715 }
716
717 /**
718 * Constructs an IndexWriter for the index in
719 * <code>path</code>, first creating it if it does not
720 * already exist. Text will be analyzed with
721 * <code>a</code>.
722 *
723 * <p><b>NOTE</b>: autoCommit (see <a
724 * href="#autoCommit">above</a>) is set to false with this
725 * constructor.
726 *
727 * @param path the path to the index directory
728 * @param a the analyzer to use
729 * @param mfl Maximum field length: LIMITED, UNLIMITED, or user-specified
730 * via the MaxFieldLength constructor.
731 * @throws CorruptIndexException if the index is corrupt
732 * @throws LockObtainFailedException if another writer
733 * has this index open (<code>write.lock</code> could not
734 * be obtained)
735 * @throws IOException if the directory cannot be
736 * read/written to or if there is any other low-level
737 * IO error
738 */
739 public IndexWriter(String path, Analyzer a, MaxFieldLength mfl)
740 throws CorruptIndexException, LockObtainFailedException, IOException {
741 init(FSDirectory.getDirectory(path), a, true, null, false, mfl.getLimit());
742 }
743
744 /**
745 * Constructs an IndexWriter for the index in
746 * <code>path</code>, first creating it if it does not
747 * already exist. Text will be analyzed with
748 * <code>a</code>.
749 *
750 * @param path the path to the index directory
751 * @param a the analyzer to use
752 * @throws CorruptIndexException if the index is corrupt
753 * @throws LockObtainFailedException if another writer
754 * has this index open (<code>write.lock</code> could not
755 * be obtained)
756 * @throws IOException if the directory cannot be
757 * read/written to or if there is any other low-level
758 * IO error
759 * @deprecated This constructor will be removed in the 3.0
760 * release, and call {@link #commit()} when needed.
761 * Use {@link #IndexWriter(String,Analyzer,MaxFieldLength)} instead.
762 */
763 public IndexWriter(String path, Analyzer a)
764 throws CorruptIndexException, LockObtainFailedException, IOException {
765 init(FSDirectory.getDirectory(path), a, true, null, true, DEFAULT_MAX_FIELD_LENGTH);
766 }
767
768 /**
769 * Constructs an IndexWriter for the index in
770 * <code>path</code>, first creating it if it does not
771 * already exist. Text will be analyzed with
772 * <code>a</code>.
773 *
774 * <p><b>NOTE</b>: autoCommit (see <a
775 * href="#autoCommit">above</a>) is set to false with this
776 * constructor.
777 *
778 * @param path the path to the index directory
779 * @param a the analyzer to use
780 * @param mfl Maximum field length: LIMITED, UNLIMITED, or user-specified
781 * via the MaxFieldLength constructor.
782 * @throws CorruptIndexException if the index is corrupt
783 * @throws LockObtainFailedException if another writer
784 * has this index open (<code>write.lock</code> could not
785 * be obtained)
786 * @throws IOException if the directory cannot be
787 * read/written to or if there is any other low-level
788 * IO error
789 */
790 public IndexWriter(File path, Analyzer a, MaxFieldLength mfl)
791 throws CorruptIndexException, LockObtainFailedException, IOException {
792 init(FSDirectory.getDirectory(path), a, true, null, false, mfl.getLimit());
793 }
794
795 /**
796 * Constructs an IndexWriter for the index in
797 * <code>path</code>, first creating it if it does not
798 * already exist. Text will be analyzed with
799 * <code>a</code>.
800 *
801 * @param path the path to the index directory
802 * @param a the analyzer to use
803 * @throws CorruptIndexException if the index is corrupt
804 * @throws LockObtainFailedException if another writer
805 * has this index open (<code>write.lock</code> could not
806 * be obtained)
807 * @throws IOException if the directory cannot be
808 * read/written to or if there is any other low-level
809 * IO error
810 * @deprecated This constructor will be removed in the 3.0 release.
811 * Use {@link #IndexWriter(File,Analyzer,MaxFieldLength)}
812 * instead, and call {@link #commit()} when needed.
813 */
814 public IndexWriter(File path, Analyzer a)
815 throws CorruptIndexException, LockObtainFailedException, IOException {
816 init(FSDirectory.getDirectory(path), a, true, null, true, DEFAULT_MAX_FIELD_LENGTH);
817 }
818
819 /**
820 * Constructs an IndexWriter for the index in
821 * <code>d</code>, first creating it if it does not
822 * already exist. Text will be analyzed with
823 * <code>a</code>.
824 *
825 * <p><b>NOTE</b>: autoCommit (see <a
826 * href="#autoCommit">above</a>) is set to false with this
827 * constructor.
828 *
829 * @param d the index directory
830 * @param a the analyzer to use
831 * @param mfl Maximum field length: LIMITED, UNLIMITED, or user-specified
832 * via the MaxFieldLength constructor.
833 * @throws CorruptIndexException if the index is corrupt
834 * @throws LockObtainFailedException if another writer
835 * has this index open (<code>write.lock</code> could not
836 * be obtained)
837 * @throws IOException if the directory cannot be
838 * read/written to or if there is any other low-level
839 * IO error
840 */
841 public IndexWriter(Directory d, Analyzer a, MaxFieldLength mfl)
842 throws CorruptIndexException, LockObtainFailedException, IOException {
843 init(d, a, false, null, false, mfl.getLimit());
844 }
845
846 /**
847 * Constructs an IndexWriter for the index in
848 * <code>d</code>, first creating it if it does not
849 * already exist. Text will be analyzed with
850 * <code>a</code>.
851 *
852 * @param d the index directory
853 * @param a the analyzer to use
854 * @throws CorruptIndexException if the index is corrupt
855 * @throws LockObtainFailedException if another writer
856 * has this index open (<code>write.lock</code> could not
857 * be obtained)
858 * @throws IOException if the directory cannot be
859 * read/written to or if there is any other low-level
860 * IO error
861 * @deprecated This constructor will be removed in the 3.0 release.
862 * Use {@link
863 * #IndexWriter(Directory,Analyzer,MaxFieldLength)}
864 * instead, and call {@link #commit()} when needed.
865 */
866 public IndexWriter(Directory d, Analyzer a)
867 throws CorruptIndexException, LockObtainFailedException, IOException {
868 init(d, a, false, null, true, DEFAULT_MAX_FIELD_LENGTH);
869 }
870
871 /**
872 * Constructs an IndexWriter for the index in
873 * <code>d</code>, first creating it if it does not
874 * already exist. Text will be analyzed with
875 * <code>a</code>.
876 *
877 * @param d the index directory
878 * @param autoCommit see <a href="#autoCommit">above</a>
879 * @param a the analyzer to use
880 * @throws CorruptIndexException if the index is corrupt
881 * @throws LockObtainFailedException if another writer
882 * has this index open (<code>write.lock</code> could not
883 * be obtained)
884 * @throws IOException if the directory cannot be
885 * read/written to or if there is any other low-level
886 * IO error
887 * @deprecated This constructor will be removed in the 3.0 release.
888 * Use {@link
889 * #IndexWriter(Directory,Analyzer,MaxFieldLength)}
890 * instead, and call {@link #commit()} when needed.
891 */
892 public IndexWriter(Directory d, boolean autoCommit, Analyzer a)
893 throws CorruptIndexException, LockObtainFailedException, IOException {
894 init(d, a, false, null, autoCommit, DEFAULT_MAX_FIELD_LENGTH);
895 }
896
897 /**
898 * Constructs an IndexWriter for the index in <code>d</code>.
899 * Text will be analyzed with <code>a</code>. If <code>create</code>
900 * is true, then a new, empty index will be created in
901 * <code>d</code>, replacing the index already there, if any.
902 *
903 * @param d the index directory
904 * @param autoCommit see <a href="#autoCommit">above</a>
905 * @param a the analyzer to use
906 * @param create <code>true</code> to create the index or overwrite
907 * the existing one; <code>false</code> to append to the existing
908 * index
909 * @throws CorruptIndexException if the index is corrupt
910 * @throws LockObtainFailedException if another writer
911 * has this index open (<code>write.lock</code> could not
912 * be obtained)
913 * @throws IOException if the directory cannot be read/written to, or
914 * if it does not exist and <code>create</code> is
915 * <code>false</code> or if there is any other low-level
916 * IO error
917 * @deprecated This constructor will be removed in the 3.0 release.
918 * Use {@link
919 * #IndexWriter(Directory,Analyzer,boolean,MaxFieldLength)}
920 * instead, and call {@link #commit()} when needed.
921 */
922 public IndexWriter(Directory d, boolean autoCommit, Analyzer a, boolean create)
923 throws CorruptIndexException, LockObtainFailedException, IOException {
924 init(d, a, create, false, null, autoCommit, DEFAULT_MAX_FIELD_LENGTH);
925 }
926
927 /**
928 * Expert: constructs an IndexWriter with a custom {@link
929 * IndexDeletionPolicy}, for the index in <code>d</code>,
930 * first creating it if it does not already exist. Text
931 * will be analyzed with <code>a</code>.
932 *
933 * <p><b>NOTE</b>: autoCommit (see <a
934 * href="#autoCommit">above</a>) is set to false with this
935 * constructor.
936 *
937 * @param d the index directory
938 * @param a the analyzer to use
939 * @param deletionPolicy see <a href="#deletionPolicy">above</a>
940 * @param mfl whether or not to limit field lengths
941 * @throws CorruptIndexException if the index is corrupt
942 * @throws LockObtainFailedException if another writer
943 * has this index open (<code>write.lock</code> could not
944 * be obtained)
945 * @throws IOException if the directory cannot be
946 * read/written to or if there is any other low-level
947 * IO error
948 */
949 public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
950 throws CorruptIndexException, LockObtainFailedException, IOException {
951 init(d, a, false, deletionPolicy, false, mfl.getLimit());
952 }
953
954 /**
955 * Expert: constructs an IndexWriter with a custom {@link
956 * IndexDeletionPolicy}, for the index in <code>d</code>,
957 * first creating it if it does not already exist. Text
958 * will be analyzed with <code>a</code>.
959 *
960 * @param d the index directory
961 * @param autoCommit see <a href="#autoCommit">above</a>
962 * @param a the analyzer to use
963 * @param deletionPolicy see <a href="#deletionPolicy">above</a>
964 * @throws CorruptIndexException if the index is corrupt
965 * @throws LockObtainFailedException if another writer
966 * has this index open (<code>write.lock</code> could not
967 * be obtained)
968 * @throws IOException if the directory cannot be
969 * read/written to or if there is any other low-level
970 * IO error
971 * @deprecated This constructor will be removed in the 3.0 release.
972 * Use {@link
973 * #IndexWriter(Directory,Analyzer,IndexDeletionPolicy,MaxFieldLength)}
974 * instead, and call {@link #commit()} when needed.
975 */
976 public IndexWriter(Directory d, boolean autoCommit, Analyzer a, IndexDeletionPolicy deletionPolicy)
977 throws CorruptIndexException, LockObtainFailedException, IOException {
978 init(d, a, false, deletionPolicy, autoCommit, DEFAULT_MAX_FIELD_LENGTH);
979 }
980
981 /**
982 * Expert: constructs an IndexWriter with a custom {@link
983 * IndexDeletionPolicy}, for the index in <code>d</code>.
984 * Text will be analyzed with <code>a</code>. If
985 * <code>create</code> is true, then a new, empty index
986 * will be created in <code>d</code>, replacing the index
987 * already there, if any.
988 *
989 * <p><b>NOTE</b>: autoCommit (see <a
990 * href="#autoCommit">above</a>) is set to false with this
991 * constructor.
992 *
993 * @param d the index directory
994 * @param a the analyzer to use
995 * @param create <code>true</code> to create the index or overwrite
996 * the existing one; <code>false</code> to append to the existing
997 * index
998 * @param deletionPolicy see <a href="#deletionPolicy">above</a>
999 * @param mfl whether or not to limit field lengths
1000 * @throws CorruptIndexException if the index is corrupt
1001 * @throws LockObtainFailedException if another writer
1002 * has this index open (<code>write.lock</code> could not
1003 * be obtained)
1004 * @throws IOException if the directory cannot be read/written to, or
1005 * if it does not exist and <code>create</code> is
1006 * <code>false</code> or if there is any other low-level
1007 * IO error
1008 */
1009 public IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
1010 throws CorruptIndexException, LockObtainFailedException, IOException {
1011 init(d, a, create, false, deletionPolicy, false, mfl.getLimit());
1012 }
1013
1014 /**
1015 * Expert: constructs an IndexWriter with a custom {@link
1016 * IndexDeletionPolicy}, for the index in <code>d</code>.
1017 * Text will be analyzed with <code>a</code>. If
1018 * <code>create</code> is true, then a new, empty index
1019 * will be created in <code>d</code>, replacing the index
1020 * already there, if any.
1021 *
1022 * @param d the index directory
1023 * @param autoCommit see <a href="#autoCommit">above</a>
1024 * @param a the analyzer to use
1025 * @param create <code>true</code> to create the index or overwrite
1026 * the existing one; <code>false</code> to append to the existing
1027 * index
1028 * @param deletionPolicy see <a href="#deletionPolicy">above</a>
1029 * @throws CorruptIndexException if the index is corrupt
1030 * @throws LockObtainFailedException if another writer
1031 * has this index open (<code>write.lock</code> could not
1032 * be obtained)
1033 * @throws IOException if the directory cannot be read/written to, or
1034 * if it does not exist and <code>create</code> is
1035 * <code>false</code> or if there is any other low-level
1036 * IO error
1037 * @deprecated This constructor will be removed in the 3.0 release.
1038 * Use {@link
1039 * #IndexWriter(Directory,Analyzer,boolean,IndexDeletionPolicy,MaxFieldLength)}
1040 * instead, and call {@link #commit()} when needed.
1041 */
1042 public IndexWriter(Directory d, boolean autoCommit, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy)
1043 throws CorruptIndexException, LockObtainFailedException, IOException {
1044 init(d, a, create, false, deletionPolicy, autoCommit, DEFAULT_MAX_FIELD_LENGTH);
1045 }
1046
1047 private void init(Directory d, Analyzer a, boolean closeDir, IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength)
1048 throws CorruptIndexException, LockObtainFailedException, IOException {
1049 if (IndexReader.indexExists(d)) {
1050 init(d, a, false, closeDir, deletionPolicy, autoCommit, maxFieldLength);
1051 } else {
1052 init(d, a, true, closeDir, deletionPolicy, autoCommit, maxFieldLength);
1053 }
1054 }
1055
1056 private void init(Directory d, Analyzer a, final boolean create, boolean closeDir, IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength)
1057 throws CorruptIndexException, LockObtainFailedException, IOException {
1058 this.closeDir = closeDir;
1059 directory = d;
1060 analyzer = a;
1061 setMessageID(defaultInfoStream);
1062 this.maxFieldLength = maxFieldLength;
1063
1064 if (create) {
1065 // Clear the write lock in case it's leftover:
1066 directory.clearLock(WRITE_LOCK_NAME);
1067 }
1068
1069 Lock writeLock = directory.makeLock(WRITE_LOCK_NAME);
1070 if (!writeLock.obtain(writeLockTimeout)) // obtain write lock
1071 throw new LockObtainFailedException("Index locked for write: " + writeLock);
1072 this.writeLock = writeLock; // save it
1073
1074 try {
1075 if (create) {
1076 // Try to read first. This is to allow create
1077 // against an index that's currently open for
1078 // searching. In this case we write the next
1079 // segments_N file with no segments:
1080 try {
1081 segmentInfos.read(directory);
1082 segmentInfos.clear();
1083 } catch (IOException e) {
1084 // Likely this means it's a fresh directory
1085 }
1086 segmentInfos.commit(directory);
1087 } else {
1088 segmentInfos.read(directory);
1089
1090 // We assume that this segments_N was previously
1091 // properly sync'd:
1092 for(int i=0;i<segmentInfos.size();i++) {
1093 final SegmentInfo info = segmentInfos.info(i);
1094 List files = info.files();
1095 for(int j=0;j<files.size();j++)
1096 synced.add(files.get(j));
1097 }
1098 }
1099
1100 this.autoCommit = autoCommit;
1101 setRollbackSegmentInfos(segmentInfos);
1102
1103 docWriter = new DocumentsWriter(directory, this);
1104 docWriter.setInfoStream(infoStream);
1105 docWriter.setMaxFieldLength(maxFieldLength);
1106
1107 // Default deleter (for backwards compatibility) is
1108 // KeepOnlyLastCommitDeleter:
1109 deleter = new IndexFileDeleter(directory,
1110 deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : deletionPolicy,
1111 segmentInfos, infoStream, docWriter);
1112
1113 pushMaxBufferedDocs();
1114
1115 if (infoStream != null) {
1116 message("init: create=" + create);
1117 messageState();
1118 }
1119
1120 } catch (IOException e) {
1121 this.writeLock.release();
1122 this.writeLock = null;
1123 throw e;
1124 }
1125 }
1126
1127 private synchronized void setRollbackSegmentInfos(SegmentInfos infos) {
1128 rollbackSegmentInfos = (SegmentInfos) infos.clone();
1129 assert !hasExternalSegments(rollbackSegmentInfos);
1130 rollbackSegments = new HashMap();
1131 final int size = rollbackSegmentInfos.size();
1132 for(int i=0;i<size;i++)
1133 rollbackSegments.put(rollbackSegmentInfos.info(i), new Integer(i));
1134 }
1135
1136 /**
1137 * Expert: set the merge policy used by this writer.
1138 */
1139 public void setMergePolicy(MergePolicy mp) {
1140 ensureOpen();
1141 if (mp == null)
1142 throw new NullPointerException("MergePolicy must be non-null");
1143
1144 if (mergePolicy != mp)
1145 mergePolicy.close();
1146 mergePolicy = mp;
1147 pushMaxBufferedDocs();
1148 if (infoStream != null)
1149 message("setMergePolicy " + mp);
1150 }
1151
1152 /**
1153 * Expert: returns the current MergePolicy in use by this writer.
1154 * @see #setMergePolicy
1155 */
1156 public MergePolicy getMergePolicy() {
1157 ensureOpen();
1158 return mergePolicy;
1159 }
1160
1161 /**
1162 * Expert: set the merge scheduler used by this writer.
1163 */
1164 synchronized public void setMergeScheduler(MergeScheduler mergeScheduler) throws CorruptIndexException, IOException {
1165 ensureOpen();
1166 if (mergeScheduler == null)
1167 throw new NullPointerException("MergeScheduler must be non-null");
1168
1169 if (this.mergeScheduler != mergeScheduler) {
1170 finishMerges(true);
1171 this.mergeScheduler.close();
1172 }
1173 this.mergeScheduler = mergeScheduler;
1174 if (infoStream != null)
1175 message("setMergeScheduler " + mergeScheduler);
1176 }
1177
1178 /**
1179 * Expert: returns the current MergePolicy in use by this
1180 * writer.
1181 * @see #setMergePolicy
1182 */
1183 public MergeScheduler getMergeScheduler() {
1184 ensureOpen();
1185 return mergeScheduler;
1186 }
1187
1188 /** <p>Determines the largest segment (measured by
1189 * document count) that may be merged with other segments.
1190 * Small values (e.g., less than 10,000) are best for
1191 * interactive indexing, as this limits the length of
1192 * pauses while indexing to a few seconds. Larger values
1193 * are best for batched indexing and speedier
1194 * searches.</p>
1195 *
1196 * <p>The default value is {@link Integer#MAX_VALUE}.</p>
1197 *
1198 * <p>Note that this method is a convenience method: it
1199 * just calls mergePolicy.setMaxMergeDocs as long as
1200 * mergePolicy is an instance of {@link LogMergePolicy}.
1201 * Otherwise an IllegalArgumentException is thrown.</p>
1202 *
1203 * <p>The default merge policy ({@link
1204 * LogByteSizeMergePolicy}) also allows you to set this
1205 * limit by net size (in MB) of the segment, using {@link
1206 * LogByteSizeMergePolicy#setMaxMergeMB}.</p>
1207 */
1208 public void setMaxMergeDocs(int maxMergeDocs) {
1209 getLogMergePolicy().setMaxMergeDocs(maxMergeDocs);
1210 }
1211
1212 /**
1213 * <p>Returns the largest segment (measured by document
1214 * count) that may be merged with other segments.</p>
1215 *
1216 * <p>Note that this method is a convenience method: it
1217 * just calls mergePolicy.getMaxMergeDocs as long as
1218 * mergePolicy is an instance of {@link LogMergePolicy}.
1219 * Otherwise an IllegalArgumentException is thrown.</p>
1220 *
1221 * @see #setMaxMergeDocs
1222 */
1223 public int getMaxMergeDocs() {
1224 return getLogMergePolicy().getMaxMergeDocs();
1225 }
1226
1227 /**
1228 * The maximum number of terms that will be indexed for a single field in a
1229 * document. This limits the amount of memory required for indexing, so that
1230 * collections with very large files will not crash the indexing process by
1231 * running out of memory. This setting refers to the number of running terms,
1232 * not to the number of different terms.<p/>
1233 * <strong>Note:</strong> this silently truncates large documents, excluding from the
1234 * index all terms that occur further in the document. If you know your source
1235 * documents are large, be sure to set this value high enough to accomodate
1236 * the expected size. If you set it to Integer.MAX_VALUE, then the only limit
1237 * is your memory, but you should anticipate an OutOfMemoryError.<p/>
1238 * By default, no more than {@link #DEFAULT_MAX_FIELD_LENGTH} terms
1239 * will be indexed for a field.
1240 */
1241 public void setMaxFieldLength(int maxFieldLength) {
1242 ensureOpen();
1243 this.maxFieldLength = maxFieldLength;
1244 docWriter.setMaxFieldLength(maxFieldLength);
1245 if (infoStream != null)
1246 message("setMaxFieldLength " + maxFieldLength);
1247 }
1248
1249 /**
1250 * Returns the maximum number of terms that will be
1251 * indexed for a single field in a document.
1252 * @see #setMaxFieldLength
1253 */
1254 public int getMaxFieldLength() {
1255 ensureOpen();
1256 return maxFieldLength;
1257 }
1258
1259 /** Determines the minimal number of documents required
1260 * before the buffered in-memory documents are flushed as
1261 * a new Segment. Large values generally gives faster
1262 * indexing.
1263 *
1264 * <p>When this is set, the writer will flush every
1265 * maxBufferedDocs added documents. Pass in {@link
1266 * #DISABLE_AUTO_FLUSH} to prevent triggering a flush due
1267 * to number of buffered documents. Note that if flushing
1268 * by RAM usage is also enabled, then the flush will be
1269 * triggered by whichever comes first.</p>
1270 *
1271 * <p>Disabled by default (writer flushes by RAM usage).</p>
1272 *
1273 * @throws IllegalArgumentException if maxBufferedDocs is
1274 * enabled but smaller than 2, or it disables maxBufferedDocs
1275 * when ramBufferSize is already disabled
1276 * @see #setRAMBufferSizeMB
1277 */
1278 public void setMaxBufferedDocs(int maxBufferedDocs) {
1279 ensureOpen();
1280 if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2)
1281 throw new IllegalArgumentException(
1282 "maxBufferedDocs must at least be 2 when enabled");
1283 if (maxBufferedDocs == DISABLE_AUTO_FLUSH
1284 && getRAMBufferSizeMB() == DISABLE_AUTO_FLUSH)
1285 throw new IllegalArgumentException(
1286 "at least one of ramBufferSize and maxBufferedDocs must be enabled");
1287 docWriter.setMaxBufferedDocs(maxBufferedDocs);
1288 pushMaxBufferedDocs();
1289 if (infoStream != null)
1290 message("setMaxBufferedDocs " + maxBufferedDocs);
1291 }
1292
1293 /**
1294 * If we are flushing by doc count (not by RAM usage), and
1295 * using LogDocMergePolicy then push maxBufferedDocs down
1296 * as its minMergeDocs, to keep backwards compatibility.
1297 */
1298 private void pushMaxBufferedDocs() {
1299 if (docWriter.getMaxBufferedDocs() != DISABLE_AUTO_FLUSH) {
1300 final MergePolicy mp = mergePolicy;
1301 if (mp instanceof LogDocMergePolicy) {
1302 LogDocMergePolicy lmp = (LogDocMergePolicy) mp;
1303 final int maxBufferedDocs = docWriter.getMaxBufferedDocs();
1304 if (lmp.getMinMergeDocs() != maxBufferedDocs) {
1305 if (infoStream != null)
1306 message("now push maxBufferedDocs " + maxBufferedDocs + " to LogDocMergePolicy");
1307 lmp.setMinMergeDocs(maxBufferedDocs);
1308 }
1309 }
1310 }
1311 }
1312
1313 /**
1314 * Returns the number of buffered added documents that will
1315 * trigger a flush if enabled.
1316 * @see #setMaxBufferedDocs
1317 */
1318 public int getMaxBufferedDocs() {
1319 ensureOpen();
1320 return docWriter.getMaxBufferedDocs();
1321 }
1322
1323 /** Determines the amount of RAM that may be used for
1324 * buffering added documents before they are flushed as a
1325 * new Segment. Generally for faster indexing performance
1326 * it's best to flush by RAM usage instead of document
1327 * count and use as large a RAM buffer as you can.
1328 *
1329 * <p>When this is set, the writer will flush whenever
1330 * buffered documents use this much RAM. Pass in {@link
1331 * #DISABLE_AUTO_FLUSH} to prevent triggering a flush due
1332 * to RAM usage. Note that if flushing by document count
1333 * is also enabled, then the flush will be triggered by
1334 * whichever comes first.</p>
1335 *
1336 * <p> The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.</p>
1337 *
1338 * @throws IllegalArgumentException if ramBufferSize is
1339 * enabled but non-positive, or it disables ramBufferSize
1340 * when maxBufferedDocs is already disabled
1341 */
1342 public void setRAMBufferSizeMB(double mb) {
1343 if (mb != DISABLE_AUTO_FLUSH && mb <= 0.0)
1344 throw new IllegalArgumentException(
1345 "ramBufferSize should be > 0.0 MB when enabled");
1346 if (mb == DISABLE_AUTO_FLUSH && getMaxBufferedDocs() == DISABLE_AUTO_FLUSH)
1347 throw new IllegalArgumentException(
1348 "at least one of ramBufferSize and maxBufferedDocs must be enabled");
1349 docWriter.setRAMBufferSizeMB(mb);
1350 if (infoStream != null)
1351 message("setRAMBufferSizeMB " + mb);
1352 }
1353
1354 /**
1355 * Returns the value set by {@link #setRAMBufferSizeMB} if enabled.
1356 */
1357 public double getRAMBufferSizeMB() {
1358 return docWriter.getRAMBufferSizeMB();
1359 }
1360
1361 /**
1362 * <p>Determines the minimal number of delete terms required before the buffered
1363 * in-memory delete terms are applied and flushed. If there are documents
1364 * buffered in memory at the time, they are merged and a new segment is
1365 * created.</p>
1366
1367 * <p>Disabled by default (writer flushes by RAM usage).</p>
1368 *
1369 * @throws IllegalArgumentException if maxBufferedDeleteTerms
1370 * is enabled but smaller than 1
1371 * @see #setRAMBufferSizeMB
1372 */
1373 public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
1374 ensureOpen();
1375 if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH
1376 && maxBufferedDeleteTerms < 1)
1377 throw new IllegalArgumentException(
1378 "maxBufferedDeleteTerms must at least be 1 when enabled");
1379 docWriter.setMaxBufferedDeleteTerms(maxBufferedDeleteTerms);
1380 if (infoStream != null)
1381 message("setMaxBufferedDeleteTerms " + maxBufferedDeleteTerms);
1382 }
1383
1384 /**
1385 * Returns the number of buffered deleted terms that will
1386 * trigger a flush if enabled.
1387 * @see #setMaxBufferedDeleteTerms
1388 */
1389 public int getMaxBufferedDeleteTerms() {
1390 ensureOpen();
1391 return docWriter.getMaxBufferedDeleteTerms();
1392 }
1393
1394 /** Determines how often segment indices are merged by addDocument(). With
1395 * smaller values, less RAM is used while indexing, and searches on
1396 * unoptimized indices are faster, but indexing speed is slower. With larger
1397 * values, more RAM is used during indexing, and while searches on unoptimized
1398 * indices are slower, indexing is faster. Thus larger values (> 10) are best
1399 * for batch index creation, and smaller values (< 10) for indices that are
1400 * interactively maintained.
1401 *
1402 * <p>Note that this method is a convenience method: it
1403 * just calls mergePolicy.setMergeFactor as long as
1404 * mergePolicy is an instance of {@link LogMergePolicy}.
1405 * Otherwise an IllegalArgumentException is thrown.</p>
1406 *
1407 * <p>This must never be less than 2. The default value is 10.
1408 */
1409 public void setMergeFactor(int mergeFactor) {
1410 getLogMergePolicy().setMergeFactor(mergeFactor);
1411 }
1412
1413 /**
1414 * <p>Returns the number of segments that are merged at
1415 * once and also controls the total number of segments
1416 * allowed to accumulate in the index.</p>
1417 *
1418 * <p>Note that this method is a convenience method: it
1419 * just calls mergePolicy.getMergeFactor as long as
1420 * mergePolicy is an instance of {@link LogMergePolicy}.
1421 * Otherwise an IllegalArgumentException is thrown.</p>
1422 *
1423 * @see #setMergeFactor
1424 */
1425 public int getMergeFactor() {
1426 return getLogMergePolicy().getMergeFactor();
1427 }
1428
1429 /**
1430 * Expert: returns max delay inserted before syncing a
1431 * commit point. On Windows, at least, pausing before
1432 * syncing can increase net indexing throughput. The
1433 * delay is variable based on size of the segment's files,
1434 * and is only inserted when using
1435 * ConcurrentMergeScheduler for merges.
1436 * @deprecated This will be removed in 3.0, when
1437 * autoCommit=true is removed from IndexWriter.
1438 */
1439 public double getMaxSyncPauseSeconds() {
1440 return maxSyncPauseSeconds;
1441 }
1442
1443 /**
1444 * Expert: sets the max delay before syncing a commit
1445 * point.
1446 * @see #getMaxSyncPauseSeconds
1447 * @deprecated This will be removed in 3.0, when
1448 * autoCommit=true is removed from IndexWriter.
1449 */
1450 public void setMaxSyncPauseSeconds(double seconds) {
1451 maxSyncPauseSeconds = seconds;
1452 }
1453
1454 /** If non-null, this will be the default infoStream used
1455 * by a newly instantiated IndexWriter.
1456 * @see #setInfoStream
1457 */
1458 public static void setDefaultInfoStream(PrintStream infoStream) {
1459 IndexWriter.defaultInfoStream = infoStream;
1460 }
1461
1462 /**
1463 * Returns the current default infoStream for newly
1464 * instantiated IndexWriters.
1465 * @see #setDefaultInfoStream
1466 */
1467 public static PrintStream getDefaultInfoStream() {
1468 return IndexWriter.defaultInfoStream;
1469 }
1470
1471 /** If non-null, information about merges, deletes and a
1472 * message when maxFieldLength is reached will be printed
1473 * to this.
1474 */
1475 public void setInfoStream(PrintStream infoStream) {
1476 ensureOpen();
1477 setMessageID(infoStream);
1478 docWriter.setInfoStream(infoStream);
1479 deleter.setInfoStream(infoStream);
1480 if (infoStream != null)
1481 messageState();
1482 }
1483
1484 private void messageState() {
1485 message("setInfoStream: dir=" + directory +
1486 " autoCommit=" + autoCommit +
1487 " mergePolicy=" + mergePolicy +
1488 " mergeScheduler=" + mergeScheduler +
1489 " ramBufferSizeMB=" + docWriter.getRAMBufferSizeMB() +
1490 " maxBufferedDocs=" + docWriter.getMaxBufferedDocs() +
1491 " maxBuffereDeleteTerms=" + docWriter.getMaxBufferedDeleteTerms() +
1492 " maxFieldLength=" + maxFieldLength +
1493 " index=" + segString());
1494 }
1495
1496 /**
1497 * Returns the current infoStream in use by this writer.
1498 * @see #setInfoStream
1499 */
1500 public PrintStream getInfoStream() {
1501 ensureOpen();
1502 return infoStream;
1503 }
1504
1505 /**
1506 * Sets the maximum time to wait for a write lock (in milliseconds) for this instance of IndexWriter. @see
1507 * @see #setDefaultWriteLockTimeout to change the default value for all instances of IndexWriter.
1508 */
1509 public void setWriteLockTimeout(long writeLockTimeout) {
1510 ensureOpen();
1511 this.writeLockTimeout = writeLockTimeout;
1512 }
1513
1514 /**
1515 * Returns allowed timeout when acquiring the write lock.
1516 * @see #setWriteLockTimeout
1517 */
1518 public long getWriteLockTimeout() {
1519 ensureOpen();
1520 return writeLockTimeout;
1521 }
1522
1523 /**
1524 * Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in
1525 * milliseconds).
1526 */
1527 public static void setDefaultWriteLockTimeout(long writeLockTimeout) {
1528 IndexWriter.WRITE_LOCK_TIMEOUT = writeLockTimeout;
1529 }
1530
1531 /**
1532 * Returns default write lock timeout for newly
1533 * instantiated IndexWriters.
1534 * @see #setDefaultWriteLockTimeout
1535 */
1536 public static long getDefaultWriteLockTimeout() {
1537 return IndexWriter.WRITE_LOCK_TIMEOUT;
1538 }
1539
1540 /**
1541 * Commits all changes to an index and closes all
1542 * associated files. Note that this may be a costly
1543 * operation, so, try to re-use a single writer instead of
1544 * closing and opening a new one. See {@link #commit()} for
1545 * caveats about write caching done by some IO devices.
1546 *
1547 * <p> If an Exception is hit during close, eg due to disk
1548 * full or some other reason, then both the on-disk index
1549 * and the internal state of the IndexWriter instance will
1550 * be consistent. However, the close will not be complete
1551 * even though part of it (flushing buffered documents)
1552 * may have succeeded, so the write lock will still be
1553 * held.</p>
1554 *
1555 * <p> If you can correct the underlying cause (eg free up
1556 * some disk space) then you can call close() again.
1557 * Failing that, if you want to force the write lock to be
1558 * released (dangerous, because you may then lose buffered
1559 * docs in the IndexWriter instance) then you can do
1560 * something like this:</p>
1561 *
1562 * <pre>
1563 * try {
1564 * writer.close();
1565 * } finally {
1566 * if (IndexWriter.isLocked(directory)) {
1567 * IndexWriter.unlock(directory);
1568 * }
1569 * }
1570 * </pre>
1571 *
1572 * after which, you must be certain not to use the writer
1573 * instance anymore.</p>
1574 * @throws CorruptIndexException if the index is corrupt
1575 * @throws IOException if there is a low-level IO error
1576 */
1577 public void close() throws CorruptIndexException, IOException {
1578 close(true);
1579 }
1580
1581 /**
1582 * Closes the index with or without waiting for currently
1583 * running merges to finish. This is only meaningful when
1584 * using a MergeScheduler that runs merges in background
1585 * threads.
1586 * @param waitForMerges if true, this call will block
1587 * until all merges complete; else, it will ask all
1588 * running merges to abort, wait until those merges have
1589 * finished (which should be at most a few seconds), and
1590 * then return.
1591 */
1592 public void close(boolean waitForMerges) throws CorruptIndexException, IOException {
1593
1594 // Ensure that only one thread actually gets to do the closing:
1595 if (shouldClose()) {
1596 // If any methods have hit OutOfMemoryError, then abort
1597 // on close, in case the internal state of IndexWriter
1598 // or DocumentsWriter is corrupt
1599 if (hitOOM)
1600 rollbackInternal();
1601 else
1602 closeInternal(waitForMerges);
1603 }
1604 }
1605
1606 // Returns true if this thread should attempt to close, or
1607 // false if IndexWriter is now closed; else, waits until
1608 // another thread finishes closing
1609 synchronized private boolean shouldClose() {
1610 while(true) {
1611 if (!closed) {
1612 if (!closing) {
1613 closing = true;
1614 return true;
1615 } else {
1616 // Another thread is presently trying to close;
1617 // wait until it finishes one way (closes
1618 // successfully) or another (fails to close)
1619 doWait();
1620 }
1621 } else
1622 return false;
1623 }
1624 }
1625
1626 private void closeInternal(boolean waitForMerges) throws CorruptIndexException, IOException {
1627
1628 docWriter.pauseAllThreads();
1629
1630 try {
1631 if (infoStream != null)
1632 message("now flush at close");
1633
1634 docWriter.close();
1635
1636 // Only allow a new merge to be triggered if we are
1637 // going to wait for merges:
1638 flush(waitForMerges, true, true);
1639
1640 if (waitForMerges)
1641 // Give merge scheduler last chance to run, in case
1642 // any pending merges are waiting:
1643 mergeScheduler.merge(this);
1644
1645 mergePolicy.close();
1646
1647 finishMerges(waitForMerges);
1648
1649 mergeScheduler.close();
1650
1651 if (infoStream != null)
1652 message("now call final commit()");
1653
1654 commit(0);
1655
1656 if (infoStream != null)
1657 message("at close: " + segString());
1658
1659 synchronized(this) {
1660 docWriter = null;
1661 deleter.close();
1662 }
1663
1664 if (closeDir)
1665 directory.close();
1666
1667 if (writeLock != null) {
1668 writeLock.release(); // release write lock
1669 writeLock = null;
1670 }
1671 synchronized(this) {
1672 closed = true;
1673 }
1674 } catch (OutOfMemoryError oom) {
1675 hitOOM = true;
1676 throw oom;
1677 } finally {
1678 synchronized(this) {
1679 closing = false;
1680 notifyAll();
1681 if (!closed) {
1682 if (docWriter != null)
1683 docWriter.resumeAllThreads();
1684 if (infoStream != null)
1685 message("hit exception while closing");
1686 }
1687 }
1688 }
1689 }
1690
1691 /** Tells the docWriter to close its currently open shared
1692 * doc stores (stored fields & vectors files).
1693 * Return value specifices whether new doc store files are compound or not.
1694 */
1695 private synchronized boolean flushDocStores() throws IOException {
1696
1697 boolean useCompoundDocStore = false;
1698
1699 String docStoreSegment;
1700
1701 boolean success = false;
1702 try {
1703 docStoreSegment = docWriter.closeDocStore();
1704 success = true;
1705 } finally {
1706 if (!success) {
1707 if (infoStream != null)
1708 message("hit exception closing doc store segment");
1709 }
1710 }
1711
1712 useCompoundDocStore = mergePolicy.useCompoundDocStore(segmentInfos);
1713
1714 if (useCompoundDocStore && docStoreSegment != null && docWriter.closedFiles().size() != 0) {
1715 // Now build compound doc store file
1716
1717 success = false;
1718
1719 final int numSegments = segmentInfos.size();
1720 final String compoundFileName = docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION;
1721
1722 try {
1723 CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, compoundFileName);
1724 final Iterator it = docWriter.closedFiles().iterator();
1725 while(it.hasNext())
1726 cfsWriter.addFile((String) it.next());
1727
1728 // Perform the merge
1729 cfsWriter.close();
1730 success = true;
1731
1732 } finally {
1733 if (!success) {
1734 if (infoStream != null)
1735 message("hit exception building compound file doc store for segment " + docStoreSegment);
1736 deleter.deleteFile(compoundFileName);
1737 }
1738 }
1739
1740 for(int i=0;i<numSegments;i++) {
1741 SegmentInfo si = segmentInfos.info(i);
1742 if (si.getDocStoreOffset() != -1 &&
1743 si.getDocStoreSegment().equals(docStoreSegment))
1744 si.setDocStoreIsCompoundFile(true);
1745 }
1746
1747 checkpoint();
1748
1749 // In case the files we just merged into a CFS were
1750 // not previously checkpointed:
1751 deleter.deleteNewFiles(docWriter.closedFiles());
1752 }
1753
1754 return useCompoundDocStore;
1755 }
1756
1757 /** Release the write lock, if needed. */
1758 protected void finalize() throws Throwable {
1759 try {
1760 if (writeLock != null) {
1761 writeLock.release(); // release write lock
1762 writeLock = null;
1763 }
1764 } finally {
1765 super.finalize();
1766 }
1767 }
1768
1769 /** Returns the Directory used by this index. */
1770 public Directory getDirectory() {
1771 // Pass false because the flush during closing calls getDirectory
1772 ensureOpen(false);
1773 return directory;
1774 }
1775
1776 /** Returns the analyzer used by this index. */
1777 public Analyzer getAnalyzer() {
1778 ensureOpen();
1779 return analyzer;
1780 }
1781
1782 /** Returns the number of documents currently in this
1783 * index, not counting deletions.
1784 * @deprecated Please use {@link #maxDoc()} (same as this
1785 * method) or {@link #numDocs()} (also takes deletions
1786 * into account), instead. */
1787 public synchronized int docCount() {
1788 ensureOpen();
1789 return maxDoc();
1790 }
1791
1792 /** Returns total number of docs in this index, including
1793 * docs not yet flushed (still in the RAM buffer),
1794 * not counting deletions.
1795 * @see #numDocs */
1796 public synchronized int maxDoc() {
1797 int count;
1798 if (docWriter != null)
1799 count = docWriter.getNumDocsInRAM();
1800 else
1801 count = 0;
1802
1803 for (int i = 0; i < segmentInfos.size(); i++)
1804 count += segmentInfos.info(i).docCount;
1805 return count;
1806 }
1807
1808 /** Returns total number of docs in this index, including
1809 * docs not yet flushed (still in the RAM buffer), and
1810 * including deletions. <b>NOTE:</b> buffered deletions
1811 * are not counted. If you really need these to be
1812 * counted you should call {@link #commit()} first.
1813 * @see #numDocs */
1814 public synchronized int numDocs() throws IOException {
1815 int count;
1816 if (docWriter != null)
1817 count = docWriter.getNumDocsInRAM();
1818 else
1819 count = 0;
1820
1821 for (int i = 0; i < segmentInfos.size(); i++) {
1822 final SegmentInfo info = segmentInfos.info(i);
1823 count += info.docCount - info.getDelCount();
1824 }
1825 return count;
1826 }
1827
1828 public synchronized boolean hasDeletions() throws IOException {
1829 ensureOpen();
1830 if (docWriter.hasDeletes())
1831 return true;
1832 for (int i = 0; i < segmentInfos.size(); i++)
1833 if (segmentInfos.info(i).hasDeletions())
1834 return true;
1835 return false;
1836 }
1837
1838 /**
1839 * The maximum number of terms that will be indexed for a single field in a
1840 * document. This limits the amount of memory required for indexing, so that
1841 * collections with very large files will not crash the indexing process by
1842 * running out of memory.<p/>
1843 * Note that this effectively truncates large documents, excluding from the
1844 * index terms that occur further in the document. If you know your source
1845 * documents are large, be sure to set this value high enough to accomodate
1846 * the expected size. If you set it to Integer.MAX_VALUE, then the only limit
1847 * is your memory, but you should anticipate an OutOfMemoryError.<p/>
1848 * By default, no more than 10,000 terms will be indexed for a field.
1849 *
1850 * @see MaxFieldLength
1851 */
1852 private int maxFieldLength;
1853
1854 /**
1855 * Adds a document to this index. If the document contains more than
1856 * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
1857 * discarded.
1858 *
1859 * <p> Note that if an Exception is hit (for example disk full)
1860 * then the index will be consistent, but this document
1861 * may not have been added. Furthermore, it's possible
1862 * the index will have one segment in non-compound format
1863 * even when using compound files (when a merge has
1864 * partially succeeded).</p>
1865 *
1866 * <p> This method periodically flushes pending documents
1867 * to the Directory (see <a href="#flush">above</a>), and
1868 * also periodically triggers segment merges in the index
1869 * according to the {@link MergePolicy} in use.</p>
1870 *
1871 * <p>Merges temporarily consume space in the
1872 * directory. The amount of space required is up to 1X the
1873 * size of all segments being merged, when no
1874 * readers/searchers are open against the index, and up to
1875 * 2X the size of all segments being merged when
1876 * readers/searchers are open against the index (see
1877 * {@link #optimize()} for details). The sequence of
1878 * primitive merge operations performed is governed by the
1879 * merge policy.
1880 *
1881 * <p>Note that each term in the document can be no longer
1882 * than 16383 characters, otherwise an
1883 * IllegalArgumentException will be thrown.</p>
1884 *
1885 * <p>Note that it's possible to create an invalid Unicode
1886 * string in java if a UTF16 surrogate pair is malformed.
1887 * In this case, the invalid characters are silently
1888 * replaced with the Unicode replacement character
1889 * U+FFFD.</p>
1890 *
1891 * @throws CorruptIndexException if the index is corrupt
1892 * @throws IOException if there is a low-level IO error
1893 */
1894 public void addDocument(Document doc) throws CorruptIndexException, IOException {
1895 addDocument(doc, analyzer);
1896 }
1897
1898 /**
1899 * Adds a document to this index, using the provided analyzer instead of the
1900 * value of {@link #getAnalyzer()}. If the document contains more than
1901 * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
1902 * discarded.
1903 *
1904 * <p>See {@link #addDocument(Document)} for details on
1905 * index and IndexWriter state after an Exception, and
1906 * flushing/merging temporary free space requirements.</p>
1907 *
1908 * @throws CorruptIndexException if the index is corrupt
1909 * @throws IOException if there is a low-level IO error
1910 */
1911 public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException {
1912 ensureOpen();
1913 boolean doFlush = false;
1914 boolean success = false;
1915 try {
1916 try {
1917 doFlush = docWriter.addDocument(doc, analyzer);
1918 success = true;
1919 } finally {
1920 if (!success) {
1921
1922 if (infoStream != null)
1923 message("hit exception adding document");
1924
1925 synchronized (this) {
1926 // If docWriter has some aborted files that were
1927 // never incref'd, then we clean them up here
1928 if (docWriter != null) {
1929 final Collection files = docWriter.abortedFiles();
1930 if (files != null)
1931 deleter.deleteNewFiles(files);
1932 }
1933 }
1934 }
1935 }
1936 if (doFlush)
1937 flush(true, false, false);
1938 } catch (OutOfMemoryError oom) {
1939 hitOOM = true;
1940 throw oom;
1941 }
1942 }
1943
1944 /**
1945 * Deletes the document(s) containing <code>term</code>.
1946 * @param term the term to identify the documents to be deleted
1947 * @throws CorruptIndexException if the index is corrupt
1948 * @throws IOException if there is a low-level IO error
1949 */
1950 public void deleteDocuments(Term term) throws CorruptIndexException, IOException {
1951 ensureOpen();
1952 try {
1953 boolean doFlush = docWriter.bufferDeleteTerm(term);
1954 if (doFlush)
1955 flush(true, false, false);
1956 } catch (OutOfMemoryError oom) {
1957 hitOOM = true;
1958 throw oom;
1959 }
1960 }
1961
1962 /**
1963 * Deletes the document(s) containing any of the
1964 * terms. All deletes are flushed at the same time.
1965 * @param terms array of terms to identify the documents
1966 * to be deleted
1967 * @throws CorruptIndexException if the index is corrupt
1968 * @throws IOException if there is a low-level IO error
1969 */
1970 public void deleteDocuments(Term[] terms) throws CorruptIndexException, IOException {
1971 ensureOpen();
1972 try {
1973 boolean doFlush = docWriter.bufferDeleteTerms(terms);
1974 if (doFlush)
1975 flush(true, false, false);
1976 } catch (OutOfMemoryError oom) {
1977 hitOOM = true;
1978 throw oom;
1979 }
1980 }
1981
1982 /**
1983 * Deletes the document(s) matching the provided query.
1984 * @param query the query to identify the documents to be deleted
1985 * @throws CorruptIndexException if the index is corrupt
1986 * @throws IOException if there is a low-level IO error
1987 */
1988 public void deleteDocuments(Query query) throws CorruptIndexException, IOException {
1989 ensureOpen();
1990 boolean doFlush = docWriter.bufferDeleteQuery(query);
1991 if (doFlush)
1992 flush(true, false, false);
1993 }
1994
1995 /**
1996 * Deletes the document(s) matching any of the provided queries.
1997 * All deletes are flushed at the same time.
1998 * @param queries array of queries to identify the documents
1999 * to be deleted
2000 * @throws CorruptIndexException if the index is corrupt
2001 * @throws IOException if there is a low-level IO error
2002 */
2003 public void deleteDocuments(Query[] queries) throws CorruptIndexException, IOException {
2004 ensureOpen();
2005 boolean doFlush = docWriter.bufferDeleteQueries(queries);
2006 if (doFlush)
2007 flush(true, false, false);
2008 }
2009
2010 /**
2011 * Updates a document by first deleting the document(s)
2012 * containing <code>term</code> and then adding the new
2013 * document. The delete and then add are atomic as seen
2014 * by a reader on the same index (flush may happen only after
2015 * the add).
2016 * @param term the term to identify the document(s) to be
2017 * deleted
2018 * @param doc the document to be added
2019 * @throws CorruptIndexException if the index is corrupt
2020 * @throws IOException if there is a low-level IO error
2021 */
2022 public void updateDocument(Term term, Document doc) throws CorruptIndexException, IOException {
2023 ensureOpen();
2024 updateDocument(term, doc, getAnalyzer());
2025 }
2026
2027 /**
2028 * Updates a document by first deleting the document(s)
2029 * containing <code>term</code> and then adding the new
2030 * document. The delete and then add are atomic as seen
2031 * by a reader on the same index (flush may happen only after
2032 * the add).
2033 * @param term the term to identify the document(s) to be
2034 * deleted
2035 * @param doc the document to be added
2036 * @param analyzer the analyzer to use when analyzing the document
2037 * @throws CorruptIndexException if the index is corrupt
2038 * @throws IOException if there is a low-level IO error
2039 */
2040 public void updateDocument(Term term, Document doc, Analyzer analyzer)
2041 throws CorruptIndexException, IOException {
2042 ensureOpen();
2043 try {
2044 boolean doFlush = false;
2045 boolean success = false;
2046 try {
2047 doFlush = docWriter.updateDocument(term, doc, analyzer);
2048 success = true;
2049 } finally {
2050 if (!success) {
2051
2052 if (infoStream != null)
2053 message("hit exception updating document");
2054
2055 synchronized (this) {
2056 // If docWriter has some aborted files that were
2057 // never incref'd, then we clean them up here
2058 final Collection files = docWriter.abortedFiles();
2059 if (files != null)
2060 deleter.deleteNewFiles(files);
2061 }
2062 }
2063 }
2064 if (doFlush)
2065 flush(true, false, false);
2066 } catch (OutOfMemoryError oom) {
2067 hitOOM = true;
2068 throw oom;
2069 }
2070 }
2071
2072 // for test purpose
2073 final synchronized int getSegmentCount(){
2074 return segmentInfos.size();
2075 }
2076
2077 // for test purpose
2078 final synchronized int getNumBufferedDocuments(){
2079 return docWriter.getNumDocsInRAM();
2080 }
2081
2082 // for test purpose
2083 final synchronized int getDocCount(int i) {
2084 if (i >= 0 && i < segmentInfos.size()) {
2085 return segmentInfos.info(i).docCount;
2086 } else {
2087 return -1;
2088 }
2089 }
2090
2091 // for test purpose
2092 final synchronized int getFlushCount() {
2093 return flushCount;
2094 }
2095
2096 // for test purpose
2097 final synchronized int getFlushDeletesCount() {
2098 return flushDeletesCount;
2099 }
2100
2101 final String newSegmentName() {
2102 // Cannot synchronize on IndexWriter because that causes
2103 // deadlock
2104 synchronized(segmentInfos) {
2105 // Important to increment changeCount so that the
2106 // segmentInfos is written on close. Otherwise we
2107 // could close, re-open and re-return the same segment
2108 // name that was previously returned which can cause
2109 // problems at least with ConcurrentMergeScheduler.
2110 changeCount++;
2111 return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX);
2112 }
2113 }
2114
2115 /** If non-null, information about merges will be printed to this.
2116 */
2117 private PrintStream infoStream = null;
2118 private static PrintStream defaultInfoStream = null;
2119
2120 /**
2121 * Requests an "optimize" operation on an index, priming the index
2122 * for the fastest available search. Traditionally this has meant
2123 * merging all segments into a single segment as is done in the
2124 * default merge policy, but individaul merge policies may implement
2125 * optimize in different ways.
2126 *
2127 * @see LogMergePolicy#findMergesForOptimize
2128 *
2129 * <p>It is recommended that this method be called upon completion of indexing. In
2130 * environments with frequent updates, optimize is best done during low volume times, if at all.
2131 *
2132 * </p>
2133 * <p>See http://www.gossamer-threads.com/lists/lucene/java-dev/47895 for more discussion. </p>
2134 *
2135 * <p>Note that this can require substantial temporary free
2136 * space in the Directory (see <a target="_top"
2137 * href="http://issues.apache.org/jira/browse/LUCENE-764">LUCENE-764</a>
2138 * for details):</p>
2139 *
2140 * <ul>
2141 * <li>
2142 *
2143 * <p>If no readers/searchers are open against the index,
2144 * then free space required is up to 1X the total size of
2145 * the starting index. For example, if the starting
2146 * index is 10 GB, then you must have up to 10 GB of free
2147 * space before calling optimize.</p>
2148 *
2149 * <li>
2150 *
2151 * <p>If readers/searchers are using the index, then free
2152 * space required is up to 2X the size of the starting
2153 * index. This is because in addition to the 1X used by
2154 * optimize, the original 1X of the starting index is
2155 * still consuming space in the Directory as the readers
2156 * are holding the segments files open. Even on Unix,
2157 * where it will appear as if the files are gone ("ls"
2158 * won't list them), they still consume storage due to
2159 * "delete on last close" semantics.</p>
2160 *
2161 * <p>Furthermore, if some but not all readers re-open
2162 * while the optimize is underway, this will cause > 2X
2163 * temporary space to be consumed as those new readers
2164 * will then hold open the partially optimized segments at
2165 * that time. It is best not to re-open readers while
2166 * optimize is running.</p>
2167 *
2168 * </ul>
2169 *
2170 * <p>The actual temporary usage could be much less than
2171 * these figures (it depends on many factors).</p>
2172 *
2173 * <p>In general, once the optimize completes, the total size of the
2174 * index will be less than the size of the starting index.
2175 * It could be quite a bit smaller (if there were many
2176 * pending deletes) or just slightly smaller.</p>
2177 *
2178 * <p>If an Exception is hit during optimize(), for example
2179 * due to disk full, the index will not be corrupt and no
2180 * documents will have been lost. However, it may have
2181 * been partially optimized (some segments were merged but
2182 * not all), and it's possible that one of the segments in
2183 * the index will be in non-compound format even when
2184 * using compound file format. This will occur when the
2185 * Exception is hit during conversion of the segment into
2186 * compound format.</p>
2187 *
2188 * <p>This call will optimize those segments present in
2189 * the index when the call started. If other threads are
2190 * still adding documents and flushing segments, those
2191 * newly created segments will not be optimized unless you
2192 * call optimize again.</p>
2193 *
2194 * @throws CorruptIndexException if the index is corrupt
2195 * @throws IOException if there is a low-level IO error
2196 */
2197 public void optimize() throws CorruptIndexException, IOException {
2198 optimize(true);
2199 }
2200
2201 /**
2202 * Optimize the index down to <= maxNumSegments. If
2203 * maxNumSegments==1 then this is the same as {@link
2204 * #optimize()}.
2205 * @param maxNumSegments maximum number of segments left
2206 * in the index after optimization finishes
2207 */
2208 public void optimize(int maxNumSegments) throws CorruptIndexException, IOException {
2209 optimize(maxNumSegments, true);
2210 }
2211
2212 /** Just like {@link #optimize()}, except you can specify
2213 * whether the call should block until the optimize
2214 * completes. This is only meaningful with a
2215 * {@link MergeScheduler} that is able to run merges in
2216 * background threads. */
2217 public void optimize(boolean doWait) throws CorruptIndexException, IOException {
2218 optimize(1, doWait);
2219 }
2220
2221 /** Just like {@link #optimize(int)}, except you can
2222 * specify whether the call should block until the
2223 * optimize completes. This is only meaningful with a
2224 * {@link MergeScheduler} that is able to run merges in
2225 * background threads. */
2226 public void optimize(int maxNumSegments, boolean doWait) throws CorruptIndexException, IOException {
2227 ensureOpen();
2228
2229 if (maxNumSegments < 1)
2230 throw new IllegalArgumentException("maxNumSegments must be >= 1; got " + maxNumSegments);
2231
2232 if (infoStream != null)
2233 message("optimize: index now " + segString());
2234
2235 flush(true, false, true);
2236
2237 synchronized(this) {
2238 resetMergeExceptions();
2239 segmentsToOptimize = new HashSet();
2240 final int numSegments = segmentInfos.size();
2241 for(int i=0;i<numSegments;i++)
2242 segmentsToOptimize.add(segmentInfos.info(i));
2243
2244 // Now mark all pending & running merges as optimize
2245 // merge:
2246 Iterator it = pendingMerges.iterator();
2247 while(it.hasNext()) {
2248 final MergePolicy.OneMerge merge = (MergePolicy.OneMerge) it.next();
2249 merge.optimize = true;
2250 merge.maxNumSegmentsOptimize = maxNumSegments;
2251 }
2252
2253 it = runningMerges.iterator();
2254 while(it.hasNext()) {
2255 final MergePolicy.OneMerge merge = (MergePolicy.OneMerge) it.next();
2256 merge.optimize = true;
2257 merge.maxNumSegmentsOptimize = maxNumSegments;
2258 }
2259 }
2260
2261 maybeMerge(maxNumSegments, true);
2262
2263 if (doWait) {
2264 synchronized(this) {
2265 while(true) {
2266 if (mergeExceptions.size() > 0) {
2267 // Forward any exceptions in background merge
2268 // threads to the current thread:
2269 final int size = mergeExceptions.size();
2270 for(int i=0;i<size;i++) {
2271 final MergePolicy.OneMerge merge = (MergePolicy.OneMerge) mergeExceptions.get(0);
2272 if (merge.optimize) {
2273 IOException err = new IOException("background merge hit exception: " + merge.segString(directory));
2274 final Throwable t = merge.getException();
2275 if (t != null)
2276 err.initCause(t);
2277 throw err;
2278 }
2279 }
2280 }
2281
2282 if (optimizeMergesPending())
2283 doWait();
2284 else
2285 break;
2286 }
2287 }
2288
2289 // If close is called while we are still
2290 // running, throw an exception so the calling
2291 // thread will know the optimize did not
2292 // complete
2293 ensureOpen();
2294 }
2295
2296 // NOTE: in the ConcurrentMergeScheduler case, when
2297 // doWait is false, we can return immediately while
2298 // background threads accomplish the optimization
2299 }
2300
2301 /** Returns true if any merges in pendingMerges or
2302 * runningMerges are optimization merges. */
2303 private synchronized boolean optimizeMergesPending() {
2304 Iterator it = pendingMerges.iterator();
2305 while(it.hasNext())
2306 if (((MergePolicy.OneMerge) it.next()).optimize)
2307 return true;
2308
2309 it = runningMerges.iterator();
2310 while(it.hasNext())
2311 if (((MergePolicy.OneMerge) it.next()).optimize)
2312 return true;
2313
2314 return false;
2315 }
2316
2317 /** Just like {@link #expungeDeletes()}, except you can
2318 * specify whether the call should block until the
2319 * operation completes. This is only meaningful with a
2320 * {@link MergeScheduler} that is able to run merges in
2321 * background threads. */
2322 public void expungeDeletes(boolean doWait)
2323 throws CorruptIndexException, IOException {
2324 ensureOpen();
2325
2326 if (infoStream != null)
2327 message("expungeDeletes: index now " + segString());
2328
2329 MergePolicy.MergeSpecification spec;
2330
2331 synchronized(this) {
2332 spec = mergePolicy.findMergesToExpungeDeletes(segmentInfos, this);
2333 if (spec != null) {
2334 final int numMerges = spec.merges.size();
2335 for(int i=0;i<numMerges;i++)
2336 registerMerge((MergePolicy.OneMerge) spec.merges.get(i));
2337 }
2338 }
2339
2340 mergeScheduler.merge(this);
2341
2342 if (spec != null && doWait) {
2343 final int numMerges = spec.merges.size();
2344 synchronized(this) {
2345 boolean running = true;
2346 while(running) {
2347
2348 // Check each merge that MergePolicy asked us to
2349 // do, to see if any of them are still running and
2350 // if any of them have hit an exception.
2351 running = false;
2352 for(int i=0;i<numMerges;i++) {
2353 final MergePolicy.OneMerge merge = (MergePolicy.OneMerge) spec.merges.get(i);
2354 if (pendingMerges.contains(merge) || runningMerges.contains(merge))
2355 running = true;
2356 Throwable t = merge.getException();
2357 if (t != null) {
2358 IOException ioe = new IOException("background merge hit exception: " + merge.segString(directory));
2359 ioe.initCause(t);
2360 throw ioe;
2361 }
2362 }
2363
2364 // If any of our merges are still running, wait:
2365 if (running)
2366 doWait();
2367 }
2368 }
2369 }
2370
2371 // NOTE: in the ConcurrentMergeScheduler case, when
2372 // doWait is false, we can return immediately while
2373 // background threads accomplish the optimization
2374 }
2375
2376
2377 /** Expunges all deletes from the index. When an index
2378 * has many document deletions (or updates to existing
2379 * documents), it's best to either call optimize or
2380 * expungeDeletes to remove all unused data in the index
2381 * associated with the deleted documents. To see how
2382 * many deletions you have pending in your index, call
2383 * {@link IndexReader#numDeletedDocs}
2384 * This saves disk space and memory usage while
2385 * searching. expungeDeletes should be somewhat faster
2386 * than optimize since it does not insist on reducing the
2387 * index to a single segment (though, this depends on the
2388 * {@link MergePolicy}; see {@link
2389 * MergePolicy#findMergesToExpungeDeletes}.). Note that
2390 * this call does not first commit any buffered
2391 * documents, so you must do so yourself if necessary.
2392 * See also {@link #expungeDeletes(boolean)} */
2393 public void expungeDeletes() throws CorruptIndexException, IOException {
2394 expungeDeletes(true);
2395 }
2396
2397 /**
2398 * Expert: asks the mergePolicy whether any merges are
2399 * necessary now and if so, runs the requested merges and
2400 * then iterate (test again if merges are needed) until no
2401 * more merges are returned by the mergePolicy.
2402 *
2403 * Explicit calls to maybeMerge() are usually not
2404 * necessary. The most common case is when merge policy
2405 * parameters have changed.
2406 */
2407 public final void maybeMerge() throws CorruptIndexException, IOException {
2408 maybeMerge(false);
2409 }
2410
2411 private final void maybeMerge(boolean optimize) throws CorruptIndexException, IOException {
2412 maybeMerge(1, optimize);
2413 }
2414
2415 private final void maybeMerge(int maxNumSegmentsOptimize, boolean optimize) throws CorruptIndexException, IOException {
2416 updatePendingMerges(maxNumSegmentsOptimize, optimize);
2417 mergeScheduler.merge(this);
2418 }
2419
2420 private synchronized void updatePendingMerges(int maxNumSegmentsOptimize, boolean optimize)
2421 throws CorruptIndexException, IOException {
2422 assert !optimize || maxNumSegmentsOptimize > 0;
2423
2424 if (stopMerges)
2425 return;
2426
2427 final MergePolicy.MergeSpecification spec;
2428 if (optimize) {
2429 spec = mergePolicy.findMergesForOptimize(segmentInfos, this, maxNumSegmentsOptimize, segmentsToOptimize);
2430
2431 if (spec != null) {
2432 final int numMerges = spec.merges.size();
2433 for(int i=0;i<numMerges;i++) {
2434 final MergePolicy.OneMerge merge = ((MergePolicy.OneMerge) spec.merges.get(i));
2435 merge.optimize = true;
2436 merge.maxNumSegmentsOptimize = maxNumSegmentsOptimize;
2437 }
2438 }
2439
2440 } else
2441 spec = mergePolicy.findMerges(segmentInfos, this);
2442
2443 if (spec != null) {
2444 final int numMerges = spec.merges.size();
2445 for(int i=0;i<numMerges;i++)
2446 registerMerge((MergePolicy.OneMerge) spec.merges.get(i));
2447 }
2448 }
2449
2450 /** Expert: the {@link MergeScheduler} calls this method
2451 * to retrieve the next merge requested by the
2452 * MergePolicy */
2453 synchronized MergePolicy.OneMerge getNextMerge() {
2454 if (pendingMerges.size() == 0)
2455 return null;
2456 else {
2457 // Advance the merge from pending to running
2458 MergePolicy.OneMerge merge = (MergePolicy.OneMerge) pendingMerges.removeFirst();
2459 runningMerges.add(merge);
2460 return merge;
2461 }
2462 }
2463
2464 /** Like getNextMerge() except only returns a merge if it's
2465 * external. */
2466 private synchronized MergePolicy.OneMerge getNextExternalMerge() {
2467 if (pendingMerges.size() == 0)
2468 return null;
2469 else {
2470 Iterator it = pendingMerges.iterator();
2471 while(it.hasNext()) {
2472 MergePolicy.OneMerge merge = (MergePolicy.OneMerge) it.next();
2473 if (merge.isExternal) {
2474 // Advance the merge from pending to running
2475 it.remove();
2476 runningMerges.add(merge);
2477 return merge;
2478 }
2479 }
2480
2481 // All existing merges do not involve external segments
2482 return null;
2483 }
2484 }
2485
2486 /*
2487 * Begin a transaction. During a transaction, any segment
2488 * merges that happen (or ram segments flushed) will not
2489 * write a new segments file and will not remove any files
2490 * that were present at the start of the transaction. You
2491 * must make a matched (try/finally) call to
2492 * commitTransaction() or rollbackTransaction() to finish
2493 * the transaction.
2494 *
2495 * Note that buffered documents and delete terms are not handled
2496 * within the transactions, so they must be flushed before the
2497 * transaction is started.
2498 */
2499 private synchronized void startTransaction(boolean haveReadLock) throws IOException {
2500
2501 boolean success = false;
2502 try {
2503 if (infoStream != null)
2504 message("now start transaction");
2505
2506 assert docWriter.getNumBufferedDeleteTerms() == 0 :
2507 "calling startTransaction with buffered delete terms not supported: numBufferedDeleteTerms=" + docWriter.getNumBufferedDeleteTerms();
2508 assert docWriter.getNumDocsInRAM() == 0 :
2509 "calling startTransaction with buffered documents not supported: numDocsInRAM=" + docWriter.getNumDocsInRAM();
2510
2511 ensureOpen();
2512
2513 // If a transaction is trying to roll back (because
2514 // addIndexes hit an exception) then wait here until
2515 // that's done:
2516 synchronized(this) {
2517 while(stopMerges)
2518 doWait();
2519 }
2520 success = true;
2521 } finally {
2522 // Release the write lock if our caller held it, on
2523 // hitting an exception
2524 if (!success && haveReadLock)
2525 releaseRead();
2526 }
2527
2528 if (haveReadLock) {
2529 upgradeReadToWrite();
2530 } else {
2531 acquireWrite();
2532 }
2533
2534 success = false;
2535 try {
2536 localRollbackSegmentInfos = (SegmentInfos) segmentInfos.clone();
2537
2538 assert !hasExternalSegments(segmentInfos);
2539
2540 localAutoCommit = autoCommit;
2541 localFlushedDocCount = docWriter.getFlushedDocCount();
2542
2543 if (localAutoCommit) {
2544
2545 if (infoStream != null)
2546 message("flush at startTransaction");
2547
2548 flush(true, false, false);
2549
2550 // Turn off auto-commit during our local transaction:
2551 autoCommit = false;
2552 } else
2553 // We must "protect" our files at this point from
2554 // deletion in case we need to rollback:
2555 deleter.incRef(segmentInfos, false);
2556
2557 success = true;
2558 } finally {
2559 if (!success)
2560 finishAddIndexes();
2561 }
2562 }
2563
2564 /*
2565 * Rolls back the transaction and restores state to where
2566 * we were at the start.
2567 */
2568 private synchronized void rollbackTransaction() throws IOException {
2569
2570 if (infoStream != null)
2571 message("now rollback transaction");
2572
2573 // First restore autoCommit in case we hit an exception below:
2574 autoCommit = localAutoCommit;
2575 docWriter.setFlushedDocCount(localFlushedDocCount);
2576
2577 // Must finish merges before rolling back segmentInfos
2578 // so merges don't hit exceptions on trying to commit
2579 // themselves, don't get files deleted out from under
2580 // them, etc:
2581 finishMerges(false);
2582
2583 // Keep the same segmentInfos instance but replace all
2584 // of its SegmentInfo instances. This is so the next
2585 // attempt to commit using this instance of IndexWriter
2586 // will always write to a new generation ("write once").
2587 segmentInfos.clear();
2588 segmentInfos.addAll(localRollbackSegmentInfos);
2589 localRollbackSegmentInfos = null;
2590
2591 // This must come after we rollback segmentInfos, so
2592 // that if a commit() kicks off it does not see the
2593 // segmentInfos with external segments
2594 finishAddIndexes();
2595
2596 // Ask deleter to locate unreferenced files we had
2597 // created & remove them:
2598 deleter.checkpoint(segmentInfos, false);
2599
2600 if (!autoCommit)
2601 // Remove the incRef we did in startTransaction:
2602 deleter.decRef(segmentInfos);
2603
2604 // Also ask deleter to remove any newly created files
2605 // that were never incref'd; this "garbage" is created
2606 // when a merge kicks off but aborts part way through
2607 // before it had a chance to incRef the files it had
2608 // partially created
2609 deleter.refresh();
2610
2611 notifyAll();
2612
2613 assert !hasExternalSegments();
2614 }
2615
2616 /*
2617 * Commits the transaction. This will write the new
2618 * segments file and remove and pending deletions we have
2619 * accumulated during the transaction
2620 */
2621 private synchronized void commitTransaction() throws IOException {
2622
2623 if (infoStream != null)
2624 message("now commit transaction");
2625
2626 // First restore autoCommit in case we hit an exception below:
2627 autoCommit = localAutoCommit;
2628
2629 // Give deleter a chance to remove files now:
2630 checkpoint();
2631
2632 if (autoCommit) {
2633 boolean success = false;
2634 try {
2635 commit(0);
2636 success = true;
2637 } finally {
2638 if (!success) {
2639 if (infoStream != null)
2640 message("hit exception committing transaction");
2641 rollbackTransaction();
2642 }
2643 }
2644 } else
2645 // Remove the incRef we did in startTransaction.
2646 deleter.decRef(localRollbackSegmentInfos);
2647
2648 localRollbackSegmentInfos = null;
2649
2650 assert !hasExternalSegments();
2651
2652 finishAddIndexes();
2653 }
2654
2655 /**
2656 * @deprecated Please use {@link #rollback} instead.
2657 */
2658 public void abort() throws IOException {
2659 rollback();
2660 }
2661
2662 /**
2663 * Close the <code>IndexWriter</code> without committing
2664 * any changes that have occurred since the last commit
2665 * (or since it was opened, if commit hasn't been called).
2666 * This removes any temporary files that had been
2667 * created, after which the state of the index will be the
2668 * same as it was when this writer was first opened. This
2669 * can only be called when this IndexWriter was opened
2670 * with <code>autoCommit=false</code>. This also clears a
2671 * previous call to {@link #prepareCommit}.
2672 * @throws IllegalStateException if this is called when
2673 * the writer was opened with <code>autoCommit=true</code>.
2674 * @throws IOException if there is a low-level IO error
2675 */
2676 public void rollback() throws IOException {
2677 ensureOpen();
2678 if (autoCommit)
2679 throw new IllegalStateException("rollback() can only be called when IndexWriter was opened with autoCommit=false");
2680
2681 // Ensure that only one thread actually gets to do the closing:
2682 if (shouldClose())
2683 rollbackInternal();
2684 }
2685
2686 private void rollbackInternal() throws IOException {
2687
2688 boolean success = false;
2689
2690 docWriter.pauseAllThreads();
2691
2692 try {
2693 finishMerges(false);
2694
2695 // Must pre-close these two, in case they increment
2696 // changeCount so that we can then set it to false
2697 // before calling closeInternal
2698 mergePolicy.close();
2699 mergeScheduler.close();
2700
2701 synchronized(this) {
2702
2703 if (pendingCommit != null) {
2704 pendingCommit.rollbackCommit(directory);
2705 deleter.decRef(pendingCommit);
2706 pendingCommit = null;
2707 notifyAll();
2708 }
2709
2710 // Keep the same segmentInfos instance but replace all
2711 // of its SegmentInfo instances. This is so the next
2712 // attempt to commit using this instance of IndexWriter
2713 // will always write to a new generation ("write
2714 // once").
2715 segmentInfos.clear();
2716 segmentInfos.addAll(rollbackSegmentInfos);
2717
2718 assert !hasExternalSegments();
2719
2720 docWriter.abort();
2721
2722 assert testPoint("rollback before checkpoint");
2723
2724 // Ask deleter to locate unreferenced files & remove
2725 // them:
2726 deleter.checkpoint(segmentInfos, false);
2727 deleter.refresh();
2728 }
2729
2730 lastCommitChangeCount = changeCount;
2731
2732 success = true;
2733 } catch (OutOfMemoryError oom) {
2734 hitOOM = true;
2735 throw oom;
2736 } finally {
2737 synchronized(this) {
2738 if (!success) {
2739 docWriter.resumeAllThreads();
2740 closing = false;
2741 notifyAll();
2742 if (infoStream != null)
2743 message("hit exception during rollback");
2744 }
2745 }
2746 }
2747
2748 closeInternal(false);
2749 }
2750
2751 private synchronized void finishMerges(boolean waitForMerges) throws IOException {
2752 if (!waitForMerges) {
2753
2754 stopMerges = true;
2755
2756 // Abort all pending & running merges:
2757 Iterator it = pendingMerges.iterator();
2758 while(it.hasNext()) {
2759 final MergePolicy.OneMerge merge = (MergePolicy.OneMerge) it.next();
2760 if (infoStream != null)
2761 message("now abort pending merge " + merge.segString(directory));
2762 merge.abort();
2763 mergeFinish(merge);
2764 }
2765 pendingMerges.clear();
2766
2767 it = runningMerges.iterator();
2768 while(it.hasNext()) {
2769 final MergePolicy.OneMerge merge = (MergePolicy.OneMerge) it.next();
2770 if (infoStream != null)
2771 message("now abort running merge " + merge.segString(directory));
2772 merge.abort();
2773 }
2774
2775 // Ensure any running addIndexes finishes. It's fine
2776 // if a new one attempts to start because its merges
2777 // will quickly see the stopMerges == true and abort.
2778 acquireRead();
2779 releaseRead();
2780
2781 // These merges periodically check whether they have
2782 // been aborted, and stop if so. We wait here to make
2783 // sure they all stop. It should not take very long
2784 // because the merge threads periodically check if
2785 // they are aborted.
2786 while(runningMerges.size() > 0) {
2787 if (infoStream != null)
2788 message("now wait for " + runningMerges.size() + " running merge to abort");
2789 doWait();
2790 }
2791
2792 stopMerges = false;
2793 notifyAll();
2794
2795 assert 0 == mergingSegments.size();
2796
2797 if (infoStream != null)
2798 message("all running merges have aborted");
2799
2800 } else {
2801 // Ensure any running addIndexes finishes. It's fine
2802 // if a new one attempts to start because from our
2803 // caller above the call will see that we are in the
2804 // process of closing, and will throw an
2805 // AlreadyClosedException.
2806 acquireRead();
2807 releaseRead();
2808 while(pendingMerges.size() > 0 || runningMerges.size() > 0)
2809 doWait();
2810 assert 0 == mergingSegments.size();
2811 }
2812 }
2813
2814 /*
2815 * Called whenever the SegmentInfos has been updated and
2816 * the index files referenced exist (correctly) in the
2817 * index directory.
2818 */
2819 private synchronized void checkpoint() throws IOException {
2820 changeCount++;
2821 deleter.checkpoint(segmentInfos, false);
2822 }
2823
2824 private void finishAddIndexes() {
2825 releaseWrite();
2826 }
2827
2828 private void blockAddIndexes(boolean includePendingClose) {
2829
2830 acquireRead();
2831
2832 boolean success = false;
2833 try {
2834
2835 // Make sure we are still open since we could have
2836 // waited quite a while for last addIndexes to finish
2837 ensureOpen(includePendingClose);
2838 success = true;
2839 } finally {
2840 if (!success)
2841 releaseRead();
2842 }
2843 }
2844
2845 private void resumeAddIndexes() {
2846 releaseRead();
2847 }
2848
2849 /** Merges all segments from an array of indexes into this index.
2850 * @deprecated Use {@link #addIndexesNoOptimize} instead,
2851 * then separately call {@link #optimize} afterwards if
2852 * you need to.
2853 * @throws CorruptIndexException if the index is corrupt
2854 * @throws IOException if there is a low-level IO error
2855 */
2856 public void addIndexes(Directory[] dirs)
2857 throws CorruptIndexException, IOException {
2858
2859 ensureOpen();
2860
2861 noDupDirs(dirs);
2862
2863 // Do not allow add docs or deletes while we are running:
2864 docWriter.pauseAllThreads();
2865
2866 try {
2867
2868 if (infoStream != null)
2869 message("flush at addIndexes");
2870 flush(true, false, true);
2871
2872 boolean success = false;
2873
2874 startTransaction(false);
2875
2876 try {
2877
2878 int docCount = 0;
2879 synchronized(this) {
2880 ensureOpen();
2881 for (int i = 0; i < dirs.length; i++) {
2882 SegmentInfos sis = new SegmentInfos(); // read infos from dir
2883 sis.read(dirs[i]);
2884 for (int j = 0; j < sis.size(); j++) {
2885 final SegmentInfo info = sis.info(j);
2886 docCount += info.docCount;
2887 assert !segmentInfos.contains(info);
2888 segmentInfos.add(info); // add each info
2889 }
2890 }
2891 }
2892
2893 // Notify DocumentsWriter that the flushed count just increased
2894 docWriter.updateFlushedDocCount(docCount);
2895
2896 optimize();
2897
2898 success = true;
2899 } finally {
2900 if (success) {
2901 commitTransaction();
2902 } else {
2903 rollbackTransaction();
2904 }
2905 }
2906 } catch (OutOfMemoryError oom) {
2907 hitOOM = true;
2908 throw oom;
2909 } finally {
2910 docWriter.resumeAllThreads();
2911 }
2912 }
2913
2914 private synchronized void resetMergeExceptions() {
2915 mergeExceptions = new ArrayList();
2916 mergeGen++;
2917 }
2918
2919 private void noDupDirs(Directory[] dirs) {
2920 HashSet dups = new HashSet();
2921 for(int i=0;i<dirs.length;i++) {
2922 if (dups.contains(dirs[i]))
2923 throw new IllegalArgumentException("Directory " + dirs[i] + " appears more than once");
2924 if (dirs[i] == directory)
2925 throw new IllegalArgumentException("Cannot add directory to itself");
2926 dups.add(dirs[i]);
2927 }
2928 }
2929
2930 /**
2931 * Merges all segments from an array of indexes into this
2932 * index.
2933 *
2934 * <p>This may be used to parallelize batch indexing. A large document
2935 * collection can be broken into sub-collections. Each sub-collection can be
2936 * indexed in parallel, on a different thread, process or machine. The
2937 * complete index can then be created by merging sub-collection indexes
2938 * with this method.
2939 *
2940 * <p><b>NOTE:</b> the index in each Directory must not be
2941 * changed (opened by a writer) while this method is
2942 * running. This method does not acquire a write lock in
2943 * each input Directory, so it is up to the caller to
2944 * enforce this.
2945 *
2946 * <p><b>NOTE:</b> while this is running, any attempts to
2947 * add or delete documents (with another thread) will be
2948 * paused until this method completes.
2949 *
2950 * <p>This method is transactional in how Exceptions are
2951 * handled: it does not commit a new segments_N file until
2952 * all indexes are added. This means if an Exception
2953 * occurs (for example disk full), then either no indexes
2954 * will have been added or they all will have been.</p>
2955 *
2956 * <p>Note that this requires temporary free space in the
2957 * Directory up to 2X the sum of all input indexes
2958 * (including the starting index). If readers/searchers
2959 * are open against the starting index, then temporary
2960 * free space required will be higher by the size of the
2961 * starting index (see {@link #optimize()} for details).
2962 * </p>
2963 *
2964 * <p>Once this completes, the final size of the index
2965 * will be less than the sum of all input index sizes
2966 * (including the starting index). It could be quite a
2967 * bit smaller (if there were many pending deletes) or
2968 * just slightly smaller.</p>
2969 *
2970 * <p>
2971 * This requires this index not be among those to be added.
2972 *
2973 * @throws CorruptIndexException if the index is corrupt
2974 * @throws IOException if there is a low-level IO error
2975 */
2976 public void addIndexesNoOptimize(Directory[] dirs)
2977 throws CorruptIndexException, IOException {
2978
2979 ensureOpen();
2980
2981 noDupDirs(dirs);
2982
2983 // Do not allow add docs or deletes while we are running:
2984 docWriter.pauseAllThreads();
2985
2986 try {
2987 if (infoStream != null)
2988 message("flush at addIndexesNoOptimize");
2989 flush(true, false, true);
2990
2991 boolean success = false;
2992
2993 startTransaction(false);
2994
2995 try {
2996
2997 int docCount = 0;
2998 synchronized(this) {
2999 ensureOpen();
3000
3001 for (int i = 0; i < dirs.length; i++) {
3002 if (directory == dirs[i]) {
3003 // cannot add this index: segments may be deleted in merge before added
3004 throw new IllegalArgumentException("Cannot add this index to itself");
3005 }
3006
3007 SegmentInfos sis = new SegmentInfos(); // read infos from dir
3008 sis.read(dirs[i]);
3009 for (int j = 0; j < sis.size(); j++) {
3010 SegmentInfo info = sis.info(j);
3011 assert !segmentInfos.contains(info): "dup info dir=" + info.dir + " name=" + info.name;
3012 docCount += info.docCount;
3013 segmentInfos.add(info); // add each info
3014 }
3015 }
3016 }
3017
3018 // Notify DocumentsWriter that the flushed count just increased
3019 docWriter.updateFlushedDocCount(docCount);
3020
3021 maybeMerge();
3022
3023 ensureOpen();
3024
3025 // If after merging there remain segments in the index
3026 // that are in a different directory, just copy these
3027 // over into our index. This is necessary (before
3028 // finishing the transaction) to avoid leaving the
3029 // index in an unusable (inconsistent) state.
3030 resolveExternalSegments();
3031
3032 ensureOpen();
3033
3034 success = true;
3035
3036 } finally {
3037 if (success) {
3038 commitTransaction();
3039 } else {
3040 rollbackTransaction();
3041 }
3042 }
3043 } catch (OutOfMemoryError oom) {
3044 hitOOM = true;
3045 throw oom;
3046 } finally {
3047 docWriter.resumeAllThreads();
3048 }
3049 }
3050
3051 private boolean hasExternalSegments() {
3052 return hasExternalSegments(segmentInfos);
3053 }
3054
3055 private boolean hasExternalSegments(SegmentInfos infos) {
3056 final int numSegments = infos.size();
3057 for(int i=0;i<numSegments;i++)
3058 if (infos.info(i).dir != directory)
3059 return true;
3060 return false;
3061 }
3062
3063 /* If any of our segments are using a directory != ours
3064 * then we have to either copy them over one by one, merge
3065 * them (if merge policy has chosen to) or wait until
3066 * currently running merges (in the background) complete.
3067 * We don't return until the SegmentInfos has no more
3068 * external segments. Currently this is only used by
3069 * addIndexesNoOptimize(). */
3070 private void resolveExternalSegments() throws CorruptIndexException, IOException {
3071
3072 boolean any = false;
3073
3074 boolean done = false;
3075
3076 while(!done) {
3077 SegmentInfo info = null;
3078 MergePolicy.OneMerge merge = null;
3079 synchronized(this) {
3080
3081 if (stopMerges)
3082 throw new MergePolicy.MergeAbortedException("rollback() was called or addIndexes* hit an unhandled exception");
3083
3084 final int numSegments = segmentInfos.size();
3085
3086 done = true;
3087 for(int i=0;i<numSegments;i++) {
3088 info = segmentInfos.info(i);
3089 if (info.dir != directory) {
3090 done = false;
3091 final MergePolicy.OneMerge newMerge = new MergePolicy.OneMerge(segmentInfos.range(i, 1+i), info.getUseCompoundFile());
3092
3093 // Returns true if no running merge conflicts
3094 // with this one (and, records this merge as
3095 // pending), ie, this segment is not currently
3096 // being merged:
3097 if (registerMerge(newMerge)) {
3098 merge = newMerge;
3099
3100 // If this segment is not currently being
3101 // merged, then advance it to running & run
3102 // the merge ourself (below):
3103 pendingMerges.remove(merge);
3104 runningMerges.add(merge);
3105 break;
3106 }
3107 }
3108 }
3109
3110 if (!done && merge == null)
3111 // We are not yet done (external segments still
3112 // exist in segmentInfos), yet, all such segments
3113 // are currently "covered" by a pending or running
3114 // merge. We now try to grab any pending merge
3115 // that involves external segments:
3116 merge = getNextExternalMerge();
3117
3118 if (!done && merge == null)
3119 // We are not yet done, and, all external segments
3120 // fall under merges that the merge scheduler is
3121 // currently running. So, we now wait and check
3122 // back to see if the merge has completed.
3123 doWait();
3124 }
3125
3126 if (merge != null) {
3127 any = true;
3128 merge(merge);
3129 }
3130 }
3131
3132 if (any)
3133 // Sometimes, on copying an external segment over,
3134 // more merges may become necessary:
3135 mergeScheduler.merge(this);
3136 }
3137
3138 /** Merges the provided indexes into this index.
3139 * <p>After this completes, the index is optimized. </p>
3140 * <p>The provided IndexReaders are not closed.</p>
3141
3142 * <p><b>NOTE:</b> the index in each Directory must not be
3143 * changed (opened by a writer) while this method is
3144 * running. This method does not acquire a write lock in
3145 * each input Directory, so it is up to the caller to
3146 * enforce this.
3147 *
3148 * <p><b>NOTE:</b> while this is running, any attempts to
3149 * add or delete documents (with another thread) will be
3150 * paused until this method completes.
3151 *
3152 * <p>See {@link #addIndexesNoOptimize(Directory[])} for
3153 * details on transactional semantics, temporary free
3154 * space required in the Directory, and non-CFS segments
3155 * on an Exception.</p>
3156 * @throws CorruptIndexException if the index is corrupt
3157 * @throws IOException if there is a low-level IO error
3158 */
3159 public void addIndexes(IndexReader[] readers)
3160 throws CorruptIndexException, IOException {
3161
3162 ensureOpen();
3163
3164 // Do not allow add docs or deletes while we are running:
3165 docWriter.pauseAllThreads();
3166
3167 // We must pre-acquire a read lock here (and upgrade to
3168 // write lock in startTransaction below) so that no
3169 // other addIndexes is allowed to start up after we have
3170 // flushed & optimized but before we then start our
3171 // transaction. This is because the merging below
3172 // requires that only one segment is present in the
3173 // index:
3174 acquireRead();
3175
3176 try {
3177
3178 SegmentInfo info = null;
3179 String mergedName = null;
3180 SegmentMerger merger = null;
3181
3182 boolean success = false;
3183
3184 try {
3185 flush(true, false, true);
3186 optimize(); // start with zero or 1 seg
3187 success = true;
3188 } finally {
3189 // Take care to release the read lock if we hit an
3190 // exception before starting the transaction
3191 if (!success)
3192 releaseRead();
3193 }
3194
3195 // true means we already have a read lock; if this
3196 // call hits an exception it will release the write
3197 // lock:
3198 startTransaction(true);
3199
3200 try {
3201 mergedName = newSegmentName();
3202 merger = new SegmentMerger(this, mergedName, null);
3203
3204 IndexReader sReader = null;
3205 synchronized(this) {
3206 if (segmentInfos.size() == 1) { // add existing index, if any
3207 sReader = SegmentReader.get(true, segmentInfos.info(0));
3208 }
3209 }
3210
3211 success = false;
3212
3213 try {
3214 if (sReader != null)
3215 merger.add(sReader);
3216
3217 for (int i = 0; i < readers.length; i++) // add new indexes
3218 merger.add(readers[i]);
3219
3220 int docCount = merger.merge(); // merge 'em
3221
3222 if(sReader != null) {
3223 sReader.close();
3224 sReader = null;
3225 }
3226
3227 synchronized(this) {
3228 segmentInfos.clear(); // pop old infos & add new
3229 info = new SegmentInfo(mergedName, docCount, directory, false, true,
3230 -1, null, false, merger.hasProx());
3231 segmentInfos.add(info);
3232 }
3233
3234 // Notify DocumentsWriter that the flushed count just increased
3235 docWriter.updateFlushedDocCount(docCount);
3236
3237 success = true;
3238
3239 } finally {
3240 if (sReader != null) {
3241 sReader.close();
3242 }
3243 }
3244 } finally {
3245 if (!success) {
3246 if (infoStream != null)
3247 message("hit exception in addIndexes during merge");
3248 rollbackTransaction();
3249 } else {
3250 commitTransaction();
3251 }
3252 }
3253
3254 if (mergePolicy instanceof LogMergePolicy && getUseCompoundFile()) {
3255
3256 List files = null;
3257
3258 synchronized(this) {
3259 // Must incRef our files so that if another thread
3260 // is running merge/optimize, it doesn't delete our
3261 // segment's files before we have a change to
3262 // finish making the compound file.
3263 if (segmentInfos.contains(info)) {
3264 files = info.files();
3265 deleter.incRef(files);
3266 }
3267 }
3268
3269 if (files != null) {
3270
3271 success = false;
3272
3273 startTransaction(false);
3274
3275 try {
3276 merger.createCompoundFile(mergedName + ".cfs");
3277 synchronized(this) {
3278 info.setUseCompoundFile(true);
3279 }
3280
3281 success = true;
3282
3283 } finally {
3284
3285 deleter.decRef(files);
3286
3287 if (!success) {
3288 if (infoStream != null)
3289 message("hit exception building compound file in addIndexes during merge");
3290
3291 rollbackTransaction();
3292 } else {
3293 commitTransaction();
3294 }
3295 }
3296 }
3297 }
3298 } catch (OutOfMemoryError oom) {
3299 hitOOM = true;
3300 throw oom;
3301 } finally {
3302 docWriter.resumeAllThreads();
3303 }
3304 }
3305
3306 // This is called after pending added and deleted
3307 // documents have been flushed to the Directory but before
3308 // the change is committed (new segments_N file written).
3309 void doAfterFlush()
3310 throws IOException {
3311 }
3312
3313 /**
3314 * Flush all in-memory buffered updates (adds and deletes)
3315 * to the Directory.
3316 * <p>Note: while this will force buffered docs to be
3317 * pushed into the index, it will not make these docs
3318 * visible to a reader. Use {@link #commit()} instead
3319 * @throws CorruptIndexException if the index is corrupt
3320 * @throws IOException if there is a low-level IO error
3321 * @deprecated please call {@link #commit()}) instead
3322 */
3323 public final void flush() throws CorruptIndexException, IOException {
3324 if (hitOOM)
3325 throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot flush");
3326
3327 flush(true, false, true);
3328 }
3329
3330 /** <p>Expert: prepare for commit. This does the first
3331 * phase of 2-phase commit. You can only call this when
3332 * autoCommit is false. This method does all steps
3333 * necessary to commit changes since this writer was
3334 * opened: flushes pending added and deleted docs, syncs
3335 * the index files, writes most of next segments_N file.
3336 * After calling this you must call either {@link
3337 * #commit()} to finish the commit, or {@link
3338 * #rollback()} to revert the commit and undo all changes
3339 * done since the writer was opened.</p>
3340 *
3341 * You can also just call {@link #commit()} directly
3342 * without prepareCommit first in which case that method
3343 * will internally call prepareCommit.
3344 */
3345 public final void prepareCommit() throws CorruptIndexException, IOException {
3346 ensureOpen();
3347 prepareCommit(false);
3348 }
3349
3350 private final void prepareCommit(boolean internal) throws CorruptIndexException, IOException {
3351
3352 if (hitOOM)
3353 throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot commit");
3354
3355 if (autoCommit && !internal)
3356 throw new IllegalStateException("this method can only be used when autoCommit is false");
3357
3358 if (!autoCommit && pendingCommit != null)
3359 throw new IllegalStateException("prepareCommit was already called with no corresponding call to commit");
3360
3361 message("prepareCommit: flush");
3362
3363 flush(true, true, true);
3364
3365 startCommit(0);
3366 }
3367
3368 private void commit(long sizeInBytes) throws IOException {
3369 startCommit(sizeInBytes);
3370 finishCommit();
3371 }
3372
3373 /**
3374 * <p>Commits all pending updates (added & deleted
3375 * documents) to the index, and syncs all referenced index
3376 * files, such that a reader will see the changes and the
3377 * index updates will survive an OS or machine crash or
3378 * power loss. Note that this does not wait for any
3379 * running background merges to finish. This may be a
3380 * costly operation, so you should test the cost in your
3381 * application and do it only when really necessary.</p>
3382 *
3383 * <p> Note that this operation calls Directory.sync on
3384 * the index files. That call should not return until the
3385 * file contents & metadata are on stable storage. For
3386 * FSDirectory, this calls the OS's fsync. But, beware:
3387 * some hardware devices may in fact cache writes even
3388 * during fsync, and return before the bits are actually
3389 * on stable storage, to give the appearance of faster
3390 * performance. If you have such a device, and it does
3391 * not have a battery backup (for example) then on power
3392 * loss it may still lose data. Lucene cannot guarantee
3393 * consistency on such devices. </p>
3394 *
3395 * @see #prepareCommit
3396 */
3397
3398 public final void commit() throws CorruptIndexException, IOException {
3399
3400 ensureOpen();
3401
3402 if (infoStream != null)
3403 message("commit: start");
3404
3405 if (autoCommit || pendingCommit == null) {
3406 if (infoStream != null)
3407 message("commit: now prepare");
3408 prepareCommit(true);
3409 } else if (infoStream != null)
3410 message("commit: already prepared");
3411
3412 finishCommit();
3413 }
3414
3415 private synchronized final void finishCommit() throws CorruptIndexException, IOException {
3416
3417 if (pendingCommit != null) {
3418 try {
3419 message("commit: pendingCommit != null");
3420 pendingCommit.finishCommit(directory);
3421 lastCommitChangeCount = pendingCommitChangeCount;
3422 segmentInfos.updateGeneration(pendingCommit);
3423 setRollbackSegmentInfos(pendingCommit);
3424 deleter.checkpoint(pendingCommit, true);
3425 } finally {
3426 deleter.decRef(pendingCommit);
3427 pendingCommit = null;
3428 notifyAll();
3429 }
3430
3431 } else
3432 message("commit: pendingCommit == null; skip");
3433
3434 message("commit: done");
3435 }
3436
3437 /**
3438 * Flush all in-memory buffered udpates (adds and deletes)
3439 * to the Directory.
3440 * @param triggerMerge if true, we may merge segments (if
3441 * deletes or docs were flushed) if necessary
3442 * @param flushDocStores if false we are allowed to keep
3443 * doc stores open to share with the next segment
3444 * @param flushDeletes whether pending deletes should also
3445 * be flushed
3446 */
3447 protected final void flush(boolean triggerMerge, boolean flushDocStores, boolean flushDeletes) throws CorruptIndexException, IOException {
3448 // We can be called during close, when closing==true, so we must pass false to ensureOpen:
3449 ensureOpen(false);
3450 if (doFlush(flushDocStores, flushDeletes) && triggerMerge)
3451 maybeMerge();
3452 }
3453
3454 // TODO: this method should not have to be entirely
3455 // synchronized, ie, merges should be allowed to commit
3456 // even while a flush is happening
3457 private synchronized final boolean doFlush(boolean flushDocStores, boolean flushDeletes) throws CorruptIndexException, IOException {
3458
3459 ensureOpen(false);
3460
3461 assert testPoint("startDoFlush");
3462
3463 flushCount++;
3464
3465 flushDeletes |= docWriter.deletesFull();
3466
3467 // When autoCommit=true we must always flush deletes
3468 // when flushing a segment; otherwise deletes may become
3469 // visible before their corresponding added document
3470 // from an updateDocument call
3471 flushDeletes |= autoCommit;
3472
3473 // Make sure no threads are actively adding a document.
3474 // Returns true if docWriter is currently aborting, in
3475 // which case we skip flushing this segment
3476 if (docWriter.pauseAllThreads()) {
3477 docWriter.resumeAllThreads();
3478 return false;
3479 }
3480
3481 try {
3482
3483 SegmentInfo newSegment = null;
3484
3485 final int numDocs = docWriter.getNumDocsInRAM();
3486
3487 // Always flush docs if there are any
3488 boolean flushDocs = numDocs > 0;
3489
3490 // With autoCommit=true we always must flush the doc
3491 // stores when we flush
3492 flushDocStores |= autoCommit;
3493 String docStoreSegment = docWriter.getDocStoreSegment();
3494 if (docStoreSegment == null)
3495 flushDocStores = false;
3496
3497 int docStoreOffset = docWriter.getDocStoreOffset();
3498
3499 // docStoreOffset should only be non-zero when
3500 // autoCommit == false
3501 assert !autoCommit || 0 == docStoreOffset;
3502
3503 boolean docStoreIsCompoundFile = false;
3504
3505 if (infoStream != null) {
3506 message(" flush: segment=" + docWriter.getSegment() +
3507 " docStoreSegment=" + docWriter.getDocStoreSegment() +
3508 " docStoreOffset=" + docStoreOffset +
3509 " flushDocs=" + flushDocs +
3510 " flushDeletes=" + flushDeletes +
3511 " flushDocStores=" + flushDocStores +
3512 " numDocs=" + numDocs +
3513 " numBufDelTerms=" + docWriter.getNumBufferedDeleteTerms());
3514 message(" index before flush " + segString());
3515 }
3516
3517 // Check if the doc stores must be separately flushed
3518 // because other segments, besides the one we are about
3519 // to flush, reference it
3520 if (flushDocStores && (!flushDocs || !docWriter.getSegment().equals(docWriter.getDocStoreSegment()))) {
3521 // We must separately flush the doc store
3522 if (infoStream != null)
3523 message(" flush shared docStore segment " + docStoreSegment);
3524
3525 docStoreIsCompoundFile = flushDocStores();
3526 flushDocStores = false;
3527 }
3528
3529 String segment = docWriter.getSegment();
3530
3531 // If we are flushing docs, segment must not be null:
3532 assert segment != null || !flushDocs;
3533
3534 if (flushDocs) {
3535
3536 boolean success = false;
3537 final int flushedDocCount;
3538
3539 try {
3540 flushedDocCount = docWriter.flush(flushDocStores);
3541 success = true;
3542 } finally {
3543 if (!success) {
3544 if (infoStream != null)
3545 message("hit exception flushing segment " + segment);
3546 deleter.refresh(segment);
3547 }
3548 }
3549
3550 if (0 == docStoreOffset && flushDocStores) {
3551 // This means we are flushing private doc stores
3552 // with this segment, so it will not be shared
3553 // with other segments
3554 assert docStoreSegment != null;
3555 assert docStoreSegment.equals(segment);
3556 docStoreOffset = -1;
3557 docStoreIsCompoundFile = false;
3558 docStoreSegment = null;
3559 }
3560
3561 // Create new SegmentInfo, but do not add to our
3562 // segmentInfos until deletes are flushed
3563 // successfully.
3564 newSegment = new SegmentInfo(segment,
3565 flushedDocCount,
3566 directory, false, true,
3567 docStoreOffset, docStoreSegment,
3568 docStoreIsCompoundFile,
3569 docWriter.hasProx());
3570 }
3571
3572 docWriter.pushDeletes();
3573
3574 if (flushDocs)
3575 segmentInfos.add(newSegment);
3576
3577 if (flushDeletes) {
3578 flushDeletesCount++;
3579 applyDeletes();
3580 }
3581
3582 doAfterFlush();
3583
3584 if (flushDocs)
3585 checkpoint();
3586
3587 if (flushDocs && mergePolicy.useCompoundFile(segmentInfos, newSegment)) {
3588 // Now build compound file
3589 boolean success = false;
3590 try {
3591 docWriter.createCompoundFile(segment);
3592 success = true;
3593 } finally {
3594 if (!success) {
3595 if (infoStream != null)
3596 message("hit exception creating compound file for newly flushed segment " + segment);
3597 deleter.deleteFile(segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
3598 }
3599 }
3600
3601 newSegment.setUseCompoundFile(true);
3602 checkpoint();
3603 }
3604
3605 return flushDocs;
3606
3607 } catch (OutOfMemoryError oom) {
3608 hitOOM = true;
3609 throw oom;
3610 } finally {
3611 docWriter.clearFlushPending();
3612 docWriter.resumeAllThreads();
3613 }
3614 }
3615
3616 /** Expert: Return the total size of all index files currently cached in memory.
3617 * Useful for size management with flushRamDocs()
3618 */
3619 public final long ramSizeInBytes() {
3620 ensureOpen();
3621 return docWriter.getRAMUsed();
3622 }
3623
3624 /** Expert: Return the number of documents currently
3625 * buffered in RAM. */
3626 public final synchronized int numRamDocs() {
3627 ensureOpen();
3628 return docWriter.getNumDocsInRAM();
3629 }
3630
3631 private int ensureContiguousMerge(MergePolicy.OneMerge merge) {
3632
3633 int first = segmentInfos.indexOf(merge.segments.info(0));
3634 if (first == -1)
3635 throw new MergePolicy.MergeException("could not find segment " + merge.segments.info(0).name + " in current segments", directory);
3636
3637 final int numSegments = segmentInfos.size();
3638
3639 final int numSegmentsToMerge = merge.segments.size();
3640 for(int i=0;i<numSegmentsToMerge;i++) {
3641 final SegmentInfo info = merge.segments.info(i);
3642
3643 if (first + i >= numSegments || !segmentInfos.info(first+i).equals(info)) {
3644 if (segmentInfos.indexOf(info) == -1)
3645 throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the index", directory);
3646 else
3647 throw new MergePolicy.MergeException("MergePolicy selected non-contiguous segments to merge (" + merge.segString(directory) + " vs " + segString() + "), which IndexWriter (currently) cannot handle",
3648 directory);
3649 }
3650 }
3651
3652 return first;
3653 }
3654
3655 /** Carefully merges deletes for the segments we just
3656 * merged. This is tricky because, although merging will
3657 * clear all deletes (compacts the documents), new
3658 * deletes may have been flushed to the segments since
3659 * the merge was started. This method "carries over"
3660 * such new deletes onto the newly merged segment, and
3661 * saves the resulting deletes file (incrementing the
3662 * delete generation for merge.info). If no deletes were
3663 * flushed, no new deletes file is saved. */
3664 synchronized private void commitMergedDeletes(MergePolicy.OneMerge merge) throws IOException {
3665
3666 assert testPoint("startCommitMergeDeletes");
3667
3668 final SegmentInfos sourceSegmentsClone = merge.segmentsClone;
3669 final SegmentInfos sourceSegments = merge.segments;
3670
3671 if (infoStream != null)
3672 message("commitMergeDeletes " + merge.segString(directory));
3673
3674 // Carefully merge deletes that occurred after we
3675 // started merging:
3676
3677 BitVector deletes = null;
3678 int docUpto = 0;
3679 int delCount = 0;
3680
3681 final int numSegmentsToMerge = sourceSegments.size();
3682 for(int i=0;i<numSegmentsToMerge;i++) {
3683 final SegmentInfo previousInfo = sourceSegmentsClone.info(i);
3684 final SegmentInfo currentInfo = sourceSegments.info(i);
3685
3686 assert currentInfo.docCount == previousInfo.docCount;
3687
3688 final int docCount = currentInfo.docCount;
3689
3690 if (previousInfo.hasDeletions()) {
3691
3692 // There were deletes on this segment when the merge
3693 // started. The merge has collapsed away those
3694 // deletes, but, if new deletes were flushed since
3695 // the merge started, we must now carefully keep any
3696 // newly flushed deletes but mapping them to the new
3697 // docIDs.
3698
3699 assert currentInfo.hasDeletions();
3700
3701 // Load deletes present @ start of merge, for this segment:
3702 BitVector previousDeletes = new BitVector(previousInfo.dir, previousInfo.getDelFileName());
3703
3704 if (!currentInfo.getDelFileName().equals(previousInfo.getDelFileName())) {
3705 // This means this segment has had new deletes
3706 // committed since we started the merge, so we
3707 // must merge them:
3708 if (deletes == null)
3709 deletes = new BitVector(merge.info.docCount);
3710
3711 BitVector currentDeletes = new BitVector(currentInfo.dir, currentInfo.getDelFileName());
3712 for(int j=0;j<docCount;j++) {
3713 if (previousDeletes.get(j))
3714 assert currentDeletes.get(j);
3715 else {
3716 if (currentDeletes.get(j)) {
3717 deletes.set(docUpto);
3718 delCount++;
3719 }
3720 docUpto++;
3721 }
3722 }
3723 } else
3724 docUpto += docCount - previousDeletes.count();
3725
3726 } else if (currentInfo.hasDeletions()) {
3727 // This segment had no deletes before but now it
3728 // does:
3729 if (deletes == null)
3730 deletes = new BitVector(merge.info.docCount);
3731 BitVector currentDeletes = new BitVector(directory, currentInfo.getDelFileName());
3732
3733 for(int j=0;j<docCount;j++) {
3734 if (currentDeletes.get(j)) {
3735 deletes.set(docUpto);
3736 delCount++;
3737 }
3738 docUpto++;
3739 }
3740
3741 } else
3742 // No deletes before or after
3743 docUpto += currentInfo.docCount;
3744 }
3745
3746 if (deletes != null) {
3747 merge.info.advanceDelGen();
3748 message("commit merge deletes to " + merge.info.getDelFileName());
3749 deletes.write(directory, merge.info.getDelFileName());
3750 merge.info.setDelCount(delCount);
3751 assert delCount == deletes.count();
3752 }
3753 }
3754
3755 /* FIXME if we want to support non-contiguous segment merges */
3756 synchronized private boolean commitMerge(MergePolicy.OneMerge merge, SegmentMerger merger, int mergedDocCount) throws IOException {
3757
3758 assert testPoint("startCommitMerge");
3759
3760 if (hitOOM)
3761 return false;
3762
3763 if (infoStream != null)
3764 message("commitMerge: " + merge.segString(directory) + " index=" + segString());
3765
3766 assert merge.registerDone;
3767
3768 // If merge was explicitly aborted, or, if rollback() or
3769 // rollbackTransaction() had been called since our merge
3770 // started (which results in an unqualified
3771 // deleter.refresh() call that will remove any index
3772 // file that current segments does not reference), we
3773 // abort this merge
3774 if (merge.isAborted()) {
3775 if (infoStream != null)
3776 message("commitMerge: skipping merge " + merge.segString(directory) + ": it was aborted");
3777
3778 deleter.refresh(merge.info.name);
3779 return false;
3780 }
3781
3782 final int start = ensureContiguousMerge(merge);
3783
3784 commitMergedDeletes(merge);
3785
3786 docWriter.remapDeletes(segmentInfos, merger.getDocMaps(), merger.getDelCounts(), merge, mergedDocCount);
3787
3788 // Simple optimization: if the doc store we are using
3789 // has been closed and is in now compound format (but
3790 // wasn't when we started), then we will switch to the
3791 // compound format as well:
3792 final String mergeDocStoreSegment = merge.info.getDocStoreSegment();
3793 if (mergeDocStoreSegment != null && !merge.info.getDocStoreIsCompoundFile()) {
3794 final int size = segmentInfos.size();
3795 for(int i=0;i<size;i++) {
3796 final SegmentInfo info = segmentInfos.info(i);
3797 final String docStoreSegment = info.getDocStoreSegment();
3798 if (docStoreSegment != null &&
3799 docStoreSegment.equals(mergeDocStoreSegment) &&
3800 info.getDocStoreIsCompoundFile()) {
3801 merge.info.setDocStoreIsCompoundFile(true);
3802 break;
3803 }
3804 }
3805 }
3806
3807 merge.info.setHasProx(merger.hasProx());
3808
3809 segmentInfos.subList(start, start + merge.segments.size()).clear();
3810 assert !segmentInfos.contains(merge.info);
3811 segmentInfos.add(start, merge.info);
3812
3813 // Must checkpoint before decrefing so any newly
3814 // referenced files in the new merge.info are incref'd
3815 // first:
3816 checkpoint();
3817
3818 decrefMergeSegments(merge);
3819
3820 if (merge.optimize)
3821 segmentsToOptimize.add(merge.info);
3822 return true;
3823 }
3824
3825 private void decrefMergeSegments(MergePolicy.OneMerge merge) throws IOException {
3826 final SegmentInfos sourceSegmentsClone = merge.segmentsClone;
3827 final int numSegmentsToMerge = sourceSegmentsClone.size();
3828 assert merge.increfDone;
3829 merge.increfDone = false;
3830 for(int i=0;i<numSegmentsToMerge;i++) {
3831 final SegmentInfo previousInfo = sourceSegmentsClone.info(i);
3832 // Decref all files for this SegmentInfo (this
3833 // matches the incref in mergeInit):
3834 if (previousInfo.dir == directory)
3835 deleter.decRef(previousInfo.files());
3836 }
3837 }
3838
3839 final private void handleMergeException(Throwable t, MergePolicy.OneMerge merge) throws IOException {
3840 // Set the exception on the merge, so if
3841 // optimize() is waiting on us it sees the root
3842 // cause exception:
3843 merge.setException(t);
3844 addMergeException(merge);
3845
3846 if (t instanceof MergePolicy.MergeAbortedException) {
3847 // We can ignore this exception (it happens when
3848 // close(false) or rollback is called), unless the
3849 // merge involves segments from external directories,
3850 // in which case we must throw it so, for example, the
3851 // rollbackTransaction code in addIndexes* is
3852 // executed.
3853 if (merge.isExternal)
3854 throw (MergePolicy.MergeAbortedException) t;
3855 } else if (t instanceof IOException)
3856 throw (IOException) t;
3857 else if (t instanceof RuntimeException)
3858 throw (RuntimeException) t;
3859 else if (t instanceof Error)
3860 throw (Error) t;
3861 else
3862 // Should not get here
3863 throw new RuntimeException(t);
3864 }
3865
3866 /**
3867 * Merges the indicated segments, replacing them in the stack with a
3868 * single segment.
3869 */
3870
3871 final void merge(MergePolicy.OneMerge merge)
3872 throws CorruptIndexException, IOException {
3873
3874 boolean success = false;
3875
3876 try {
3877 try {
3878 try {
3879 mergeInit(merge);
3880
3881 if (infoStream != null)
3882 message("now merge\n merge=" + merge.segString(directory) + "\n merge=" + merge + "\n index=" + segString());
3883
3884 mergeMiddle(merge);
3885 success = true;
3886 } catch (Throwable t) {
3887 handleMergeException(t, merge);
3888 }
3889 } finally {
3890 synchronized(this) {
3891 try {
3892
3893 mergeFinish(merge);
3894
3895 if (!success) {
3896 if (infoStream != null)
3897 message("hit exception during merge");
3898 if (merge.info != null && !segmentInfos.contains(merge.info))
3899 deleter.refresh(merge.info.name);
3900 }
3901
3902 // This merge (and, generally, any change to the
3903 // segments) may now enable new merges, so we call
3904 // merge policy & update pending merges.
3905 if (success && !merge.isAborted() && !closed && !closing)
3906 updatePendingMerges(merge.maxNumSegmentsOptimize, merge.optimize);
3907 } finally {
3908 runningMerges.remove(merge);
3909 }
3910 }
3911 }
3912 } catch (OutOfMemoryError oom) {
3913 hitOOM = true;
3914 throw oom;
3915 }
3916 }
3917
3918 /** Checks whether this merge involves any segments
3919 * already participating in a merge. If not, this merge
3920 * is "registered", meaning we record that its segments
3921 * are now participating in a merge, and true is
3922 * returned. Else (the merge conflicts) false is
3923 * returned. */
3924 final synchronized boolean registerMerge(MergePolicy.OneMerge merge) throws MergePolicy.MergeAbortedException {
3925
3926 if (merge.registerDone)
3927 return true;
3928
3929 if (stopMerges) {
3930 merge.abort();
3931 throw new MergePolicy.MergeAbortedException("merge is aborted: " + merge.segString(directory));
3932 }
3933
3934 final int count = merge.segments.size();
3935 boolean isExternal = false;
3936 for(int i=0;i<count;i++) {
3937 final SegmentInfo info = merge.segments.info(i);
3938 if (mergingSegments.contains(info))
3939 return false;
3940 if (segmentInfos.indexOf(info) == -1)
3941 return false;
3942 if (info.dir != directory)
3943 isExternal = true;
3944 }
3945
3946 ensureContiguousMerge(merge);
3947
3948 pendingMerges.add(merge);
3949
3950 if (infoStream != null)
3951 message("add merge to pendingMerges: " + merge.segString(directory) + " [total " + pendingMerges.size() + " pending]");
3952
3953 merge.mergeGen = mergeGen;
3954 merge.isExternal = isExternal;
3955
3956 // OK it does not conflict; now record that this merge
3957 // is running (while synchronized) to avoid race
3958 // condition where two conflicting merges from different
3959 // threads, start
3960 for(int i=0;i<count;i++)
3961 mergingSegments.add(merge.segments.info(i));
3962
3963 // Merge is now registered
3964 merge.registerDone = true;
3965 return true;
3966 }
3967
3968 /** Does initial setup for a merge, which is fast but holds
3969 * the synchronized lock on IndexWriter instance. */
3970 final synchronized void mergeInit(MergePolicy.OneMerge merge) throws IOException {
3971 boolean success = false;
3972 try {
3973 _mergeInit(merge);
3974 success = true;
3975 } finally {
3976 if (!success) {
3977 mergeFinish(merge);
3978 runningMerges.remove(merge);
3979 }
3980 }
3981 }
3982
3983 final synchronized private void _mergeInit(MergePolicy.OneMerge merge) throws IOException {
3984
3985 assert testPoint("startMergeInit");
3986
3987 assert merge.registerDone;
3988 assert !merge.optimize || merge.maxNumSegmentsOptimize > 0;
3989
3990 if (merge.info != null)
3991 // mergeInit already done
3992 return;
3993
3994 if (merge.isAborted())
3995 return;
3996
3997 boolean changed = applyDeletes();
3998
3999 // If autoCommit == true then all deletes should have
4000 // been flushed when we flushed the last segment
4001 assert !changed || !autoCommit;
4002
4003 final SegmentInfos sourceSegments = merge.segments;
4004 final int end = sourceSegments.size();
4005
4006 // Check whether this merge will allow us to skip
4007 // merging the doc stores (stored field & vectors).
4008 // This is a very substantial optimization (saves tons
4009 // of IO) that can only be applied with
4010 // autoCommit=false.
4011
4012 Directory lastDir = directory;
4013 String lastDocStoreSegment = null;
4014 int next = -1;
4015
4016 boolean mergeDocStores = false;
4017 boolean doFlushDocStore = false;
4018 final String currentDocStoreSegment = docWriter.getDocStoreSegment();
4019
4020 // Test each segment to be merged: check if we need to
4021 // flush/merge doc stores
4022 for (int i = 0; i < end; i++) {
4023 SegmentInfo si = sourceSegments.info(i);
4024
4025 // If it has deletions we must merge the doc stores
4026 if (si.hasDeletions())
4027 mergeDocStores = true;
4028
4029 // If it has its own (private) doc stores we must
4030 // merge the doc stores
4031 if (-1 == si.getDocStoreOffset())
4032 mergeDocStores = true;
4033
4034 // If it has a different doc store segment than
4035 // previous segments, we must merge the doc stores
4036 String docStoreSegment = si.getDocStoreSegment();
4037 if (docStoreSegment == null)
4038 mergeDocStores = true;
4039 else if (lastDocStoreSegment == null)
4040 lastDocStoreSegment = docStoreSegment;
4041 else if (!lastDocStoreSegment.equals(docStoreSegment))
4042 mergeDocStores = true;
4043
4044 // Segments' docScoreOffsets must be in-order,
4045 // contiguous. For the default merge policy now
4046 // this will always be the case but for an arbitrary
4047 // merge policy this may not be the case
4048 if (-1 == next)
4049 next = si.getDocStoreOffset() + si.docCount;
4050 else if (next != si.getDocStoreOffset())
4051 mergeDocStores = true;
4052 else
4053 next = si.getDocStoreOffset() + si.docCount;
4054
4055 // If the segment comes from a different directory
4056 // we must merge
4057 if (lastDir != si.dir)
4058 mergeDocStores = true;
4059
4060 // If the segment is referencing the current "live"
4061 // doc store outputs then we must merge
4062 if (si.getDocStoreOffset() != -1 && currentDocStoreSegment != null && si.getDocStoreSegment().equals(currentDocStoreSegment)) {
4063 doFlushDocStore = true;
4064 }
4065 }
4066
4067 final int docStoreOffset;
4068 final String docStoreSegment;
4069 final boolean docStoreIsCompoundFile;
4070
4071 if (mergeDocStores) {
4072 docStoreOffset = -1;
4073 docStoreSegment = null;
4074 docStoreIsCompoundFile = false;
4075 } else {
4076 SegmentInfo si = sourceSegments.info(0);
4077 docStoreOffset = si.getDocStoreOffset();
4078 docStoreSegment = si.getDocStoreSegment();
4079 docStoreIsCompoundFile = si.getDocStoreIsCompoundFile();
4080 }
4081
4082 if (mergeDocStores && doFlushDocStore) {
4083 // SegmentMerger intends to merge the doc stores
4084 // (stored fields, vectors), and at least one of the
4085 // segments to be merged refers to the currently
4086 // live doc stores.
4087
4088 // TODO: if we know we are about to merge away these
4089 // newly flushed doc store files then we should not
4090 // make compound file out of them...
4091 if (infoStream != null)
4092 message("now flush at merge");
4093 doFlush(true, false);
4094 //flush(false, true, false);
4095 }
4096
4097 // We must take a full copy at this point so that we can
4098 // properly merge deletes in commitMerge()
4099 merge.segmentsClone = (SegmentInfos) merge.segments.clone();
4100
4101 for (int i = 0; i < end; i++) {
4102 SegmentInfo si = merge.segmentsClone.info(i);
4103
4104 // IncRef all files for this segment info to make sure
4105 // they are not removed while we are trying to merge.
4106 if (si.dir == directory)
4107 deleter.incRef(si.files());
4108 }
4109
4110 merge.increfDone = true;
4111
4112 merge.mergeDocStores = mergeDocStores;
4113
4114 // Bind a new segment name here so even with
4115 // ConcurrentMergePolicy we keep deterministic segment
4116 // names.
4117 merge.info = new SegmentInfo(newSegmentName(), 0,
4118 directory, false, true,
4119 docStoreOffset,
4120 docStoreSegment,
4121 docStoreIsCompoundFile,
4122 false);
4123
4124 // Also enroll the merged segment into mergingSegments;
4125 // this prevents it from getting selected for a merge
4126 // after our merge is done but while we are building the
4127 // CFS:
4128 mergingSegments.add(merge.info);
4129 }
4130
4131 /** This is called after merging a segment and before
4132 * building its CFS. Return true if the files should be
4133 * sync'd. If you return false, then the source segment
4134 * files that were merged cannot be deleted until the CFS
4135 * file is built & sync'd. So, returning false consumes
4136 * more transient disk space, but saves performance of
4137 * not having to sync files which will shortly be deleted
4138 * anyway.
4139 * @deprecated -- this will be removed in 3.0 when
4140 * autoCommit is hardwired to false */
4141 private synchronized boolean doCommitBeforeMergeCFS(MergePolicy.OneMerge merge) throws IOException {
4142 long freeableBytes = 0;
4143 final int size = merge.segments.size();
4144 for(int i=0;i<size;i++) {
4145 final SegmentInfo info = merge.segments.info(i);
4146 // It's only important to sync if the most recent
4147 // commit actually references this segment, because if
4148 // it doesn't, even without syncing we will free up
4149 // the disk space:
4150 Integer loc = (Integer) rollbackSegments.get(info);
4151 if (loc != null) {
4152 final SegmentInfo oldInfo = rollbackSegmentInfos.info(loc.intValue());
4153 if (oldInfo.getUseCompoundFile() != info.getUseCompoundFile())
4154 freeableBytes += info.sizeInBytes();
4155 }
4156 }
4157 // If we would free up more than 1/3rd of the index by
4158 // committing now, then do so:
4159 long totalBytes = 0;
4160 final int numSegments = segmentInfos.size();
4161 for(int i=0;i<numSegments;i++)
4162 totalBytes += segmentInfos.info(i).sizeInBytes();
4163 if (3*freeableBytes > totalBytes)
4164 return true;
4165 else
4166 return false;
4167 }
4168
4169 /** Does fininishing for a merge, which is fast but holds
4170 * the synchronized lock on IndexWriter instance. */
4171 final synchronized void mergeFinish(MergePolicy.OneMerge merge) throws IOException {
4172
4173 // Optimize, addIndexes or finishMerges may be waiting
4174 // on merges to finish.
4175 notifyAll();
4176
4177 if (merge.increfDone)
4178 decrefMergeSegments(merge);
4179
4180 assert merge.registerDone;
4181
4182 final SegmentInfos sourceSegments = merge.segments;
4183 final int end = sourceSegments.size();
4184 for(int i=0;i<end;i++)
4185 mergingSegments.remove(sourceSegments.info(i));
4186 mergingSegments.remove(merge.info);
4187 merge.registerDone = false;
4188 }
4189
4190 /** Does the actual (time-consuming) work of the merge,
4191 * but without holding synchronized lock on IndexWriter
4192 * instance */
4193 final private int mergeMiddle(MergePolicy.OneMerge merge)
4194 throws CorruptIndexException, IOException {
4195
4196 merge.checkAborted(directory);
4197
4198 final String mergedName = merge.info.name;
4199
4200 SegmentMerger merger = null;
4201
4202 int mergedDocCount = 0;
4203
4204 SegmentInfos sourceSegments = merge.segments;
4205 SegmentInfos sourceSegmentsClone = merge.segmentsClone;
4206 final int numSegments = sourceSegments.size();
4207
4208 if (infoStream != null)
4209 message("merging " + merge.segString(directory));
4210
4211 merger = new SegmentMerger(this, mergedName, merge);
4212
4213 // This is try/finally to make sure merger's readers are
4214 // closed:
4215 try {
4216 int totDocCount = 0;
4217
4218 for (int i = 0; i < numSegments; i++) {
4219 SegmentInfo si = sourceSegmentsClone.info(i);
4220 IndexReader reader = SegmentReader.get(true, si, MERGE_READ_BUFFER_SIZE, merge.mergeDocStores); // no need to set deleter (yet)
4221 merger.add(reader);
4222 totDocCount += reader.numDocs();
4223 }
4224 if (infoStream != null) {
4225 message("merge: total "+totDocCount+" docs");
4226 }
4227
4228 merge.checkAborted(directory);
4229
4230 // This is where all the work happens:
4231 mergedDocCount = merge.info.docCount = merger.merge(merge.mergeDocStores);
4232
4233 assert mergedDocCount == totDocCount;
4234
4235 } finally {
4236 // close readers before we attempt to delete
4237 // now-obsolete segments
4238 if (merger != null) {
4239 merger.closeReaders();
4240 }
4241 }
4242
4243 if (!commitMerge(merge, merger, mergedDocCount))
4244 // commitMerge will return false if this merge was aborted
4245 return 0;
4246
4247 if (merge.useCompoundFile) {
4248
4249 // Maybe force a sync here to allow reclaiming of the
4250 // disk space used by the segments we just merged:
4251 if (autoCommit && doCommitBeforeMergeCFS(merge)) {
4252 final long size;
4253 synchronized(this) {
4254 size = merge.info.sizeInBytes();
4255 }
4256 commit(size);
4257 }
4258
4259 boolean success = false;
4260 final String compoundFileName = mergedName + "." + IndexFileNames.COMPOUND_FILE_EXTENSION;
4261
4262 try {
4263 merger.createCompoundFile(compoundFileName);
4264 success = true;
4265 } catch (IOException ioe) {
4266 synchronized(this) {
4267 if (merge.isAborted()) {
4268 // This can happen if rollback or close(false)
4269 // is called -- fall through to logic below to
4270 // remove the partially created CFS:
4271 success = true;
4272 } else
4273 handleMergeException(ioe, merge);
4274 }
4275 } catch (Throwable t) {
4276 handleMergeException(t, merge);
4277 } finally {
4278 if (!success) {
4279 if (infoStream != null)
4280 message("hit exception creating compound file during merge");
4281 synchronized(this) {
4282 deleter.deleteFile(compoundFileName);
4283 }
4284 }
4285 }
4286
4287 if (merge.isAborted()) {
4288 if (infoStream != null)
4289 message("abort merge after building CFS");
4290 deleter.deleteFile(compoundFileName);
4291 return 0;
4292 }
4293
4294 synchronized(this) {
4295 if (segmentInfos.indexOf(merge.info) == -1 || merge.isAborted()) {
4296 // Our segment (committed in non-compound
4297 // format) got merged away while we were
4298 // building the compound format.
4299 deleter.deleteFile(compoundFileName);
4300 } else {
4301 merge.info.setUseCompoundFile(true);
4302 checkpoint();
4303 }
4304 }
4305 }
4306
4307 // Force a sync after commiting the merge. Once this
4308 // sync completes then all index files referenced by the
4309 // current segmentInfos are on stable storage so if the
4310 // OS/machine crashes, or power cord is yanked, the
4311 // index will be intact. Note that this is just one
4312 // (somewhat arbitrary) policy; we could try other
4313 // policies like only sync if it's been > X minutes or
4314 // more than Y bytes have been written, etc.
4315 if (autoCommit) {
4316 final long size;
4317 synchronized(this) {
4318 size = merge.info.sizeInBytes();
4319 }
4320 commit(size);
4321 }
4322
4323 return mergedDocCount;
4324 }
4325
4326 synchronized void addMergeException(MergePolicy.OneMerge merge) {
4327 assert merge.getException() != null;
4328 if (!mergeExceptions.contains(merge) && mergeGen == merge.mergeGen)
4329 mergeExceptions.add(merge);
4330 }
4331
4332 // Apply buffered deletes to all segments.
4333 private final synchronized boolean applyDeletes() throws CorruptIndexException, IOException {
4334 assert testPoint("startApplyDeletes");
4335 SegmentInfos rollback = (SegmentInfos) segmentInfos.clone();
4336 boolean success = false;
4337 boolean changed;
4338 try {
4339 changed = docWriter.applyDeletes(segmentInfos);
4340 success = true;
4341 } finally {
4342 if (!success) {
4343 if (infoStream != null)
4344 message("hit exception flushing deletes");
4345
4346 // Carefully remove any partially written .del
4347 // files
4348 final int size = rollback.size();
4349 for(int i=0;i<size;i++) {
4350 final String newDelFileName = segmentInfos.info(i).getDelFileName();
4351 final String delFileName = rollback.info(i).getDelFileName();
4352 if (newDelFileName != null && !newDelFileName.equals(delFileName))
4353 deleter.deleteFile(newDelFileName);
4354 }
4355
4356 // Fully replace the segmentInfos since flushed
4357 // deletes could have changed any of the
4358 // SegmentInfo instances:
4359 segmentInfos.clear();
4360 segmentInfos.addAll(rollback);
4361 }
4362 }
4363
4364 if (changed)
4365 checkpoint();
4366 return changed;
4367 }
4368
4369 // For test purposes.
4370 final synchronized int getBufferedDeleteTermsSize() {
4371 return docWriter.getBufferedDeleteTerms().size();
4372 }
4373
4374 // For test purposes.
4375 final synchronized int getNumBufferedDeleteTerms() {
4376 return docWriter.getNumBufferedDeleteTerms();
4377 }
4378
4379 // utility routines for tests
4380 SegmentInfo newestSegment() {
4381 return segmentInfos.info(segmentInfos.size()-1);
4382 }
4383
4384 public synchronized String segString() {
4385 return segString(segmentInfos);
4386 }
4387
4388 private synchronized String segString(SegmentInfos infos) {
4389 StringBuffer buffer = new StringBuffer();
4390 final int count = infos.size();
4391 for(int i = 0; i < count; i++) {
4392 if (i > 0) {
4393 buffer.append(' ');
4394 }
4395 final SegmentInfo info = infos.info(i);
4396 buffer.append(info.segString(directory));
4397 if (info.dir != directory)
4398 buffer.append("**");
4399 }
4400 return buffer.toString();
4401 }
4402
4403 // Files that have been sync'd already
4404 private HashSet synced = new HashSet();
4405
4406 // Files that are now being sync'd
4407 private HashSet syncing = new HashSet();
4408
4409 private boolean startSync(String fileName, Collection pending) {
4410 synchronized(synced) {
4411 if (!synced.contains(fileName)) {
4412 if (!syncing.contains(fileName)) {
4413 syncing.add(fileName);
4414 return true;
4415 } else {
4416 pending.add(fileName);
4417 return false;
4418 }
4419 } else
4420 return false;
4421 }
4422 }
4423
4424 private void finishSync(String fileName, boolean success) {
4425 synchronized(synced) {
4426 assert syncing.contains(fileName);
4427 syncing.remove(fileName);
4428 if (success)
4429 synced.add(fileName);
4430 synced.notifyAll();
4431 }
4432 }
4433
4434 /** Blocks until all files in syncing are sync'd */
4435 private boolean waitForAllSynced(Collection syncing) throws IOException {
4436 synchronized(synced) {
4437 Iterator it = syncing.iterator();
4438 while(it.hasNext()) {
4439 final String fileName = (String) it.next();
4440 while(!synced.contains(fileName)) {
4441 if (!syncing.contains(fileName))
4442 // There was an error because a file that was
4443 // previously syncing failed to appear in synced
4444 return false;
4445 else
4446 try {
4447 synced.wait();
4448 } catch (InterruptedException ie) {
4449 continue;
4450 }
4451 }
4452 }
4453 return true;
4454 }
4455 }
4456
4457 /** Pauses before syncing. On Windows, at least, it's
4458 * best (performance-wise) to pause in order to let OS
4459 * flush writes to disk on its own, before forcing a
4460 * sync.
4461 * @deprecated -- this will be removed in 3.0 when
4462 * autoCommit is hardwired to false */
4463 private void syncPause(long sizeInBytes) {
4464 if (mergeScheduler instanceof ConcurrentMergeScheduler && maxSyncPauseSeconds > 0) {
4465 // Rough heuristic: for every 10 MB, we pause for 1
4466 // second, up until the max
4467 long pauseTime = (long) (1000*sizeInBytes/10/1024/1024);
4468 final long maxPauseTime = (long) (maxSyncPauseSeconds*1000);
4469 if (pauseTime > maxPauseTime)
4470 pauseTime = maxPauseTime;
4471 final int sleepCount = (int) (pauseTime / 100);
4472 for(int i=0;i<sleepCount;i++) {
4473 synchronized(this) {
4474 if (stopMerges || closing)
4475 break;
4476 }
4477 try {
4478 Thread.sleep(100);
4479 } catch (InterruptedException ie) {
4480 Thread.currentThread().interrupt();
4481 }
4482 }
4483 }
4484 }
4485
4486 private synchronized void doWait() {
4487 try {
4488 // NOTE: the callers of this method should in theory
4489 // be able to do simply wait(), but, as a defense
4490 // against thread timing hazards where notifyAll()
4491 // falls to be called, we wait for at most 1 second
4492 // and then return so caller can check if wait
4493 // conditions are satisified:
4494 wait(1000);
4495 } catch (InterruptedException ie) {
4496 Thread.currentThread().interrupt();
4497 }
4498 }
4499
4500 /** Walk through all files referenced by the current
4501 * segmentInfos and ask the Directory to sync each file,
4502 * if it wasn't already. If that succeeds, then we
4503 * prepare a new segments_N file but do not fully commit
4504 * it. */
4505 private void startCommit(long sizeInBytes) throws IOException {
4506
4507 assert testPoint("startStartCommit");
4508
4509 if (hitOOM)
4510 return;
4511
4512 try {
4513
4514 if (infoStream != null)
4515 message("startCommit(): start sizeInBytes=" + sizeInBytes);
4516
4517 if (sizeInBytes > 0)
4518 syncPause(sizeInBytes);
4519
4520 SegmentInfos toSync = null;
4521 final long myChangeCount;
4522
4523 synchronized(this) {
4524
4525 // sizeInBytes > 0 means this is an autoCommit at
4526 // the end of a merge. If at this point stopMerges
4527 // is true (which means a rollback() or
4528 // rollbackTransaction() is waiting for us to
4529 // finish), we skip the commit to avoid deadlock
4530 if (sizeInBytes > 0 && stopMerges)
4531 return;
4532
4533 // Wait for any running addIndexes to complete
4534 // first, then block any from running until we've
4535 // copied the segmentInfos we intend to sync:
4536 blockAddIndexes(false);
4537
4538 assert !hasExternalSegments();
4539
4540 try {
4541
4542 assert lastCommitChangeCount <= changeCount;
4543
4544 if (changeCount == lastCommitChangeCount) {
4545 if (infoStream != null)
4546 message(" skip startCommit(): no changes pending");
4547 return;
4548 }
4549
4550 // First, we clone & incref the segmentInfos we intend
4551 // to sync, then, without locking, we sync() each file
4552 // referenced by toSync, in the background. Multiple
4553 // threads can be doing this at once, if say a large
4554 // merge and a small merge finish at the same time:
4555
4556 if (infoStream != null)
4557 message("startCommit index=" + segString(segmentInfos) + " changeCount=" + changeCount);
4558
4559 toSync = (SegmentInfos) segmentInfos.clone();
4560 deleter.incRef(toSync, false);
4561 myChangeCount = changeCount;
4562 } finally {
4563 resumeAddIndexes();
4564 }
4565 }
4566
4567 assert testPoint("midStartCommit");
4568
4569 boolean setPending = false;
4570
4571 try {
4572
4573 // Loop until all files toSync references are sync'd:
4574 while(true) {
4575
4576 final Collection pending = new ArrayList();
4577
4578 for(int i=0;i<toSync.size();i++) {
4579 final SegmentInfo info = toSync.info(i);
4580 final List files = info.files();
4581 for(int j=0;j<files.size();j++) {
4582 final String fileName = (String) files.get(j);
4583 if (startSync(fileName, pending)) {
4584 boolean success = false;
4585 try {
4586 // Because we incRef'd this commit point, above,
4587 // the file had better exist:
4588 assert directory.fileExists(fileName): "file '" + fileName + "' does not exist dir=" + directory;
4589 message("now sync " + fileName);
4590 directory.sync(fileName);
4591 success = true;
4592 } finally {
4593 finishSync(fileName, success);
4594 }
4595 }
4596 }
4597 }
4598
4599 // All files that I require are either synced or being
4600 // synced by other threads. If they are being synced,
4601 // we must at this point block until they are done.
4602 // If this returns false, that means an error in
4603 // another thread resulted in failing to actually
4604 // sync one of our files, so we repeat:
4605 if (waitForAllSynced(pending))
4606 break;
4607 }
4608
4609 assert testPoint("midStartCommit2");
4610
4611 synchronized(this) {
4612 // If someone saved a newer version of segments file
4613 // since I first started syncing my version, I can
4614 // safely skip saving myself since I've been
4615 // superseded:
4616
4617 while(true) {
4618 if (myChangeCount <= lastCommitChangeCount) {
4619 if (infoStream != null) {
4620 message("sync superseded by newer infos");
4621 }
4622 break;
4623 } else if (pendingCommit == null) {
4624 // My turn to commit
4625
4626 if (segmentInfos.getGeneration() > toSync.getGeneration())
4627 toSync.updateGeneration(segmentInfos);
4628
4629 boolean success = false;
4630 try {
4631
4632 // Exception here means nothing is prepared
4633 // (this method unwinds everything it did on
4634 // an exception)
4635 try {
4636 toSync.prepareCommit(directory);
4637 } finally {
4638 // Have our master segmentInfos record the
4639 // generations we just prepared. We do this
4640 // on error or success so we don't
4641 // double-write a segments_N file.
4642 segmentInfos.updateGeneration(toSync);
4643 }
4644
4645 assert pendingCommit == null;
4646 setPending = true;
4647 pendingCommit = toSync;
4648 pendingCommitChangeCount = myChangeCount;
4649 success = true;
4650 } finally {
4651 if (!success && infoStream != null)
4652 message("hit exception committing segments file");
4653 }
4654 break;
4655 } else {
4656 // Must wait for other commit to complete
4657 doWait();
4658 }
4659 }
4660 }
4661
4662 message("done all syncs");
4663
4664 assert testPoint("midStartCommitSuccess");
4665
4666 } finally {
4667 synchronized(this) {
4668 if (!setPending)
4669 deleter.decRef(toSync);
4670 }
4671 }
4672 } catch (OutOfMemoryError oom) {
4673 hitOOM = true;
4674 throw oom;
4675 }
4676 assert testPoint("finishStartCommit");
4677 }
4678
4679 /**
4680 * Returns <code>true</code> iff the index in the named directory is
4681 * currently locked.
4682 * @param directory the directory to check for a lock
4683 * @throws IOException if there is a low-level IO error
4684 */
4685 public static boolean isLocked(Directory directory) throws IOException {
4686 return directory.makeLock(WRITE_LOCK_NAME).isLocked();
4687 }
4688
4689 /**
4690 * Returns <code>true</code> iff the index in the named directory is
4691 * currently locked.
4692 * @param directory the directory to check for a lock
4693 * @throws IOException if there is a low-level IO error
4694 */
4695 public static boolean isLocked(String directory) throws IOException {
4696 Directory dir = FSDirectory.getDirectory(directory);
4697 try {
4698 return isLocked(dir);
4699 } finally {
4700 dir.close();
4701 }
4702 }
4703
4704 /**
4705 * Forcibly unlocks the index in the named directory.
4706 * <P>
4707 * Caution: this should only be used by failure recovery code,
4708 * when it is known that no other process nor thread is in fact
4709 * currently accessing this index.
4710 */
4711 public static void unlock(Directory directory) throws IOException {
4712 directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();
4713 }
4714
4715 /**
4716 * Specifies maximum field length in {@link IndexWriter} constructors.
4717 * {@link #setMaxFieldLength(int)} overrides the value set by
4718 * the constructor.
4719 */
4720 public static final class MaxFieldLength {
4721
4722 private int limit;
4723 private String name;
4724
4725 /**
4726 * Private type-safe-enum-pattern constructor.
4727 *
4728 * @param name instance name
4729 * @param limit maximum field length
4730 */
4731 private MaxFieldLength(String name, int limit) {
4732 this.name = name;
4733 this.limit = limit;
4734 }
4735
4736 /**
4737 * Public constructor to allow users to specify the maximum field size limit.
4738 *
4739 * @param limit The maximum field length
4740 */
4741 public MaxFieldLength(int limit) {
4742 this("User-specified", limit);
4743 }
4744
4745 public int getLimit() {
4746 return limit;
4747 }
4748
4749 public String toString()
4750 {
4751 return name + ":" + limit;
4752 }
4753
4754 /** Sets the maximum field length to {@link Integer#MAX_VALUE}. */
4755 public static final MaxFieldLength UNLIMITED
4756 = new MaxFieldLength("UNLIMITED", Integer.MAX_VALUE);
4757
4758 /**
4759 * Sets the maximum field length to
4760 * {@link #DEFAULT_MAX_FIELD_LENGTH}
4761 * */
4762 public static final MaxFieldLength LIMITED
4763 = new MaxFieldLength("LIMITED", DEFAULT_MAX_FIELD_LENGTH);
4764 }
4765
4766 // Used only by assert for testing. Current points:
4767 // startDoFlush
4768 // startCommitMerge
4769 // startStartCommit
4770 // midStartCommit
4771 // midStartCommit2
4772 // midStartCommitSuccess
4773 // finishStartCommit
4774 // startCommitMergeDeletes
4775 // startMergeInit
4776 // startApplyDeletes
4777 // DocumentsWriter.ThreadState.init start
4778 boolean testPoint(String name) {
4779 return true;
4780 }
4781}
4782| IndexWriter.java |