1   package org.apache.lucene.analysis;
2   
3   /**
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements.  See the NOTICE file distributed with
6    * this work for additional information regarding copyright ownership.
7    * The ASF licenses this file to You under the Apache License, Version 2.0
8    * (the "License"); you may not use this file except in compliance with
9    * the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  import org.apache.lucene.index.Payload;
21  
22  import java.io.IOException;
23  
24  /** A TokenStream enumerates the sequence of tokens, either from
25    fields of a document or from query text.
26    <p>
27    This is an abstract class.  Concrete subclasses are:
28    <ul>
29    <li>{@link Tokenizer}, a TokenStream
30    whose input is a Reader; and
31    <li>{@link TokenFilter}, a TokenStream
32    whose input is another TokenStream.
33    </ul>
34    NOTE: subclasses must override {@link #next(Token)}.  It's
35    also OK to instead override {@link #next()} but that
36    method is now deprecated in favor of {@link #next(Token)}.
37    */
38  
39  public abstract class TokenStream {
40  
41    /** Returns the next token in the stream, or null at EOS.
42     *  @deprecated The returned Token is a "full private copy" (not
43     *  re-used across calls to next()) but will be slower
44     *  than calling {@link #next(Token)} instead.. */
45    public Token next() throws IOException {
46      final Token reusableToken = new Token();
47      Token nextToken = next(reusableToken);
48  
49      if (nextToken != null) {
50        Payload p = nextToken.getPayload();
51        if (p != null) {
52          nextToken.setPayload((Payload) p.clone());
53        }
54      }
55  
56      return nextToken;
57    }
58  
59    /** Returns the next token in the stream, or null at EOS.
60     *  When possible, the input Token should be used as the
61     *  returned Token (this gives fastest tokenization
62     *  performance), but this is not required and a new Token
63     *  may be returned. Callers may re-use a single Token
64     *  instance for successive calls to this method.
65     *  <p>
66     *  This implicitly defines a "contract" between 
67     *  consumers (callers of this method) and 
68     *  producers (implementations of this method 
69     *  that are the source for tokens):
70     *  <ul>
71     *   <li>A consumer must fully consume the previously 
72     *       returned Token before calling this method again.</li>
73     *   <li>A producer must call {@link Token#clear()}
74     *       before setting the fields in it & returning it</li>
75     *  </ul>
76     *  Also, the producer must make no assumptions about a
77     *  Token after it has been returned: the caller may
78     *  arbitrarily change it.  If the producer needs to hold
79     *  onto the token for subsequent calls, it must clone()
80     *  it before storing it.
81     *  Note that a {@link TokenFilter} is considered a consumer.
82     *  @param reusableToken a Token that may or may not be used to
83     *  return; this parameter should never be null (the callee
84     *  is not required to check for null before using it, but it is a
85     *  good idea to assert that it is not null.)
86     *  @return next token in the stream or null if end-of-stream was hit
87     */
88    public Token next(final Token reusableToken) throws IOException {
89      // We don't actually use inputToken, but still add this assert
90      assert reusableToken != null;
91      return next();
92    }
93  
94    /** Resets this stream to the beginning. This is an
95     *  optional operation, so subclasses may or may not
96     *  implement this method. Reset() is not needed for
97     *  the standard indexing process. However, if the Tokens 
98     *  of a TokenStream are intended to be consumed more than 
99     *  once, it is necessary to implement reset().  Note that
100    *  if your TokenStream caches tokens and feeds them back
101    *  again after a reset, it is imperative that you
102    *  clone the tokens when you store them away (on the
103    *  first pass) as well as when you return them (on future
104    *  passes after reset()).
105    */
106   public void reset() throws IOException {}
107   
108   /** Releases resources associated with this stream. */
109   public void close() throws IOException {}
110 }
111