| TokenStream.java |
1 package org.apache.lucene.analysis;
2
3 /**
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 import org.apache.lucene.index.Payload;
21
22 import java.io.IOException;
23
24 /** A TokenStream enumerates the sequence of tokens, either from
25 fields of a document or from query text.
26 <p>
27 This is an abstract class. Concrete subclasses are:
28 <ul>
29 <li>{@link Tokenizer}, a TokenStream
30 whose input is a Reader; and
31 <li>{@link TokenFilter}, a TokenStream
32 whose input is another TokenStream.
33 </ul>
34 NOTE: subclasses must override {@link #next(Token)}. It's
35 also OK to instead override {@link #next()} but that
36 method is now deprecated in favor of {@link #next(Token)}.
37 */
38
39 public abstract class TokenStream {
40
41 /** Returns the next token in the stream, or null at EOS.
42 * @deprecated The returned Token is a "full private copy" (not
43 * re-used across calls to next()) but will be slower
44 * than calling {@link #next(Token)} instead.. */
45 public Token next() throws IOException {
46 final Token reusableToken = new Token();
47 Token nextToken = next(reusableToken);
48
49 if (nextToken != null) {
50 Payload p = nextToken.getPayload();
51 if (p != null) {
52 nextToken.setPayload((Payload) p.clone());
53 }
54 }
55
56 return nextToken;
57 }
58
59 /** Returns the next token in the stream, or null at EOS.
60 * When possible, the input Token should be used as the
61 * returned Token (this gives fastest tokenization
62 * performance), but this is not required and a new Token
63 * may be returned. Callers may re-use a single Token
64 * instance for successive calls to this method.
65 * <p>
66 * This implicitly defines a "contract" between
67 * consumers (callers of this method) and
68 * producers (implementations of this method
69 * that are the source for tokens):
70 * <ul>
71 * <li>A consumer must fully consume the previously
72 * returned Token before calling this method again.</li>
73 * <li>A producer must call {@link Token#clear()}
74 * before setting the fields in it & returning it</li>
75 * </ul>
76 * Also, the producer must make no assumptions about a
77 * Token after it has been returned: the caller may
78 * arbitrarily change it. If the producer needs to hold
79 * onto the token for subsequent calls, it must clone()
80 * it before storing it.
81 * Note that a {@link TokenFilter} is considered a consumer.
82 * @param reusableToken a Token that may or may not be used to
83 * return; this parameter should never be null (the callee
84 * is not required to check for null before using it, but it is a
85 * good idea to assert that it is not null.)
86 * @return next token in the stream or null if end-of-stream was hit
87 */
88 public Token next(final Token reusableToken) throws IOException {
89 // We don't actually use inputToken, but still add this assert
90 assert reusableToken != null;
91 return next();
92 }
93
94 /** Resets this stream to the beginning. This is an
95 * optional operation, so subclasses may or may not
96 * implement this method. Reset() is not needed for
97 * the standard indexing process. However, if the Tokens
98 * of a TokenStream are intended to be consumed more than
99 * once, it is necessary to implement reset(). Note that
100 * if your TokenStream caches tokens and feeds them back
101 * again after a reset, it is imperative that you
102 * clone the tokens when you store them away (on the
103 * first pass) as well as when you return them (on future
104 * passes after reset()).
105 */
106 public void reset() throws IOException {}
107
108 /** Releases resources associated with this stream. */
109 public void close() throws IOException {}
110 }
111 | TokenStream.java |