xref: /aosp_15_r20/external/icing/icing/index/index-processor.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_INDEX_INDEX_PROCESSOR_H_
16 #define ICING_INDEX_INDEX_PROCESSOR_H_
17 
18 #include <memory>
19 #include <utility>
20 #include <vector>
21 
22 #include "icing/text_classifier/lib3/utils/base/status.h"
23 #include "icing/index/data-indexing-handler.h"
24 #include "icing/proto/logging.pb.h"
25 #include "icing/store/document-id.h"
26 #include "icing/util/clock.h"
27 #include "icing/util/tokenized-document.h"
28 
29 namespace icing {
30 namespace lib {
31 
32 class IndexProcessor {
33  public:
34   explicit IndexProcessor(std::vector<std::unique_ptr<DataIndexingHandler>>&&
35                               data_indexing_handlers,
36                           const Clock* clock, bool recovery_mode = false)
data_indexing_handlers_(std::move (data_indexing_handlers))37       : data_indexing_handlers_(std::move(data_indexing_handlers)),
38         clock_(*clock),
39         recovery_mode_(recovery_mode) {}
40 
41   // Add tokenized document to the index, associated with document_id. If the
42   // number of tokens in the document exceeds max_tokens_per_document, then only
43   // the first max_tokens_per_document will be added to the index. All tokens of
44   // length exceeding max_token_length will be shortened to max_token_length.
45   //
46   // old_document_id is provided. If valid, then it means the document with
47   // the same (namespace, uri) exists previously, and it is updated with new
48   // contents at this round. Each indexing handler should decide whether
49   // migrating existing data from old_document_id to (new) document_id according
50   // to each index's data logic.
51   //
52   // Indexing a document *may* trigger an index merge. If a merge fails, then
53   // all content in the index will be lost.
54   //
55   // If put_document_stats is present, the fields related to indexing will be
56   // populated.
57   //
58   // Returns:
59   //   - OK on success.
60   //   - Any DataIndexingHandler errors.
61   libtextclassifier3::Status IndexDocument(
62       const TokenizedDocument& tokenized_document, DocumentId document_id,
63       DocumentId old_document_id,
64       PutDocumentStatsProto* put_document_stats = nullptr);
65 
66  private:
67   std::vector<std::unique_ptr<DataIndexingHandler>> data_indexing_handlers_;
68   const Clock& clock_;  // Does not own.
69   bool recovery_mode_;
70 };
71 
72 }  // namespace lib
73 }  // namespace icing
74 
75 #endif  // ICING_INDEX_INDEX_PROCESSOR_H_
76