1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_INDEX_INDEX_PROCESSOR_H_ 16 #define ICING_INDEX_INDEX_PROCESSOR_H_ 17 18 #include <memory> 19 #include <utility> 20 #include <vector> 21 22 #include "icing/text_classifier/lib3/utils/base/status.h" 23 #include "icing/index/data-indexing-handler.h" 24 #include "icing/proto/logging.pb.h" 25 #include "icing/store/document-id.h" 26 #include "icing/util/clock.h" 27 #include "icing/util/tokenized-document.h" 28 29 namespace icing { 30 namespace lib { 31 32 class IndexProcessor { 33 public: 34 explicit IndexProcessor(std::vector<std::unique_ptr<DataIndexingHandler>>&& 35 data_indexing_handlers, 36 const Clock* clock, bool recovery_mode = false) data_indexing_handlers_(std::move (data_indexing_handlers))37 : data_indexing_handlers_(std::move(data_indexing_handlers)), 38 clock_(*clock), 39 recovery_mode_(recovery_mode) {} 40 41 // Add tokenized document to the index, associated with document_id. If the 42 // number of tokens in the document exceeds max_tokens_per_document, then only 43 // the first max_tokens_per_document will be added to the index. All tokens of 44 // length exceeding max_token_length will be shortened to max_token_length. 45 // 46 // old_document_id is provided. If valid, then it means the document with 47 // the same (namespace, uri) exists previously, and it is updated with new 48 // contents at this round. Each indexing handler should decide whether 49 // migrating existing data from old_document_id to (new) document_id according 50 // to each index's data logic. 51 // 52 // Indexing a document *may* trigger an index merge. If a merge fails, then 53 // all content in the index will be lost. 54 // 55 // If put_document_stats is present, the fields related to indexing will be 56 // populated. 57 // 58 // Returns: 59 // - OK on success. 60 // - Any DataIndexingHandler errors. 61 libtextclassifier3::Status IndexDocument( 62 const TokenizedDocument& tokenized_document, DocumentId document_id, 63 DocumentId old_document_id, 64 PutDocumentStatsProto* put_document_stats = nullptr); 65 66 private: 67 std::vector<std::unique_ptr<DataIndexingHandler>> data_indexing_handlers_; 68 const Clock& clock_; // Does not own. 69 bool recovery_mode_; 70 }; 71 72 } // namespace lib 73 } // namespace icing 74 75 #endif // ICING_INDEX_INDEX_PROCESSOR_H_ 76