1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_SCHEMA_SECTION_MANAGER_H_ 16 #define ICING_SCHEMA_SECTION_MANAGER_H_ 17 18 #include <memory> 19 #include <string> 20 #include <string_view> 21 #include <vector> 22 23 #include "icing/text_classifier/lib3/utils/base/statusor.h" 24 #include "icing/proto/document.pb.h" 25 #include "icing/schema/section.h" 26 #include "icing/store/document-filter-data.h" 27 #include "icing/store/key-mapper.h" 28 29 namespace icing { 30 namespace lib { 31 32 // This class provides section-related operations. It assigns sections according 33 // to type configs and extracts section / sections from documents. 34 // The actual instance is created together with JoinablePropertyManager and both 35 // of them are wrapped into SchemaTypeManager. 36 // 37 // Note: SectionManager assumes schema type ids are consecutive integers 38 // starting from 0, so it maintains a vector with size 39 // schema_type_mapper_->num_keys() that maps schema type id to a list (2nd level 40 // vector) of SectionMetadatas. Therefore, all schema type ids stored in 41 // schema_type_mapper_ must be in range [0, schema_type_mapper_->num_keys() - 1] 42 // and unique. 43 class SectionManager { 44 public: 45 // Builder class to create a SectionManager which does not take ownership of 46 // any input components, and all pointers must refer to valid objects that 47 // outlive the created SectionManager instance. 48 class Builder { 49 public: Builder(const KeyMapper<SchemaTypeId> & schema_type_mapper)50 explicit Builder(const KeyMapper<SchemaTypeId>& schema_type_mapper) 51 : schema_type_mapper_(schema_type_mapper), 52 section_metadata_cache_(schema_type_mapper.num_keys()) {} 53 54 // Checks and appends a new SectionMetadata for the schema type id if the 55 // given property config is indexable. 56 // 57 // Returns: 58 // - OK on success 59 // - INVALID_ARGUMENT_ERROR if schema type id is invalid (not in range [0, 60 // schema_type_mapper_.num_keys() - 1]) 61 // - OUT_OF_RANGE_ERROR if # of indexable properties in a single Schema 62 // exceeds the threshold (kTotalNumSections) 63 libtextclassifier3::Status ProcessSchemaTypePropertyConfig( 64 SchemaTypeId schema_type_id, const PropertyConfigProto& property_config, 65 std::string&& property_path); 66 67 // Builds and returns a SectionManager instance. Build()68 std::unique_ptr<SectionManager> Build() && { 69 return std::unique_ptr<SectionManager>(new SectionManager( 70 schema_type_mapper_, std::move(section_metadata_cache_))); 71 } 72 73 private: 74 const KeyMapper<SchemaTypeId>& schema_type_mapper_; // Does not own. 75 std::vector<std::vector<SectionMetadata>> section_metadata_cache_; 76 }; 77 78 SectionManager(const SectionManager&) = delete; 79 SectionManager& operator=(const SectionManager&) = delete; 80 81 // Returns the SectionMetadata associated with the SectionId that's in the 82 // SchemaTypeId. 83 // 84 // Returns: 85 // pointer to SectionMetadata on success 86 // INVALID_ARGUMENT if schema type id or section is invalid 87 libtextclassifier3::StatusOr<const SectionMetadata*> GetSectionMetadata( 88 SchemaTypeId schema_type_id, SectionId section_id) const; 89 90 // Extracts all sections of different types from the given document and group 91 // them by type. 92 // - Sections are sorted by section id in ascending order. 93 // - Section ids start from 0. 94 // - Sections with empty content won't be returned. 95 // 96 // Returns: 97 // A SectionGroup instance on success 98 // NOT_FOUND if the type config name of document is not present in 99 // schema_type_mapper_ 100 libtextclassifier3::StatusOr<SectionGroup> ExtractSections( 101 const DocumentProto& document) const; 102 103 // Returns: 104 // - On success, the section metadatas for the specified type 105 // - NOT_FOUND if the type config name is not present in schema_type_mapper_ 106 libtextclassifier3::StatusOr<const std::vector<SectionMetadata>*> 107 GetMetadataList(const std::string& type_config_name) const; 108 109 private: SectionManager(const KeyMapper<SchemaTypeId> & schema_type_mapper,std::vector<std::vector<SectionMetadata>> && section_metadata_cache)110 explicit SectionManager( 111 const KeyMapper<SchemaTypeId>& schema_type_mapper, 112 std::vector<std::vector<SectionMetadata>>&& section_metadata_cache) 113 : schema_type_mapper_(schema_type_mapper), 114 section_metadata_cache_(std::move(section_metadata_cache)) {} 115 116 // Maps schema types to a densely-assigned unique id. 117 const KeyMapper<SchemaTypeId>& schema_type_mapper_; // Does not own 118 119 // The index of section_metadata_cache_ corresponds to a schema type's 120 // SchemaTypeId. At that SchemaTypeId index, we store an inner vector. The 121 // inner vector's index corresponds to a section's SectionId. At the SectionId 122 // index, we store the SectionMetadata of that section. 123 // 124 // For example, pretend "email" had a SchemaTypeId of 0 and it had a section 125 // called "subject" with a SectionId of 1. Then there would exist a vector 126 // that holds the "subject" property's SectionMetadata at index 1. This vector 127 // would be stored at index 0 of the section_metadata_cache_ vector. 128 const std::vector<std::vector<SectionMetadata>> section_metadata_cache_; 129 }; 130 131 } // namespace lib 132 } // namespace icing 133 134 #endif // ICING_SCHEMA_SECTION_MANAGER_H_ 135