1 // Copyright (C) 2021 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_SCORING_SECTION_WEIGHTS_H_ 16 #define ICING_SCORING_SECTION_WEIGHTS_H_ 17 18 #include <unordered_map> 19 20 #include "icing/text_classifier/lib3/utils/base/statusor.h" 21 #include "icing/proto/scoring.pb.h" 22 #include "icing/schema/schema-store.h" 23 #include "icing/store/document-store.h" 24 25 namespace icing { 26 namespace lib { 27 28 inline constexpr double kDefaultSectionWeight = 1.0; 29 30 // Provides functions for setting and retrieving section weights for schema 31 // type properties. Section weights are used to promote and demote term matches 32 // in sections when scoring results. Section weights are provided by property 33 // path, and can range from (0, DBL_MAX]. The SectionId is matched to the 34 // property path by going over the schema type's section metadata. Weights that 35 // correspond to a valid property path are then normalized against the maxmium 36 // section weight, and put into map for quick access for scorers. By default, 37 // a section is given a raw, pre-normalized weight of 1.0. 38 class SectionWeights { 39 public: 40 // SectionWeights instances should not be copied. 41 SectionWeights(const SectionWeights&) = delete; 42 SectionWeights& operator=(const SectionWeights&) = delete; 43 44 // Factory function to create a SectionWeights instance. Raw weights are 45 // provided through the ScoringSpecProto. Provided property paths for weights 46 // are validated against the schema type's section metadata. If the property 47 // path doesn't exist, the property weight is ignored. If a weight is 0 or 48 // negative, an invalid argument error is returned. Raw weights are then 49 // normalized against the maximum weight for that schema type. 50 // 51 // Returns: 52 // A SectionWeights instance on success 53 // FAILED_PRECONDITION on any null pointer input 54 // INVALID_ARGUMENT if a provided weight for a property path is less than or 55 // equal to 0. 56 static libtextclassifier3::StatusOr<std::unique_ptr<SectionWeights>> Create( 57 const SchemaStore* schema_store, const ScoringSpecProto& scoring_spec); 58 59 // Returns the normalized section weight by SchemaTypeId and SectionId. If 60 // the SchemaTypeId, or the SectionId for a SchemaTypeId, is not found in the 61 // normalized weights map, the default weight is returned instead. 62 double GetNormalizedSectionWeight(SchemaTypeId schema_type_id, 63 SectionId section_id) const; 64 65 private: 66 // Holds the normalized section weights for a schema type, as well as the 67 // normalized default weight for sections that have no weight set. 68 struct NormalizedSectionWeights { 69 std::unordered_map<SectionId, double> section_weights; 70 double default_weight; 71 }; 72 SectionWeights(const std::unordered_map<SchemaTypeId,NormalizedSectionWeights> schema_section_weight_map)73 explicit SectionWeights( 74 const std::unordered_map<SchemaTypeId, NormalizedSectionWeights> 75 schema_section_weight_map) 76 : schema_section_weight_map_(std::move(schema_section_weight_map)) {} 77 78 // Creates a map of section ids to normalized weights from the raw property 79 // path weight map and section metadata and calculates the normalized default 80 // section weight. 81 static inline SectionWeights::NormalizedSectionWeights 82 ExtractNormalizedSectionWeights( 83 const std::unordered_map<std::string, double>& raw_weights, 84 const std::vector<SectionMetadata>& metadata_list); 85 86 // A map of (SchemaTypeId -> SectionId -> Normalized Weight), allows for fast 87 // look up of normalized weights. This is precomputed when creating a 88 // SectionWeights instance. 89 std::unordered_map<SchemaTypeId, NormalizedSectionWeights> 90 schema_section_weight_map_; 91 }; 92 93 } // namespace lib 94 } // namespace icing 95 96 #endif // ICING_SCORING_SECTION_WEIGHTS_H_ 97