xref: /aosp_15_r20/external/icing/icing/scoring/section-weights.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2021 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_SCORING_SECTION_WEIGHTS_H_
16 #define ICING_SCORING_SECTION_WEIGHTS_H_
17 
18 #include <unordered_map>
19 
20 #include "icing/text_classifier/lib3/utils/base/statusor.h"
21 #include "icing/proto/scoring.pb.h"
22 #include "icing/schema/schema-store.h"
23 #include "icing/store/document-store.h"
24 
25 namespace icing {
26 namespace lib {
27 
28 inline constexpr double kDefaultSectionWeight = 1.0;
29 
30 // Provides functions for setting and retrieving section weights for schema
31 // type properties. Section weights are used to promote and demote term matches
32 // in sections when scoring results. Section weights are provided by property
33 // path, and can range from (0, DBL_MAX]. The SectionId is matched to the
34 // property path by going over the schema type's section metadata. Weights that
35 // correspond to a valid property path are then normalized against the maxmium
36 // section weight, and put into map for quick access for scorers. By default,
37 // a section is given a raw, pre-normalized weight of 1.0.
38 class SectionWeights {
39  public:
40   // SectionWeights instances should not be copied.
41   SectionWeights(const SectionWeights&) = delete;
42   SectionWeights& operator=(const SectionWeights&) = delete;
43 
44   // Factory function to create a SectionWeights instance. Raw weights are
45   // provided through the ScoringSpecProto. Provided property paths for weights
46   // are validated against the schema type's section metadata. If the property
47   // path doesn't exist, the property weight is ignored. If a weight is 0 or
48   // negative, an invalid argument error is returned. Raw weights are then
49   // normalized against the maximum weight for that schema type.
50   //
51   // Returns:
52   //   A SectionWeights instance on success
53   //   FAILED_PRECONDITION on any null pointer input
54   //   INVALID_ARGUMENT if a provided weight for a property path is less than or
55   // equal to 0.
56   static libtextclassifier3::StatusOr<std::unique_ptr<SectionWeights>> Create(
57       const SchemaStore* schema_store, const ScoringSpecProto& scoring_spec);
58 
59   // Returns the normalized section weight by SchemaTypeId and SectionId. If
60   // the SchemaTypeId, or the SectionId for a SchemaTypeId, is not found in the
61   // normalized weights map, the default weight is returned instead.
62   double GetNormalizedSectionWeight(SchemaTypeId schema_type_id,
63                                     SectionId section_id) const;
64 
65  private:
66   // Holds the normalized section weights for a schema type, as well as the
67   // normalized default weight for sections that have no weight set.
68   struct NormalizedSectionWeights {
69     std::unordered_map<SectionId, double> section_weights;
70     double default_weight;
71   };
72 
SectionWeights(const std::unordered_map<SchemaTypeId,NormalizedSectionWeights> schema_section_weight_map)73   explicit SectionWeights(
74       const std::unordered_map<SchemaTypeId, NormalizedSectionWeights>
75           schema_section_weight_map)
76       : schema_section_weight_map_(std::move(schema_section_weight_map)) {}
77 
78   // Creates a map of section ids to normalized weights from the raw property
79   // path weight map and section metadata and calculates the normalized default
80   // section weight.
81   static inline SectionWeights::NormalizedSectionWeights
82   ExtractNormalizedSectionWeights(
83       const std::unordered_map<std::string, double>& raw_weights,
84       const std::vector<SectionMetadata>& metadata_list);
85 
86   // A map of (SchemaTypeId -> SectionId -> Normalized Weight), allows for fast
87   // look up of normalized weights. This is precomputed when creating a
88   // SectionWeights instance.
89   std::unordered_map<SchemaTypeId, NormalizedSectionWeights>
90       schema_section_weight_map_;
91 };
92 
93 }  // namespace lib
94 }  // namespace icing
95 
96 #endif  // ICING_SCORING_SECTION_WEIGHTS_H_
97