1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_SCORING_SCORED_DOCUMENT_HIT_H_ 16 #define ICING_SCORING_SCORED_DOCUMENT_HIT_H_ 17 18 #include <memory> 19 #include <utility> 20 #include <vector> 21 22 #include "icing/schema/section.h" 23 #include "icing/store/document-id.h" 24 25 namespace icing { 26 namespace lib { 27 28 class JoinedScoredDocumentHit; 29 30 // A data class containing information about the document, hit sections, and a 31 // score. The score is calculated against both the document and the hit 32 // sections. 33 class ScoredDocumentHit { 34 public: 35 class Converter { 36 public: 37 JoinedScoredDocumentHit operator()( 38 ScoredDocumentHit&& scored_doc_hit) const; 39 }; 40 ScoredDocumentHit(DocumentId document_id,SectionIdMask hit_section_id_mask,double score)41 ScoredDocumentHit(DocumentId document_id, SectionIdMask hit_section_id_mask, 42 double score) 43 : document_id_(document_id), 44 hit_section_id_mask_(hit_section_id_mask), 45 score_(score) {} 46 ScoredDocumentHit(DocumentId document_id,SectionIdMask hit_section_id_mask,double score,std::vector<double> additional_scores)47 ScoredDocumentHit(DocumentId document_id, SectionIdMask hit_section_id_mask, 48 double score, std::vector<double> additional_scores) 49 : document_id_(document_id), 50 hit_section_id_mask_(hit_section_id_mask), 51 score_(score) { 52 SetAdditionalScores(std::move(additional_scores)); 53 } 54 ScoredDocumentHit(const ScoredDocumentHit & other)55 ScoredDocumentHit(const ScoredDocumentHit& other) 56 : document_id_(other.document_id_), 57 hit_section_id_mask_(other.hit_section_id_mask_), 58 score_(other.score_) { 59 if (other.additional_scores_ != nullptr) { 60 SetAdditionalScores(*other.additional_scores_); 61 } 62 } 63 64 ScoredDocumentHit& operator=(const ScoredDocumentHit& other) { 65 document_id_ = other.document_id_; 66 hit_section_id_mask_ = other.hit_section_id_mask_; 67 score_ = other.score_; 68 if (other.additional_scores_ != nullptr) { 69 SetAdditionalScores(*other.additional_scores_); 70 } 71 return *this; 72 } 73 ScoredDocumentHit(ScoredDocumentHit && other)74 ScoredDocumentHit(ScoredDocumentHit&& other) { Swap(&other); } 75 ScoredDocumentHit& operator=(ScoredDocumentHit&& other) { 76 Swap(&other); 77 return *this; 78 } 79 80 bool operator<(const ScoredDocumentHit& other) const { 81 if (score() < other.score()) return true; 82 if (score() > other.score()) return false; 83 return document_id() < other.document_id(); 84 } 85 document_id()86 DocumentId document_id() const { return document_id_; } 87 hit_section_id_mask()88 SectionIdMask hit_section_id_mask() const { return hit_section_id_mask_; } 89 score()90 double score() const { return score_; } 91 92 // nullptr if no additional scores. additional_scores()93 const std::vector<double>* additional_scores() const { 94 return additional_scores_.get(); 95 } 96 97 private: Swap(ScoredDocumentHit * other)98 void Swap(ScoredDocumentHit* other) { 99 std::swap(document_id_, other->document_id_); 100 std::swap(hit_section_id_mask_, other->hit_section_id_mask_); 101 std::swap(score_, other->score_); 102 std::swap(additional_scores_, other->additional_scores_); 103 } 104 SetAdditionalScores(std::vector<double> additional_scores)105 void SetAdditionalScores(std::vector<double> additional_scores) { 106 if (additional_scores.empty()) { 107 additional_scores_.reset(); 108 return; 109 } 110 additional_scores_ = 111 std::make_unique<std::vector<double>>(std::move(additional_scores)); 112 } 113 114 DocumentId document_id_; 115 SectionIdMask hit_section_id_mask_; 116 double score_; 117 // nullptr if no additional scores. 118 std::unique_ptr<std::vector<double>> additional_scores_; 119 }; 120 121 // A custom comparator for ScoredDocumentHit that determines which 122 // ScoredDocumentHit is better (should come first) based off of 123 // ScoredDocumentHit itself and the order of its score. 124 // 125 // Returns true if left is better than right according to score and order. 126 // Comparison is based off of score with ties broken by 127 // ScoredDocumentHit.document_id(). 128 class ScoredDocumentHitComparator { 129 public: 130 explicit ScoredDocumentHitComparator(bool is_descending = true) is_descending_(is_descending)131 : is_descending_(is_descending) {} 132 operator()133 bool operator()(const ScoredDocumentHit& lhs, 134 const ScoredDocumentHit& rhs) const { 135 // STL comparator requirement: equal MUST return false. 136 // If writing `return is_descending_ == !(lhs < rhs)`: 137 // - When lhs == rhs, !(lhs < rhs) is true 138 // - If is_descending_ is true, then we return true for equal case! 139 if (is_descending_) { 140 return rhs < lhs; 141 } 142 return lhs < rhs; 143 } 144 145 private: 146 bool is_descending_; 147 }; 148 149 // A data class containing information about a composite document after joining, 150 // including final score, parent ScoredDocumentHit, and a vector of all child 151 // ScoredDocumentHits. The final score is calculated by the strategy specified 152 // in join spec/rank strategy. It could be aggregated score, raw parent doc 153 // score, or anything else. 154 // 155 // ScoredDocumentHitsRanker may store ScoredDocumentHit or 156 // JoinedScoredDocumentHit. 157 // - We could've created a virtual class for them and ScoredDocumentHitsRanker 158 // uses the abstract type. 159 // - However, Icing lib caches ScoredDocumentHitsRanker (which contains a list 160 // of (Joined)ScoredDocumentHits) in ResultState. Inheriting the virtual class 161 // makes both classes have additional 8 bytes for vtable, which increases 40% 162 // and 15% memory usage respectively. 163 // - Also since JoinedScoredDocumentHit is a super-set of ScoredDocumentHit, 164 // let's avoid the common virtual class and instead implement a convert 165 // function (original type -> JoinedScoredDocumentHit) for each class, so 166 // ScoredDocumentHitsRanker::PopNext can return a common type (i.e. 167 // JoinedScoredDocumentHit). 168 class JoinedScoredDocumentHit { 169 public: 170 class Converter { 171 public: operator()172 JoinedScoredDocumentHit operator()( 173 JoinedScoredDocumentHit&& scored_doc_hit) const { 174 return scored_doc_hit; 175 } 176 }; 177 JoinedScoredDocumentHit(double final_score,ScoredDocumentHit parent_scored_document_hit,std::vector<ScoredDocumentHit> child_scored_document_hits)178 explicit JoinedScoredDocumentHit( 179 double final_score, ScoredDocumentHit parent_scored_document_hit, 180 std::vector<ScoredDocumentHit> child_scored_document_hits) 181 : final_score_(final_score), 182 parent_scored_document_hit_(std::move(parent_scored_document_hit)), 183 child_scored_document_hits_(std::move(child_scored_document_hits)) {} 184 185 bool operator<(const JoinedScoredDocumentHit& other) const { 186 if (final_score_ != other.final_score_) { 187 return final_score_ < other.final_score_; 188 } 189 return parent_scored_document_hit_ < other.parent_scored_document_hit_; 190 } 191 final_score()192 double final_score() const { return final_score_; } 193 parent_scored_document_hit()194 const ScoredDocumentHit& parent_scored_document_hit() const { 195 return parent_scored_document_hit_; 196 } 197 child_scored_document_hits()198 const std::vector<ScoredDocumentHit>& child_scored_document_hits() const { 199 return child_scored_document_hits_; 200 } 201 202 private: 203 double final_score_; 204 ScoredDocumentHit parent_scored_document_hit_; 205 std::vector<ScoredDocumentHit> child_scored_document_hits_; 206 }; 207 208 } // namespace lib 209 } // namespace icing 210 211 #endif // ICING_SCORING_SCORED_DOCUMENT_HIT_H_ 212