1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_SCORING_SCORER_H_ 16 #define ICING_SCORING_SCORER_H_ 17 18 #include <memory> 19 #include <string> 20 #include <unordered_map> 21 #include <vector> 22 23 #include "icing/index/hit/doc-hit-info.h" 24 #include "icing/index/iterator/doc-hit-info-iterator.h" 25 26 namespace icing { 27 namespace lib { 28 29 // Scorer calculates scores for documents. 30 class Scorer { 31 public: 32 virtual ~Scorer() = default; 33 34 // Returns a non-negative score of a document. The score can be a 35 // document-associated score which comes from the DocumentProto directly, an 36 // accumulated score, a relevance score, or even an inferred score. If it 37 // fails to find or calculate a score, the user-provided default score will be 38 // returned. 39 // 40 // Some examples of possible scores: 41 // 1. Document-associated scores: document score, creation timestamp score. 42 // 2. Accumulated scores: usage count score. 43 // 3. Inferred scores: a score calculated by a machine learning model. 44 // 4. Relevance score: computed as BM25F score. 45 // 46 // NOTE: This method is performance-sensitive as it's called for every 47 // potential result document. We're trying to avoid returning StatusOr<double> 48 // to save a little more time and memory. 49 virtual double GetScore(const DocHitInfo& hit_info, 50 const DocHitInfoIterator* query_it = nullptr) = 0; 51 52 // Returns additional score as specified in 53 // scoring_spec.additional_advanced_scoring_expressions(). As a result, only 54 // AdvancedScorer can produce additional scores. 55 // 56 // NOTE: This method is performance-sensitive as it's called for every 57 // potential result document. We're trying to avoid returning 58 // StatusOr<std::vector<double>> to save a little more time and memory. GetAdditionalScores(const DocHitInfo & hit_info,const DocHitInfoIterator * query_it)59 virtual std::vector<double> GetAdditionalScores( 60 const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) { 61 return {}; 62 } 63 64 // Currently only overriden by the RelevanceScoreScorer. 65 // NOTE: the query_term_iterators map must 66 // outlive the scorer, see bm25f-calculator for more details. PrepareToScore(std::unordered_map<std::string,std::unique_ptr<DocHitInfoIterator>> * query_term_iterators)67 virtual void PrepareToScore( 68 std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>* 69 query_term_iterators) {} 70 }; 71 72 } // namespace lib 73 } // namespace icing 74 75 #endif // ICING_SCORING_SCORER_H_ 76