diff options
Diffstat (limited to 'icing/index/lite/lite-index.h')
-rw-r--r-- | icing/index/lite/lite-index.h | 83 |
1 files changed, 63 insertions, 20 deletions
diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h index 916a14b..288602a 100644 --- a/icing/index/lite/lite-index.h +++ b/icing/index/lite/lite-index.h @@ -20,6 +20,7 @@ #define ICING_INDEX_LITE_INDEX_H_ #include <cstdint> +#include <iterator> #include <limits> #include <memory> #include <string> @@ -48,7 +49,6 @@ #include "icing/store/document-id.h" #include "icing/store/namespace-id.h" #include "icing/store/suggestion-result-checker.h" -#include "icing/util/bit-util.h" #include "icing/util/crc32.h" namespace icing { @@ -63,6 +63,9 @@ class LiteIndex { // An entry in the hit buffer. using Options = LiteIndexOptions; + // Offset for the LiteIndex_Header in the hit buffer mmap. + static constexpr uint32_t kHeaderFileOffset = 0; + // Updates checksum of subcomponents. ~LiteIndex(); @@ -152,8 +155,8 @@ class LiteIndex { // Add all hits with term_id from the sections specified in section_id_mask, // skipping hits in non-prefix sections if only_from_prefix_sections is true, // to hits_out. If hits_out is nullptr, no hits will be added. The - // corresponding hit term frequencies will also be added if term_frequency_out - // is nullptr. + // corresponding hit term frequencies will also not be added if + // term_frequency_out is nullptr. // // Only those hits which belongs to the given namespaces will be counted and // fetched. A nullptr namespace checker will disable this check. @@ -181,15 +184,29 @@ class LiteIndex { uint32_t size() const ICING_LOCKS_EXCLUDED(mutex_) { absl_ports::shared_lock l(&mutex_); - return sizeLocked(); + return size_impl(); } bool WantsMerge() const ICING_LOCKS_EXCLUDED(mutex_) { absl_ports::shared_lock l(&mutex_); - return is_full() || sizeLocked() >= (options_.hit_buffer_want_merge_bytes / - sizeof(TermIdHitPair::Value)); + return is_full() || size_impl() >= (options_.hit_buffer_want_merge_bytes / + sizeof(TermIdHitPair::Value)); + } + + // Whether or not the HitBuffer's unsorted tail size exceeds the sort + // threshold. + bool HasUnsortedHitsExceedingSortThreshold() const + ICING_LOCKS_EXCLUDED(mutex_) { + absl_ports::shared_lock l(&mutex_); + return HasUnsortedHitsExceedingSortThresholdImpl(); } + // Sort hits stored in the index. + void SortHits() ICING_LOCKS_EXCLUDED(mutex_) { + absl_ports::unique_lock l(&mutex_); + SortHitsImpl(); + }; + class const_iterator { friend class LiteIndex; @@ -326,17 +343,13 @@ class LiteIndex { // Check if the hit buffer has reached its capacity. bool is_full() const ICING_SHARED_LOCKS_REQUIRED(mutex_); - uint32_t sizeLocked() const ICING_SHARED_LOCKS_REQUIRED(mutex_) { - return header_->cur_size(); - } - // Non-locking implementation for empty(). bool empty_impl() const ICING_SHARED_LOCKS_REQUIRED(mutex_) { return size_impl() == 0; } // Non-locking implementation for size(). - bool size_impl() const ICING_SHARED_LOCKS_REQUIRED(mutex_) { + uint32_t size_impl() const ICING_SHARED_LOCKS_REQUIRED(mutex_) { return header_->cur_size(); } @@ -352,18 +365,48 @@ class LiteIndex { NamespaceId namespace_id) ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); - // Whether or not the HitBuffer requires sorting. - bool NeedSort() ICING_LOCKS_EXCLUDED(mutex_) { - absl_ports::shared_lock l(&mutex_); - return header_->cur_size() - header_->searchable_end() > 0; + // We need to sort during querying time when: + // 1. Sorting at indexing time is not enabled and there is an unsorted tail + // section in the HitBuffer. + // 2. The unsorted tail size exceeds the hit_buffer_sort_threshold, regardless + // of whether or not hit_buffer_sort_at_indexing is enabled. This is to + // prevent performing sequential search on a large unsorted tail section, + // which would result in bad query performance. + // This is more of a sanity check. We should not really be encountering + // this case. + bool NeedSortAtQuerying() const ICING_SHARED_LOCKS_REQUIRED(mutex_) { + return HasUnsortedHitsExceedingSortThresholdImpl() || + (!options_.hit_buffer_sort_at_indexing && + header_->cur_size() - header_->searchable_end() > 0); } - // Sort hits stored in the index. - void SortHits() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + // Non-locking implementation for HasUnsortedHitsExceedingSortThresholdImpl(). + bool HasUnsortedHitsExceedingSortThresholdImpl() const + ICING_SHARED_LOCKS_REQUIRED(mutex_) { + return header_->cur_size() - header_->searchable_end() >= + (options_.hit_buffer_sort_threshold_bytes / + sizeof(TermIdHitPair::Value)); + } - // Returns the position of the first element with term_id, or the searchable - // end of the hit buffer if term_id is not present. - uint32_t Seek(uint32_t term_id) const ICING_SHARED_LOCKS_REQUIRED(mutex_); + // Non-locking implementation for SortHits(). + void SortHitsImpl() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Calculates and adds the score for a fetched hit to total_score_out, while + // updating last_document_id (which keeps track of the last added docId so + // far), and is_last_document_desired (which keeps track of whether that last + // added docId belongs to the query's desired namespace.) + // + // Also appends the hit to hits_out and term_frequency_out if the vectors are + // not null. + void ScoreAndAppendFetchedHit( + const Hit& hit, SectionIdMask section_id_mask, + bool only_from_prefix_sections, + SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by, + const SuggestionResultChecker* suggestion_result_checker, + DocumentId& last_document_id, bool& is_last_document_desired, + int& total_score_out, std::vector<DocHitInfo>* hits_out, + std::vector<Hit::TermFrequencyArray>* term_frequency_out) const + ICING_SHARED_LOCKS_REQUIRED(mutex_); // File descriptor that points to where the header and hit buffer are written // to. |