diff options
Diffstat (limited to 'icing/index/hit/hit.cc')
-rw-r--r-- | icing/index/hit/hit.cc | 96 |
1 files changed, 56 insertions, 40 deletions
diff --git a/icing/index/hit/hit.cc b/icing/index/hit/hit.cc index 493e62b..7b7f2f6 100644 --- a/icing/index/hit/hit.cc +++ b/icing/index/hit/hit.cc @@ -14,42 +14,19 @@ #include "icing/index/hit/hit.h" +#include <cstring> +#include <limits> + +#include "icing/schema/section.h" #include "icing/store/document-id.h" #include "icing/util/bit-util.h" +#include "icing/util/logging.h" namespace icing { namespace lib { namespace { -enum FlagOffset { - // This hit, whether exact or not, came from a prefixed section and will - // need to be backfilled into branching posting lists if/when those are - // created. - kInPrefixSection = 0, - // This hit represents a prefix of a longer term. If exact matches are - // required, then this hit should be ignored. - kPrefixHit = 1, - // Whether or not the hit has a term_frequency other than - // kDefaultTermFrequency. - kHasTermFrequency = 2, - kNumFlags = 3, -}; - -static_assert(kDocumentIdBits + kSectionIdBits + kNumFlags < - sizeof(Hit::Value) * 8, - "Hit::kInvalidValue contains risky value and we should have at " - "least one unused bit to avoid potential bugs. Please follow the " - "process mentioned in hit.h to correct the value of " - "Hit::kInvalidValue and remove this static_assert afterwards."); - -static_assert(kDocumentIdBits + kSectionIdBits + kNumFlags <= - sizeof(Hit::Value) * 8, - "HitOverflow"); -static_assert(kDocumentIdBits == 22, ""); -static_assert(kSectionIdBits == 6, ""); -static_assert(kNumFlags == 3, ""); - inline DocumentId InvertDocumentId(DocumentId document_id) { static_assert(kMaxDocumentId <= (std::numeric_limits<DocumentId>::max() - 1), "(kMaxDocumentId + 1) must not overflow."); @@ -88,10 +65,28 @@ SectionId BasicHit::section_id() const { /*len=*/kSectionIdBits); } +Hit::Hit(Value value, Flags flags, TermFrequency term_frequency) + : flags_(flags), term_frequency_(term_frequency) { + memcpy(value_.data(), &value, sizeof(value)); + if (!CheckFlagsAreConsistent()) { + ICING_VLOG(1) + << "Creating Hit that has inconsistent flag values across its fields: " + << "Hit(value=" << value << ", flags=" << flags + << "term_frequency=" << term_frequency << ")"; + } +} + Hit::Hit(SectionId section_id, DocumentId document_id, Hit::TermFrequency term_frequency, bool is_in_prefix_section, bool is_prefix_hit) : term_frequency_(term_frequency) { + // We compute flags first as the value's has_flags bit depends on the flags_ + // field. + Flags temp_flags = 0; + bit_util::BitfieldSet(term_frequency != kDefaultTermFrequency, + kHasTermFrequency, /*len=*/1, &temp_flags); + flags_ = temp_flags; + // Values are stored so that when sorted, they appear in document_id // descending, section_id ascending, order. Also, all else being // equal, non-prefix hits sort before prefix hits. So inverted @@ -99,30 +94,29 @@ Hit::Hit(SectionId section_id, DocumentId document_id, // (uninverted) section_id. Value temp_value = 0; bit_util::BitfieldSet(InvertDocumentId(document_id), - kSectionIdBits + kNumFlags, kDocumentIdBits, + kSectionIdBits + kNumFlagsInValueField, kDocumentIdBits, + &temp_value); + bit_util::BitfieldSet(section_id, kNumFlagsInValueField, kSectionIdBits, &temp_value); - bit_util::BitfieldSet(section_id, kNumFlags, kSectionIdBits, &temp_value); - bit_util::BitfieldSet(term_frequency != kDefaultTermFrequency, - kHasTermFrequency, /*len=*/1, &temp_value); bit_util::BitfieldSet(is_prefix_hit, kPrefixHit, /*len=*/1, &temp_value); bit_util::BitfieldSet(is_in_prefix_section, kInPrefixSection, /*len=*/1, &temp_value); - value_ = temp_value; + + bool has_flags = flags_ != kNoEnabledFlags; + bit_util::BitfieldSet(has_flags, kHasFlags, /*len=*/1, &temp_value); + + memcpy(value_.data(), &temp_value, sizeof(temp_value)); } DocumentId Hit::document_id() const { DocumentId inverted_document_id = bit_util::BitfieldGet( - value(), kSectionIdBits + kNumFlags, kDocumentIdBits); + value(), kSectionIdBits + kNumFlagsInValueField, kDocumentIdBits); // Undo the document_id inversion. return InvertDocumentId(inverted_document_id); } SectionId Hit::section_id() const { - return bit_util::BitfieldGet(value(), kNumFlags, kSectionIdBits); -} - -bool Hit::has_term_frequency() const { - return bit_util::BitfieldGet(value(), kHasTermFrequency, 1); + return bit_util::BitfieldGet(value(), kNumFlagsInValueField, kSectionIdBits); } bool Hit::is_prefix_hit() const { @@ -133,6 +127,27 @@ bool Hit::is_in_prefix_section() const { return bit_util::BitfieldGet(value(), kInPrefixSection, 1); } +bool Hit::has_flags() const { + return bit_util::BitfieldGet(value(), kHasFlags, 1); +} + +bool Hit::has_term_frequency() const { + return bit_util::BitfieldGet(flags(), kHasTermFrequency, 1); +} + +bool Hit::CheckFlagsAreConsistent() const { + bool has_flags = flags_ != kNoEnabledFlags; + bool has_flags_enabled_in_value = + bit_util::BitfieldGet(value(), kHasFlags, /*len=*/1); + + bool has_term_frequency = term_frequency_ != kDefaultTermFrequency; + bool has_term_frequency_enabled_in_flags = + bit_util::BitfieldGet(flags(), kHasTermFrequency, /*len=*/1); + + return has_flags == has_flags_enabled_in_value && + has_term_frequency == has_term_frequency_enabled_in_flags; +} + Hit Hit::TranslateHit(Hit old_hit, DocumentId new_document_id) { return Hit(old_hit.section_id(), new_document_id, old_hit.term_frequency(), old_hit.is_in_prefix_section(), old_hit.is_prefix_hit()); @@ -140,7 +155,8 @@ Hit Hit::TranslateHit(Hit old_hit, DocumentId new_document_id) { bool Hit::EqualsDocumentIdAndSectionId::operator()(const Hit& hit1, const Hit& hit2) const { - return (hit1.value() >> kNumFlags) == (hit2.value() >> kNumFlags); + return (hit1.value() >> kNumFlagsInValueField) == + (hit2.value() >> kNumFlagsInValueField); } } // namespace lib |