diff options
Diffstat (limited to 'icing/index/iterator/doc-hit-info-iterator-section-restrict.cc')
-rw-r--r-- | icing/index/iterator/doc-hit-info-iterator-section-restrict.cc | 230 |
1 files changed, 113 insertions, 117 deletions
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc index b850a9b..35dc0b9 100644 --- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc +++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc @@ -16,137 +16,142 @@ #include <cstdint> #include <memory> +#include <set> #include <string> #include <string_view> +#include <unordered_map> #include <utility> +#include <vector> #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/absl_ports/canonical_errors.h" #include "icing/absl_ports/str_cat.h" +#include "icing/absl_ports/str_join.h" #include "icing/index/hit/doc-hit-info.h" #include "icing/index/iterator/doc-hit-info-iterator.h" +#include "icing/index/iterator/section-restrict-data.h" +#include "icing/proto/search.pb.h" #include "icing/schema/schema-store.h" #include "icing/schema/section.h" #include "icing/store/document-filter-data.h" #include "icing/store/document-id.h" #include "icing/store/document-store.h" +#include "icing/util/status-macros.h" namespace icing { namespace lib { +// An iterator that simply takes ownership of SectionRestrictData. +class SectionRestrictDataHolderIterator : public DocHitInfoIterator { + public: + explicit SectionRestrictDataHolderIterator( + std::unique_ptr<DocHitInfoIterator> delegate, + std::unique_ptr<SectionRestrictData> data) + : delegate_(std::move(delegate)), data_(std::move(data)) {} + + libtextclassifier3::Status Advance() override { + auto result = delegate_->Advance(); + doc_hit_info_ = delegate_->doc_hit_info(); + return result; + } + + libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override { + ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_delegate, + std::move(*delegate_).TrimRightMostNode()); + if (trimmed_delegate.iterator_ != nullptr) { + trimmed_delegate.iterator_ = + std::make_unique<SectionRestrictDataHolderIterator>( + std::move(trimmed_delegate.iterator_), std::move(data_)); + } + return trimmed_delegate; + } + + void MapChildren(const ChildrenMapper& mapper) override { + delegate_ = mapper(std::move(delegate_)); + } + + CallStats GetCallStats() const override { return delegate_->GetCallStats(); } + + std::string ToString() const override { return delegate_->ToString(); } + + void PopulateMatchedTermsStats( + std::vector<TermMatchInfo>* matched_terms_stats, + SectionIdMask filtering_section_mask) const override { + return delegate_->PopulateMatchedTermsStats(matched_terms_stats, + filtering_section_mask); + } + + private: + std::unique_ptr<DocHitInfoIterator> delegate_; + std::unique_ptr<SectionRestrictData> data_; +}; + DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict( - std::unique_ptr<DocHitInfoIterator> delegate, + std::unique_ptr<DocHitInfoIterator> delegate, SectionRestrictData* data) + : delegate_(std::move(delegate)), data_(data) {} + +std::unique_ptr<DocHitInfoIterator> +DocHitInfoIteratorSectionRestrict::ApplyRestrictions( + std::unique_ptr<DocHitInfoIterator> iterator, const DocumentStore* document_store, const SchemaStore* schema_store, - std::set<std::string> target_sections, int64_t current_time_ms) - : delegate_(std::move(delegate)), - document_store_(*document_store), - schema_store_(*schema_store), - current_time_ms_(current_time_ms) { - type_property_filters_[std::string(SchemaStore::kSchemaTypeWildcard)] = + std::set<std::string> target_sections, int64_t current_time_ms) { + std::unordered_map<std::string, std::set<std::string>> type_property_filters; + type_property_filters[std::string(SchemaStore::kSchemaTypeWildcard)] = std::move(target_sections); + auto data = std::make_unique<SectionRestrictData>( + document_store, schema_store, current_time_ms, type_property_filters); + std::unique_ptr<DocHitInfoIterator> result = + ApplyRestrictions(std::move(iterator), data.get()); + return std::make_unique<SectionRestrictDataHolderIterator>(std::move(result), + std::move(data)); } -DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict( - std::unique_ptr<DocHitInfoIterator> delegate, +std::unique_ptr<DocHitInfoIterator> +DocHitInfoIteratorSectionRestrict::ApplyRestrictions( + std::unique_ptr<DocHitInfoIterator> iterator, const DocumentStore* document_store, const SchemaStore* schema_store, - const SearchSpecProto& search_spec, - int64_t current_time_ms) - : delegate_(std::move(delegate)), - document_store_(*document_store), - schema_store_(*schema_store), - current_time_ms_(current_time_ms) { + const SearchSpecProto& search_spec, int64_t current_time_ms) { + std::unordered_map<std::string, std::set<std::string>> type_property_filters; // TODO(b/294274922): Add support for polymorphism in type property filters. for (const TypePropertyMask& type_property_mask : - search_spec.type_property_filters()) { - type_property_filters_[type_property_mask.schema_type()] = + search_spec.type_property_filters()) { + type_property_filters[type_property_mask.schema_type()] = std::set<std::string>(type_property_mask.paths().begin(), type_property_mask.paths().end()); } + auto data = std::make_unique<SectionRestrictData>( + document_store, schema_store, current_time_ms, type_property_filters); + std::unique_ptr<DocHitInfoIterator> result = + ApplyRestrictions(std::move(iterator), data.get()); + return std::make_unique<SectionRestrictDataHolderIterator>(std::move(result), + std::move(data)); } -DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict( - std::unique_ptr<DocHitInfoIterator> delegate, - const DocumentStore* document_store, const SchemaStore* schema_store, - std::unordered_map<std::string, std::set<std::string>> - type_property_filters, - std::unordered_map<std::string, SectionIdMask> type_property_masks, - int64_t current_time_ms) - : delegate_(std::move(delegate)), - document_store_(*document_store), - schema_store_(*schema_store), - current_time_ms_(current_time_ms), - type_property_filters_(std::move(type_property_filters)), - type_property_masks_(std::move(type_property_masks)) {} - -SectionIdMask DocHitInfoIteratorSectionRestrict::GenerateSectionMask( - const std::string& schema_type, - const std::set<std::string>& target_sections) const { - SectionIdMask section_mask = kSectionIdMaskNone; - auto section_metadata_list_or = - schema_store_.GetSectionMetadata(schema_type); - if (!section_metadata_list_or.ok()) { - // The current schema doesn't have section metadata. - return kSectionIdMaskNone; - } - const std::vector<SectionMetadata>* section_metadata_list = - section_metadata_list_or.ValueOrDie(); - for (const SectionMetadata& section_metadata : *section_metadata_list) { - if (target_sections.find(section_metadata.path) != - target_sections.end()) { - section_mask |= UINT64_C(1) << section_metadata.id; +std::unique_ptr<DocHitInfoIterator> +DocHitInfoIteratorSectionRestrict::ApplyRestrictions( + std::unique_ptr<DocHitInfoIterator> iterator, SectionRestrictData* data) { + ChildrenMapper mapper; + mapper = [&data, &mapper](std::unique_ptr<DocHitInfoIterator> iterator) + -> std::unique_ptr<DocHitInfoIterator> { + if (iterator->is_leaf()) { + return std::make_unique<DocHitInfoIteratorSectionRestrict>( + std::move(iterator), data); + } else { + iterator->MapChildren(mapper); + return iterator; } - } - return section_mask; -} - -SectionIdMask DocHitInfoIteratorSectionRestrict:: - ComputeAndCacheSchemaTypeAllowedSectionsMask( - const std::string& schema_type) { - if (const auto type_property_mask_itr = - type_property_masks_.find(schema_type); - type_property_mask_itr != type_property_masks_.end()) { - return type_property_mask_itr->second; - } - - // Section id mask of schema_type is never calculated before, so - // calculate it here and put it into type_property_masks_. - // - If type property filters of schema_type or wildcard (*) are - // specified, then create a mask according to the filters. - // - Otherwise, create a mask to match all properties. - SectionIdMask new_section_id_mask = kSectionIdMaskAll; - if (const auto itr = type_property_filters_.find(schema_type); - itr != type_property_filters_.end()) { - // Property filters defined for given schema type - new_section_id_mask = GenerateSectionMask( - schema_type, itr->second); - } else if (const auto wildcard_itr = type_property_filters_.find( - std::string(SchemaStore::kSchemaTypeWildcard)); - wildcard_itr != type_property_filters_.end()) { - // Property filters defined for wildcard entry - new_section_id_mask = GenerateSectionMask( - schema_type, wildcard_itr->second); - } else { - // Do not cache the section mask if no property filters apply to this schema - // type to avoid taking up unnecessary space. - return kSectionIdMaskAll; - } - - type_property_masks_[schema_type] = new_section_id_mask; - return new_section_id_mask; + }; + return mapper(std::move(iterator)); } libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() { doc_hit_info_ = DocHitInfo(kInvalidDocumentId); - hit_intersect_section_ids_mask_ = kSectionIdMaskNone; while (delegate_->Advance().ok()) { DocumentId document_id = delegate_->doc_hit_info().document_id(); - SectionIdMask section_id_mask = - delegate_->doc_hit_info().hit_section_ids_mask(); - - auto data_optional = document_store_.GetAliveDocumentFilterData( - document_id, current_time_ms_); + auto data_optional = data_->document_store().GetAliveDocumentFilterData( + document_id, data_->current_time_ms()); if (!data_optional) { // Ran into some error retrieving information on this hit, skip continue; @@ -154,34 +159,35 @@ libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() { // Guaranteed that the DocumentFilterData exists at this point SchemaTypeId schema_type_id = data_optional.value().schema_type_id(); - auto schema_type_or = schema_store_.GetSchemaType(schema_type_id); + auto schema_type_or = data_->schema_store().GetSchemaType(schema_type_id); if (!schema_type_or.ok()) { // Ran into error retrieving schema type, skip continue; } const std::string* schema_type = std::move(schema_type_or).ValueOrDie(); SectionIdMask allowed_sections_mask = - ComputeAndCacheSchemaTypeAllowedSectionsMask(*schema_type); + data_->ComputeAllowedSectionsMask(*schema_type); // A hit can be in multiple sections at once, need to check which of the // section ids match the sections allowed by type_property_masks_. This can // be done by doing a bitwise and of the section_id_mask in the doc hit and // the allowed_sections_mask. - hit_intersect_section_ids_mask_ = section_id_mask & allowed_sections_mask; + SectionIdMask section_id_mask = + delegate_->doc_hit_info().hit_section_ids_mask() & + allowed_sections_mask; // Return this document if: // - the sectionIdMask is not empty after applying property filters, or // - no property filters apply for its schema type (allowed_sections_mask // == kSectionIdMaskAll). This is needed to ensure that in case of empty // query (which uses doc-hit-info-iterator-all-document-id), where - // section_id_mask (and hence hit_intersect_section_ids_mask_) is - // kSectionIdMaskNone, doc hits with no property restrictions don't get - // filtered out. Doc hits for schema types for whom property filters are - // specified will still get filtered out. - if (allowed_sections_mask == kSectionIdMaskAll - || hit_intersect_section_ids_mask_ != kSectionIdMaskNone) { + // section_id_mask is kSectionIdMaskNone, doc hits with no property + // restrictions don't get filtered out. Doc hits for schema types for + // whom property filters are specified will still get filtered out. + if (allowed_sections_mask == kSectionIdMaskAll || + section_id_mask != kSectionIdMaskNone) { doc_hit_info_ = delegate_->doc_hit_info(); - doc_hit_info_.set_hit_section_ids_mask(hit_intersect_section_ids_mask_); + doc_hit_info_.set_hit_section_ids_mask(section_id_mask); return libtextclassifier3::Status::OK; } // Didn't find a matching section name for this hit. Continue. @@ -200,14 +206,14 @@ DocHitInfoIteratorSectionRestrict::TrimRightMostNode() && { // type_property_filters_ when code flow reaches here. If the InternalError // below is returned, that means TrimRightMostNode hasn't been called in the // right context. - const auto it = type_property_filters_.find("*"); - if (it == type_property_filters_.end()) { + const auto it = data_->type_property_filters().find("*"); + if (it == data_->type_property_filters().end()) { return absl_ports::InternalError( "A wildcard entry should always be present in type property filters " "whenever TrimRightMostNode() is called for " "DocHitInfoIteratorSectionRestrict"); } - std::set<std::string>& target_sections = it->second; + const std::set<std::string>& target_sections = it->second; if (target_sections.empty()) { return absl_ports::InternalError( "Target sections should not be empty whenever TrimRightMostNode() is " @@ -222,24 +228,14 @@ DocHitInfoIteratorSectionRestrict::TrimRightMostNode() && { trimmed_delegate.iterator_ = std::unique_ptr<DocHitInfoIteratorSectionRestrict>( new DocHitInfoIteratorSectionRestrict( - std::move(trimmed_delegate.iterator_), &document_store_, - &schema_store_, std::move(type_property_filters_), - std::move(type_property_masks_), current_time_ms_)); + std::move(trimmed_delegate.iterator_), std::move(data_))); return std::move(trimmed_delegate); } -int32_t DocHitInfoIteratorSectionRestrict::GetNumBlocksInspected() const { - return delegate_->GetNumBlocksInspected(); -} - -int32_t DocHitInfoIteratorSectionRestrict::GetNumLeafAdvanceCalls() const { - return delegate_->GetNumLeafAdvanceCalls(); -} - std::string DocHitInfoIteratorSectionRestrict::ToString() const { std::string output = ""; - for (auto it = type_property_filters_.cbegin(); - it != type_property_filters_.cend(); it++) { + for (auto it = data_->type_property_filters().cbegin(); + it != data_->type_property_filters().cend(); it++) { std::string paths = absl_ports::StrJoin(it->second, ","); output += (it->first) + ":" + (paths) + "; "; } |