diff options
Diffstat (limited to 'icing/index/iterator/doc-hit-info-iterator-section-restrict.cc')
-rw-r--r-- | icing/index/iterator/doc-hit-info-iterator-section-restrict.cc | 217 |
1 files changed, 165 insertions, 52 deletions
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc index 227a185..35dc0b9 100644 --- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc +++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc @@ -16,46 +16,142 @@ #include <cstdint> #include <memory> +#include <set> #include <string> #include <string_view> +#include <unordered_map> #include <utility> +#include <vector> #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/absl_ports/canonical_errors.h" #include "icing/absl_ports/str_cat.h" +#include "icing/absl_ports/str_join.h" #include "icing/index/hit/doc-hit-info.h" #include "icing/index/iterator/doc-hit-info-iterator.h" +#include "icing/index/iterator/section-restrict-data.h" +#include "icing/proto/search.pb.h" #include "icing/schema/schema-store.h" #include "icing/schema/section.h" #include "icing/store/document-filter-data.h" #include "icing/store/document-id.h" #include "icing/store/document-store.h" +#include "icing/util/status-macros.h" namespace icing { namespace lib { +// An iterator that simply takes ownership of SectionRestrictData. +class SectionRestrictDataHolderIterator : public DocHitInfoIterator { + public: + explicit SectionRestrictDataHolderIterator( + std::unique_ptr<DocHitInfoIterator> delegate, + std::unique_ptr<SectionRestrictData> data) + : delegate_(std::move(delegate)), data_(std::move(data)) {} + + libtextclassifier3::Status Advance() override { + auto result = delegate_->Advance(); + doc_hit_info_ = delegate_->doc_hit_info(); + return result; + } + + libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override { + ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_delegate, + std::move(*delegate_).TrimRightMostNode()); + if (trimmed_delegate.iterator_ != nullptr) { + trimmed_delegate.iterator_ = + std::make_unique<SectionRestrictDataHolderIterator>( + std::move(trimmed_delegate.iterator_), std::move(data_)); + } + return trimmed_delegate; + } + + void MapChildren(const ChildrenMapper& mapper) override { + delegate_ = mapper(std::move(delegate_)); + } + + CallStats GetCallStats() const override { return delegate_->GetCallStats(); } + + std::string ToString() const override { return delegate_->ToString(); } + + void PopulateMatchedTermsStats( + std::vector<TermMatchInfo>* matched_terms_stats, + SectionIdMask filtering_section_mask) const override { + return delegate_->PopulateMatchedTermsStats(matched_terms_stats, + filtering_section_mask); + } + + private: + std::unique_ptr<DocHitInfoIterator> delegate_; + std::unique_ptr<SectionRestrictData> data_; +}; + DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict( - std::unique_ptr<DocHitInfoIterator> delegate, + std::unique_ptr<DocHitInfoIterator> delegate, SectionRestrictData* data) + : delegate_(std::move(delegate)), data_(data) {} + +std::unique_ptr<DocHitInfoIterator> +DocHitInfoIteratorSectionRestrict::ApplyRestrictions( + std::unique_ptr<DocHitInfoIterator> iterator, const DocumentStore* document_store, const SchemaStore* schema_store, - std::set<std::string> target_sections, int64_t current_time_ms) - : delegate_(std::move(delegate)), - document_store_(*document_store), - schema_store_(*schema_store), - target_sections_(std::move(target_sections)), - current_time_ms_(current_time_ms) {} + std::set<std::string> target_sections, int64_t current_time_ms) { + std::unordered_map<std::string, std::set<std::string>> type_property_filters; + type_property_filters[std::string(SchemaStore::kSchemaTypeWildcard)] = + std::move(target_sections); + auto data = std::make_unique<SectionRestrictData>( + document_store, schema_store, current_time_ms, type_property_filters); + std::unique_ptr<DocHitInfoIterator> result = + ApplyRestrictions(std::move(iterator), data.get()); + return std::make_unique<SectionRestrictDataHolderIterator>(std::move(result), + std::move(data)); +} + +std::unique_ptr<DocHitInfoIterator> +DocHitInfoIteratorSectionRestrict::ApplyRestrictions( + std::unique_ptr<DocHitInfoIterator> iterator, + const DocumentStore* document_store, const SchemaStore* schema_store, + const SearchSpecProto& search_spec, int64_t current_time_ms) { + std::unordered_map<std::string, std::set<std::string>> type_property_filters; + // TODO(b/294274922): Add support for polymorphism in type property filters. + for (const TypePropertyMask& type_property_mask : + search_spec.type_property_filters()) { + type_property_filters[type_property_mask.schema_type()] = + std::set<std::string>(type_property_mask.paths().begin(), + type_property_mask.paths().end()); + } + auto data = std::make_unique<SectionRestrictData>( + document_store, schema_store, current_time_ms, type_property_filters); + std::unique_ptr<DocHitInfoIterator> result = + ApplyRestrictions(std::move(iterator), data.get()); + return std::make_unique<SectionRestrictDataHolderIterator>(std::move(result), + std::move(data)); +} + +std::unique_ptr<DocHitInfoIterator> +DocHitInfoIteratorSectionRestrict::ApplyRestrictions( + std::unique_ptr<DocHitInfoIterator> iterator, SectionRestrictData* data) { + ChildrenMapper mapper; + mapper = [&data, &mapper](std::unique_ptr<DocHitInfoIterator> iterator) + -> std::unique_ptr<DocHitInfoIterator> { + if (iterator->is_leaf()) { + return std::make_unique<DocHitInfoIteratorSectionRestrict>( + std::move(iterator), data); + } else { + iterator->MapChildren(mapper); + return iterator; + } + }; + return mapper(std::move(iterator)); +} libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() { doc_hit_info_ = DocHitInfo(kInvalidDocumentId); - hit_intersect_section_ids_mask_ = kSectionIdMaskNone; while (delegate_->Advance().ok()) { DocumentId document_id = delegate_->doc_hit_info().document_id(); - SectionIdMask section_id_mask = - delegate_->doc_hit_info().hit_section_ids_mask(); - - auto data_optional = document_store_.GetAliveDocumentFilterData( - document_id, current_time_ms_); + auto data_optional = data_->document_store().GetAliveDocumentFilterData( + document_id, data_->current_time_ms()); if (!data_optional) { // Ran into some error retrieving information on this hit, skip continue; @@ -63,34 +159,35 @@ libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() { // Guaranteed that the DocumentFilterData exists at this point SchemaTypeId schema_type_id = data_optional.value().schema_type_id(); - - // A hit can be in multiple sections at once, need to check which of the - // section ids match the target sections - while (section_id_mask != 0) { - // There was a hit in this section id - SectionId section_id = __builtin_ctzll(section_id_mask); - - auto section_metadata_or = - schema_store_.GetSectionMetadata(schema_type_id, section_id); - - if (section_metadata_or.ok()) { - const SectionMetadata* section_metadata = - section_metadata_or.ValueOrDie(); - - if (target_sections_.find(section_metadata->path) != - target_sections_.end()) { - // The hit was in the target section name, return OK/found - hit_intersect_section_ids_mask_ |= UINT64_C(1) << section_id; - } - } - - // Mark this section as checked - section_id_mask &= ~(UINT64_C(1) << section_id); + auto schema_type_or = data_->schema_store().GetSchemaType(schema_type_id); + if (!schema_type_or.ok()) { + // Ran into error retrieving schema type, skip + continue; } + const std::string* schema_type = std::move(schema_type_or).ValueOrDie(); + SectionIdMask allowed_sections_mask = + data_->ComputeAllowedSectionsMask(*schema_type); - if (hit_intersect_section_ids_mask_ != kSectionIdMaskNone) { + // A hit can be in multiple sections at once, need to check which of the + // section ids match the sections allowed by type_property_masks_. This can + // be done by doing a bitwise and of the section_id_mask in the doc hit and + // the allowed_sections_mask. + SectionIdMask section_id_mask = + delegate_->doc_hit_info().hit_section_ids_mask() & + allowed_sections_mask; + + // Return this document if: + // - the sectionIdMask is not empty after applying property filters, or + // - no property filters apply for its schema type (allowed_sections_mask + // == kSectionIdMaskAll). This is needed to ensure that in case of empty + // query (which uses doc-hit-info-iterator-all-document-id), where + // section_id_mask is kSectionIdMaskNone, doc hits with no property + // restrictions don't get filtered out. Doc hits for schema types for + // whom property filters are specified will still get filtered out. + if (allowed_sections_mask == kSectionIdMaskAll || + section_id_mask != kSectionIdMaskNone) { doc_hit_info_ = delegate_->doc_hit_info(); - doc_hit_info_.set_hit_section_ids_mask(hit_intersect_section_ids_mask_); + doc_hit_info_.set_hit_section_ids_mask(section_id_mask); return libtextclassifier3::Status::OK; } // Didn't find a matching section name for this hit. Continue. @@ -104,30 +201,46 @@ libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode> DocHitInfoIteratorSectionRestrict::TrimRightMostNode() && { ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_delegate, std::move(*delegate_).TrimRightMostNode()); + // TrimRightMostNode is only used by suggestion processor to process query + // expression, so an entry for wildcard should always be present in + // type_property_filters_ when code flow reaches here. If the InternalError + // below is returned, that means TrimRightMostNode hasn't been called in the + // right context. + const auto it = data_->type_property_filters().find("*"); + if (it == data_->type_property_filters().end()) { + return absl_ports::InternalError( + "A wildcard entry should always be present in type property filters " + "whenever TrimRightMostNode() is called for " + "DocHitInfoIteratorSectionRestrict"); + } + const std::set<std::string>& target_sections = it->second; + if (target_sections.empty()) { + return absl_ports::InternalError( + "Target sections should not be empty whenever TrimRightMostNode() is " + "called for DocHitInfoIteratorSectionRestrict"); + } if (trimmed_delegate.iterator_ == nullptr) { // TODO(b/228240987): Update TrimmedNode and downstream code to handle // multiple section restricts. - trimmed_delegate.target_section_ = std::move(*target_sections_.begin()); + trimmed_delegate.target_section_ = std::move(*target_sections.begin()); return trimmed_delegate; } trimmed_delegate.iterator_ = - std::make_unique<DocHitInfoIteratorSectionRestrict>( - std::move(trimmed_delegate.iterator_), &document_store_, - &schema_store_, std::move(target_sections_), current_time_ms_); + std::unique_ptr<DocHitInfoIteratorSectionRestrict>( + new DocHitInfoIteratorSectionRestrict( + std::move(trimmed_delegate.iterator_), std::move(data_))); return std::move(trimmed_delegate); } -int32_t DocHitInfoIteratorSectionRestrict::GetNumBlocksInspected() const { - return delegate_->GetNumBlocksInspected(); -} - -int32_t DocHitInfoIteratorSectionRestrict::GetNumLeafAdvanceCalls() const { - return delegate_->GetNumLeafAdvanceCalls(); -} - std::string DocHitInfoIteratorSectionRestrict::ToString() const { - return absl_ports::StrCat("(", absl_ports::StrJoin(target_sections_, ","), - "): ", delegate_->ToString()); + std::string output = ""; + for (auto it = data_->type_property_filters().cbegin(); + it != data_->type_property_filters().cend(); it++) { + std::string paths = absl_ports::StrJoin(it->second, ","); + output += (it->first) + ":" + (paths) + "; "; + } + std::string result = "{" + output.substr(0, output.size() - 2) + "}: "; + return absl_ports::StrCat(result, delegate_->ToString()); } } // namespace lib |