aboutsummaryrefslogtreecommitdiff
path: root/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
diff options
context:
space:
mode:
Diffstat (limited to 'icing/index/iterator/doc-hit-info-iterator-section-restrict.cc')
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict.cc217
1 files changed, 165 insertions, 52 deletions
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
index 227a185..35dc0b9 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
@@ -16,46 +16,142 @@
#include <cstdint>
#include <memory>
+#include <set>
#include <string>
#include <string_view>
+#include <unordered_map>
#include <utility>
+#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
+#include "icing/absl_ports/str_join.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/iterator/section-restrict-data.h"
+#include "icing/proto/search.pb.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
#include "icing/store/document-store.h"
+#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
+// An iterator that simply takes ownership of SectionRestrictData.
+class SectionRestrictDataHolderIterator : public DocHitInfoIterator {
+ public:
+ explicit SectionRestrictDataHolderIterator(
+ std::unique_ptr<DocHitInfoIterator> delegate,
+ std::unique_ptr<SectionRestrictData> data)
+ : delegate_(std::move(delegate)), data_(std::move(data)) {}
+
+ libtextclassifier3::Status Advance() override {
+ auto result = delegate_->Advance();
+ doc_hit_info_ = delegate_->doc_hit_info();
+ return result;
+ }
+
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override {
+ ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_delegate,
+ std::move(*delegate_).TrimRightMostNode());
+ if (trimmed_delegate.iterator_ != nullptr) {
+ trimmed_delegate.iterator_ =
+ std::make_unique<SectionRestrictDataHolderIterator>(
+ std::move(trimmed_delegate.iterator_), std::move(data_));
+ }
+ return trimmed_delegate;
+ }
+
+ void MapChildren(const ChildrenMapper& mapper) override {
+ delegate_ = mapper(std::move(delegate_));
+ }
+
+ CallStats GetCallStats() const override { return delegate_->GetCallStats(); }
+
+ std::string ToString() const override { return delegate_->ToString(); }
+
+ void PopulateMatchedTermsStats(
+ std::vector<TermMatchInfo>* matched_terms_stats,
+ SectionIdMask filtering_section_mask) const override {
+ return delegate_->PopulateMatchedTermsStats(matched_terms_stats,
+ filtering_section_mask);
+ }
+
+ private:
+ std::unique_ptr<DocHitInfoIterator> delegate_;
+ std::unique_ptr<SectionRestrictData> data_;
+};
+
DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict(
- std::unique_ptr<DocHitInfoIterator> delegate,
+ std::unique_ptr<DocHitInfoIterator> delegate, SectionRestrictData* data)
+ : delegate_(std::move(delegate)), data_(data) {}
+
+std::unique_ptr<DocHitInfoIterator>
+DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+ std::unique_ptr<DocHitInfoIterator> iterator,
const DocumentStore* document_store, const SchemaStore* schema_store,
- std::set<std::string> target_sections, int64_t current_time_ms)
- : delegate_(std::move(delegate)),
- document_store_(*document_store),
- schema_store_(*schema_store),
- target_sections_(std::move(target_sections)),
- current_time_ms_(current_time_ms) {}
+ std::set<std::string> target_sections, int64_t current_time_ms) {
+ std::unordered_map<std::string, std::set<std::string>> type_property_filters;
+ type_property_filters[std::string(SchemaStore::kSchemaTypeWildcard)] =
+ std::move(target_sections);
+ auto data = std::make_unique<SectionRestrictData>(
+ document_store, schema_store, current_time_ms, type_property_filters);
+ std::unique_ptr<DocHitInfoIterator> result =
+ ApplyRestrictions(std::move(iterator), data.get());
+ return std::make_unique<SectionRestrictDataHolderIterator>(std::move(result),
+ std::move(data));
+}
+
+std::unique_ptr<DocHitInfoIterator>
+DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+ std::unique_ptr<DocHitInfoIterator> iterator,
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ const SearchSpecProto& search_spec, int64_t current_time_ms) {
+ std::unordered_map<std::string, std::set<std::string>> type_property_filters;
+ // TODO(b/294274922): Add support for polymorphism in type property filters.
+ for (const TypePropertyMask& type_property_mask :
+ search_spec.type_property_filters()) {
+ type_property_filters[type_property_mask.schema_type()] =
+ std::set<std::string>(type_property_mask.paths().begin(),
+ type_property_mask.paths().end());
+ }
+ auto data = std::make_unique<SectionRestrictData>(
+ document_store, schema_store, current_time_ms, type_property_filters);
+ std::unique_ptr<DocHitInfoIterator> result =
+ ApplyRestrictions(std::move(iterator), data.get());
+ return std::make_unique<SectionRestrictDataHolderIterator>(std::move(result),
+ std::move(data));
+}
+
+std::unique_ptr<DocHitInfoIterator>
+DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+ std::unique_ptr<DocHitInfoIterator> iterator, SectionRestrictData* data) {
+ ChildrenMapper mapper;
+ mapper = [&data, &mapper](std::unique_ptr<DocHitInfoIterator> iterator)
+ -> std::unique_ptr<DocHitInfoIterator> {
+ if (iterator->is_leaf()) {
+ return std::make_unique<DocHitInfoIteratorSectionRestrict>(
+ std::move(iterator), data);
+ } else {
+ iterator->MapChildren(mapper);
+ return iterator;
+ }
+ };
+ return mapper(std::move(iterator));
+}
libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() {
doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
- hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
while (delegate_->Advance().ok()) {
DocumentId document_id = delegate_->doc_hit_info().document_id();
- SectionIdMask section_id_mask =
- delegate_->doc_hit_info().hit_section_ids_mask();
-
- auto data_optional = document_store_.GetAliveDocumentFilterData(
- document_id, current_time_ms_);
+ auto data_optional = data_->document_store().GetAliveDocumentFilterData(
+ document_id, data_->current_time_ms());
if (!data_optional) {
// Ran into some error retrieving information on this hit, skip
continue;
@@ -63,34 +159,35 @@ libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() {
// Guaranteed that the DocumentFilterData exists at this point
SchemaTypeId schema_type_id = data_optional.value().schema_type_id();
-
- // A hit can be in multiple sections at once, need to check which of the
- // section ids match the target sections
- while (section_id_mask != 0) {
- // There was a hit in this section id
- SectionId section_id = __builtin_ctzll(section_id_mask);
-
- auto section_metadata_or =
- schema_store_.GetSectionMetadata(schema_type_id, section_id);
-
- if (section_metadata_or.ok()) {
- const SectionMetadata* section_metadata =
- section_metadata_or.ValueOrDie();
-
- if (target_sections_.find(section_metadata->path) !=
- target_sections_.end()) {
- // The hit was in the target section name, return OK/found
- hit_intersect_section_ids_mask_ |= UINT64_C(1) << section_id;
- }
- }
-
- // Mark this section as checked
- section_id_mask &= ~(UINT64_C(1) << section_id);
+ auto schema_type_or = data_->schema_store().GetSchemaType(schema_type_id);
+ if (!schema_type_or.ok()) {
+ // Ran into error retrieving schema type, skip
+ continue;
}
+ const std::string* schema_type = std::move(schema_type_or).ValueOrDie();
+ SectionIdMask allowed_sections_mask =
+ data_->ComputeAllowedSectionsMask(*schema_type);
- if (hit_intersect_section_ids_mask_ != kSectionIdMaskNone) {
+ // A hit can be in multiple sections at once, need to check which of the
+ // section ids match the sections allowed by type_property_masks_. This can
+ // be done by doing a bitwise and of the section_id_mask in the doc hit and
+ // the allowed_sections_mask.
+ SectionIdMask section_id_mask =
+ delegate_->doc_hit_info().hit_section_ids_mask() &
+ allowed_sections_mask;
+
+ // Return this document if:
+ // - the sectionIdMask is not empty after applying property filters, or
+ // - no property filters apply for its schema type (allowed_sections_mask
+ // == kSectionIdMaskAll). This is needed to ensure that in case of empty
+ // query (which uses doc-hit-info-iterator-all-document-id), where
+ // section_id_mask is kSectionIdMaskNone, doc hits with no property
+ // restrictions don't get filtered out. Doc hits for schema types for
+ // whom property filters are specified will still get filtered out.
+ if (allowed_sections_mask == kSectionIdMaskAll ||
+ section_id_mask != kSectionIdMaskNone) {
doc_hit_info_ = delegate_->doc_hit_info();
- doc_hit_info_.set_hit_section_ids_mask(hit_intersect_section_ids_mask_);
+ doc_hit_info_.set_hit_section_ids_mask(section_id_mask);
return libtextclassifier3::Status::OK;
}
// Didn't find a matching section name for this hit. Continue.
@@ -104,30 +201,46 @@ libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
DocHitInfoIteratorSectionRestrict::TrimRightMostNode() && {
ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_delegate,
std::move(*delegate_).TrimRightMostNode());
+ // TrimRightMostNode is only used by suggestion processor to process query
+ // expression, so an entry for wildcard should always be present in
+ // type_property_filters_ when code flow reaches here. If the InternalError
+ // below is returned, that means TrimRightMostNode hasn't been called in the
+ // right context.
+ const auto it = data_->type_property_filters().find("*");
+ if (it == data_->type_property_filters().end()) {
+ return absl_ports::InternalError(
+ "A wildcard entry should always be present in type property filters "
+ "whenever TrimRightMostNode() is called for "
+ "DocHitInfoIteratorSectionRestrict");
+ }
+ const std::set<std::string>& target_sections = it->second;
+ if (target_sections.empty()) {
+ return absl_ports::InternalError(
+ "Target sections should not be empty whenever TrimRightMostNode() is "
+ "called for DocHitInfoIteratorSectionRestrict");
+ }
if (trimmed_delegate.iterator_ == nullptr) {
// TODO(b/228240987): Update TrimmedNode and downstream code to handle
// multiple section restricts.
- trimmed_delegate.target_section_ = std::move(*target_sections_.begin());
+ trimmed_delegate.target_section_ = std::move(*target_sections.begin());
return trimmed_delegate;
}
trimmed_delegate.iterator_ =
- std::make_unique<DocHitInfoIteratorSectionRestrict>(
- std::move(trimmed_delegate.iterator_), &document_store_,
- &schema_store_, std::move(target_sections_), current_time_ms_);
+ std::unique_ptr<DocHitInfoIteratorSectionRestrict>(
+ new DocHitInfoIteratorSectionRestrict(
+ std::move(trimmed_delegate.iterator_), std::move(data_)));
return std::move(trimmed_delegate);
}
-int32_t DocHitInfoIteratorSectionRestrict::GetNumBlocksInspected() const {
- return delegate_->GetNumBlocksInspected();
-}
-
-int32_t DocHitInfoIteratorSectionRestrict::GetNumLeafAdvanceCalls() const {
- return delegate_->GetNumLeafAdvanceCalls();
-}
-
std::string DocHitInfoIteratorSectionRestrict::ToString() const {
- return absl_ports::StrCat("(", absl_ports::StrJoin(target_sections_, ","),
- "): ", delegate_->ToString());
+ std::string output = "";
+ for (auto it = data_->type_property_filters().cbegin();
+ it != data_->type_property_filters().cend(); it++) {
+ std::string paths = absl_ports::StrJoin(it->second, ",");
+ output += (it->first) + ":" + (paths) + "; ";
+ }
+ std::string result = "{" + output.substr(0, output.size() - 2) + "}: ";
+ return absl_ports::StrCat(result, delegate_->ToString());
}
} // namespace lib