diff options
Diffstat (limited to 'icing/schema/schema-property-iterator.cc')
-rw-r--r-- | icing/schema/schema-property-iterator.cc | 136 |
1 files changed, 125 insertions, 11 deletions
diff --git a/icing/schema/schema-property-iterator.cc b/icing/schema/schema-property-iterator.cc index e1078c2..8fc245c 100644 --- a/icing/schema/schema-property-iterator.cc +++ b/icing/schema/schema-property-iterator.cc @@ -14,9 +14,17 @@ #include "icing/schema/schema-property-iterator.h" +#include <algorithm> +#include <string> +#include <unordered_set> +#include <utility> +#include <vector> + #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/absl_ports/canonical_errors.h" #include "icing/absl_ports/str_cat.h" +#include "icing/proto/schema.pb.h" +#include "icing/schema/property-util.h" namespace icing { namespace lib { @@ -27,16 +35,63 @@ libtextclassifier3::Status SchemaPropertyIterator::Advance() { // When finishing iterating all properties of the current level, pop it // from the stack (levels_), return to the previous level and resume the // iteration. - parent_type_config_names_.erase(levels_.back().GetSchemaTypeName()); + parent_type_config_names_.erase( + parent_type_config_names_.find(levels_.back().GetSchemaTypeName())); levels_.pop_back(); continue; } const PropertyConfigProto& curr_property_config = levels_.back().GetCurrentPropertyConfig(); + std::string curr_property_path = levels_.back().GetCurrentPropertyPath(); + + // Iterate through the sorted_top_level_indexable_nested_properties_ in + // order until we find the first element that is >= curr_property_path. + while (current_top_level_indexable_nested_properties_idx_ < + sorted_top_level_indexable_nested_properties_.size() && + sorted_top_level_indexable_nested_properties_.at( + current_top_level_indexable_nested_properties_idx_) < + curr_property_path) { + // If an element in sorted_top_level_indexable_nested_properties_ < the + // current property path, it means that we've already iterated past the + // possible position for it without seeing it. + // It's not a valid property path in our schema definition. Add it to + // unknown_indexable_nested_properties_ and advance + // current_top_level_indexable_nested_properties_idx_. + unknown_indexable_nested_property_paths_.push_back( + sorted_top_level_indexable_nested_properties_.at( + current_top_level_indexable_nested_properties_idx_)); + ++current_top_level_indexable_nested_properties_idx_; + } + if (curr_property_config.data_type() != PropertyConfigProto::DataType::DOCUMENT) { // We've advanced to a leaf property. + // Set whether this property is indexable according to its level's + // indexable config. If this property is declared in + // indexable_nested_properties_list of the top-level schema, it is also + // nested indexable. + std::string* current_indexable_nested_prop = + current_top_level_indexable_nested_properties_idx_ < + sorted_top_level_indexable_nested_properties_.size() + ? &sorted_top_level_indexable_nested_properties_.at( + current_top_level_indexable_nested_properties_idx_) + : nullptr; + if (current_indexable_nested_prop == nullptr || + *current_indexable_nested_prop > curr_property_path) { + // Current property is not in the indexable list. Set it as indexable if + // its schema level is indexable AND it is an indexable property. + bool is_property_indexable = + levels_.back().GetLevelNestedIndexable() && + SchemaUtil::IsIndexedProperty(curr_property_config); + levels_.back().SetCurrentPropertyIndexable(is_property_indexable); + } else if (*current_indexable_nested_prop == curr_property_path) { + // Current property is in the indexable list. Set its indexable config + // to true. This property will consume a sectionId regardless of whether + // or not it is actually indexable. + levels_.back().SetCurrentPropertyIndexable(true); + ++current_top_level_indexable_nested_properties_idx_; + } return libtextclassifier3::Status::OK; } @@ -55,28 +110,87 @@ libtextclassifier3::Status SchemaPropertyIterator::Advance() { return absl_ports::NotFoundError(absl_ports::StrCat( "Type config not found: ", curr_property_config.schema_type())); } + const SchemaTypeConfigProto& nested_type_config = + nested_type_config_iter->second; - if (parent_type_config_names_.count( - nested_type_config_iter->second.schema_type()) > 0) { + if (levels_.back().GetLevelNestedIndexable()) { + // We should set sorted_top_level_indexable_nested_properties_ to the list + // defined by the current level. + // GetLevelNestedIndexable() is true either because: + // 1. We're looking at a document property of the top-level schema -- + // The first LevelInfo for the iterator is initialized with + // all_nested_properties_indexable_ = true. + // 2. All previous levels set index_nested_properties = true: + // This indicates that upper-level schema types want to follow nested + // properties definition of its document subtypes. If this is the first + // subtype level that defines a list, we should set it as + // top_level_indexable_nested_properties_ for the current top-level + // schema. + sorted_top_level_indexable_nested_properties_.clear(); + sorted_top_level_indexable_nested_properties_.reserve( + curr_property_config.document_indexing_config() + .indexable_nested_properties_list() + .size()); + for (const std::string& property : + curr_property_config.document_indexing_config() + .indexable_nested_properties_list()) { + // Concat the current property name to each property to get the full + // property path expression for each indexable nested property. + sorted_top_level_indexable_nested_properties_.push_back( + property_util::ConcatenatePropertyPathExpr(curr_property_path, + property)); + } + current_top_level_indexable_nested_properties_idx_ = 0; + // Sort elements and dedupe + std::sort(sorted_top_level_indexable_nested_properties_.begin(), + sorted_top_level_indexable_nested_properties_.end()); + auto last = + std::unique(sorted_top_level_indexable_nested_properties_.begin(), + sorted_top_level_indexable_nested_properties_.end()); + sorted_top_level_indexable_nested_properties_.erase( + last, sorted_top_level_indexable_nested_properties_.end()); + } + + bool is_cycle = + parent_type_config_names_.find(nested_type_config.schema_type()) != + parent_type_config_names_.end(); + bool is_parent_property_path = + current_top_level_indexable_nested_properties_idx_ < + sorted_top_level_indexable_nested_properties_.size() && + property_util::IsParentPropertyPath( + curr_property_path, + sorted_top_level_indexable_nested_properties_.at( + current_top_level_indexable_nested_properties_idx_)); + if (is_cycle && !is_parent_property_path) { // Cycle detected. The schema definition is guaranteed to be valid here // since it must have already been validated during SchemaUtil::Validate, // which would have rejected any schema with bad cycles. // + // There are no properties in the indexable_nested_properties_list that + // are a part of this circular reference. // We do not need to iterate this type further so we simply move on to // other properties in the parent type. continue; } - std::string curr_property_path = levels_.back().GetCurrentPropertyPath(); - bool is_nested_indexable = levels_.back().GetCurrentNestedIndexable() && - curr_property_config.document_indexing_config() - .index_nested_properties(); - levels_.push_back(LevelInfo(nested_type_config_iter->second, + bool all_nested_properties_indexable = + levels_.back().GetLevelNestedIndexable() && + curr_property_config.document_indexing_config() + .index_nested_properties(); + levels_.push_back(LevelInfo(nested_type_config, std::move(curr_property_path), - is_nested_indexable)); - parent_type_config_names_.insert( - nested_type_config_iter->second.schema_type()); + all_nested_properties_indexable)); + parent_type_config_names_.insert(nested_type_config.schema_type()); } + + // Before returning, move all remaining uniterated properties from + // sorted_top_level_indexable_nested_properties_ into + // unknown_indexable_nested_properties_. + std::move(sorted_top_level_indexable_nested_properties_.begin() + + current_top_level_indexable_nested_properties_idx_, + sorted_top_level_indexable_nested_properties_.end(), + std::back_inserter(unknown_indexable_nested_property_paths_)); + return absl_ports::OutOfRangeError("End of iterator"); } |