diff options
Diffstat (limited to 'icing/icing-search-engine.cc')
-rw-r--r-- | icing/icing-search-engine.cc | 321 |
1 files changed, 217 insertions, 104 deletions
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc index 467c943..72be4e9 100644 --- a/icing/icing-search-engine.cc +++ b/icing/icing-search-engine.cc @@ -40,8 +40,10 @@ #include "icing/index/integer-section-indexing-handler.h" #include "icing/index/iterator/doc-hit-info-iterator.h" #include "icing/index/numeric/integer-index.h" -#include "icing/index/string-section-indexing-handler.h" +#include "icing/index/term-indexing-handler.h" #include "icing/join/join-processor.h" +#include "icing/join/qualified-id-join-index-impl-v1.h" +#include "icing/join/qualified-id-join-index-impl-v2.h" #include "icing/join/qualified-id-join-index.h" #include "icing/join/qualified-id-join-indexing-handler.h" #include "icing/legacy/index/icing-filesystem.h" @@ -87,6 +89,7 @@ #include "icing/transform/normalizer.h" #include "icing/util/clock.h" #include "icing/util/crc32.h" +#include "icing/util/data-loss.h" #include "icing/util/logging.h" #include "icing/util/status-macros.h" #include "icing/util/tokenized-document.h" @@ -227,6 +230,29 @@ libtextclassifier3::Status ValidateSuggestionSpec( return libtextclassifier3::Status::OK; } +bool IsV2QualifiedIdJoinIndexEnabled(const IcingSearchEngineOptions& options) { + return options.use_new_qualified_id_join_index() && + options.document_store_namespace_id_fingerprint(); +} + +libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>> +CreateQualifiedIdJoinIndex(const Filesystem& filesystem, + std::string qualified_id_join_index_dir, + const IcingSearchEngineOptions& options) { + if (IsV2QualifiedIdJoinIndexEnabled(options)) { + // V2 + return QualifiedIdJoinIndexImplV2::Create( + filesystem, std::move(qualified_id_join_index_dir), + options.pre_mapping_fbv()); + } else { + // V1 + // TODO(b/275121148): deprecate this part after rollout v2. + return QualifiedIdJoinIndexImplV1::Create( + filesystem, std::move(qualified_id_join_index_dir), + options.pre_mapping_fbv(), options.use_persistent_hash_map()); + } +} + // Version file is a single file under base_dir containing version info of the // existing data. std::string MakeVersionFilePath(const std::string& base_dir) { @@ -639,22 +665,33 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers( "Could not delete directories: ", index_dir, ", ", integer_index_dir, ", ", qualified_id_join_index_dir, " and ", doc_store_dir)); } - ICING_RETURN_IF_ERROR(InitializeDocumentStore( - /*force_recovery_and_revalidate_documents=*/false, initialize_stats)); - index_init_status = InitializeIndex(initialize_stats); + ICING_ASSIGN_OR_RETURN( + bool document_store_derived_files_regenerated, + InitializeDocumentStore( + /*force_recovery_and_revalidate_documents=*/false, + initialize_stats)); + index_init_status = InitializeIndex( + document_store_derived_files_regenerated, initialize_stats); if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) { return index_init_status; } } else if (filesystem_->FileExists(marker_filepath.c_str())) { // If the marker file is still around then something wonky happened when we // last tried to set the schema. + // + // Since we're going to rebuild all indices in this case, the return value + // of InitializeDocumentStore (document_store_derived_files_regenerated) is + // unused. ICING_RETURN_IF_ERROR(InitializeDocumentStore( /*force_recovery_and_revalidate_documents=*/true, initialize_stats)); // We're going to need to build the index from scratch. So just delete its // directory now. // Discard index directory and instantiate a new one. - Index::Options index_options(index_dir, options_.index_merge_size()); + Index::Options index_options( + index_dir, options_.index_merge_size(), + options_.lite_index_sort_at_indexing(), options_.lite_index_sort_size(), + options_.build_property_existence_metadata_hits()); if (!filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) || !filesystem_->CreateDirectoryRecursively(index_dir.c_str())) { return absl_ports::InternalError( @@ -682,9 +719,8 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers( *filesystem_, qualified_id_join_index_dir)); ICING_ASSIGN_OR_RETURN( qualified_id_join_index_, - QualifiedIdJoinIndex::Create( - *filesystem_, std::move(qualified_id_join_index_dir), - options_.pre_mapping_fbv(), options_.use_persistent_hash_map())); + CreateQualifiedIdJoinIndex( + *filesystem_, std::move(qualified_id_join_index_dir), options_)); std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer(); IndexRestorationResult restore_result = RestoreIndexIfNeeded(); @@ -709,9 +745,12 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers( initialize_stats->set_qualified_id_join_index_restoration_cause( InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC); } else if (version_state_change != version_util::StateChange::kCompatible) { - ICING_RETURN_IF_ERROR(InitializeDocumentStore( - /*force_recovery_and_revalidate_documents=*/true, initialize_stats)); - index_init_status = InitializeIndex(initialize_stats); + ICING_ASSIGN_OR_RETURN(bool document_store_derived_files_regenerated, + InitializeDocumentStore( + /*force_recovery_and_revalidate_documents=*/true, + initialize_stats)); + index_init_status = InitializeIndex( + document_store_derived_files_regenerated, initialize_stats); if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) { return index_init_status; } @@ -727,9 +766,13 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers( initialize_stats->set_qualified_id_join_index_restoration_cause( InitializeStatsProto::VERSION_CHANGED); } else { - ICING_RETURN_IF_ERROR(InitializeDocumentStore( - /*force_recovery_and_revalidate_documents=*/false, initialize_stats)); - index_init_status = InitializeIndex(initialize_stats); + ICING_ASSIGN_OR_RETURN( + bool document_store_derived_files_regenerated, + InitializeDocumentStore( + /*force_recovery_and_revalidate_documents=*/false, + initialize_stats)); + index_init_status = InitializeIndex( + document_store_derived_files_regenerated, initialize_stats); if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) { return index_init_status; } @@ -763,7 +806,7 @@ libtextclassifier3::Status IcingSearchEngine::InitializeSchemaStore( return libtextclassifier3::Status::OK; } -libtextclassifier3::Status IcingSearchEngine::InitializeDocumentStore( +libtextclassifier3::StatusOr<bool> IcingSearchEngine::InitializeDocumentStore( bool force_recovery_and_revalidate_documents, InitializeStatsProto* initialize_stats) { ICING_RETURN_ERROR_IF_NULL(initialize_stats); @@ -785,10 +828,11 @@ libtextclassifier3::Status IcingSearchEngine::InitializeDocumentStore( options_.compression_level(), initialize_stats)); document_store_ = std::move(create_result.document_store); - return libtextclassifier3::Status::OK; + return create_result.derived_files_regenerated; } libtextclassifier3::Status IcingSearchEngine::InitializeIndex( + bool document_store_derived_files_regenerated, InitializeStatsProto* initialize_stats) { ICING_RETURN_ERROR_IF_NULL(initialize_stats); @@ -798,7 +842,10 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex( return absl_ports::InternalError( absl_ports::StrCat("Could not create directory: ", index_dir)); } - Index::Options index_options(index_dir, options_.index_merge_size()); + Index::Options index_options( + index_dir, options_.index_merge_size(), + options_.lite_index_sort_at_indexing(), options_.lite_index_sort_size(), + options_.build_property_existence_metadata_hits()); // Term index InitializeStatsProto::RecoveryCause index_recovery_cause; @@ -858,29 +905,44 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex( std::string qualified_id_join_index_dir = MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir()); InitializeStatsProto::RecoveryCause qualified_id_join_index_recovery_cause; - auto qualified_id_join_index_or = QualifiedIdJoinIndex::Create( - *filesystem_, qualified_id_join_index_dir, options_.pre_mapping_fbv(), - options_.use_persistent_hash_map()); - if (!qualified_id_join_index_or.ok()) { + if (document_store_derived_files_regenerated && + IsV2QualifiedIdJoinIndexEnabled(options_)) { + // V2 qualified id join index depends on document store derived files, so we + // have to rebuild it from scratch if + // document_store_derived_files_regenerated is true. ICING_RETURN_IF_ERROR(QualifiedIdJoinIndex::Discard( *filesystem_, qualified_id_join_index_dir)); - qualified_id_join_index_recovery_cause = InitializeStatsProto::IO_ERROR; - - // Try recreating it from scratch and rebuild everything. ICING_ASSIGN_OR_RETURN( qualified_id_join_index_, - QualifiedIdJoinIndex::Create( - *filesystem_, std::move(qualified_id_join_index_dir), - options_.pre_mapping_fbv(), options_.use_persistent_hash_map())); - } else { - // Qualified id join index was created fine. - qualified_id_join_index_ = - std::move(qualified_id_join_index_or).ValueOrDie(); - // If a recover does have to happen, then it must be because the index is - // out of sync with the document store. + CreateQualifiedIdJoinIndex( + *filesystem_, std::move(qualified_id_join_index_dir), options_)); + qualified_id_join_index_recovery_cause = - InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH; + InitializeStatsProto::DEPENDENCIES_CHANGED; + } else { + auto qualified_id_join_index_or = CreateQualifiedIdJoinIndex( + *filesystem_, qualified_id_join_index_dir, options_); + if (!qualified_id_join_index_or.ok()) { + ICING_RETURN_IF_ERROR(QualifiedIdJoinIndex::Discard( + *filesystem_, qualified_id_join_index_dir)); + + qualified_id_join_index_recovery_cause = InitializeStatsProto::IO_ERROR; + + // Try recreating it from scratch and rebuild everything. + ICING_ASSIGN_OR_RETURN( + qualified_id_join_index_, + CreateQualifiedIdJoinIndex( + *filesystem_, std::move(qualified_id_join_index_dir), options_)); + } else { + // Qualified id join index was created fine. + qualified_id_join_index_ = + std::move(qualified_id_join_index_or).ValueOrDie(); + // If a recover does have to happen, then it must be because the index is + // out of sync with the document store. + qualified_id_join_index_recovery_cause = + InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH; + } } std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer(); @@ -1552,33 +1614,41 @@ OptimizeResultProto IcingSearchEngine::Optimize() { // TODO(b/143646633): figure out if we need to optimize index and doc store // at the same time. std::unique_ptr<Timer> optimize_doc_store_timer = clock_->GetNewTimer(); - libtextclassifier3::StatusOr<std::vector<DocumentId>> - document_id_old_to_new_or = OptimizeDocumentStore(optimize_stats); + libtextclassifier3::StatusOr<DocumentStore::OptimizeResult> + optimize_result_or = OptimizeDocumentStore(optimize_stats); optimize_stats->set_document_store_optimize_latency_ms( optimize_doc_store_timer->GetElapsedMilliseconds()); - if (!document_id_old_to_new_or.ok() && - !absl_ports::IsDataLoss(document_id_old_to_new_or.status())) { + if (!optimize_result_or.ok() && + !absl_ports::IsDataLoss(optimize_result_or.status())) { // The status now is either ABORTED_ERROR or INTERNAL_ERROR. // If ABORTED_ERROR, Icing should still be working. // If INTERNAL_ERROR, we're having IO errors or other errors that we can't // recover from. - TransformStatus(document_id_old_to_new_or.status(), result_status); + TransformStatus(optimize_result_or.status(), result_status); return result_proto; } // The status is either OK or DATA_LOSS. The optimized document store is // guaranteed to work, so we update index according to the new document store. std::unique_ptr<Timer> optimize_index_timer = clock_->GetNewTimer(); + auto doc_store_optimize_result_status = optimize_result_or.status(); bool should_rebuild_index = - !document_id_old_to_new_or.ok() || + !optimize_result_or.ok() || + optimize_result_or.ValueOrDie().should_rebuild_index || ShouldRebuildIndex(*optimize_stats, options_.optimize_rebuild_index_threshold()); if (!should_rebuild_index) { + // At this point should_rebuild_index is false, so it means + // optimize_result_or.ok() is true and therefore it is safe to call + // ValueOrDie. + DocumentStore::OptimizeResult optimize_result = + std::move(optimize_result_or).ValueOrDie(); + optimize_stats->set_index_restoration_mode( OptimizeStatsProto::INDEX_TRANSLATION); libtextclassifier3::Status index_optimize_status = - index_->Optimize(document_id_old_to_new_or.ValueOrDie(), + index_->Optimize(optimize_result.document_id_old_to_new, document_store_->last_added_document_id()); if (!index_optimize_status.ok()) { ICING_LOG(WARNING) << "Failed to optimize index. Error: " @@ -1587,7 +1657,7 @@ OptimizeResultProto IcingSearchEngine::Optimize() { } libtextclassifier3::Status integer_index_optimize_status = - integer_index_->Optimize(document_id_old_to_new_or.ValueOrDie(), + integer_index_->Optimize(optimize_result.document_id_old_to_new, document_store_->last_added_document_id()); if (!integer_index_optimize_status.ok()) { ICING_LOG(WARNING) << "Failed to optimize integer index. Error: " @@ -1597,7 +1667,8 @@ OptimizeResultProto IcingSearchEngine::Optimize() { libtextclassifier3::Status qualified_id_join_index_optimize_status = qualified_id_join_index_->Optimize( - document_id_old_to_new_or.ValueOrDie(), + optimize_result.document_id_old_to_new, + optimize_result.namespace_id_old_to_new, document_store_->last_added_document_id()); if (!qualified_id_join_index_optimize_status.ok()) { ICING_LOG(WARNING) @@ -1609,6 +1680,7 @@ OptimizeResultProto IcingSearchEngine::Optimize() { // If we received a DATA_LOSS error from OptimizeDocumentStore, we have a // valid document store, but it might be the old one or the new one. So throw // out the index data and rebuild from scratch. + // Also rebuild index if DocumentStore::OptimizeInto hints to do so. // Likewise, if Index::Optimize failed, then attempt to recover the index by // rebuilding from scratch. // If ShouldRebuildIndex() returns true, we will also rebuild the index for @@ -1667,7 +1739,11 @@ OptimizeResultProto IcingSearchEngine::Optimize() { // Update the status for this run and write it. auto optimize_status = std::make_unique<OptimizeStatusProto>(); optimize_status->set_last_successful_optimize_run_time_ms(current_time); - optimize_status_file.Write(std::move(optimize_status)); + auto write_status = optimize_status_file.Write(std::move(optimize_status)); + if (!write_status.ok()) { + ICING_LOG(ERROR) << "Failed to write optimize status:\n" + << write_status.error_message(); + } // Flushes data to disk after doing optimization status = InternalPersistToDisk(PersistType::FULL); @@ -1680,7 +1756,7 @@ OptimizeResultProto IcingSearchEngine::Optimize() { optimize_stats->set_storage_size_after( Filesystem::SanitizeFileSize(after_size)); - TransformStatus(document_id_old_to_new_or.status(), result_status); + TransformStatus(doc_store_optimize_result_status, result_status); return result_proto; } @@ -1887,7 +1963,17 @@ SearchResultProto IcingSearchEngine::InternalSearch( StatusProto* result_status = result_proto.mutable_status(); QueryStatsProto* query_stats = result_proto.mutable_query_stats(); + query_stats->set_is_first_page(true); + query_stats->set_requested_page_size(result_spec.num_per_page()); + + // TODO(b/305098009): deprecate search-related flat fields in query_stats. + query_stats->set_num_namespaces_filtered( + search_spec.namespace_filters_size()); + query_stats->set_num_schema_types_filtered( + search_spec.schema_type_filters_size()); query_stats->set_query_length(search_spec.query().length()); + query_stats->set_ranking_strategy(scoring_spec.rank_by()); + if (!initialized_) { result_status->set_code(StatusProto::FAILED_PRECONDITION); result_status->set_message("IcingSearchEngine has not been initialized!"); @@ -1906,27 +1992,22 @@ SearchResultProto IcingSearchEngine::InternalSearch( return result_proto; } - query_stats->set_num_namespaces_filtered( - search_spec.namespace_filters_size()); - query_stats->set_num_schema_types_filtered( - search_spec.schema_type_filters_size()); - query_stats->set_ranking_strategy(scoring_spec.rank_by()); - query_stats->set_is_first_page(true); - query_stats->set_requested_page_size(result_spec.num_per_page()); - const JoinSpecProto& join_spec = search_spec.join_spec(); std::unique_ptr<JoinChildrenFetcher> join_children_fetcher; std::unique_ptr<ResultAdjustmentInfo> child_result_adjustment_info; int64_t current_time_ms = clock_->GetSystemTimeMilliseconds(); if (!join_spec.parent_property_expression().empty() && !join_spec.child_property_expression().empty()) { + query_stats->set_is_join_query(true); + QueryStatsProto::SearchStats* child_search_stats = + query_stats->mutable_child_search_stats(); + // Process child query QueryScoringResults nested_query_scoring_results = ProcessQueryAndScore( join_spec.nested_spec().search_spec(), join_spec.nested_spec().scoring_spec(), join_spec.nested_spec().result_spec(), - /*join_children_fetcher=*/nullptr, current_time_ms); - // TOOD(b/256022027): set different kinds of latency for 2nd query. + /*join_children_fetcher=*/nullptr, current_time_ms, child_search_stats); if (!nested_query_scoring_results.status.ok()) { TransformStatus(nested_query_scoring_results.status, result_status); return result_proto; @@ -1957,24 +2038,24 @@ SearchResultProto IcingSearchEngine::InternalSearch( } // Process parent query - QueryScoringResults query_scoring_results = - ProcessQueryAndScore(search_spec, scoring_spec, result_spec, - join_children_fetcher.get(), current_time_ms); - int term_count = 0; - for (const auto& section_and_terms : query_scoring_results.query_terms) { - term_count += section_and_terms.second.size(); - } - query_stats->set_num_terms(term_count); + QueryStatsProto::SearchStats* parent_search_stats = + query_stats->mutable_parent_search_stats(); + QueryScoringResults query_scoring_results = ProcessQueryAndScore( + search_spec, scoring_spec, result_spec, join_children_fetcher.get(), + current_time_ms, parent_search_stats); + // TODO(b/305098009): deprecate search-related flat fields in query_stats. + query_stats->set_num_terms(parent_search_stats->num_terms()); query_stats->set_parse_query_latency_ms( - query_scoring_results.parse_query_latency_ms); - query_stats->set_scoring_latency_ms(query_scoring_results.scoring_latency_ms); + parent_search_stats->parse_query_latency_ms()); + query_stats->set_scoring_latency_ms( + parent_search_stats->scoring_latency_ms()); + query_stats->set_num_documents_scored( + parent_search_stats->num_documents_scored()); if (!query_scoring_results.status.ok()) { TransformStatus(query_scoring_results.status, result_status); return result_proto; } - query_stats->set_num_documents_scored( - query_scoring_results.scored_document_hits.size()); // Returns early for empty result if (query_scoring_results.scored_document_hits.empty()) { result_status->set_code(StatusProto::OK); @@ -2088,7 +2169,15 @@ SearchResultProto IcingSearchEngine::InternalSearch( IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore( const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec, const ResultSpecProto& result_spec, - const JoinChildrenFetcher* join_children_fetcher, int64_t current_time_ms) { + const JoinChildrenFetcher* join_children_fetcher, int64_t current_time_ms, + QueryStatsProto::SearchStats* search_stats) { + search_stats->set_num_namespaces_filtered( + search_spec.namespace_filters_size()); + search_stats->set_num_schema_types_filtered( + search_spec.schema_type_filters_size()); + search_stats->set_query_length(search_spec.query().length()); + search_stats->set_ranking_strategy(scoring_spec.rank_by()); + std::unique_ptr<Timer> component_timer = clock_->GetNewTimer(); // Gets unordered results from query processor @@ -2096,11 +2185,11 @@ IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore( index_.get(), integer_index_.get(), language_segmenter_.get(), normalizer_.get(), document_store_.get(), schema_store_.get()); if (!query_processor_or.ok()) { - return QueryScoringResults( - std::move(query_processor_or).status(), /*query_terms_in=*/{}, - /*scored_document_hits_in=*/{}, - /*parse_query_latency_ms_in=*/component_timer->GetElapsedMilliseconds(), - /*scoring_latency_ms_in=*/0); + search_stats->set_parse_query_latency_ms( + component_timer->GetElapsedMilliseconds()); + return QueryScoringResults(std::move(query_processor_or).status(), + /*query_terms_in=*/{}, + /*scored_document_hits_in=*/{}); } std::unique_ptr<QueryProcessor> query_processor = std::move(query_processor_or).ValueOrDie(); @@ -2113,15 +2202,25 @@ IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore( } else { query_results_or = ranking_strategy_or.status(); } + search_stats->set_parse_query_latency_ms( + component_timer->GetElapsedMilliseconds()); if (!query_results_or.ok()) { - return QueryScoringResults( - std::move(query_results_or).status(), /*query_terms_in=*/{}, - /*scored_document_hits_in=*/{}, - /*parse_query_latency_ms_in=*/component_timer->GetElapsedMilliseconds(), - /*scoring_latency_ms_in=*/0); + return QueryScoringResults(std::move(query_results_or).status(), + /*query_terms_in=*/{}, + /*scored_document_hits_in=*/{}); } QueryResults query_results = std::move(query_results_or).ValueOrDie(); - int64_t parse_query_latency_ms = component_timer->GetElapsedMilliseconds(); + + // Set SearchStats related to QueryResults. + int term_count = 0; + for (const auto& section_and_terms : query_results.query_terms) { + term_count += section_and_terms.second.size(); + } + search_stats->set_num_terms(term_count); + + if (query_results.features_in_use.count(kNumericSearchFeature)) { + search_stats->set_is_numeric_query(true); + } component_timer = clock_->GetNewTimer(); // Scores but does not rank the results. @@ -2132,22 +2231,20 @@ IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore( if (!scoring_processor_or.ok()) { return QueryScoringResults(std::move(scoring_processor_or).status(), std::move(query_results.query_terms), - /*scored_document_hits_in=*/{}, - parse_query_latency_ms, - /*scoring_latency_ms_in=*/0); + /*scored_document_hits_in=*/{}); } std::unique_ptr<ScoringProcessor> scoring_processor = std::move(scoring_processor_or).ValueOrDie(); std::vector<ScoredDocumentHit> scored_document_hits = - scoring_processor->Score(std::move(query_results.root_iterator), - result_spec.num_to_score(), - &query_results.query_term_iterators); - int64_t scoring_latency_ms = component_timer->GetElapsedMilliseconds(); + scoring_processor->Score( + std::move(query_results.root_iterator), result_spec.num_to_score(), + &query_results.query_term_iterators, search_stats); + search_stats->set_scoring_latency_ms( + component_timer->GetElapsedMilliseconds()); return QueryScoringResults(libtextclassifier3::Status::OK, std::move(query_results.query_terms), - std::move(scored_document_hits), - parse_query_latency_ms, scoring_latency_ms); + std::move(scored_document_hits)); } SearchResultProto IcingSearchEngine::GetNextPage(uint64_t next_page_token) { @@ -2238,7 +2335,7 @@ void IcingSearchEngine::InvalidateNextPageToken(uint64_t next_page_token) { result_state_manager_->InvalidateResultState(next_page_token); } -libtextclassifier3::StatusOr<std::vector<DocumentId>> +libtextclassifier3::StatusOr<DocumentStore::OptimizeResult> IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) { // Gets the current directory path and an empty tmp directory path for // document store optimization. @@ -2255,16 +2352,16 @@ IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) { } // Copies valid document data to tmp directory - libtextclassifier3::StatusOr<std::vector<DocumentId>> - document_id_old_to_new_or = document_store_->OptimizeInto( + libtextclassifier3::StatusOr<DocumentStore::OptimizeResult> + optimize_result_or = document_store_->OptimizeInto( temporary_document_dir, language_segmenter_.get(), optimize_stats); // Handles error if any - if (!document_id_old_to_new_or.ok()) { + if (!optimize_result_or.ok()) { filesystem_->DeleteDirectoryRecursively(temporary_document_dir.c_str()); return absl_ports::Annotate( absl_ports::AbortedError("Failed to optimize document store"), - document_id_old_to_new_or.status().error_message()); + optimize_result_or.status().error_message()); } // result_state_manager_ depends on document_store_. So we need to reset it at @@ -2333,7 +2430,9 @@ IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) { "Document store has been optimized, but a valid document store " "instance can't be created"); } - document_store_ = std::move(create_result_or.ValueOrDie().document_store); + DocumentStore::CreateResult create_result = + std::move(create_result_or).ValueOrDie(); + document_store_ = std::move(create_result.document_store); result_state_manager_ = std::make_unique<ResultStateManager>( performance_configuration_.max_num_total_hits, *document_store_); @@ -2343,7 +2442,19 @@ IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) { ICING_LOG(ERROR) << "Document store has been optimized, but it failed to " "delete temporary file directory"; } - return document_id_old_to_new_or; + + // Since we created new (optimized) document store with correct PersistToDisk + // call, we shouldn't have data loss or regenerate derived files. Therefore, + // if we really encounter any of these situations, then return DataLossError + // to let the caller rebuild index. + if (create_result.data_loss != DataLoss::NONE || + create_result.derived_files_regenerated) { + return absl_ports::DataLossError( + "Unexpected data loss or derived files regenerated for new document " + "store"); + } + + return optimize_result_or; } IcingSearchEngine::IndexRestorationResult @@ -2475,11 +2586,12 @@ IcingSearchEngine::CreateDataIndexingHandlers() { std::vector<std::unique_ptr<DataIndexingHandler>> handlers; // Term index handler - ICING_ASSIGN_OR_RETURN(std::unique_ptr<StringSectionIndexingHandler> - string_section_indexing_handler, - StringSectionIndexingHandler::Create( - clock_.get(), normalizer_.get(), index_.get())); - handlers.push_back(std::move(string_section_indexing_handler)); + ICING_ASSIGN_OR_RETURN( + std::unique_ptr<TermIndexingHandler> term_indexing_handler, + TermIndexingHandler::Create( + clock_.get(), normalizer_.get(), index_.get(), + options_.build_property_existence_metadata_hits())); + handlers.push_back(std::move(term_indexing_handler)); // Integer index handler ICING_ASSIGN_OR_RETURN(std::unique_ptr<IntegerSectionIndexingHandler> @@ -2489,10 +2601,11 @@ IcingSearchEngine::CreateDataIndexingHandlers() { handlers.push_back(std::move(integer_section_indexing_handler)); // Qualified id join index handler - ICING_ASSIGN_OR_RETURN(std::unique_ptr<QualifiedIdJoinIndexingHandler> - qualified_id_join_indexing_handler, - QualifiedIdJoinIndexingHandler::Create( - clock_.get(), qualified_id_join_index_.get())); + ICING_ASSIGN_OR_RETURN( + std::unique_ptr<QualifiedIdJoinIndexingHandler> + qualified_id_join_indexing_handler, + QualifiedIdJoinIndexingHandler::Create( + clock_.get(), document_store_.get(), qualified_id_join_index_.get())); handlers.push_back(std::move(qualified_id_join_indexing_handler)); return handlers; |