aboutsummaryrefslogtreecommitdiff
path: root/icing/icing-search-engine.cc
diff options
context:
space:
mode:
Diffstat (limited to 'icing/icing-search-engine.cc')
-rw-r--r--icing/icing-search-engine.cc321
1 files changed, 217 insertions, 104 deletions
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
index 467c943..72be4e9 100644
--- a/icing/icing-search-engine.cc
+++ b/icing/icing-search-engine.cc
@@ -40,8 +40,10 @@
#include "icing/index/integer-section-indexing-handler.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/numeric/integer-index.h"
-#include "icing/index/string-section-indexing-handler.h"
+#include "icing/index/term-indexing-handler.h"
#include "icing/join/join-processor.h"
+#include "icing/join/qualified-id-join-index-impl-v1.h"
+#include "icing/join/qualified-id-join-index-impl-v2.h"
#include "icing/join/qualified-id-join-index.h"
#include "icing/join/qualified-id-join-indexing-handler.h"
#include "icing/legacy/index/icing-filesystem.h"
@@ -87,6 +89,7 @@
#include "icing/transform/normalizer.h"
#include "icing/util/clock.h"
#include "icing/util/crc32.h"
+#include "icing/util/data-loss.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
#include "icing/util/tokenized-document.h"
@@ -227,6 +230,29 @@ libtextclassifier3::Status ValidateSuggestionSpec(
return libtextclassifier3::Status::OK;
}
+bool IsV2QualifiedIdJoinIndexEnabled(const IcingSearchEngineOptions& options) {
+ return options.use_new_qualified_id_join_index() &&
+ options.document_store_namespace_id_fingerprint();
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
+CreateQualifiedIdJoinIndex(const Filesystem& filesystem,
+ std::string qualified_id_join_index_dir,
+ const IcingSearchEngineOptions& options) {
+ if (IsV2QualifiedIdJoinIndexEnabled(options)) {
+ // V2
+ return QualifiedIdJoinIndexImplV2::Create(
+ filesystem, std::move(qualified_id_join_index_dir),
+ options.pre_mapping_fbv());
+ } else {
+ // V1
+ // TODO(b/275121148): deprecate this part after rollout v2.
+ return QualifiedIdJoinIndexImplV1::Create(
+ filesystem, std::move(qualified_id_join_index_dir),
+ options.pre_mapping_fbv(), options.use_persistent_hash_map());
+ }
+}
+
// Version file is a single file under base_dir containing version info of the
// existing data.
std::string MakeVersionFilePath(const std::string& base_dir) {
@@ -639,22 +665,33 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
"Could not delete directories: ", index_dir, ", ", integer_index_dir,
", ", qualified_id_join_index_dir, " and ", doc_store_dir));
}
- ICING_RETURN_IF_ERROR(InitializeDocumentStore(
- /*force_recovery_and_revalidate_documents=*/false, initialize_stats));
- index_init_status = InitializeIndex(initialize_stats);
+ ICING_ASSIGN_OR_RETURN(
+ bool document_store_derived_files_regenerated,
+ InitializeDocumentStore(
+ /*force_recovery_and_revalidate_documents=*/false,
+ initialize_stats));
+ index_init_status = InitializeIndex(
+ document_store_derived_files_regenerated, initialize_stats);
if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
return index_init_status;
}
} else if (filesystem_->FileExists(marker_filepath.c_str())) {
// If the marker file is still around then something wonky happened when we
// last tried to set the schema.
+ //
+ // Since we're going to rebuild all indices in this case, the return value
+ // of InitializeDocumentStore (document_store_derived_files_regenerated) is
+ // unused.
ICING_RETURN_IF_ERROR(InitializeDocumentStore(
/*force_recovery_and_revalidate_documents=*/true, initialize_stats));
// We're going to need to build the index from scratch. So just delete its
// directory now.
// Discard index directory and instantiate a new one.
- Index::Options index_options(index_dir, options_.index_merge_size());
+ Index::Options index_options(
+ index_dir, options_.index_merge_size(),
+ options_.lite_index_sort_at_indexing(), options_.lite_index_sort_size(),
+ options_.build_property_existence_metadata_hits());
if (!filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) ||
!filesystem_->CreateDirectoryRecursively(index_dir.c_str())) {
return absl_ports::InternalError(
@@ -682,9 +719,8 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
*filesystem_, qualified_id_join_index_dir));
ICING_ASSIGN_OR_RETURN(
qualified_id_join_index_,
- QualifiedIdJoinIndex::Create(
- *filesystem_, std::move(qualified_id_join_index_dir),
- options_.pre_mapping_fbv(), options_.use_persistent_hash_map()));
+ CreateQualifiedIdJoinIndex(
+ *filesystem_, std::move(qualified_id_join_index_dir), options_));
std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
IndexRestorationResult restore_result = RestoreIndexIfNeeded();
@@ -709,9 +745,12 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
initialize_stats->set_qualified_id_join_index_restoration_cause(
InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
} else if (version_state_change != version_util::StateChange::kCompatible) {
- ICING_RETURN_IF_ERROR(InitializeDocumentStore(
- /*force_recovery_and_revalidate_documents=*/true, initialize_stats));
- index_init_status = InitializeIndex(initialize_stats);
+ ICING_ASSIGN_OR_RETURN(bool document_store_derived_files_regenerated,
+ InitializeDocumentStore(
+ /*force_recovery_and_revalidate_documents=*/true,
+ initialize_stats));
+ index_init_status = InitializeIndex(
+ document_store_derived_files_regenerated, initialize_stats);
if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
return index_init_status;
}
@@ -727,9 +766,13 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
initialize_stats->set_qualified_id_join_index_restoration_cause(
InitializeStatsProto::VERSION_CHANGED);
} else {
- ICING_RETURN_IF_ERROR(InitializeDocumentStore(
- /*force_recovery_and_revalidate_documents=*/false, initialize_stats));
- index_init_status = InitializeIndex(initialize_stats);
+ ICING_ASSIGN_OR_RETURN(
+ bool document_store_derived_files_regenerated,
+ InitializeDocumentStore(
+ /*force_recovery_and_revalidate_documents=*/false,
+ initialize_stats));
+ index_init_status = InitializeIndex(
+ document_store_derived_files_regenerated, initialize_stats);
if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
return index_init_status;
}
@@ -763,7 +806,7 @@ libtextclassifier3::Status IcingSearchEngine::InitializeSchemaStore(
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::Status IcingSearchEngine::InitializeDocumentStore(
+libtextclassifier3::StatusOr<bool> IcingSearchEngine::InitializeDocumentStore(
bool force_recovery_and_revalidate_documents,
InitializeStatsProto* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(initialize_stats);
@@ -785,10 +828,11 @@ libtextclassifier3::Status IcingSearchEngine::InitializeDocumentStore(
options_.compression_level(), initialize_stats));
document_store_ = std::move(create_result.document_store);
- return libtextclassifier3::Status::OK;
+ return create_result.derived_files_regenerated;
}
libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
+ bool document_store_derived_files_regenerated,
InitializeStatsProto* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(initialize_stats);
@@ -798,7 +842,10 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
return absl_ports::InternalError(
absl_ports::StrCat("Could not create directory: ", index_dir));
}
- Index::Options index_options(index_dir, options_.index_merge_size());
+ Index::Options index_options(
+ index_dir, options_.index_merge_size(),
+ options_.lite_index_sort_at_indexing(), options_.lite_index_sort_size(),
+ options_.build_property_existence_metadata_hits());
// Term index
InitializeStatsProto::RecoveryCause index_recovery_cause;
@@ -858,29 +905,44 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
std::string qualified_id_join_index_dir =
MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir());
InitializeStatsProto::RecoveryCause qualified_id_join_index_recovery_cause;
- auto qualified_id_join_index_or = QualifiedIdJoinIndex::Create(
- *filesystem_, qualified_id_join_index_dir, options_.pre_mapping_fbv(),
- options_.use_persistent_hash_map());
- if (!qualified_id_join_index_or.ok()) {
+ if (document_store_derived_files_regenerated &&
+ IsV2QualifiedIdJoinIndexEnabled(options_)) {
+ // V2 qualified id join index depends on document store derived files, so we
+ // have to rebuild it from scratch if
+ // document_store_derived_files_regenerated is true.
ICING_RETURN_IF_ERROR(QualifiedIdJoinIndex::Discard(
*filesystem_, qualified_id_join_index_dir));
- qualified_id_join_index_recovery_cause = InitializeStatsProto::IO_ERROR;
-
- // Try recreating it from scratch and rebuild everything.
ICING_ASSIGN_OR_RETURN(
qualified_id_join_index_,
- QualifiedIdJoinIndex::Create(
- *filesystem_, std::move(qualified_id_join_index_dir),
- options_.pre_mapping_fbv(), options_.use_persistent_hash_map()));
- } else {
- // Qualified id join index was created fine.
- qualified_id_join_index_ =
- std::move(qualified_id_join_index_or).ValueOrDie();
- // If a recover does have to happen, then it must be because the index is
- // out of sync with the document store.
+ CreateQualifiedIdJoinIndex(
+ *filesystem_, std::move(qualified_id_join_index_dir), options_));
+
qualified_id_join_index_recovery_cause =
- InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
+ InitializeStatsProto::DEPENDENCIES_CHANGED;
+ } else {
+ auto qualified_id_join_index_or = CreateQualifiedIdJoinIndex(
+ *filesystem_, qualified_id_join_index_dir, options_);
+ if (!qualified_id_join_index_or.ok()) {
+ ICING_RETURN_IF_ERROR(QualifiedIdJoinIndex::Discard(
+ *filesystem_, qualified_id_join_index_dir));
+
+ qualified_id_join_index_recovery_cause = InitializeStatsProto::IO_ERROR;
+
+ // Try recreating it from scratch and rebuild everything.
+ ICING_ASSIGN_OR_RETURN(
+ qualified_id_join_index_,
+ CreateQualifiedIdJoinIndex(
+ *filesystem_, std::move(qualified_id_join_index_dir), options_));
+ } else {
+ // Qualified id join index was created fine.
+ qualified_id_join_index_ =
+ std::move(qualified_id_join_index_or).ValueOrDie();
+ // If a recover does have to happen, then it must be because the index is
+ // out of sync with the document store.
+ qualified_id_join_index_recovery_cause =
+ InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
+ }
}
std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
@@ -1552,33 +1614,41 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
// TODO(b/143646633): figure out if we need to optimize index and doc store
// at the same time.
std::unique_ptr<Timer> optimize_doc_store_timer = clock_->GetNewTimer();
- libtextclassifier3::StatusOr<std::vector<DocumentId>>
- document_id_old_to_new_or = OptimizeDocumentStore(optimize_stats);
+ libtextclassifier3::StatusOr<DocumentStore::OptimizeResult>
+ optimize_result_or = OptimizeDocumentStore(optimize_stats);
optimize_stats->set_document_store_optimize_latency_ms(
optimize_doc_store_timer->GetElapsedMilliseconds());
- if (!document_id_old_to_new_or.ok() &&
- !absl_ports::IsDataLoss(document_id_old_to_new_or.status())) {
+ if (!optimize_result_or.ok() &&
+ !absl_ports::IsDataLoss(optimize_result_or.status())) {
// The status now is either ABORTED_ERROR or INTERNAL_ERROR.
// If ABORTED_ERROR, Icing should still be working.
// If INTERNAL_ERROR, we're having IO errors or other errors that we can't
// recover from.
- TransformStatus(document_id_old_to_new_or.status(), result_status);
+ TransformStatus(optimize_result_or.status(), result_status);
return result_proto;
}
// The status is either OK or DATA_LOSS. The optimized document store is
// guaranteed to work, so we update index according to the new document store.
std::unique_ptr<Timer> optimize_index_timer = clock_->GetNewTimer();
+ auto doc_store_optimize_result_status = optimize_result_or.status();
bool should_rebuild_index =
- !document_id_old_to_new_or.ok() ||
+ !optimize_result_or.ok() ||
+ optimize_result_or.ValueOrDie().should_rebuild_index ||
ShouldRebuildIndex(*optimize_stats,
options_.optimize_rebuild_index_threshold());
if (!should_rebuild_index) {
+ // At this point should_rebuild_index is false, so it means
+ // optimize_result_or.ok() is true and therefore it is safe to call
+ // ValueOrDie.
+ DocumentStore::OptimizeResult optimize_result =
+ std::move(optimize_result_or).ValueOrDie();
+
optimize_stats->set_index_restoration_mode(
OptimizeStatsProto::INDEX_TRANSLATION);
libtextclassifier3::Status index_optimize_status =
- index_->Optimize(document_id_old_to_new_or.ValueOrDie(),
+ index_->Optimize(optimize_result.document_id_old_to_new,
document_store_->last_added_document_id());
if (!index_optimize_status.ok()) {
ICING_LOG(WARNING) << "Failed to optimize index. Error: "
@@ -1587,7 +1657,7 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
}
libtextclassifier3::Status integer_index_optimize_status =
- integer_index_->Optimize(document_id_old_to_new_or.ValueOrDie(),
+ integer_index_->Optimize(optimize_result.document_id_old_to_new,
document_store_->last_added_document_id());
if (!integer_index_optimize_status.ok()) {
ICING_LOG(WARNING) << "Failed to optimize integer index. Error: "
@@ -1597,7 +1667,8 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
libtextclassifier3::Status qualified_id_join_index_optimize_status =
qualified_id_join_index_->Optimize(
- document_id_old_to_new_or.ValueOrDie(),
+ optimize_result.document_id_old_to_new,
+ optimize_result.namespace_id_old_to_new,
document_store_->last_added_document_id());
if (!qualified_id_join_index_optimize_status.ok()) {
ICING_LOG(WARNING)
@@ -1609,6 +1680,7 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
// If we received a DATA_LOSS error from OptimizeDocumentStore, we have a
// valid document store, but it might be the old one or the new one. So throw
// out the index data and rebuild from scratch.
+ // Also rebuild index if DocumentStore::OptimizeInto hints to do so.
// Likewise, if Index::Optimize failed, then attempt to recover the index by
// rebuilding from scratch.
// If ShouldRebuildIndex() returns true, we will also rebuild the index for
@@ -1667,7 +1739,11 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
// Update the status for this run and write it.
auto optimize_status = std::make_unique<OptimizeStatusProto>();
optimize_status->set_last_successful_optimize_run_time_ms(current_time);
- optimize_status_file.Write(std::move(optimize_status));
+ auto write_status = optimize_status_file.Write(std::move(optimize_status));
+ if (!write_status.ok()) {
+ ICING_LOG(ERROR) << "Failed to write optimize status:\n"
+ << write_status.error_message();
+ }
// Flushes data to disk after doing optimization
status = InternalPersistToDisk(PersistType::FULL);
@@ -1680,7 +1756,7 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
optimize_stats->set_storage_size_after(
Filesystem::SanitizeFileSize(after_size));
- TransformStatus(document_id_old_to_new_or.status(), result_status);
+ TransformStatus(doc_store_optimize_result_status, result_status);
return result_proto;
}
@@ -1887,7 +1963,17 @@ SearchResultProto IcingSearchEngine::InternalSearch(
StatusProto* result_status = result_proto.mutable_status();
QueryStatsProto* query_stats = result_proto.mutable_query_stats();
+ query_stats->set_is_first_page(true);
+ query_stats->set_requested_page_size(result_spec.num_per_page());
+
+ // TODO(b/305098009): deprecate search-related flat fields in query_stats.
+ query_stats->set_num_namespaces_filtered(
+ search_spec.namespace_filters_size());
+ query_stats->set_num_schema_types_filtered(
+ search_spec.schema_type_filters_size());
query_stats->set_query_length(search_spec.query().length());
+ query_stats->set_ranking_strategy(scoring_spec.rank_by());
+
if (!initialized_) {
result_status->set_code(StatusProto::FAILED_PRECONDITION);
result_status->set_message("IcingSearchEngine has not been initialized!");
@@ -1906,27 +1992,22 @@ SearchResultProto IcingSearchEngine::InternalSearch(
return result_proto;
}
- query_stats->set_num_namespaces_filtered(
- search_spec.namespace_filters_size());
- query_stats->set_num_schema_types_filtered(
- search_spec.schema_type_filters_size());
- query_stats->set_ranking_strategy(scoring_spec.rank_by());
- query_stats->set_is_first_page(true);
- query_stats->set_requested_page_size(result_spec.num_per_page());
-
const JoinSpecProto& join_spec = search_spec.join_spec();
std::unique_ptr<JoinChildrenFetcher> join_children_fetcher;
std::unique_ptr<ResultAdjustmentInfo> child_result_adjustment_info;
int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
if (!join_spec.parent_property_expression().empty() &&
!join_spec.child_property_expression().empty()) {
+ query_stats->set_is_join_query(true);
+ QueryStatsProto::SearchStats* child_search_stats =
+ query_stats->mutable_child_search_stats();
+
// Process child query
QueryScoringResults nested_query_scoring_results = ProcessQueryAndScore(
join_spec.nested_spec().search_spec(),
join_spec.nested_spec().scoring_spec(),
join_spec.nested_spec().result_spec(),
- /*join_children_fetcher=*/nullptr, current_time_ms);
- // TOOD(b/256022027): set different kinds of latency for 2nd query.
+ /*join_children_fetcher=*/nullptr, current_time_ms, child_search_stats);
if (!nested_query_scoring_results.status.ok()) {
TransformStatus(nested_query_scoring_results.status, result_status);
return result_proto;
@@ -1957,24 +2038,24 @@ SearchResultProto IcingSearchEngine::InternalSearch(
}
// Process parent query
- QueryScoringResults query_scoring_results =
- ProcessQueryAndScore(search_spec, scoring_spec, result_spec,
- join_children_fetcher.get(), current_time_ms);
- int term_count = 0;
- for (const auto& section_and_terms : query_scoring_results.query_terms) {
- term_count += section_and_terms.second.size();
- }
- query_stats->set_num_terms(term_count);
+ QueryStatsProto::SearchStats* parent_search_stats =
+ query_stats->mutable_parent_search_stats();
+ QueryScoringResults query_scoring_results = ProcessQueryAndScore(
+ search_spec, scoring_spec, result_spec, join_children_fetcher.get(),
+ current_time_ms, parent_search_stats);
+ // TODO(b/305098009): deprecate search-related flat fields in query_stats.
+ query_stats->set_num_terms(parent_search_stats->num_terms());
query_stats->set_parse_query_latency_ms(
- query_scoring_results.parse_query_latency_ms);
- query_stats->set_scoring_latency_ms(query_scoring_results.scoring_latency_ms);
+ parent_search_stats->parse_query_latency_ms());
+ query_stats->set_scoring_latency_ms(
+ parent_search_stats->scoring_latency_ms());
+ query_stats->set_num_documents_scored(
+ parent_search_stats->num_documents_scored());
if (!query_scoring_results.status.ok()) {
TransformStatus(query_scoring_results.status, result_status);
return result_proto;
}
- query_stats->set_num_documents_scored(
- query_scoring_results.scored_document_hits.size());
// Returns early for empty result
if (query_scoring_results.scored_document_hits.empty()) {
result_status->set_code(StatusProto::OK);
@@ -2088,7 +2169,15 @@ SearchResultProto IcingSearchEngine::InternalSearch(
IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore(
const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
const ResultSpecProto& result_spec,
- const JoinChildrenFetcher* join_children_fetcher, int64_t current_time_ms) {
+ const JoinChildrenFetcher* join_children_fetcher, int64_t current_time_ms,
+ QueryStatsProto::SearchStats* search_stats) {
+ search_stats->set_num_namespaces_filtered(
+ search_spec.namespace_filters_size());
+ search_stats->set_num_schema_types_filtered(
+ search_spec.schema_type_filters_size());
+ search_stats->set_query_length(search_spec.query().length());
+ search_stats->set_ranking_strategy(scoring_spec.rank_by());
+
std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
// Gets unordered results from query processor
@@ -2096,11 +2185,11 @@ IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore(
index_.get(), integer_index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(), schema_store_.get());
if (!query_processor_or.ok()) {
- return QueryScoringResults(
- std::move(query_processor_or).status(), /*query_terms_in=*/{},
- /*scored_document_hits_in=*/{},
- /*parse_query_latency_ms_in=*/component_timer->GetElapsedMilliseconds(),
- /*scoring_latency_ms_in=*/0);
+ search_stats->set_parse_query_latency_ms(
+ component_timer->GetElapsedMilliseconds());
+ return QueryScoringResults(std::move(query_processor_or).status(),
+ /*query_terms_in=*/{},
+ /*scored_document_hits_in=*/{});
}
std::unique_ptr<QueryProcessor> query_processor =
std::move(query_processor_or).ValueOrDie();
@@ -2113,15 +2202,25 @@ IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore(
} else {
query_results_or = ranking_strategy_or.status();
}
+ search_stats->set_parse_query_latency_ms(
+ component_timer->GetElapsedMilliseconds());
if (!query_results_or.ok()) {
- return QueryScoringResults(
- std::move(query_results_or).status(), /*query_terms_in=*/{},
- /*scored_document_hits_in=*/{},
- /*parse_query_latency_ms_in=*/component_timer->GetElapsedMilliseconds(),
- /*scoring_latency_ms_in=*/0);
+ return QueryScoringResults(std::move(query_results_or).status(),
+ /*query_terms_in=*/{},
+ /*scored_document_hits_in=*/{});
}
QueryResults query_results = std::move(query_results_or).ValueOrDie();
- int64_t parse_query_latency_ms = component_timer->GetElapsedMilliseconds();
+
+ // Set SearchStats related to QueryResults.
+ int term_count = 0;
+ for (const auto& section_and_terms : query_results.query_terms) {
+ term_count += section_and_terms.second.size();
+ }
+ search_stats->set_num_terms(term_count);
+
+ if (query_results.features_in_use.count(kNumericSearchFeature)) {
+ search_stats->set_is_numeric_query(true);
+ }
component_timer = clock_->GetNewTimer();
// Scores but does not rank the results.
@@ -2132,22 +2231,20 @@ IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore(
if (!scoring_processor_or.ok()) {
return QueryScoringResults(std::move(scoring_processor_or).status(),
std::move(query_results.query_terms),
- /*scored_document_hits_in=*/{},
- parse_query_latency_ms,
- /*scoring_latency_ms_in=*/0);
+ /*scored_document_hits_in=*/{});
}
std::unique_ptr<ScoringProcessor> scoring_processor =
std::move(scoring_processor_or).ValueOrDie();
std::vector<ScoredDocumentHit> scored_document_hits =
- scoring_processor->Score(std::move(query_results.root_iterator),
- result_spec.num_to_score(),
- &query_results.query_term_iterators);
- int64_t scoring_latency_ms = component_timer->GetElapsedMilliseconds();
+ scoring_processor->Score(
+ std::move(query_results.root_iterator), result_spec.num_to_score(),
+ &query_results.query_term_iterators, search_stats);
+ search_stats->set_scoring_latency_ms(
+ component_timer->GetElapsedMilliseconds());
return QueryScoringResults(libtextclassifier3::Status::OK,
std::move(query_results.query_terms),
- std::move(scored_document_hits),
- parse_query_latency_ms, scoring_latency_ms);
+ std::move(scored_document_hits));
}
SearchResultProto IcingSearchEngine::GetNextPage(uint64_t next_page_token) {
@@ -2238,7 +2335,7 @@ void IcingSearchEngine::InvalidateNextPageToken(uint64_t next_page_token) {
result_state_manager_->InvalidateResultState(next_page_token);
}
-libtextclassifier3::StatusOr<std::vector<DocumentId>>
+libtextclassifier3::StatusOr<DocumentStore::OptimizeResult>
IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) {
// Gets the current directory path and an empty tmp directory path for
// document store optimization.
@@ -2255,16 +2352,16 @@ IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) {
}
// Copies valid document data to tmp directory
- libtextclassifier3::StatusOr<std::vector<DocumentId>>
- document_id_old_to_new_or = document_store_->OptimizeInto(
+ libtextclassifier3::StatusOr<DocumentStore::OptimizeResult>
+ optimize_result_or = document_store_->OptimizeInto(
temporary_document_dir, language_segmenter_.get(), optimize_stats);
// Handles error if any
- if (!document_id_old_to_new_or.ok()) {
+ if (!optimize_result_or.ok()) {
filesystem_->DeleteDirectoryRecursively(temporary_document_dir.c_str());
return absl_ports::Annotate(
absl_ports::AbortedError("Failed to optimize document store"),
- document_id_old_to_new_or.status().error_message());
+ optimize_result_or.status().error_message());
}
// result_state_manager_ depends on document_store_. So we need to reset it at
@@ -2333,7 +2430,9 @@ IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) {
"Document store has been optimized, but a valid document store "
"instance can't be created");
}
- document_store_ = std::move(create_result_or.ValueOrDie().document_store);
+ DocumentStore::CreateResult create_result =
+ std::move(create_result_or).ValueOrDie();
+ document_store_ = std::move(create_result.document_store);
result_state_manager_ = std::make_unique<ResultStateManager>(
performance_configuration_.max_num_total_hits, *document_store_);
@@ -2343,7 +2442,19 @@ IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) {
ICING_LOG(ERROR) << "Document store has been optimized, but it failed to "
"delete temporary file directory";
}
- return document_id_old_to_new_or;
+
+ // Since we created new (optimized) document store with correct PersistToDisk
+ // call, we shouldn't have data loss or regenerate derived files. Therefore,
+ // if we really encounter any of these situations, then return DataLossError
+ // to let the caller rebuild index.
+ if (create_result.data_loss != DataLoss::NONE ||
+ create_result.derived_files_regenerated) {
+ return absl_ports::DataLossError(
+ "Unexpected data loss or derived files regenerated for new document "
+ "store");
+ }
+
+ return optimize_result_or;
}
IcingSearchEngine::IndexRestorationResult
@@ -2475,11 +2586,12 @@ IcingSearchEngine::CreateDataIndexingHandlers() {
std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
// Term index handler
- ICING_ASSIGN_OR_RETURN(std::unique_ptr<StringSectionIndexingHandler>
- string_section_indexing_handler,
- StringSectionIndexingHandler::Create(
- clock_.get(), normalizer_.get(), index_.get()));
- handlers.push_back(std::move(string_section_indexing_handler));
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<TermIndexingHandler> term_indexing_handler,
+ TermIndexingHandler::Create(
+ clock_.get(), normalizer_.get(), index_.get(),
+ options_.build_property_existence_metadata_hits()));
+ handlers.push_back(std::move(term_indexing_handler));
// Integer index handler
ICING_ASSIGN_OR_RETURN(std::unique_ptr<IntegerSectionIndexingHandler>
@@ -2489,10 +2601,11 @@ IcingSearchEngine::CreateDataIndexingHandlers() {
handlers.push_back(std::move(integer_section_indexing_handler));
// Qualified id join index handler
- ICING_ASSIGN_OR_RETURN(std::unique_ptr<QualifiedIdJoinIndexingHandler>
- qualified_id_join_indexing_handler,
- QualifiedIdJoinIndexingHandler::Create(
- clock_.get(), qualified_id_join_index_.get()));
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<QualifiedIdJoinIndexingHandler>
+ qualified_id_join_indexing_handler,
+ QualifiedIdJoinIndexingHandler::Create(
+ clock_.get(), document_store_.get(), qualified_id_join_index_.get()));
handlers.push_back(std::move(qualified_id_join_indexing_handler));
return handlers;