diff options
author | Xin Li <delphij@google.com> | 2022-02-11 06:57:32 +0000 |
---|---|---|
committer | Xin Li <delphij@google.com> | 2022-02-11 06:57:32 +0000 |
commit | 72ba6d64ec0e63a73b33c83bfbb9d92a84b74490 (patch) | |
tree | 9a5164283e60c4a632018bb8ec89734ab577d307 | |
parent | ac1bdbdf75212de81b3b5b2b413e4696125eb2be (diff) | |
parent | 1d9bda212ace0fc84ba6db2826a67d6dcc3d2b31 (diff) | |
download | icing-sam_222710654.tar.gz |
Merge sc-v2-dev-plus-aosp-without-vendor@8084891sam_222710654
Bug: 214455710
Merged-In: I1f2599847db38e6c283c81b678616b416c47688c
Change-Id: I900e032bc567fc6d2056e213bf4ffb97b87548fb
26 files changed, 829 insertions, 189 deletions
diff --git a/build.gradle b/build.gradle index 882a929..0f60c5e 100644 --- a/build.gradle +++ b/build.gradle @@ -14,8 +14,9 @@ * limitations under the License. */ + +import androidx.build.dependencies.DependenciesKt import static androidx.build.SupportConfig.* -import static androidx.build.dependencies.DependenciesKt.* buildscript { dependencies { @@ -57,14 +58,14 @@ dependencies { implementation('com.google.protobuf:protobuf-javalite:3.10.0') - androidTestImplementation(ANDROIDX_TEST_CORE) - androidTestImplementation(ANDROIDX_TEST_RULES) - androidTestImplementation(TRUTH) + androidTestImplementation(libs.testCore) + androidTestImplementation(libs.testRules) + androidTestImplementation(libs.truth) } protobuf { protoc { - artifact = 'com.google.protobuf:protoc:3.10.0' + artifact = DependenciesKt.getDependencyAsString(libs.protobufCompiler) } generateProtoTasks { @@ -93,7 +94,7 @@ android.libraryVariants.all { variant -> // only renames the java classes. Remove them here since they are unused. // Expand the jar and remove any .proto files. from(zipTree(configurations.detachedConfiguration( - dependencies.create(PROTOBUF_LITE)).getSingleFile())) { + dependencies.create(libs.protobufLite.get())).getSingleFile())) { exclude("**/*.proto") } diff --git a/icing/file/portable-file-backed-proto-log.h b/icing/file/portable-file-backed-proto-log.h index 825b763..5284b6e 100644 --- a/icing/file/portable-file-backed-proto-log.h +++ b/icing/file/portable-file-backed-proto-log.h @@ -183,7 +183,7 @@ class PortableFileBackedProtoLog { void SetCompressFlag(bool compress) { SetFlag(kCompressBit, compress); } - bool GetDirtyFlag() { return GetFlag(kDirtyBit); } + bool GetDirtyFlag() const { return GetFlag(kDirtyBit); } void SetDirtyFlag(bool dirty) { SetFlag(kDirtyBit, dirty); } @@ -1186,21 +1186,7 @@ libtextclassifier3::Status PortableFileBackedProtoLog<ProtoT>::PersistToDisk() { return libtextclassifier3::Status::OK; } - int64_t new_content_size = file_size - header_->GetRewindOffset(); - Crc32 crc; - if (new_content_size < 0) { - // File shrunk, recalculate the entire checksum. - ICING_ASSIGN_OR_RETURN( - crc, - ComputeChecksum(filesystem_, file_path_, Crc32(), - /*start=*/kHeaderReservedBytes, /*end=*/file_size)); - } else { - // Append new changes to the existing checksum. - ICING_ASSIGN_OR_RETURN( - crc, ComputeChecksum(filesystem_, file_path_, - Crc32(header_->GetLogChecksum()), - header_->GetRewindOffset(), file_size)); - } + ICING_ASSIGN_OR_RETURN(Crc32 crc, ComputeChecksum()); header_->SetLogChecksum(crc.Get()); header_->SetRewindOffset(file_size); @@ -1219,9 +1205,26 @@ libtextclassifier3::Status PortableFileBackedProtoLog<ProtoT>::PersistToDisk() { template <typename ProtoT> libtextclassifier3::StatusOr<Crc32> PortableFileBackedProtoLog<ProtoT>::ComputeChecksum() { - return PortableFileBackedProtoLog<ProtoT>::ComputeChecksum( - filesystem_, file_path_, Crc32(), /*start=*/kHeaderReservedBytes, - /*end=*/filesystem_->GetFileSize(file_path_.c_str())); + int64_t file_size = filesystem_->GetFileSize(file_path_.c_str()); + int64_t new_content_size = file_size - header_->GetRewindOffset(); + Crc32 crc; + if (new_content_size == 0) { + // No new protos appended, return cached checksum + return Crc32(header_->GetLogChecksum()); + } else if (new_content_size < 0) { + // File shrunk, recalculate the entire checksum. + ICING_ASSIGN_OR_RETURN( + crc, + ComputeChecksum(filesystem_, file_path_, Crc32(), + /*start=*/kHeaderReservedBytes, /*end=*/file_size)); + } else { + // Append new changes to the existing checksum. + ICING_ASSIGN_OR_RETURN( + crc, ComputeChecksum( + filesystem_, file_path_, Crc32(header_->GetLogChecksum()), + /*start=*/header_->GetRewindOffset(), /*end=*/file_size)); + } + return crc; } } // namespace lib diff --git a/icing/file/portable-file-backed-proto-log_benchmark.cc b/icing/file/portable-file-backed-proto-log_benchmark.cc index 04ccab0..f83ccd6 100644 --- a/icing/file/portable-file-backed-proto-log_benchmark.cc +++ b/icing/file/portable-file-backed-proto-log_benchmark.cc @@ -246,6 +246,98 @@ static void BM_ComputeChecksum(benchmark::State& state) { } BENCHMARK(BM_ComputeChecksum)->Range(1024, 1 << 20); +static void BM_ComputeChecksumWithCachedChecksum(benchmark::State& state) { + const Filesystem filesystem; + const std::string file_path = GetTestTempDir() + "/proto.log"; + int max_proto_size = (1 << 24) - 1; // 16 MiB + bool compress = true; + + // Make sure it doesn't already exist. + filesystem.DeleteFile(file_path.c_str()); + + auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create( + &filesystem, file_path, + PortableFileBackedProtoLog<DocumentProto>::Options( + compress, max_proto_size)) + .ValueOrDie() + .proto_log; + + DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build(); + + // Make the document 1KiB + int string_length = 1024; + std::default_random_engine random; + const std::string rand_str = + RandomString(kAlNumAlphabet, string_length, &random); + + auto document_properties = document.add_properties(); + document_properties->set_name("string property"); + document_properties->add_string_values(rand_str); + + // Write some content and persist. This should update our cached checksum to + // include the document. + ICING_ASSERT_OK(proto_log->WriteProto(document)); + ICING_ASSERT_OK(proto_log->PersistToDisk()); + + // This ComputeChecksum call shouldn't need to do any computation since we can + // reuse our cached checksum. + for (auto _ : state) { + testing::DoNotOptimize(proto_log->ComputeChecksum()); + } + + // Cleanup after ourselves + filesystem.DeleteFile(file_path.c_str()); +} +BENCHMARK(BM_ComputeChecksumWithCachedChecksum); + +static void BM_ComputeChecksumOnlyForTail(benchmark::State& state) { + const Filesystem filesystem; + const std::string file_path = GetTestTempDir() + "/proto.log"; + int max_proto_size = (1 << 24) - 1; // 16 MiB + bool compress = true; + + // Make sure it doesn't already exist. + filesystem.DeleteFile(file_path.c_str()); + + auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create( + &filesystem, file_path, + PortableFileBackedProtoLog<DocumentProto>::Options( + compress, max_proto_size)) + .ValueOrDie() + .proto_log; + + DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build(); + + // Make the document 1KiB + int string_length = 1024; + std::default_random_engine random; + const std::string rand_str = + RandomString(kAlNumAlphabet, string_length, &random); + + auto document_properties = document.add_properties(); + document_properties->set_name("string property"); + document_properties->add_string_values(rand_str); + + // Write some content and persist. This should update our cached checksum to + // include the document. + ICING_ASSERT_OK(proto_log->WriteProto(document)); + ICING_ASSERT_OK(proto_log->PersistToDisk()); + + // Write another proto into the tail, but it's not included in our cached + // checksum since we didn't call persist. + ICING_ASSERT_OK(proto_log->WriteProto(document)); + + // ComputeChecksum should be calculating the checksum of the tail and adding + // it to the cached checksum we have. + for (auto _ : state) { + testing::DoNotOptimize(proto_log->ComputeChecksum()); + } + + // Cleanup after ourselves + filesystem.DeleteFile(file_path.c_str()); +} +BENCHMARK(BM_ComputeChecksumOnlyForTail); + } // namespace } // namespace lib } // namespace icing diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc index 20a6bb9..1efad9b 100644 --- a/icing/icing-search-engine.cc +++ b/icing/icing-search-engine.cc @@ -502,15 +502,18 @@ SetSchemaResultProto IcingSearchEngine::SetSchema( StatusProto* result_status = result_proto.mutable_status(); absl_ports::unique_lock l(&mutex_); + std::unique_ptr<Timer> timer = clock_->GetNewTimer(); if (!initialized_) { result_status->set_code(StatusProto::FAILED_PRECONDITION); result_status->set_message("IcingSearchEngine has not been initialized!"); + result_proto.set_latency_ms(timer->GetElapsedMilliseconds()); return result_proto; } auto lost_previous_schema_or = LostPreviousSchema(); if (!lost_previous_schema_or.ok()) { TransformStatus(lost_previous_schema_or.status(), result_status); + result_proto.set_latency_ms(timer->GetElapsedMilliseconds()); return result_proto; } bool lost_previous_schema = lost_previous_schema_or.ValueOrDie(); @@ -528,10 +531,11 @@ SetSchemaResultProto IcingSearchEngine::SetSchema( std::move(new_schema), ignore_errors_and_delete_documents); if (!set_schema_result_or.ok()) { TransformStatus(set_schema_result_or.status(), result_status); + result_proto.set_latency_ms(timer->GetElapsedMilliseconds()); return result_proto; } - const SchemaStore::SetSchemaResult set_schema_result = - set_schema_result_or.ValueOrDie(); + SchemaStore::SetSchemaResult set_schema_result = + std::move(set_schema_result_or).ValueOrDie(); for (const std::string& deleted_type : set_schema_result.schema_types_deleted_by_name) { @@ -543,6 +547,25 @@ SetSchemaResultProto IcingSearchEngine::SetSchema( result_proto.add_incompatible_schema_types(incompatible_type); } + for (const std::string& new_type : + set_schema_result.schema_types_new_by_name) { + result_proto.add_new_schema_types(std::move(new_type)); + } + + for (const std::string& compatible_type : + set_schema_result.schema_types_changed_fully_compatible_by_name) { + result_proto.add_fully_compatible_changed_schema_types( + std::move(compatible_type)); + } + + bool index_incompatible = + !set_schema_result.schema_types_index_incompatible_by_name.empty(); + for (const std::string& index_incompatible_type : + set_schema_result.schema_types_index_incompatible_by_name) { + result_proto.add_index_incompatible_changed_schema_types( + std::move(index_incompatible_type)); + } + libtextclassifier3::Status status; if (set_schema_result.success) { if (lost_previous_schema) { @@ -551,6 +574,7 @@ SetSchemaResultProto IcingSearchEngine::SetSchema( status = document_store_->UpdateSchemaStore(schema_store_.get()); if (!status.ok()) { TransformStatus(status, result_status); + result_proto.set_latency_ms(timer->GetElapsedMilliseconds()); return result_proto; } } else if (!set_schema_result.old_schema_type_ids_changed.empty() || @@ -560,15 +584,17 @@ SetSchemaResultProto IcingSearchEngine::SetSchema( set_schema_result); if (!status.ok()) { TransformStatus(status, result_status); + result_proto.set_latency_ms(timer->GetElapsedMilliseconds()); return result_proto; } } - if (lost_previous_schema || set_schema_result.index_incompatible) { + if (lost_previous_schema || index_incompatible) { // Clears all index files status = index_->Reset(); if (!status.ok()) { TransformStatus(status, result_status); + result_proto.set_latency_ms(timer->GetElapsedMilliseconds()); return result_proto; } @@ -579,6 +605,7 @@ SetSchemaResultProto IcingSearchEngine::SetSchema( if (!restore_result.status.ok() && !absl_ports::IsDataLoss(restore_result.status)) { TransformStatus(status, result_status); + result_proto.set_latency_ms(timer->GetElapsedMilliseconds()); return result_proto; } } @@ -589,6 +616,7 @@ SetSchemaResultProto IcingSearchEngine::SetSchema( result_status->set_message("Schema is incompatible."); } + result_proto.set_latency_ms(timer->GetElapsedMilliseconds()); return result_proto; } @@ -903,9 +931,13 @@ DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery( return result_proto; } - DeleteStatsProto* delete_stats = result_proto.mutable_delete_stats(); - delete_stats->set_delete_type(DeleteStatsProto::DeleteType::QUERY); - + DeleteByQueryStatsProto* delete_stats = + result_proto.mutable_delete_by_query_stats(); + delete_stats->set_query_length(search_spec.query().length()); + delete_stats->set_num_namespaces_filtered( + search_spec.namespace_filters_size()); + delete_stats->set_num_schema_types_filtered( + search_spec.schema_type_filters_size()); std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer(); libtextclassifier3::Status status = @@ -915,6 +947,7 @@ DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery( return result_proto; } + std::unique_ptr<Timer> component_timer = clock_->GetNewTimer(); // Gets unordered results from query processor auto query_processor_or = QueryProcessor::Create( index_.get(), language_segmenter_.get(), normalizer_.get(), @@ -933,10 +966,13 @@ DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery( } QueryProcessor::QueryResults query_results = std::move(query_results_or).ValueOrDie(); + delete_stats->set_parse_query_latency_ms( + component_timer->GetElapsedMilliseconds()); ICING_VLOG(2) << "Deleting the docs that matched the query."; int num_deleted = 0; + component_timer = clock_->GetNewTimer(); while (query_results.root_iterator->Advance().ok()) { ICING_VLOG(3) << "Deleting doc " << query_results.root_iterator->doc_hit_info().document_id(); @@ -948,6 +984,13 @@ DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery( return result_proto; } } + delete_stats->set_document_removal_latency_ms( + component_timer->GetElapsedMilliseconds()); + int term_count = 0; + for (const auto& section_and_terms : query_results.query_terms) { + term_count += section_and_terms.second.size(); + } + delete_stats->set_num_terms(term_count); if (num_deleted > 0) { result_proto.mutable_status()->set_code(StatusProto::OK); diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc index 4c15827..ef4625a 100644 --- a/icing/icing-search-engine_test.cc +++ b/icing/icing-search-engine_test.cc @@ -101,7 +101,10 @@ constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN = constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_NONE = StringIndexingConfig_TokenizerType_Code_NONE; +#ifndef ICING_JNI_TEST constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY; +#endif // !ICING_JNI_TEST + constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX; constexpr TermMatchType_Code MATCH_NONE = TermMatchType_Code_UNKNOWN; @@ -738,7 +741,13 @@ TEST_F(IcingSearchEngineTest, SetSchemaCompatibleVersionUpdateSucceeds) { property->set_data_type(PropertyConfigProto::DataType::STRING); property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + SetSchemaResultProto set_schema_result = icing.SetSchema(schema); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); + SetSchemaResultProto expected_set_schema_result; + expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + expected_set_schema_result.mutable_new_schema_types()->Add("Email"); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1)); } @@ -756,12 +765,20 @@ TEST_F(IcingSearchEngineTest, SetSchemaCompatibleVersionUpdateSucceeds) { property->set_property_name("title"); property->set_data_type(PropertyConfigProto::DataType::STRING); property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + property = type->add_properties(); property->set_property_name("body"); property->set_data_type(PropertyConfigProto::DataType::STRING); property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); // 3. SetSchema should succeed and the version number should be updated. - EXPECT_THAT(icing.SetSchema(schema, true).status(), ProtoIsOk()); + SetSchemaResultProto set_schema_result = icing.SetSchema(schema, true); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); + SetSchemaResultProto expected_set_schema_result; + expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + expected_set_schema_result.mutable_fully_compatible_changed_schema_types() + ->Add("Email"); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2)); } @@ -947,7 +964,12 @@ TEST_F(IcingSearchEngineTest, } TEST_F(IcingSearchEngineTest, SetSchema) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + auto fake_clock = std::make_unique<FakeClock>(); + fake_clock->SetTimerElapsedMilliseconds(1000); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique<Filesystem>(), + std::make_unique<IcingFilesystem>(), + std::move(fake_clock), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); auto message_document = CreateMessageDocument("namespace", "uri"); @@ -976,26 +998,31 @@ TEST_F(IcingSearchEngineTest, SetSchema) { empty_type->set_schema_type(""); // Make sure we can't set invalid schemas - EXPECT_THAT(icing.SetSchema(invalid_schema).status(), + SetSchemaResultProto set_schema_result = icing.SetSchema(invalid_schema); + EXPECT_THAT(set_schema_result.status(), ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); + EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000)); // Can add an document of a set schema - EXPECT_THAT(icing.SetSchema(schema_with_message).status(), ProtoIsOk()); + set_schema_result = icing.SetSchema(schema_with_message); + EXPECT_THAT(set_schema_result.status(), ProtoStatusIs(StatusProto::OK)); + EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000)); EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk()); // Schema with Email doesn't have Message, so would result incompatible // data - EXPECT_THAT(icing.SetSchema(schema_with_email).status(), + set_schema_result = icing.SetSchema(schema_with_email); + EXPECT_THAT(set_schema_result.status(), ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); + EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000)); // Can expand the set of schema types and add an document of a new // schema type - EXPECT_THAT(icing.SetSchema(SchemaProto(schema_with_email_and_message)) - .status() - .code(), - Eq(StatusProto::OK)); - EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk()); + set_schema_result = icing.SetSchema(schema_with_email_and_message); + EXPECT_THAT(set_schema_result.status(), ProtoStatusIs(StatusProto::OK)); + EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000)); + EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk()); // Can't add an document whose schema isn't set auto photo_document = DocumentBuilder() .SetKey("namespace", "uri") @@ -1009,7 +1036,7 @@ TEST_F(IcingSearchEngineTest, SetSchema) { } TEST_F(IcingSearchEngineTest, - SetSchemaTriggersIndexRestorationAndReturnsOk) { + SetSchemaNewIndexedPropertyTriggersIndexRestorationAndReturnsOk) { IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); @@ -1018,8 +1045,15 @@ TEST_F(IcingSearchEngineTest, ->mutable_properties(0) ->clear_string_indexing_config(); - EXPECT_THAT(icing.SetSchema(schema_with_no_indexed_property).status(), - ProtoIsOk()); + SetSchemaResultProto set_schema_result = + icing.SetSchema(schema_with_no_indexed_property); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); + SetSchemaResultProto expected_set_schema_result; + expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + expected_set_schema_result.mutable_new_schema_types()->Add("Message"); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); + // Nothing will be index and Search() won't return anything. EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), ProtoIsOk()); @@ -1040,8 +1074,14 @@ TEST_F(IcingSearchEngineTest, SchemaProto schema_with_indexed_property = CreateMessageSchema(); // Index restoration should be triggered here because new schema requires more // properties to be indexed. - EXPECT_THAT(icing.SetSchema(schema_with_indexed_property).status(), - ProtoIsOk()); + set_schema_result = icing.SetSchema(schema_with_indexed_property); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); + expected_set_schema_result = SetSchemaResultProto(); + expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + expected_set_schema_result.mutable_index_incompatible_changed_schema_types() + ->Add("Message"); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); SearchResultProto expected_search_result_proto; expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); @@ -1085,8 +1125,12 @@ TEST_F(IcingSearchEngineTest, .Build(); SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); SetSchemaResultProto expected_set_schema_result; expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + expected_set_schema_result.mutable_new_schema_types()->Add("Email"); + expected_set_schema_result.mutable_new_schema_types()->Add("Person"); EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); DocumentProto document = @@ -1153,8 +1197,12 @@ TEST_F(IcingSearchEngineTest, .Build(); set_schema_result = icing.SetSchema(no_nested_schema); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); expected_set_schema_result = SetSchemaResultProto(); expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + expected_set_schema_result.mutable_index_incompatible_changed_schema_types() + ->Add("Email"); EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); // document shouldn't match a query for 'Bill' in either 'sender.name' or @@ -1197,7 +1245,10 @@ TEST_F(IcingSearchEngineTest, SetSchemaResultProto set_schema_result = icing.SetSchema(email_with_body_schema); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); SetSchemaResultProto expected_set_schema_result; + expected_set_schema_result.mutable_new_schema_types()->Add("Email"); expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); @@ -1243,8 +1294,12 @@ TEST_F(IcingSearchEngineTest, set_schema_result = icing.SetSchema( email_no_body_schema, /*ignore_errors_and_delete_documents=*/true); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); expected_set_schema_result = SetSchemaResultProto(); expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email"); + expected_set_schema_result.mutable_index_incompatible_changed_schema_types() + ->Add("Email"); expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); @@ -1282,7 +1337,10 @@ TEST_F( SetSchemaResultProto set_schema_result = icing.SetSchema(email_with_body_schema); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); SetSchemaResultProto expected_set_schema_result; + expected_set_schema_result.mutable_new_schema_types()->Add("Email"); expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); @@ -1336,8 +1394,12 @@ TEST_F( set_schema_result = icing.SetSchema( email_no_body_schema, /*ignore_errors_and_delete_documents=*/true); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); expected_set_schema_result = SetSchemaResultProto(); expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email"); + expected_set_schema_result.mutable_index_incompatible_changed_schema_types() + ->Add("Email"); expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); @@ -1385,7 +1447,11 @@ TEST_F(IcingSearchEngineTest, ForceSetSchemaIncompatibleNestedDocsAreDeleted) { .Build(); SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); SetSchemaResultProto expected_set_schema_result; + expected_set_schema_result.mutable_new_schema_types()->Add("Email"); + expected_set_schema_result.mutable_new_schema_types()->Add("Person"); expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); @@ -1438,9 +1504,15 @@ TEST_F(IcingSearchEngineTest, ForceSetSchemaIncompatibleNestedDocsAreDeleted) { set_schema_result = icing.SetSchema( nested_schema, /*ignore_errors_and_delete_documents=*/true); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); expected_set_schema_result = SetSchemaResultProto(); expected_set_schema_result.mutable_incompatible_schema_types()->Add("Person"); expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email"); + expected_set_schema_result.mutable_index_incompatible_changed_schema_types() + ->Add("Email"); + expected_set_schema_result.mutable_index_incompatible_changed_schema_types() + ->Add("Person"); expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); @@ -1499,6 +1571,10 @@ TEST_F(IcingSearchEngineTest, SetSchemaRevalidatesDocumentsAndReturnsOk) { property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED); // Can't set the schema since it's incompatible + SetSchemaResultProto set_schema_result = + icing.SetSchema(schema_with_required_subject); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); SetSchemaResultProto expected_set_schema_result_proto; expected_set_schema_result_proto.mutable_status()->set_code( StatusProto::FAILED_PRECONDITION); @@ -1506,15 +1582,17 @@ TEST_F(IcingSearchEngineTest, SetSchemaRevalidatesDocumentsAndReturnsOk) { "Schema is incompatible."); expected_set_schema_result_proto.add_incompatible_schema_types("email"); - EXPECT_THAT(icing.SetSchema(schema_with_required_subject), - EqualsProto(expected_set_schema_result_proto)); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result_proto)); // Force set it + set_schema_result = + icing.SetSchema(schema_with_required_subject, + /*ignore_errors_and_delete_documents=*/true); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); expected_set_schema_result_proto.mutable_status()->set_code(StatusProto::OK); expected_set_schema_result_proto.mutable_status()->clear_message(); - EXPECT_THAT(icing.SetSchema(schema_with_required_subject, - /*ignore_errors_and_delete_documents=*/true), - EqualsProto(expected_set_schema_result_proto)); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result_proto)); GetResultProto expected_get_result_proto; expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); @@ -1571,19 +1649,25 @@ TEST_F(IcingSearchEngineTest, SetSchemaDeletesDocumentsAndReturnsOk) { type->set_schema_type("email"); // Can't set the schema since it's incompatible + SetSchemaResultProto set_schema_result = icing.SetSchema(new_schema); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); SetSchemaResultProto expected_result; expected_result.mutable_status()->set_code(StatusProto::FAILED_PRECONDITION); expected_result.mutable_status()->set_message("Schema is incompatible."); expected_result.add_deleted_schema_types("message"); - EXPECT_THAT(icing.SetSchema(new_schema), EqualsProto(expected_result)); + EXPECT_THAT(set_schema_result, EqualsProto(expected_result)); // Force set it + set_schema_result = + icing.SetSchema(new_schema, + /*ignore_errors_and_delete_documents=*/true); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); expected_result.mutable_status()->set_code(StatusProto::OK); expected_result.mutable_status()->clear_message(); - EXPECT_THAT(icing.SetSchema(new_schema, - /*ignore_errors_and_delete_documents=*/true), - EqualsProto(expected_result)); + EXPECT_THAT(set_schema_result, EqualsProto(expected_result)); // "email" document is still there GetResultProto expected_get_result_proto; @@ -3186,11 +3270,16 @@ TEST_F(IcingSearchEngineTest, DeleteByQuery) { search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); DeleteByQueryResultProto result_proto = icing.DeleteByQuery(search_spec); EXPECT_THAT(result_proto.status(), ProtoIsOk()); - DeleteStatsProto exp_stats; - exp_stats.set_delete_type(DeleteStatsProto::DeleteType::QUERY); + DeleteByQueryStatsProto exp_stats; exp_stats.set_latency_ms(7); exp_stats.set_num_documents_deleted(1); - EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats)); + exp_stats.set_query_length(search_spec.query().length()); + exp_stats.set_num_terms(1); + exp_stats.set_num_namespaces_filtered(0); + exp_stats.set_num_schema_types_filtered(0); + exp_stats.set_parse_query_latency_ms(7); + exp_stats.set_document_removal_latency_ms(7); + EXPECT_THAT(result_proto.delete_by_query_stats(), EqualsProto(exp_stats)); expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); expected_get_result_proto.mutable_status()->set_message( @@ -5431,74 +5520,230 @@ TEST_F(IcingSearchEngineTest, SetSchemaCanDetectPreviousSchemaWasLost) { EqualsSearchResultIgnoreStatsAndScores(empty_result)); } -TEST_F(IcingSearchEngineTest, PersistToDisk) { - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = - CreateMessageDocument("namespace", "uri"); - +TEST_F(IcingSearchEngineTest, ImplicitPersistToDiskFullSavesEverything) { + DocumentProto document = CreateMessageDocument("namespace", "uri"); { IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), - ProtoIsOk()); + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + } // Destructing calls a PersistToDisk(FULL) - // Persisting shouldn't affect anything - EXPECT_THAT(icing.PersistToDisk(PersistType::FULL).status(), ProtoIsOk()); + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT( - icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - } // Destructing persists as well + // There should be no recovery since everything should be saved properly. + InitializeResultProto init_result = icing.Initialize(); + EXPECT_THAT(init_result.status(), ProtoIsOk()); + EXPECT_THAT(init_result.initialize_stats().document_store_data_status(), + Eq(InitializeStatsProto::NO_DATA_LOSS)); + EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(), + Eq(InitializeStatsProto::NONE)); + + // Schema is still intact. + GetSchemaResultProto expected_get_schema_result_proto; + expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema(); + + EXPECT_THAT(icing.GetSchema(), EqualsProto(expected_get_schema_result_proto)); + + // Documents are still intact. + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = document; - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); EXPECT_THAT( icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), EqualsProto(expected_get_result_proto)); + + // Index is still intact. + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("message"); // Content in the Message document. + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document; + + SearchResultProto actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_F(IcingSearchEngineTest, ExplicitPersistToDiskFullSavesEverything) { + DocumentProto document = CreateMessageDocument("namespace", "uri"); + + // Add schema and documents to our first icing1 instance. + IcingSearchEngine icing1(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing1.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing1.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + EXPECT_THAT(icing1.Put(document).status(), ProtoIsOk()); + EXPECT_THAT(icing1.PersistToDisk(PersistType::FULL).status(), ProtoIsOk()); + + // Initialize a second icing2 instance which should have it's own memory + // space. If data from icing1 isn't being persisted to the files, then icing2 + // won't be able to see those changes. + IcingSearchEngine icing2(GetDefaultIcingOptions(), GetTestJniCache()); + + // There should be no recovery since everything should be saved properly. + InitializeResultProto init_result = icing2.Initialize(); + EXPECT_THAT(init_result.status(), ProtoIsOk()); + EXPECT_THAT(init_result.initialize_stats().document_store_data_status(), + Eq(InitializeStatsProto::NO_DATA_LOSS)); + EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(), + Eq(InitializeStatsProto::NONE)); + + // Schema is still intact. + GetSchemaResultProto expected_get_schema_result_proto; + expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema(); + + EXPECT_THAT(icing2.GetSchema(), + EqualsProto(expected_get_schema_result_proto)); + + // Documents are still intact. + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = document; + + EXPECT_THAT( + icing2.Get("namespace", "uri", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + // Index is still intact. + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("message"); // Content in the Message document. + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document; + + SearchResultProto actual_results = + icing2.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); } -TEST_F(IcingSearchEngineTest, NoPersistToDiskLiteDoesntPersistPut) { +TEST_F(IcingSearchEngineTest, NoPersistToDiskLosesAllDocumentsAndIndex) { IcingSearchEngine icing1(GetDefaultIcingOptions(), GetTestJniCache()); EXPECT_THAT(icing1.Initialize().status(), ProtoIsOk()); EXPECT_THAT(icing1.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - DocumentProto document1 = CreateMessageDocument("namespace", "uri"); - EXPECT_THAT(icing1.Put(document1).status(), ProtoIsOk()); + DocumentProto document = CreateMessageDocument("namespace", "uri"); + EXPECT_THAT(icing1.Put(document).status(), ProtoIsOk()); EXPECT_THAT( icing1.Get("namespace", "uri", GetResultSpecProto::default_instance()) .document(), - EqualsProto(document1)); + EqualsProto(document)); + + // It's intentional that no PersistToDisk call is made before initializing a + // second instance of icing. IcingSearchEngine icing2(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing2.Initialize().status(), ProtoIsOk()); + InitializeResultProto init_result = icing2.Initialize(); + EXPECT_THAT(init_result.status(), ProtoIsOk()); + EXPECT_THAT(init_result.initialize_stats().document_store_data_status(), + Eq(InitializeStatsProto::PARTIAL_LOSS)); + EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(), + Eq(InitializeStatsProto::DATA_LOSS)); + EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(), + Eq(InitializeStatsProto::NONE)); + // The document shouldn't be found because we forgot to call // PersistToDisk(LITE)! EXPECT_THAT( icing2.Get("namespace", "uri", GetResultSpecProto::default_instance()) .status(), ProtoStatusIs(StatusProto::NOT_FOUND)); + + // Searching also shouldn't get us anything because the index wasn't + // recovered. + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("message"); // Content in the Message document. + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + + SearchResultProto actual_results = + icing2.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); } -TEST_F(IcingSearchEngineTest, PersistToDiskLitePersistsPut) { +TEST_F(IcingSearchEngineTest, PersistToDiskLiteSavesGroundTruth) { + DocumentProto document = CreateMessageDocument("namespace", "uri"); + IcingSearchEngine icing1(GetDefaultIcingOptions(), GetTestJniCache()); EXPECT_THAT(icing1.Initialize().status(), ProtoIsOk()); EXPECT_THAT(icing1.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - DocumentProto document1 = CreateMessageDocument("namespace", "uri"); - EXPECT_THAT(icing1.Put(document1).status(), ProtoIsOk()); + EXPECT_THAT(icing1.Put(document).status(), ProtoIsOk()); EXPECT_THAT(icing1.PersistToDisk(PersistType::LITE).status(), ProtoIsOk()); EXPECT_THAT( icing1.Get("namespace", "uri", GetResultSpecProto::default_instance()) .document(), - EqualsProto(document1)); + EqualsProto(document)); IcingSearchEngine icing2(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing2.Initialize().status(), ProtoIsOk()); + InitializeResultProto init_result = icing2.Initialize(); + EXPECT_THAT(init_result.status(), ProtoIsOk()); + EXPECT_THAT(init_result.initialize_stats().document_store_data_status(), + Eq(InitializeStatsProto::NO_DATA_LOSS)); + EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + + // A checksum mismatch gets reported as an IO error. The document store and + // index didn't have their derived files included in the checksum previously, + // so reinitializing will trigger a checksum mismatch. + EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(), + Eq(InitializeStatsProto::IO_ERROR)); + EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(), + Eq(InitializeStatsProto::IO_ERROR)); + + // Schema is still intact. + GetSchemaResultProto expected_get_schema_result_proto; + expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema(); + + EXPECT_THAT(icing2.GetSchema(), + EqualsProto(expected_get_schema_result_proto)); + // The document should be found because we called PersistToDisk(LITE)! EXPECT_THAT( icing2.Get("namespace", "uri", GetResultSpecProto::default_instance()) .document(), - EqualsProto(document1)); + EqualsProto(document)); + + // Recovered index is still intact. + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("message"); // Content in the Message document. + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document; + + SearchResultProto actual_results = + icing2.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, ResetOk) { @@ -7587,6 +7832,7 @@ TEST_F(IcingSearchEngineTest, CJKSnippetTest) { EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2)); } +#ifndef ICING_JNI_TEST // We skip this test case when we're running in a jni_test since the data files // will be stored in the android-instrumented storage location, rather than the // normal cc_library runfiles directory. To get that storage location, it's @@ -7596,12 +7842,6 @@ TEST_F(IcingSearchEngineTest, CJKSnippetTest) { // this native side yet, we're just going to disable this. The functionality is // already well-tested across 4 different emulated OS's so we're not losing much // test coverage here. -#ifndef ICING_JNI_TEST -// Disable backwards compat test. This test is enabled in google3, but disabled -// in jetpack/framework because we didn't want to keep the binary testdata files -// in our repo. -#define DISABLE_BACKWARDS_COMPAT_TEST -#ifndef DISABLE_BACKWARDS_COMPAT_TEST TEST_F(IcingSearchEngineTest, MigrateToPortableFileBackedProtoLog) { // Copy the testdata files into our IcingSearchEngine directory std::string dir_without_portable_log; @@ -7755,7 +7995,6 @@ TEST_F(IcingSearchEngineTest, MigrateToPortableFileBackedProtoLog) { EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(expected_document3)); } -#endif // DISABLE_BACKWARDS_COMPAT_TEST #endif // !ICING_JNI_TEST } // namespace diff --git a/icing/index/iterator/doc-hit-info-iterator-and.cc b/icing/index/iterator/doc-hit-info-iterator-and.cc index 66f87bd..39aa969 100644 --- a/icing/index/iterator/doc-hit-info-iterator-and.cc +++ b/icing/index/iterator/doc-hit-info-iterator-and.cc @@ -162,6 +162,7 @@ libtextclassifier3::Status DocHitInfoIteratorAndNary::Advance() { DocumentId unused; ICING_ASSIGN_OR_RETURN( unused, AdvanceTo(iterator.get(), potential_document_id)); + (void)unused; // Silence unused warning. } if (iterator->doc_hit_info().document_id() == potential_document_id) { diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.cc b/icing/index/lite/doc-hit-info-iterator-term-lite.cc index d535d7f..08df4fc 100644 --- a/icing/index/lite/doc-hit-info-iterator-term-lite.cc +++ b/icing/index/lite/doc-hit-info-iterator-term-lite.cc @@ -45,8 +45,13 @@ libtextclassifier3::Status DocHitInfoIteratorTermLite::Advance() { if (cached_hits_idx_ == -1) { libtextclassifier3::Status status = RetrieveMoreHits(); if (!status.ok()) { - ICING_LOG(ERROR) << "Failed to retrieve more hits " - << status.error_message(); + if (!absl_ports::IsNotFound(status)) { + // NOT_FOUND is expected to happen (not every term will be in the main + // index!). Other errors are worth logging. + ICING_LOG(ERROR) + << "Encountered unexpected failure while retrieving hits " + << status.error_message(); + } return absl_ports::ResourceExhaustedError( "No more DocHitInfos in iterator"); } diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.h b/icing/index/lite/doc-hit-info-iterator-term-lite.h index 8dbe043..179fc93 100644 --- a/icing/index/lite/doc-hit-info-iterator-term-lite.h +++ b/icing/index/lite/doc-hit-info-iterator-term-lite.h @@ -82,6 +82,11 @@ class DocHitInfoIteratorTermLite : public DocHitInfoIterator { protected: // Add DocHitInfos corresponding to term_ to cached_hits_. + // + // Returns: + // - OK, on success + // - NOT_FOUND if no term matching term_ was found in the lexicon. + // - INVALID_ARGUMENT if unable to properly encode the termid virtual libtextclassifier3::Status RetrieveMoreHits() = 0; const std::string term_; diff --git a/icing/index/main/doc-hit-info-iterator-term-main.cc b/icing/index/main/doc-hit-info-iterator-term-main.cc index 5553c1e..98bc18e 100644 --- a/icing/index/main/doc-hit-info-iterator-term-main.cc +++ b/icing/index/main/doc-hit-info-iterator-term-main.cc @@ -57,8 +57,9 @@ libtextclassifier3::Status DocHitInfoIteratorTermMain::Advance() { if (!absl_ports::IsNotFound(status)) { // NOT_FOUND is expected to happen (not every term will be in the main // index!). Other errors are worth logging. - ICING_LOG(ERROR) << "Failed to retrieve more hits " - << status.error_message(); + ICING_LOG(ERROR) + << "Encountered unexpected failure while retrieving hits " + << status.error_message(); } return absl_ports::ResourceExhaustedError( "No more DocHitInfos in iterator"); diff --git a/icing/index/main/posting-list-free.h b/icing/index/main/posting-list-free.h index 4b27401..4f06057 100644 --- a/icing/index/main/posting-list-free.h +++ b/icing/index/main/posting-list-free.h @@ -115,7 +115,7 @@ class PostingListFree { // bytes which will store the next posting list index, the rest are unused and // can be anything. uint8_t *posting_list_buffer_; - uint32_t size_in_bytes_; + [[maybe_unused]] uint32_t size_in_bytes_; static_assert(sizeof(PostingListIndex) <= posting_list_utils::min_posting_list_size(), diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc index e9ba654..3307638 100644 --- a/icing/schema/schema-store.cc +++ b/icing/schema/schema-store.cc @@ -331,6 +331,9 @@ SchemaStore::SetSchema(SchemaProto&& new_schema, if (absl_ports::IsNotFound(schema_proto_or.status())) { // We don't have a pre-existing schema, so anything is valid. result.success = true; + for (const SchemaTypeConfigProto& type_config : new_schema.types()) { + result.schema_types_new_by_name.insert(type_config.schema_type()); + } } else if (!schema_proto_or.ok()) { // Real error return schema_proto_or.status(); @@ -351,8 +354,11 @@ SchemaStore::SetSchema(SchemaProto&& new_schema, SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema, new_dependency_map); - // An incompatible index is fine, we can just reindex - result.index_incompatible = schema_delta.index_incompatible; + result.schema_types_new_by_name = std::move(schema_delta.schema_types_new); + result.schema_types_changed_fully_compatible_by_name = + std::move(schema_delta.schema_types_changed_fully_compatible); + result.schema_types_index_incompatible_by_name = + std::move(schema_delta.schema_types_index_incompatible); for (const auto& schema_type : schema_delta.schema_types_deleted) { // We currently don't support deletions, so mark this as not possible. diff --git a/icing/schema/schema-store.h b/icing/schema/schema-store.h index dd1edb8..b9be6c0 100644 --- a/icing/schema/schema-store.h +++ b/icing/schema/schema-store.h @@ -68,9 +68,6 @@ class SchemaStore { // to file. bool success = false; - // Whether the new schema changes invalidate the index. - bool index_incompatible = false; - // SchemaTypeIds of schema types can be reassigned new SchemaTypeIds if: // 1. Schema types are added in the middle of the SchemaProto // 2. Schema types are removed from the middle of the SchemaProto @@ -100,6 +97,21 @@ class SchemaStore { // SchemaUtil::ComputeCompatibilityDelta. Represented by the SchemaTypeId // assigned to this SchemaTypeConfigProto in the *old* schema. std::unordered_set<SchemaTypeId> schema_types_incompatible_by_id; + + // Schema types that were added in the new schema. Represented by the + // `schema_type` field in the SchemaTypeConfigProto. + std::unordered_set<std::string> schema_types_new_by_name; + + // Schema types that were changed in a way that was backwards compatible and + // didn't invalidate the index. Represented by the `schema_type` field in + // the SchemaTypeConfigProto. + std::unordered_set<std::string> + schema_types_changed_fully_compatible_by_name; + + // Schema types that were changed in a way that was backwards compatible, + // but invalidated the index. Represented by the `schema_type` field in the + // SchemaTypeConfigProto. + std::unordered_set<std::string> schema_types_index_incompatible_by_name; }; // Factory function to create a SchemaStore which does not take ownership diff --git a/icing/schema/schema-store_test.cc b/icing/schema/schema-store_test.cc index 5ef2dea..be7170f 100644 --- a/icing/schema/schema-store_test.cc +++ b/icing/schema/schema-store_test.cc @@ -103,6 +103,7 @@ TEST_F(SchemaStoreTest, CorruptSchemaError) { // Set it for the first time SchemaStore::SetSchemaResult result; result.success = true; + result.schema_types_new_by_name.insert(schema_.types(0).schema_type()); EXPECT_THAT(schema_store->SetSchema(schema_), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, @@ -138,6 +139,7 @@ TEST_F(SchemaStoreTest, RecoverCorruptDerivedFileOk) { // Set it for the first time SchemaStore::SetSchemaResult result; result.success = true; + result.schema_types_new_by_name.insert(schema_.types(0).schema_type()); EXPECT_THAT(schema_store->SetSchema(schema_), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, @@ -175,6 +177,7 @@ TEST_F(SchemaStoreTest, RecoverBadChecksumOk) { // Set it for the first time SchemaStore::SetSchemaResult result; result.success = true; + result.schema_types_new_by_name.insert(schema_.types(0).schema_type()); EXPECT_THAT(schema_store->SetSchema(schema_), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, @@ -246,6 +249,7 @@ TEST_F(SchemaStoreTest, CreateWithPreviousSchemaOk) { SchemaStore::SetSchemaResult result; result.success = true; + result.schema_types_new_by_name.insert(schema_.types(0).schema_type()); EXPECT_THAT(schema_store->SetSchema(schema_), IsOkAndHolds(EqualsSetSchemaResult(result))); @@ -267,6 +271,7 @@ TEST_F(SchemaStoreTest, MultipleCreateOk) { SchemaStore::SetSchemaResult result; result.success = true; + result.schema_types_new_by_name.insert(schema_.types(0).schema_type()); EXPECT_THAT(schema_store->SetSchema(schema_), IsOkAndHolds(EqualsSetSchemaResult(result))); @@ -303,6 +308,7 @@ TEST_F(SchemaStoreTest, SetNewSchemaOk) { // Set it for the first time SchemaStore::SetSchemaResult result; result.success = true; + result.schema_types_new_by_name.insert(schema_.types(0).schema_type()); EXPECT_THAT(schema_store->SetSchema(schema_), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, @@ -318,6 +324,7 @@ TEST_F(SchemaStoreTest, SetSameSchemaOk) { // Set it for the first time SchemaStore::SetSchemaResult result; result.success = true; + result.schema_types_new_by_name.insert(schema_.types(0).schema_type()); EXPECT_THAT(schema_store->SetSchema(schema_), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, @@ -325,6 +332,8 @@ TEST_F(SchemaStoreTest, SetSameSchemaOk) { EXPECT_THAT(*actual_schema, EqualsProto(schema_)); // And one more for fun + result = SchemaStore::SetSchemaResult(); + result.success = true; EXPECT_THAT(schema_store->SetSchema(schema_), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema()); @@ -339,6 +348,7 @@ TEST_F(SchemaStoreTest, SetIncompatibleSchemaOk) { // Set it for the first time SchemaStore::SetSchemaResult result; result.success = true; + result.schema_types_new_by_name.insert(schema_.types(0).schema_type()); EXPECT_THAT(schema_store->SetSchema(schema_), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, @@ -349,6 +359,7 @@ TEST_F(SchemaStoreTest, SetIncompatibleSchemaOk) { schema_.clear_types(); // Set the incompatible schema + result = SchemaStore::SetSchemaResult(); result.success = false; result.schema_types_deleted_by_name.emplace("email"); result.schema_types_deleted_by_id.emplace(0); @@ -368,6 +379,7 @@ TEST_F(SchemaStoreTest, SetSchemaWithAddedTypeOk) { // Set it for the first time SchemaStore::SetSchemaResult result; result.success = true; + result.schema_types_new_by_name.insert("email"); EXPECT_THAT(schema_store->SetSchema(schema), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, @@ -380,6 +392,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithAddedTypeOk) { .Build(); // Set the compatible schema + result = SchemaStore::SetSchemaResult(); + result.success = true; + result.schema_types_new_by_name.insert("new_type"); EXPECT_THAT(schema_store->SetSchema(schema), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema()); @@ -400,6 +415,8 @@ TEST_F(SchemaStoreTest, SetSchemaWithDeletedTypeOk) { // Set it for the first time SchemaStore::SetSchemaResult result; result.success = true; + result.schema_types_new_by_name.insert("email"); + result.schema_types_new_by_name.insert("message"); EXPECT_THAT(schema_store->SetSchema(schema), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, @@ -456,6 +473,8 @@ TEST_F(SchemaStoreTest, SetSchemaWithReorderedTypesOk) { // Set it for the first time SchemaStore::SetSchemaResult result; result.success = true; + result.schema_types_new_by_name.insert("email"); + result.schema_types_new_by_name.insert("message"); EXPECT_THAT(schema_store->SetSchema(schema), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, @@ -470,6 +489,8 @@ TEST_F(SchemaStoreTest, SetSchemaWithReorderedTypesOk) { // Since we assign SchemaTypeIds based on order in the SchemaProto, this will // cause SchemaTypeIds to change + result = SchemaStore::SetSchemaResult(); + result.success = true; result.old_schema_type_ids_changed.emplace(0); // Old SchemaTypeId of "email" result.old_schema_type_ids_changed.emplace( 1); // Old SchemaTypeId of "message" @@ -481,7 +502,7 @@ TEST_F(SchemaStoreTest, SetSchemaWithReorderedTypesOk) { EXPECT_THAT(*actual_schema, EqualsProto(schema)); } -TEST_F(SchemaStoreTest, SetSchemaThatRequiresReindexingOk) { +TEST_F(SchemaStoreTest, IndexedPropertyChangeRequiresReindexingOk) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_)); @@ -499,6 +520,7 @@ TEST_F(SchemaStoreTest, SetSchemaThatRequiresReindexingOk) { // Set it for the first time SchemaStore::SetSchemaResult result; result.success = true; + result.schema_types_new_by_name.insert("email"); EXPECT_THAT(schema_store->SetSchema(schema), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, @@ -514,10 +536,10 @@ TEST_F(SchemaStoreTest, SetSchemaThatRequiresReindexingOk) { .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); - // With a new indexed property, we'll need to reindex - result.index_incompatible = true; - // Set the compatible schema + result = SchemaStore::SetSchemaResult(); + result.success = true; + result.schema_types_index_incompatible_by_name.insert("email"); EXPECT_THAT(schema_store->SetSchema(schema), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema()); @@ -564,6 +586,8 @@ TEST_F(SchemaStoreTest, IndexNestedDocumentsChangeRequiresReindexingOk) { // Set schema with index_nested_properties=false to start. SchemaStore::SetSchemaResult result; result.success = true; + result.schema_types_new_by_name.insert("email"); + result.schema_types_new_by_name.insert("person"); EXPECT_THAT(schema_store->SetSchema(no_nested_index_schema), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, @@ -574,7 +598,7 @@ TEST_F(SchemaStoreTest, IndexNestedDocumentsChangeRequiresReindexingOk) { // 'person' is index incompatible. result = SchemaStore::SetSchemaResult(); result.success = true; - result.index_incompatible = true; + result.schema_types_index_incompatible_by_name.insert("person"); EXPECT_THAT(schema_store->SetSchema(nested_index_schema), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema()); @@ -584,7 +608,7 @@ TEST_F(SchemaStoreTest, IndexNestedDocumentsChangeRequiresReindexingOk) { // to 'person' is index incompatible. result = SchemaStore::SetSchemaResult(); result.success = true; - result.index_incompatible = true; + result.schema_types_index_incompatible_by_name.insert("person"); EXPECT_THAT(schema_store->SetSchema(no_nested_index_schema), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema()); @@ -609,6 +633,7 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) { // Set it for the first time SchemaStore::SetSchemaResult result; result.success = true; + result.schema_types_new_by_name.insert("email"); EXPECT_THAT(schema_store->SetSchema(schema), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, @@ -671,6 +696,8 @@ TEST_F(SchemaStoreTest, GetSchemaTypeId) { // Set it for the first time SchemaStore::SetSchemaResult result; result.success = true; + result.schema_types_new_by_name.insert(first_type); + result.schema_types_new_by_name.insert(second_type); EXPECT_THAT(schema_store->SetSchema(schema_), IsOkAndHolds(EqualsSetSchemaResult(result))); @@ -829,6 +856,8 @@ TEST_F(SchemaStoreTest, SchemaStoreStorageInfoProto) { SchemaStore::SetSchemaResult result; result.success = true; + result.schema_types_new_by_name.insert("email"); + result.schema_types_new_by_name.insert("fullSectionsType"); EXPECT_THAT(schema_store->SetSchema(schema), IsOkAndHolds(EqualsSetSchemaResult(result))); diff --git a/icing/schema/schema-util.cc b/icing/schema/schema-util.cc index cabe76d..22bc3f6 100644 --- a/icing/schema/schema-util.cc +++ b/icing/schema/schema-util.cc @@ -37,6 +37,20 @@ namespace lib { namespace { +bool ArePropertiesEqual(const PropertyConfigProto& old_property, + const PropertyConfigProto& new_property) { + return old_property.property_name() == new_property.property_name() && + old_property.data_type() == new_property.data_type() && + old_property.schema_type() == new_property.schema_type() && + old_property.cardinality() == new_property.cardinality() && + old_property.string_indexing_config().term_match_type() == + new_property.string_indexing_config().term_match_type() && + old_property.string_indexing_config().tokenizer_type() == + new_property.string_indexing_config().tokenizer_type() && + old_property.document_indexing_config().index_nested_properties() == + new_property.document_indexing_config().index_nested_properties(); +} + bool IsCardinalityCompatible(const PropertyConfigProto& old_property, const PropertyConfigProto& new_property) { if (old_property.cardinality() < new_property.cardinality()) { @@ -432,7 +446,6 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta( const SchemaProto& old_schema, const SchemaProto& new_schema, const DependencyMap& new_schema_dependency_map) { SchemaDelta schema_delta; - schema_delta.index_incompatible = false; TypeConfigMap new_type_config_map; BuildTypeConfigMap(new_schema, &new_type_config_map); @@ -463,6 +476,9 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta( // If there is a different number of properties, then there must have been a // change. + bool has_property_changed = + old_type_config.properties_size() != + new_schema_type_and_config->second.properties_size(); bool is_incompatible = false; bool is_index_incompatible = false; for (const auto& old_property_config : old_type_config.properties()) { @@ -498,6 +514,11 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta( const PropertyConfigProto* new_property_config = new_property_name_and_config->second; + if (!has_property_changed && + !ArePropertiesEqual(old_property_config, *new_property_config)) { + // Finally found a property that changed. + has_property_changed = true; + } if (!IsPropertyCompatible(old_property_config, *new_property_config)) { ICING_VLOG(1) << absl_ports::StrCat( @@ -562,9 +583,38 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta( } if (is_index_incompatible) { - schema_delta.index_incompatible = true; + // If this type is index incompatible, then every type that depends on it + // might also be index incompatible. Use the dependency map to mark those + // ones as index incompatible too. + schema_delta.schema_types_index_incompatible.insert( + old_type_config.schema_type()); + auto parent_types_itr = + new_schema_dependency_map.find(old_type_config.schema_type()); + if (parent_types_itr != new_schema_dependency_map.end()) { + schema_delta.schema_types_index_incompatible.reserve( + schema_delta.schema_types_index_incompatible.size() + + parent_types_itr->second.size()); + schema_delta.schema_types_index_incompatible.insert( + parent_types_itr->second.begin(), parent_types_itr->second.end()); + } } + if (!is_incompatible && !is_index_incompatible && has_property_changed) { + schema_delta.schema_types_changed_fully_compatible.insert( + old_type_config.schema_type()); + } + + // Lastly, remove this type from the map. We know that this type can't + // come up in future iterations through the old schema types because the old + // type config has unique types. + new_type_config_map.erase(old_type_config.schema_type()); + } + + // Any types that are still present in the new_type_config_map are newly added + // types. + schema_delta.schema_types_new.reserve(new_type_config_map.size()); + for (auto& kvp : new_type_config_map) { + schema_delta.schema_types_new.insert(std::move(kvp.first)); } return schema_delta; diff --git a/icing/schema/schema-util.h b/icing/schema/schema-util.h index abbc55d..fa80b15 100644 --- a/icing/schema/schema-util.h +++ b/icing/schema/schema-util.h @@ -41,12 +41,6 @@ class SchemaUtil { std::unordered_set<std::string_view>>; struct SchemaDelta { - // Whether an indexing config has changed, requiring the index to be - // regenerated. We don't list out all the types that make the index - // incompatible because our index isn't optimized for that. It's much easier - // to reset the entire index and reindex every document. - bool index_incompatible = false; - // Which schema types were present in the old schema, but were deleted from // the new schema. std::unordered_set<std::string> schema_types_deleted; @@ -55,10 +49,28 @@ class SchemaUtil { // could invalidate existing Documents of that schema type. std::unordered_set<std::string> schema_types_incompatible; + // Schema types that were added in the new schema. Represented by the + // `schema_type` field in the SchemaTypeConfigProto. + std::unordered_set<std::string> schema_types_new; + + // Schema types that were changed in a way that was backwards compatible and + // didn't invalidate the index. Represented by the `schema_type` field in + // the SchemaTypeConfigProto. + std::unordered_set<std::string> schema_types_changed_fully_compatible; + + // Schema types that were changed in a way that was backwards compatible, + // but invalidated the index. Represented by the `schema_type` field in the + // SchemaTypeConfigProto. + std::unordered_set<std::string> schema_types_index_incompatible; + bool operator==(const SchemaDelta& other) const { - return index_incompatible == other.index_incompatible && - schema_types_deleted == other.schema_types_deleted && - schema_types_incompatible == other.schema_types_incompatible; + return schema_types_deleted == other.schema_types_deleted && + schema_types_incompatible == other.schema_types_incompatible && + schema_types_new == other.schema_types_new && + schema_types_changed_fully_compatible == + other.schema_types_changed_fully_compatible && + schema_types_index_incompatible == + other.schema_types_index_incompatible; } }; diff --git a/icing/schema/schema-util_test.cc b/icing/schema/schema-util_test.cc index 049dd79..26ef4c7 100644 --- a/icing/schema/schema-util_test.cc +++ b/icing/schema/schema-util_test.cc @@ -705,6 +705,7 @@ TEST(SchemaUtilTest, NewOptionalPropertyIsCompatible) { .Build(); SchemaUtil::SchemaDelta schema_delta; + schema_delta.schema_types_changed_fully_compatible.insert(kEmailType); SchemaUtil::DependencyMap no_dependencies_map; EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta( old_schema, new_schema_with_optional, no_dependencies_map), @@ -817,6 +818,8 @@ TEST(SchemaUtilTest, CompatibilityOfDifferentCardinalityOk) { // We can have the new schema be less restrictive, OPTIONAL->REPEATED; SchemaUtil::SchemaDelta compatible_schema_delta; + compatible_schema_delta.schema_types_changed_fully_compatible.insert( + kEmailType); EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta( /*old_schema=*/more_restrictive_schema, /*new_schema=*/less_restrictive_schema, no_dependencies_map), @@ -912,7 +915,6 @@ TEST(SchemaUtilTest, DifferentSchemaTypeIsIncompatible) { SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta( old_schema, new_schema, dependencies_map); EXPECT_THAT(actual, Eq(schema_delta)); - EXPECT_THAT(actual.index_incompatible, testing::IsFalse()); EXPECT_THAT(actual.schema_types_incompatible, testing::ElementsAre(kEmailType)); EXPECT_THAT(actual.schema_types_deleted, testing::IsEmpty()); @@ -944,7 +946,7 @@ TEST(SchemaUtilTest, ChangingIndexedPropertiesMakesIndexIncompatible) { .Build(); SchemaUtil::SchemaDelta schema_delta; - schema_delta.index_incompatible = true; + schema_delta.schema_types_index_incompatible.insert(kPersonType); // New schema gained a new indexed property. SchemaUtil::DependencyMap no_dependencies_map; @@ -991,7 +993,7 @@ TEST(SchemaUtilTest, AddingNewIndexedPropertyMakesIndexIncompatible) { .Build(); SchemaUtil::SchemaDelta schema_delta; - schema_delta.index_incompatible = true; + schema_delta.schema_types_index_incompatible.insert(kPersonType); SchemaUtil::DependencyMap no_dependencies_map; EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema, no_dependencies_map), @@ -1031,6 +1033,7 @@ TEST(SchemaUtilTest, AddingTypeIsCompatible) { .Build(); SchemaUtil::SchemaDelta schema_delta; + schema_delta.schema_types_new.insert(kEmailType); SchemaUtil::DependencyMap no_dependencies_map; EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema, no_dependencies_map), @@ -1109,7 +1112,7 @@ TEST(SchemaUtilTest, DeletingPropertyAndChangingProperty) { SchemaUtil::SchemaDelta schema_delta; schema_delta.schema_types_incompatible.emplace(kEmailType); - schema_delta.index_incompatible = true; + schema_delta.schema_types_index_incompatible.emplace(kEmailType); SchemaUtil::DependencyMap no_dependencies_map; SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta( old_schema, new_schema, no_dependencies_map); @@ -1157,7 +1160,7 @@ TEST(SchemaUtilTest, IndexNestedDocumentsIndexIncompatible) { // should make kPersonType index_incompatible. kEmailType should be // unaffected. SchemaUtil::SchemaDelta schema_delta; - schema_delta.index_incompatible = true; + schema_delta.schema_types_index_incompatible.emplace(kPersonType); SchemaUtil::DependencyMap dependencies_map = {{kEmailType, {kPersonType}}}; SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta( no_nested_index_schema, nested_index_schema, dependencies_map); diff --git a/icing/store/document-log-creator.cc b/icing/store/document-log-creator.cc index a035f93..5e0426e 100644 --- a/icing/store/document-log-creator.cc +++ b/icing/store/document-log-creator.cc @@ -69,12 +69,10 @@ DocumentLogCreator::Create(const Filesystem* filesystem, const std::string& base_dir) { bool v0_exists = filesystem->FileExists(MakeDocumentLogFilenameV0(base_dir).c_str()); - bool regen_derived_files = false; - -#ifdef ENABLE_V1_MIGRATION bool v1_exists = filesystem->FileExists(MakeDocumentLogFilenameV1(base_dir).c_str()); + bool regen_derived_files = false; if (v0_exists && !v1_exists) { ICING_RETURN_IF_ERROR(MigrateFromV0ToV1(filesystem, base_dir)); @@ -88,14 +86,6 @@ DocumentLogCreator::Create(const Filesystem* filesystem, // existing derived files. regen_derived_files = true; } -#else // !ENABLE_V1_MIGRATION - if (v0_exists) { - // If migration from v0 to v1 is not enabled, then simply delete the v0 file - // and treat this as if it's our first time initializing a v1 log. - regen_derived_files = true; - filesystem->DeleteFile(MakeDocumentLogFilenameV0(base_dir).c_str()); - } -#endif // ENABLED_V1_MIGRATION ICING_ASSIGN_OR_RETURN( PortableFileBackedProtoLog<DocumentWrapper>::CreateResult diff --git a/icing/store/document-store.h b/icing/store/document-store.h index a60aab1..c85c989 100644 --- a/icing/store/document-store.h +++ b/icing/store/document-store.h @@ -497,28 +497,6 @@ class DocumentStore { bool force_recovery_and_revalidate_documents, InitializeStatsProto* initialize_stats); - // Initializes a new DocumentStore and sets up any underlying files. - // - // Returns: - // Data loss status on success, effectively always DataLoss::NONE - // INTERNAL on I/O error - libtextclassifier3::StatusOr<DataLoss> InitializeNewStore( - InitializeStatsProto* initialize_stats); - - // Initializes a DocumentStore over an existing directory of files. - // - // stats will be set if non-null - // - // Returns: - // Data loss status on success - // INTERNAL on I/O error - libtextclassifier3::StatusOr<DataLoss> InitializeExistingStore( - bool force_recovery_and_revalidate_documents, - InitializeStatsProto* initialize_stats); - - libtextclassifier3::StatusOr<DataLoss> MigrateFromV0ToV1( - InitializeStatsProto* initialize_stats); - // Creates sub-components and verifies the integrity of each sub-component. // This assumes that the the underlying files already exist, and will return // an error if it doesn't find what it's expecting. diff --git a/icing/store/document-store_benchmark.cc b/icing/store/document-store_benchmark.cc index ce608fc..77da928 100644 --- a/icing/store/document-store_benchmark.cc +++ b/icing/store/document-store_benchmark.cc @@ -32,6 +32,7 @@ #include "icing/document-builder.h" #include "icing/file/filesystem.h" #include "icing/proto/document.pb.h" +#include "icing/proto/persist.pb.h" #include "icing/proto/schema.pb.h" #include "icing/schema-builder.h" #include "icing/schema/schema-store.h" @@ -255,6 +256,74 @@ void BM_Delete(benchmark::State& state) { } BENCHMARK(BM_Delete); +void BM_Create(benchmark::State& state) { + Filesystem filesystem; + Clock clock; + + std::string directory = GetTestTempDir() + "/icing"; + std::string document_store_dir = directory + "/store"; + + std::unique_ptr<SchemaStore> schema_store = + CreateSchemaStore(filesystem, directory, &clock); + + // Create an initial document store and put some data in. + { + DestructibleDirectory ddir(filesystem, directory); + + filesystem.CreateDirectoryRecursively(document_store_dir.data()); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem, document_store_dir, &clock, + schema_store.get())); + std::unique_ptr<DocumentStore> document_store = + std::move(create_result.document_store); + + DocumentProto document = CreateDocument("namespace", "uri"); + ICING_ASSERT_OK(document_store->Put(document)); + ICING_ASSERT_OK(document_store->PersistToDisk(PersistType::FULL)); + } + + // Recreating it with some content to checksum over. + DestructibleDirectory ddir(filesystem, directory); + + filesystem.CreateDirectoryRecursively(document_store_dir.data()); + + for (auto s : state) { + benchmark::DoNotOptimize(DocumentStore::Create( + &filesystem, document_store_dir, &clock, schema_store.get())); + } +} +BENCHMARK(BM_Create); + +void BM_ComputeChecksum(benchmark::State& state) { + Filesystem filesystem; + Clock clock; + + std::string directory = GetTestTempDir() + "/icing"; + DestructibleDirectory ddir(filesystem, directory); + + std::string document_store_dir = directory + "/store"; + std::unique_ptr<SchemaStore> schema_store = + CreateSchemaStore(filesystem, directory, &clock); + + filesystem.CreateDirectoryRecursively(document_store_dir.data()); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem, document_store_dir, &clock, + schema_store.get())); + std::unique_ptr<DocumentStore> document_store = + std::move(create_result.document_store); + + DocumentProto document = CreateDocument("namespace", "uri"); + ICING_ASSERT_OK(document_store->Put(document)); + ICING_ASSERT_OK(document_store->PersistToDisk(PersistType::LITE)); + + for (auto s : state) { + benchmark::DoNotOptimize(document_store->ComputeChecksum()); + } +} +BENCHMARK(BM_ComputeChecksum); + } // namespace } // namespace lib diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc index 3ed4c4e..a506eea 100644 --- a/icing/store/document-store_test.cc +++ b/icing/store/document-store_test.cc @@ -3556,7 +3556,6 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) { SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); ASSERT_THAT(schema_store->SetSchema(schema), IsOk()); - DocumentId docid = kInvalidDocumentId; DocumentProto docWithBody = DocumentBuilder() .SetKey("icing", "email/1") @@ -3589,8 +3588,12 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) { std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); + DocumentId docid = kInvalidDocumentId; ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithBody)); + ASSERT_NE(docid, kInvalidDocumentId); + docid = kInvalidDocumentId; ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithoutBody)); + ASSERT_NE(docid, kInvalidDocumentId); ASSERT_THAT(doc_store->Get(docWithBody.namespace_(), docWithBody.uri()), IsOkAndHolds(EqualsProto(docWithBody))); @@ -3658,7 +3661,6 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) { SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); ASSERT_THAT(schema_store->SetSchema(schema), IsOk()); - DocumentId docid = kInvalidDocumentId; DocumentProto docWithBody = DocumentBuilder() .SetKey("icing", "email/1") @@ -3691,8 +3693,12 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) { std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); + DocumentId docid = kInvalidDocumentId; ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithBody)); + ASSERT_NE(docid, kInvalidDocumentId); + docid = kInvalidDocumentId; ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithoutBody)); + ASSERT_NE(docid, kInvalidDocumentId); ASSERT_THAT(doc_store->Get(docWithBody.namespace_(), docWithBody.uri()), IsOkAndHolds(EqualsProto(docWithBody))); diff --git a/icing/testing/common-matchers.h b/icing/testing/common-matchers.h index 8d8bdf2..f83fe0a 100644 --- a/icing/testing/common-matchers.h +++ b/icing/testing/common-matchers.h @@ -121,7 +121,6 @@ MATCHER_P(EqualsSetSchemaResult, expected, "") { const SchemaStore::SetSchemaResult& actual = arg; if (actual.success == expected.success && - actual.index_incompatible == expected.index_incompatible && actual.old_schema_type_ids_changed == expected.old_schema_type_ids_changed && actual.schema_types_deleted_by_name == @@ -131,7 +130,12 @@ MATCHER_P(EqualsSetSchemaResult, expected, "") { actual.schema_types_incompatible_by_name == expected.schema_types_incompatible_by_name && actual.schema_types_incompatible_by_id == - expected.schema_types_incompatible_by_id) { + expected.schema_types_incompatible_by_id && + actual.schema_types_new_by_name == expected.schema_types_new_by_name && + actual.schema_types_changed_fully_compatible_by_name == + expected.schema_types_changed_fully_compatible_by_name && + actual.schema_types_index_incompatible_by_name == + expected.schema_types_index_incompatible_by_name) { return true; } @@ -191,37 +195,82 @@ MATCHER_P(EqualsSetSchemaResult, expected, "") { absl_ports::NumberFormatter()), "]"); + // Format schema_types_new_by_name + std::string actual_schema_types_new_by_name = absl_ports::StrCat( + "[", absl_ports::StrJoin(actual.schema_types_new_by_name, ","), "]"); + + std::string expected_schema_types_new_by_name = absl_ports::StrCat( + "[", absl_ports::StrJoin(expected.schema_types_new_by_name, ","), "]"); + + // Format schema_types_changed_fully_compatible_by_name + std::string actual_schema_types_changed_fully_compatible_by_name = + absl_ports::StrCat( + "[", + absl_ports::StrJoin( + actual.schema_types_changed_fully_compatible_by_name, ","), + "]"); + + std::string expected_schema_types_changed_fully_compatible_by_name = + absl_ports::StrCat( + "[", + absl_ports::StrJoin( + expected.schema_types_changed_fully_compatible_by_name, ","), + "]"); + + // Format schema_types_deleted_by_id + std::string actual_schema_types_index_incompatible_by_name = + absl_ports::StrCat( + "[", + absl_ports::StrJoin(actual.schema_types_index_incompatible_by_name, + ","), + "]"); + + std::string expected_schema_types_index_incompatible_by_name = + absl_ports::StrCat( + "[", + absl_ports::StrJoin(expected.schema_types_index_incompatible_by_name, + ","), + "]"); + *result_listener << IcingStringUtil::StringPrintf( "\nExpected {\n" "\tsuccess=%d,\n" - "\tindex_incompatible=%d,\n" "\told_schema_type_ids_changed=%s,\n" "\tschema_types_deleted_by_name=%s,\n" "\tschema_types_deleted_by_id=%s,\n" "\tschema_types_incompatible_by_name=%s,\n" "\tschema_types_incompatible_by_id=%s\n" + "\tschema_types_new_by_name=%s,\n" + "\tschema_types_index_incompatible_by_name=%s,\n" + "\tschema_types_changed_fully_compatible_by_name=%s\n" "}\n" "Actual {\n" "\tsuccess=%d,\n" - "\tindex_incompatible=%d,\n" "\told_schema_type_ids_changed=%s,\n" "\tschema_types_deleted_by_name=%s,\n" "\tschema_types_deleted_by_id=%s,\n" "\tschema_types_incompatible_by_name=%s,\n" "\tschema_types_incompatible_by_id=%s\n" + "\tschema_types_new_by_name=%s,\n" + "\tschema_types_index_incompatible_by_name=%s,\n" + "\tschema_types_changed_fully_compatible_by_name=%s\n" "}\n", - expected.success, expected.index_incompatible, - expected_old_schema_type_ids_changed.c_str(), + expected.success, expected_old_schema_type_ids_changed.c_str(), expected_schema_types_deleted_by_name.c_str(), expected_schema_types_deleted_by_id.c_str(), expected_schema_types_incompatible_by_name.c_str(), - expected_schema_types_incompatible_by_id.c_str(), actual.success, - actual.index_incompatible, actual_old_schema_type_ids_changed.c_str(), + expected_schema_types_incompatible_by_id.c_str(), + expected_schema_types_new_by_name.c_str(), + expected_schema_types_changed_fully_compatible_by_name.c_str(), + expected_schema_types_index_incompatible_by_name.c_str(), actual.success, + actual_old_schema_type_ids_changed.c_str(), actual_schema_types_deleted_by_name.c_str(), actual_schema_types_deleted_by_id.c_str(), actual_schema_types_incompatible_by_name.c_str(), - actual_schema_types_incompatible_by_id.c_str()); - + actual_schema_types_incompatible_by_id.c_str(), + actual_schema_types_new_by_name.c_str(), + actual_schema_types_changed_fully_compatible_by_name.c_str(), + actual_schema_types_index_incompatible_by_name.c_str()); return false; } diff --git a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java index 64f98f6..0cee80c 100644 --- a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java +++ b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java @@ -59,7 +59,6 @@ import java.util.HashMap; import java.util.Map; import org.junit.After; import org.junit.Before; -import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; @@ -490,7 +489,6 @@ public final class IcingSearchEngineTest { } @Test - @Ignore("b/190845688") public void testCJKTSnippets() throws Exception { assertStatusOk(icingSearchEngine.initialize().getStatus()); @@ -498,12 +496,13 @@ public final class IcingSearchEngineTest { assertStatusOk( icingSearchEngine.setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false).getStatus()); - // String: "我每天走路去上班。" - // ^ ^ ^ ^^ - // UTF16 idx: 0 1 3 5 6 - // Breaks into segments: "我", "每天", "走路", "去", "上班" - String chinese = "我每天走路去上班。"; - assertThat(chinese.length()).isEqualTo(9); + // String: "天是蓝的" + // ^ ^^ ^ + // UTF16 idx: 0 1 2 3 + // Breaks into segments: "天", "是", "蓝", "的" + // "The sky is blue" + String chinese = "天是蓝的"; + assertThat(chinese.length()).isEqualTo(4); DocumentProto emailDocument1 = createEmailDocument("namespace", "uri1").toBuilder() .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues(chinese)) @@ -513,7 +512,7 @@ public final class IcingSearchEngineTest { // Search and request snippet matching but no windowing. SearchSpecProto searchSpec = SearchSpecProto.newBuilder() - .setQuery("每") + .setQuery("是") .setTermMatchType(TermMatchType.Code.PREFIX) .build(); ResultSpecProto resultSpecProto = @@ -552,9 +551,9 @@ public final class IcingSearchEngineTest { int matchStart = matchProto.getExactMatchUtf16Position(); int matchEnd = matchStart + matchProto.getExactMatchUtf16Length(); assertThat(matchStart).isEqualTo(1); - assertThat(matchEnd).isEqualTo(3); + assertThat(matchEnd).isEqualTo(2); String match = content.substring(matchStart, matchEnd); - assertThat(match).isEqualTo("每天"); + assertThat(match).isEqualTo("是"); } @Test diff --git a/proto/icing/proto/document.proto b/proto/icing/proto/document.proto index 9a4e5b9..2e8321b 100644 --- a/proto/icing/proto/document.proto +++ b/proto/icing/proto/document.proto @@ -209,7 +209,7 @@ message DeleteBySchemaTypeResultProto { } // Result of a call to IcingSearchEngine.DeleteByQuery -// Next tag: 3 +// Next tag: 4 message DeleteByQueryResultProto { // Status code can be one of: // OK @@ -224,5 +224,7 @@ message DeleteByQueryResultProto { optional StatusProto status = 1; // Stats for delete execution performance. - optional DeleteStatsProto delete_stats = 2; + optional DeleteByQueryStatsProto delete_by_query_stats = 3; + + reserved 2; } diff --git a/proto/icing/proto/logging.proto b/proto/icing/proto/logging.proto index 29f7f80..4dcfecf 100644 --- a/proto/icing/proto/logging.proto +++ b/proto/icing/proto/logging.proto @@ -181,8 +181,7 @@ message QueryStatsProto { } // Stats of the top-level functions IcingSearchEngine::Delete, -// IcingSearchEngine::DeleteByNamespace, IcingSearchEngine::DeleteBySchemaType, -// IcingSearchEngine::DeleteByQuery. +// IcingSearchEngine::DeleteByNamespace, IcingSearchEngine::DeleteBySchemaType. // Next tag: 4 message DeleteStatsProto { // Overall time used for the function call. @@ -196,8 +195,10 @@ message DeleteStatsProto { // Delete one document. SINGLE = 1; - // Delete by query. - QUERY = 2; + // Delete by query. This value is deprecated. + // IcingSearchEngine::DeleteByQuery will return a DeleteByQueryStatsProto + // rather than a DeleteStatsProto. + DEPRECATED_QUERY = 2 [deprecated = true]; // Delete by namespace. NAMESPACE = 3; @@ -211,3 +212,32 @@ message DeleteStatsProto { // Number of documents deleted by this call. optional int32 num_documents_deleted = 3; } + +// Stats of the top-level functions IcingSearchEngine::DeleteByQuery. +// Next tag: 9 +message DeleteByQueryStatsProto { + // Overall time used for the function call. + optional int32 latency_ms = 1; + + // Number of documents deleted by this call. + optional int32 num_documents_deleted = 2; + + // The UTF-8 length of the query string + optional int32 query_length = 3; + + // Number of terms in the query string. + optional int32 num_terms = 4; + + // Number of namespaces filtered. + optional int32 num_namespaces_filtered = 5; + + // Number of schema types filtered. + optional int32 num_schema_types_filtered = 6; + + // Time used to parse the query, including 2 parts: tokenizing and + // transforming tokens into an iterator tree. + optional int32 parse_query_latency_ms = 7; + + // Time used to delete each document. + optional int32 document_removal_latency_ms = 8; +} diff --git a/proto/icing/proto/schema.proto b/proto/icing/proto/schema.proto index 4188a8c..c611cbf 100644 --- a/proto/icing/proto/schema.proto +++ b/proto/icing/proto/schema.proto @@ -197,7 +197,7 @@ message SchemaProto { } // Result of a call to IcingSearchEngine.SetSchema -// Next tag: 4 +// Next tag: 8 message SetSchemaResultProto { // Status code can be one of: // OK @@ -221,6 +221,21 @@ message SetSchemaResultProto { // documents that fail validation against the new schema types would also be // deleted. repeated string incompatible_schema_types = 3; + + // Schema types that did not exist in the previous schema and were added with + // the new schema type. + repeated string new_schema_types = 4; + + // Schema types that were changed in a way that was backwards compatible and + // didn't invalidate the index. + repeated string fully_compatible_changed_schema_types = 5; + + // Schema types that were changed in a way that was backwards compatible, but + // invalidated the index. + repeated string index_incompatible_changed_schema_types = 6; + + // Overall time used for the function call. + optional int32 latency_ms = 7; } // Result of a call to IcingSearchEngine.GetSchema diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt index 35ad6d9..69dfc00 100644 --- a/synced_AOSP_CL_number.txt +++ b/synced_AOSP_CL_number.txt @@ -1 +1 @@ -set(synced_AOSP_CL_number=378695940) +set(synced_AOSP_CL_number=385604495) |