aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2024-01-02 17:18:00 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2024-01-02 17:18:00 +0000
commit77312cb0cd77d5c9895e1c657f533e0004e7b3ba (patch)
tree964f0db9dea655b29dc1c73ce95f27b931a5dcd6
parent73b8f0be7d66b284beaf78e6c40c399dddae0d06 (diff)
parent4c22659faf7d19ea0a519667fe626b89d0889d8c (diff)
downloadicing-androidx-wear-compose-release.tar.gz
Snap for 11241554 from 4c22659faf7d19ea0a519667fe626b89d0889d8c to androidx-wear-compose-releaseandroidx-wear-compose-release
Change-Id: I7ca1de83bb0f3adda37495c875981c31ea43b9b9
-rw-r--r--Android.bp2
-rw-r--r--build.gradle85
-rw-r--r--icing/absl_ports/status_test.cc53
-rw-r--r--icing/absl_ports/str_join.h7
-rw-r--r--icing/document-builder.h26
-rw-r--r--icing/file/destructible-directory_test.cc6
-rw-r--r--icing/file/file-backed-proto-log.h6
-rw-r--r--icing/file/file-backed-proto.h48
-rw-r--r--icing/file/file-backed-vector.h44
-rw-r--r--icing/file/file-backed-vector_test.cc99
-rw-r--r--icing/file/filesystem.cc21
-rw-r--r--icing/file/persistent-hash-map.cc483
-rw-r--r--icing/file/persistent-hash-map.h203
-rw-r--r--icing/file/persistent-hash-map_test.cc548
-rw-r--r--icing/file/persistent-storage.cc55
-rw-r--r--icing/file/persistent-storage.h369
-rw-r--r--icing/file/portable-file-backed-proto-log.h65
-rw-r--r--icing/file/portable-file-backed-proto-log_test.cc296
-rw-r--r--icing/file/posting_list/flash-index-storage.cc290
-rw-r--r--icing/file/posting_list/flash-index-storage.h213
-rw-r--r--icing/file/posting_list/flash-index-storage_test.cc52
-rw-r--r--icing/file/posting_list/index-block.cc326
-rw-r--r--icing/file/posting_list/index-block.h346
-rw-r--r--icing/file/posting_list/index-block_test.cc162
-rw-r--r--icing/file/posting_list/posting-list-accessor.cc89
-rw-r--r--icing/file/posting_list/posting-list-accessor.h37
-rw-r--r--icing/file/posting_list/posting-list-free_test.cc14
-rw-r--r--icing/file/posting_list/posting-list-identifier.h2
-rw-r--r--icing/file/posting_list/posting-list-used.cc20
-rw-r--r--icing/file/posting_list/posting-list-used.h67
-rw-r--r--icing/file/version-util.cc146
-rw-r--r--icing/file/version-util.h114
-rw-r--r--icing/file/version-util_test.cc474
-rw-r--r--icing/icing-search-engine.cc1013
-rw-r--r--icing/icing-search-engine.h129
-rw-r--r--icing/icing-search-engine_backwards_compatibility_test.cc202
-rw-r--r--icing/icing-search-engine_benchmark.cc586
-rw-r--r--icing/icing-search-engine_delete_test.cc768
-rw-r--r--icing/icing-search-engine_initialization_test.cc5462
-rw-r--r--icing/icing-search-engine_optimize_test.cc1843
-rw-r--r--icing/icing-search-engine_put_test.cc481
-rw-r--r--icing/icing-search-engine_schema_test.cc3136
-rw-r--r--icing/icing-search-engine_search_test.cc6199
-rw-r--r--icing/icing-search-engine_suggest_test.cc1601
-rw-r--r--icing/icing-search-engine_test.cc10164
-rw-r--r--icing/index/data-indexing-handler.h (renamed from icing/index/section-indexing-handler.h)39
-rw-r--r--icing/index/index-processor.cc43
-rw-r--r--icing/index/index-processor.h35
-rw-r--r--icing/index/index-processor_benchmark.cc199
-rw-r--r--icing/index/index-processor_test.cc505
-rw-r--r--icing/index/index.cc44
-rw-r--r--icing/index/index.h52
-rw-r--r--icing/index/index_test.cc476
-rw-r--r--icing/index/integer-section-indexing-handler.cc54
-rw-r--r--icing/index/integer-section-indexing-handler.h38
-rw-r--r--icing/index/integer-section-indexing-handler_test.cc601
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-all-document-id.cc7
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-all-document-id.h2
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc11
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-and.cc34
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-and.h4
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-and_test.cc117
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-filter.cc30
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-filter.h5
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-filter_test.cc428
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-none.h52
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-not.cc7
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-not.h5
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-not_test.cc11
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-or.cc41
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-or.h4
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-or_test.cc41
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc115
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-property-in-schema.h77
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc270
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict.cc204
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict.h56
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc298
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-test-util.h16
-rw-r--r--icing/index/iterator/doc-hit-info-iterator.h38
-rw-r--r--icing/index/lite/doc-hit-info-iterator-term-lite.cc13
-rw-r--r--icing/index/lite/doc-hit-info-iterator-term-lite.h16
-rw-r--r--icing/index/lite/lite-index-options.cc10
-rw-r--r--icing/index/lite/lite-index-options.h6
-rw-r--r--icing/index/lite/lite-index.cc284
-rw-r--r--icing/index/lite/lite-index.h202
-rw-r--r--icing/index/lite/lite-index_test.cc642
-rw-r--r--icing/index/lite/lite-index_thread-safety_test.cc399
-rw-r--r--icing/index/lite/term-id-hit-pair.h2
-rw-r--r--icing/index/main/doc-hit-info-iterator-term-main.cc105
-rw-r--r--icing/index/main/doc-hit-info-iterator-term-main.h78
-rw-r--r--icing/index/main/main-index-merger_test.cc4
-rw-r--r--icing/index/main/main-index.cc70
-rw-r--r--icing/index/main/main-index.h17
-rw-r--r--icing/index/main/main-index_test.cc160
-rw-r--r--icing/index/main/posting-list-hit-accessor.cc59
-rw-r--r--icing/index/main/posting-list-hit-accessor.h20
-rw-r--r--icing/index/main/posting-list-hit-accessor_test.cc12
-rw-r--r--icing/index/main/posting-list-hit-serializer.cc (renamed from icing/index/main/posting-list-used-hit-serializer.cc)51
-rw-r--r--icing/index/main/posting-list-hit-serializer.h (renamed from icing/index/main/posting-list-used-hit-serializer.h)12
-rw-r--r--icing/index/main/posting-list-hit-serializer_test.cc (renamed from icing/index/main/posting-list-used-hit-serializer_test.cc)157
-rw-r--r--icing/index/numeric/doc-hit-info-iterator-numeric.h12
-rw-r--r--icing/index/numeric/dummy-numeric-index.h143
-rw-r--r--icing/index/numeric/integer-index-bucket-util.cc205
-rw-r--r--icing/index/numeric/integer-index-bucket-util.h81
-rw-r--r--icing/index/numeric/integer-index-bucket-util_test.cc1112
-rw-r--r--icing/index/numeric/integer-index-storage.cc1147
-rw-r--r--icing/index/numeric/integer-index-storage.h418
-rw-r--r--icing/index/numeric/integer-index-storage_benchmark.cc407
-rw-r--r--icing/index/numeric/integer-index-storage_test.cc2036
-rw-r--r--icing/index/numeric/integer-index.cc651
-rw-r--r--icing/index/numeric/integer-index.h409
-rw-r--r--icing/index/numeric/integer-index_test.cc2465
-rw-r--r--icing/index/numeric/numeric-index.h87
-rw-r--r--icing/index/numeric/numeric-index_test.cc361
-rw-r--r--icing/index/numeric/posting-list-integer-index-accessor.cc (renamed from icing/index/numeric/posting-list-integer-index-data-accessor.cc)144
-rw-r--r--icing/index/numeric/posting-list-integer-index-accessor.h130
-rw-r--r--icing/index/numeric/posting-list-integer-index-accessor_test.cc (renamed from icing/index/numeric/posting-list-integer-index-data-accessor_test.cc)227
-rw-r--r--icing/index/numeric/posting-list-integer-index-data-accessor.h108
-rw-r--r--icing/index/numeric/posting-list-integer-index-serializer.cc (renamed from icing/index/numeric/posting-list-used-integer-index-data-serializer.cc)45
-rw-r--r--icing/index/numeric/posting-list-integer-index-serializer.h (renamed from icing/index/numeric/posting-list-used-integer-index-data-serializer.h)20
-rw-r--r--icing/index/numeric/posting-list-integer-index-serializer_test.cc (renamed from icing/index/numeric/posting-list-used-integer-index-data-serializer_test.cc)110
-rw-r--r--icing/index/string-section-indexing-handler.cc42
-rw-r--r--icing/index/string-section-indexing-handler.h42
-rw-r--r--icing/index/string-section-indexing-handler_test.cc587
-rw-r--r--icing/jni.lds1
-rw-r--r--icing/jni/icing-search-engine-jni.cc471
-rw-r--r--icing/join/doc-join-info.cc49
-rw-r--r--icing/join/doc-join-info.h66
-rw-r--r--icing/join/doc-join-info_test.cc96
-rw-r--r--icing/join/join-children-fetcher.cc39
-rw-r--r--icing/join/join-children-fetcher.h73
-rw-r--r--icing/join/join-children-fetcher_test.cc83
-rw-r--r--icing/join/join-processor.cc138
-rw-r--r--icing/join/join-processor.h49
-rw-r--r--icing/join/join-processor_test.cc305
-rw-r--r--icing/join/qualified-id-join-index.cc467
-rw-r--r--icing/join/qualified-id-join-index.h308
-rw-r--r--icing/join/qualified-id-join-index_test.cc922
-rw-r--r--icing/join/qualified-id-join-indexing-handler.cc108
-rw-r--r--icing/join/qualified-id-join-indexing-handler.h70
-rw-r--r--icing/join/qualified-id-join-indexing-handler_test.cc526
-rw-r--r--icing/join/qualified-id.cc5
-rw-r--r--icing/join/qualified-id_test.cc18
-rw-r--r--icing/monkey_test/icing-monkey-test-runner.cc88
-rw-r--r--icing/performance-configuration.cc12
-rw-r--r--icing/performance-configuration.h7
-rw-r--r--icing/portable/equals-proto.h2
-rw-r--r--icing/portable/gzip_stream.h20
-rw-r--r--icing/query/advanced_query_parser/abstract-syntax-tree.h26
-rw-r--r--icing/query/advanced_query_parser/abstract-syntax-tree_test.cc32
-rw-r--r--icing/query/advanced_query_parser/function.cc77
-rw-r--r--icing/query/advanced_query_parser/function.h66
-rw-r--r--icing/query/advanced_query_parser/function_test.cc332
-rw-r--r--icing/query/advanced_query_parser/lexer.cc100
-rw-r--r--icing/query/advanced_query_parser/lexer.h22
-rw-r--r--icing/query/advanced_query_parser/lexer_test.cc115
-rw-r--r--icing/query/advanced_query_parser/param.h57
-rw-r--r--icing/query/advanced_query_parser/parser.cc77
-rw-r--r--icing/query/advanced_query_parser/parser.h3
-rw-r--r--icing/query/advanced_query_parser/parser_integration_test.cc73
-rw-r--r--icing/query/advanced_query_parser/parser_test.cc464
-rw-r--r--icing/query/advanced_query_parser/pending-value.cc44
-rw-r--r--icing/query/advanced_query_parser/pending-value.h160
-rw-r--r--icing/query/advanced_query_parser/query-visitor.cc804
-rw-r--r--icing/query/advanced_query_parser/query-visitor.h206
-rw-r--r--icing/query/advanced_query_parser/query-visitor_test.cc3125
-rw-r--r--icing/query/advanced_query_parser/util/string-util.cc106
-rw-r--r--icing/query/advanced_query_parser/util/string-util.h49
-rw-r--r--icing/query/advanced_query_parser/util/string-util_test.cc125
-rw-r--r--icing/query/query-features.h11
-rw-r--r--icing/query/query-processor.cc67
-rw-r--r--icing/query/query-processor.h10
-rw-r--r--icing/query/query-processor_benchmark.cc84
-rw-r--r--icing/query/query-processor_test.cc1369
-rw-r--r--icing/query/suggestion-processor.cc270
-rw-r--r--icing/query/suggestion-processor.h20
-rw-r--r--icing/query/suggestion-processor_test.cc622
-rw-r--r--icing/result/projection-tree.cc10
-rw-r--r--icing/result/projection-tree.h6
-rw-r--r--icing/result/projection-tree_test.cc86
-rw-r--r--icing/result/result-adjustment-info.cc64
-rw-r--r--icing/result/result-adjustment-info.h53
-rw-r--r--icing/result/result-adjustment-info_test.cc198
-rw-r--r--icing/result/result-retriever-v2.cc122
-rw-r--r--icing/result/result-retriever-v2.h10
-rw-r--r--icing/result/result-retriever-v2_group-result-limiter_test.cc156
-rw-r--r--icing/result/result-retriever-v2_projection_test.cc813
-rw-r--r--icing/result/result-retriever-v2_snippet_test.cc840
-rw-r--r--icing/result/result-retriever-v2_test.cc352
-rw-r--r--icing/result/result-retriever.cc117
-rw-r--r--icing/result/result-retriever.h94
-rw-r--r--icing/result/result-retriever_test.cc1951
-rw-r--r--icing/result/result-state-manager.cc45
-rw-r--r--icing/result/result-state-manager.h36
-rw-r--r--icing/result/result-state-manager_test.cc567
-rw-r--r--icing/result/result-state-manager_thread-safety_test.cc60
-rw-r--r--icing/result/result-state-v2.cc41
-rw-r--r--icing/result/result-state-v2.h60
-rw-r--r--icing/result/result-state-v2_test.cc202
-rw-r--r--icing/result/snippet-retriever.cc173
-rw-r--r--icing/result/snippet-retriever_benchmark.cc333
-rw-r--r--icing/result/snippet-retriever_test.cc40
-rw-r--r--icing/schema-builder.h30
-rw-r--r--icing/schema/backup-schema-producer.cc164
-rw-r--r--icing/schema/backup-schema-producer.h55
-rw-r--r--icing/schema/backup-schema-producer_test.cc737
-rw-r--r--icing/schema/joinable-property-manager-builder_test.cc446
-rw-r--r--icing/schema/joinable-property-manager.cc203
-rw-r--r--icing/schema/joinable-property-manager.h160
-rw-r--r--icing/schema/joinable-property-manager_test.cc519
-rw-r--r--icing/schema/joinable-property.h132
-rw-r--r--icing/schema/property-util.cc137
-rw-r--r--icing/schema/property-util.h212
-rw-r--r--icing/schema/property-util_test.cc253
-rw-r--r--icing/schema/schema-property-iterator.cc198
-rw-r--r--icing/schema/schema-property-iterator.h222
-rw-r--r--icing/schema/schema-property-iterator_test.cc3905
-rw-r--r--icing/schema/schema-store.cc593
-rw-r--r--icing/schema/schema-store.h267
-rw-r--r--icing/schema/schema-store_test.cc2041
-rw-r--r--icing/schema/schema-type-manager.cc108
-rw-r--r--icing/schema/schema-type-manager.h79
-rw-r--r--icing/schema/schema-type-manager_test.cc356
-rw-r--r--icing/schema/schema-util.cc776
-rw-r--r--icing/schema/schema-util.h194
-rw-r--r--icing/schema/schema-util_test.cc4158
-rw-r--r--icing/schema/section-manager-builder_test.cc341
-rw-r--r--icing/schema/section-manager.cc324
-rw-r--r--icing/schema/section-manager.h106
-rw-r--r--icing/schema/section-manager_test.cc898
-rw-r--r--icing/schema/section.h9
-rw-r--r--icing/scoring/advanced_scoring/advanced-scorer.cc20
-rw-r--r--icing/scoring/advanced_scoring/advanced-scorer.h8
-rw-r--r--icing/scoring/advanced_scoring/advanced-scorer_fuzz_test.cc70
-rw-r--r--icing/scoring/advanced_scoring/advanced-scorer_test.cc590
-rw-r--r--icing/scoring/advanced_scoring/score-expression.cc291
-rw-r--r--icing/scoring/advanced_scoring/score-expression.h187
-rw-r--r--icing/scoring/advanced_scoring/score-expression_test.cc169
-rw-r--r--icing/scoring/advanced_scoring/scoring-visitor.cc30
-rw-r--r--icing/scoring/advanced_scoring/scoring-visitor.h27
-rw-r--r--icing/scoring/bm25f-calculator.cc21
-rw-r--r--icing/scoring/bm25f-calculator.h9
-rw-r--r--icing/scoring/score-and-rank_benchmark.cc86
-rw-r--r--icing/scoring/scorer-factory.cc50
-rw-r--r--icing/scoring/scorer-factory.h5
-rw-r--r--icing/scoring/scorer_test.cc163
-rw-r--r--icing/scoring/scoring-processor.cc7
-rw-r--r--icing/scoring/scoring-processor.h4
-rw-r--r--icing/scoring/scoring-processor_test.cc88
-rw-r--r--icing/scoring/section-weights_test.cc4
-rw-r--r--icing/store/document-log-creator.cc21
-rw-r--r--icing/store/document-log-creator.h6
-rw-r--r--icing/store/document-store.cc382
-rw-r--r--icing/store/document-store.h73
-rw-r--r--icing/store/document-store_benchmark.cc52
-rw-r--r--icing/store/document-store_test.cc1413
-rw-r--r--icing/store/key-mapper_benchmark.cc11
-rw-r--r--icing/store/key-mapper_test.cc116
-rw-r--r--icing/store/persistent-hash-map-key-mapper.h81
-rw-r--r--icing/store/persistent-hash-map-key-mapper_test.cc6
-rw-r--r--icing/store/suggestion-result-checker-impl.h124
-rw-r--r--icing/store/usage-store.h2
-rw-r--r--icing/store/usage-store_test.cc4
-rw-r--r--icing/testing/common-matchers.h58
-rw-r--r--icing/testing/numeric/normal-distribution-number-generator.h42
-rw-r--r--icing/testing/numeric/number-generator.h39
-rw-r--r--icing/testing/numeric/uniform-distribution-integer-generator.h41
-rw-r--r--icing/testing/random-string.h25
-rw-r--r--icing/text_classifier/lib3/utils/base/statusor.h25
-rw-r--r--icing/tokenization/icu/icu-language-segmenter-factory.cc2
-rw-r--r--icing/tokenization/icu/icu-language-segmenter.cc78
-rw-r--r--icing/tokenization/icu/icu-language-segmenter.h35
-rw-r--r--icing/tokenization/icu/icu-language-segmenter_test.cc44
-rw-r--r--icing/tokenization/raw-query-tokenizer.cc46
-rw-r--r--icing/tokenization/rfc822-tokenizer_test.cc85
-rw-r--r--icing/transform/icu/icu-normalizer.cc111
-rw-r--r--icing/transform/icu/icu-normalizer.h26
-rw-r--r--icing/transform/icu/icu-normalizer_test.cc13
-rw-r--r--icing/util/document-validator.cc18
-rw-r--r--icing/util/document-validator_test.cc149
-rw-r--r--icing/util/encode-util.cc50
-rw-r--r--icing/util/encode-util.h45
-rw-r--r--icing/util/encode-util_test.cc91
-rw-r--r--icing/util/snippet-helpers.cc61
-rw-r--r--icing/util/snippet-helpers.h4
-rw-r--r--icing/util/tokenized-document.cc7
-rw-r--r--icing/util/tokenized-document.h13
-rw-r--r--icing/util/tokenized-document_test.cc146
-rw-r--r--java/src/com/google/android/icing/IcingSearchEngine.java1
-rw-r--r--java/src/com/google/android/icing/IcingSearchEngineImpl.java1
-rw-r--r--lint-baseline.xml487
-rw-r--r--proto/icing/index/numeric/wildcard-property-storage.proto22
-rw-r--r--proto/icing/proto/initialize.proto71
-rw-r--r--proto/icing/proto/logging.proto45
-rw-r--r--proto/icing/proto/schema.proto36
-rw-r--r--proto/icing/proto/search.proto46
-rw-r--r--synced_AOSP_CL_number.txt2
298 files changed, 74852 insertions, 21343 deletions
diff --git a/Android.bp b/Android.bp
index 7982c4f..82b7b59 100644
--- a/Android.bp
+++ b/Android.bp
@@ -50,6 +50,8 @@ cc_defaults {
"-funsigned-char",
"-fvisibility=hidden",
+
+ "-Bsymbolic",
],
}
diff --git a/build.gradle b/build.gradle
index ca20eed..d0d1a39 100644
--- a/build.gradle
+++ b/build.gradle
@@ -14,65 +14,42 @@
* limitations under the License.
*/
-import androidx.build.SupportConfig
+import androidx.build.SdkHelperKt
plugins {
- id('com.android.library')
- id('com.google.protobuf')
+ id("AndroidXPlugin")
+ id("java-library")
+ id("com.google.protobuf")
}
-android {
- buildToolsVersion SupportConfig.buildToolsVersion(project)
- compileSdkVersion SupportConfig.COMPILE_SDK_VERSION
- defaultConfig {
- minSdkVersion SupportConfig.DEFAULT_MIN_SDK_VERSION
- targetSdkVersion SupportConfig.TARGET_SDK_VERSION
- testInstrumentationRunner SupportConfig.INSTRUMENTATION_RUNNER
+sourceSets {
+ main {
+ java.srcDir 'java/src/'
+ proto.srcDir 'proto/'
}
- compileOptions {
- sourceCompatibility = JavaVersion.VERSION_1_8
- targetCompatibility = JavaVersion.VERSION_1_8
- }
- sourceSets {
- main {
- java.srcDir 'java/src/'
- proto.srcDir 'proto/'
- }
- // TODO(b/161205849): Re-enable this test once icing nativeLib is no longer being built
- // inside appsearch:appsearch.
- //androidTest.java.srcDir 'java/tests/instrumentation/'
- }
- namespace "com.google.android.icing"
-}
-
-// This project has no device tests, skip building it
-androidComponents {
- beforeVariants(selector().withName("debug"), { variantBuilder ->
- variantBuilder.enableAndroidTest = false
- })
}
dependencies {
- api('androidx.annotation:annotation:1.1.0')
-
- implementation('com.google.protobuf:protobuf-javalite:3.10.0')
+ compileOnly("androidx.annotation:annotation:1.1.0")
+ compileOnly(SdkHelperKt.getSdkDependency(project))
+ implementation(libs.protobufLite)
+}
- androidTestImplementation(libs.testCore)
- androidTestImplementation(libs.testRules)
- androidTestImplementation(libs.truth)
- androidTestImplementation(libs.kotlinBom)
+afterEvaluate {
+ lint {
+ lintOptions {
+ // protobuf generates unannotated methods
+ disable("UnknownNullness")
+ }
+ }
}
protobuf {
protoc {
artifact = libs.protobufCompiler.get()
}
-
generateProtoTasks {
all().each { task ->
- project.tasks.named("extractReleaseAnnotations").configure {
- it.dependsOn(task)
- }
task.builtins {
java {
option 'lite'
@@ -82,26 +59,6 @@ protobuf {
}
}
-// Create export artifact for all variants (debug/release) for JarJaring
-android.libraryVariants.all { variant ->
- def variantName = variant.name
- def suffix = variantName.capitalize()
- def exportJarTask = tasks.register("exportJar${suffix}", Jar) {
- archiveBaseName.set("icing-${variantName}")
-
- // The proto-lite dependency includes .proto files, which are not used by icing. When apps
- // depend on appsearch as well as proto-lite directly, these files conflict since jarjar
- // only renames the java classes. Remove them here since they are unused.
- // Expand the jar and remove any .proto files.
- from(zipTree(configurations.detachedConfiguration(
- dependencies.create(libs.protobufLite.get())).getSingleFile())) {
- exclude("**/*.proto")
- }
-
- from files(variant.javaCompileProvider.get().destinationDir)
- dependsOn variant.javaCompileProvider.get()
- }
-
- def exportConfiguration = configurations.register("export${suffix}")
- artifacts.add(exportConfiguration.name, exportJarTask.flatMap { it.archiveFile })
+androidx {
+ mavenVersion = LibraryVersions.APPSEARCH
}
diff --git a/icing/absl_ports/status_test.cc b/icing/absl_ports/status_test.cc
new file mode 100644
index 0000000..1909302
--- /dev/null
+++ b/icing/absl_ports/status_test.cc
@@ -0,0 +1,53 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/proto/document.pb.h"
+
+namespace icing {
+namespace lib {
+
+TEST(StatusTest, StatusOrOfProtoConstructorTest) {
+ libtextclassifier3::StatusOr<DocumentProto> status_or =
+ absl_ports::InvalidArgumentError("test");
+ libtextclassifier3::StatusOr<DocumentProto> new_status_or = status_or;
+}
+
+TEST(StatusTest, StatusOrOfProtoMoveConstructorTest) {
+ libtextclassifier3::StatusOr<DocumentProto> status_or =
+ absl_ports::InvalidArgumentError("test");
+ libtextclassifier3::StatusOr<DocumentProto> new_status_or =
+ std::move(status_or);
+}
+
+TEST(StatusTest, StatusOrOfProtoAssignmentTest) {
+ libtextclassifier3::StatusOr<DocumentProto> status_or =
+ absl_ports::InvalidArgumentError("test");
+ libtextclassifier3::StatusOr<DocumentProto> new_status_or;
+ new_status_or = status_or;
+}
+
+TEST(StatusTest, StatusOrOfProtoMoveAssignmentTest) {
+ libtextclassifier3::StatusOr<DocumentProto> status_or =
+ absl_ports::InvalidArgumentError("test");
+ libtextclassifier3::StatusOr<DocumentProto> new_status_or;
+ new_status_or = std::move(status_or);
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/absl_ports/str_join.h b/icing/absl_ports/str_join.h
index f66a977..5277bca 100644
--- a/icing/absl_ports/str_join.h
+++ b/icing/absl_ports/str_join.h
@@ -93,6 +93,11 @@ std::string StrJoin(Iterator first, Iterator last, std::string_view sep,
return result;
}
+template <typename Iterator>
+std::string StrJoin(Iterator first, Iterator last, std::string_view sep) {
+ return absl_ports::StrJoin(first, last, sep, DefaultFormatter());
+}
+
template <typename Container, typename Formatter>
std::string StrJoin(const Container& container, std::string_view sep,
Formatter&& formatter) {
@@ -112,4 +117,4 @@ std::vector<std::string_view> StrSplit(std::string_view text,
} // namespace lib
} // namespace icing
-#endif // ICING_ABSL_PORTS_STR_JOIN_H_
+#endif // ICING_ABSL_PORTS_STR_JOIN_H_ \ No newline at end of file
diff --git a/icing/document-builder.h b/icing/document-builder.h
index ba68ec5..44500f9 100644
--- a/icing/document-builder.h
+++ b/icing/document-builder.h
@@ -78,11 +78,25 @@ class DocumentBuilder {
return AddStringProperty(std::move(property_name), {string_values...});
}
+ // Takes a property name and iterator of int64_t values.
+ template <typename InputIt>
+ DocumentBuilder& AddInt64Property(std::string property_name, InputIt first,
+ InputIt last) {
+ auto property = document_.add_properties();
+ property->set_name(std::move(property_name));
+ for (InputIt it = first; it != last; ++it) {
+ property->mutable_int64_values()->Add(*it);
+ }
+ return *this;
+ }
+
// Takes a property name and any number of int64_t values.
template <typename... V>
DocumentBuilder& AddInt64Property(std::string property_name,
V... int64_values) {
- return AddInt64Property(std::move(property_name), {int64_values...});
+ std::initializer_list<int64_t> int64_values_list = {int64_values...};
+ return AddInt64Property(std::move(property_name), int64_values_list.begin(),
+ int64_values_list.end());
}
// Takes a property name and any number of double values.
@@ -128,16 +142,6 @@ class DocumentBuilder {
return *this;
}
- DocumentBuilder& AddInt64Property(
- std::string property_name, std::initializer_list<int64_t> int64_values) {
- auto property = document_.add_properties();
- property->set_name(std::move(property_name));
- for (int64_t int64_value : int64_values) {
- property->mutable_int64_values()->Add(int64_value);
- }
- return *this;
- }
-
DocumentBuilder& AddDoubleProperty(
std::string property_name, std::initializer_list<double> double_values) {
auto property = document_.add_properties();
diff --git a/icing/file/destructible-directory_test.cc b/icing/file/destructible-directory_test.cc
index c62db3b..dae74ff 100644
--- a/icing/file/destructible-directory_test.cc
+++ b/icing/file/destructible-directory_test.cc
@@ -26,7 +26,7 @@ namespace {
using ::testing::Eq;
-TEST(DestructibleFileTest, DeletesDirectoryProperly) {
+TEST(DestructibleDirectoryTest, DeletesDirectoryProperly) {
Filesystem filesystem;
std::string dir_path = GetTestTempDir() + "/dir1";
std::string file_path = dir_path + "/file1";
@@ -52,7 +52,7 @@ TEST(DestructibleFileTest, DeletesDirectoryProperly) {
EXPECT_FALSE(filesystem.DirectoryExists(dir_path.c_str()));
}
-TEST(DestructibleFileTest, MoveAssignDeletesFileProperly) {
+TEST(DestructibleDirectoryTest, MoveAssignDeletesDirectoryProperly) {
Filesystem filesystem;
std::string filepath1 = GetTestTempDir() + "/dir1";
std::string filepath2 = GetTestTempDir() + "/dir2";
@@ -80,7 +80,7 @@ TEST(DestructibleFileTest, MoveAssignDeletesFileProperly) {
EXPECT_TRUE(filesystem.DirectoryExists(filepath2.c_str()));
}
-TEST(DestructibleFileTest, MoveConstructionDeletesFileProperly) {
+TEST(DestructibleDirectoryTest, MoveConstructionDeletesDirectoryProperly) {
Filesystem filesystem;
std::string filepath1 = GetTestTempDir() + "/dir1";
diff --git a/icing/file/file-backed-proto-log.h b/icing/file/file-backed-proto-log.h
index 78236ba..095f832 100644
--- a/icing/file/file-backed-proto-log.h
+++ b/icing/file/file-backed-proto-log.h
@@ -40,7 +40,6 @@
#include <string_view>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/file/filesystem.h"
@@ -53,6 +52,7 @@
#include "icing/util/data-loss.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
+#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
namespace icing {
namespace lib {
@@ -575,8 +575,8 @@ libtextclassifier3::StatusOr<ProtoT> FileBackedProtoLog<ProtoT>::ReadProto(
return absl_ports::NotFoundError("The proto data has been erased.");
}
- google::protobuf::io::ArrayInputStream proto_stream(
- mmapped_file.mutable_region(), stored_size);
+ google::protobuf::io::ArrayInputStream proto_stream(mmapped_file.mutable_region(),
+ stored_size);
// Deserialize proto
ProtoT proto;
diff --git a/icing/file/file-backed-proto.h b/icing/file/file-backed-proto.h
index d7d9bad..8c5743b 100644
--- a/icing/file/file-backed-proto.h
+++ b/icing/file/file-backed-proto.h
@@ -22,6 +22,7 @@
#ifndef ICING_FILE_FILE_BACKED_PROTO_H_
#define ICING_FILE_FILE_BACKED_PROTO_H_
+#include <algorithm>
#include <cstdint>
#include <memory>
#include <string>
@@ -37,6 +38,7 @@
#include "icing/legacy/core/icing-string-util.h"
#include "icing/util/crc32.h"
#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
@@ -66,14 +68,21 @@ class FileBackedProto {
// Reset the internal file_path for the file backed proto.
// Example use:
// auto file_backed_proto1 = *FileBackedProto<Proto>::Create(...);
- // auto file_backed_proto2 = *FileBackedProto<Proto>::Create(...);
+ // auto file_backed_google::protobuf = *FileBackedProto<Proto>::Create(...);
// filesystem.SwapFiles(file1, file2);
// file_backed_proto1.SetSwappedFilepath(file2);
- // file_backed_proto2.SetSwappedFilepath(file1);
+ // file_backed_google::protobuf.SetSwappedFilepath(file1);
void SetSwappedFilepath(std::string_view swapped_to_file_path) {
file_path_ = swapped_to_file_path;
}
+ // Computes the checksum of the proto stored in this file and returns it.
+ // RETURNS:
+ // - the checksum of the proto or 0 if the file is empty/non-existent
+ // - INTERNAL_ERROR if an IO error or a corruption was encountered.
+ libtextclassifier3::StatusOr<Crc32> ComputeChecksum() const
+ ICING_LOCKS_EXCLUDED(mutex_);
+
// Returns a reference to the proto read from the file. It
// internally caches the read proto so that future calls are fast.
//
@@ -103,6 +112,11 @@ class FileBackedProto {
FileBackedProto& operator=(const FileBackedProto&) = delete;
private:
+ // Internal method to handle reading the proto from disk.
+ // Requires the caller to hold an exclusive lock on mutex_.
+ libtextclassifier3::StatusOr<const ProtoT*> ReadInternal() const
+ ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
// Upper bound of file-size that is supported.
static constexpr int32_t kMaxFileSize = 1 * 1024 * 1024; // 1 MiB.
@@ -113,6 +127,8 @@ class FileBackedProto {
std::string file_path_;
mutable std::unique_ptr<ProtoT> cached_proto_ ICING_GUARDED_BY(mutex_);
+
+ mutable std::unique_ptr<Header> cached_header_ ICING_GUARDED_BY(mutex_);
};
template <typename ProtoT>
@@ -124,12 +140,35 @@ FileBackedProto<ProtoT>::FileBackedProto(const Filesystem& filesystem,
: filesystem_(&filesystem), file_path_(file_path) {}
template <typename ProtoT>
+libtextclassifier3::StatusOr<Crc32> FileBackedProto<ProtoT>::ComputeChecksum()
+ const {
+ absl_ports::unique_lock l(&mutex_);
+ if (cached_proto_ == nullptr) {
+ auto read_status = ReadInternal();
+ if (!read_status.ok()) {
+ if (absl_ports::IsNotFound(read_status.status())) {
+ // File doesn't exist. So simply return 0.
+ return Crc32();
+ }
+ return read_status.status();
+ }
+ }
+ return Crc32(cached_header_->proto_checksum);
+}
+
+template <typename ProtoT>
libtextclassifier3::StatusOr<const ProtoT*> FileBackedProto<ProtoT>::Read()
const {
ICING_VLOG(1) << "Reading proto from file: " << file_path_;
absl_ports::unique_lock l(&mutex_);
+ return ReadInternal();
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<const ProtoT*>
+FileBackedProto<ProtoT>::ReadInternal() const {
// Return cached proto if we've already read from disk.
if (cached_proto_ != nullptr) {
ICING_VLOG(1) << "Reusing cached proto for file: " << file_path_;
@@ -157,8 +196,7 @@ libtextclassifier3::StatusOr<const ProtoT*> FileBackedProto<ProtoT>::Read()
<< " of size: " << file_size;
Header header;
- if (!filesystem_->PRead(fd.get(), &header, sizeof(Header),
- /*offset=*/0)) {
+ if (!filesystem_->PRead(fd.get(), &header, sizeof(Header), /*offset=*/0)) {
return absl_ports::InternalError(
absl_ports::StrCat("Unable to read header of: ", file_path_));
}
@@ -193,6 +231,7 @@ libtextclassifier3::StatusOr<const ProtoT*> FileBackedProto<ProtoT>::Read()
ICING_VLOG(1) << "Successfully read proto from file: " << file_path_;
cached_proto_ = std::move(proto);
+ cached_header_ = std::make_unique<Header>(std::move(header));
return cached_proto_.get();
}
@@ -253,6 +292,7 @@ libtextclassifier3::Status FileBackedProto<ProtoT>::Write(
ICING_VLOG(1) << "Successfully wrote proto to file: " << file_path_;
cached_proto_ = std::move(new_proto);
+ cached_header_ = std::make_unique<Header>(std::move(header));
return libtextclassifier3::Status::OK;
}
diff --git a/icing/file/file-backed-vector.h b/icing/file/file-backed-vector.h
index 1d99e24..7408e8b 100644
--- a/icing/file/file-backed-vector.h
+++ b/icing/file/file-backed-vector.h
@@ -57,6 +57,7 @@
#define ICING_FILE_FILE_BACKED_VECTOR_H_
#include <sys/mman.h>
+#include <unistd.h>
#include <algorithm>
#include <cinttypes>
@@ -166,6 +167,13 @@ class FileBackedVector {
static constexpr int32_t kElementTypeSize = static_cast<int32_t>(sizeof(T));
static_assert(sizeof(T) <= (1 << 10));
+ // Absolute max # of elements allowed. Since we are using int32_t to store
+ // num_elements, max value is 2^31-1. Still the actual max # of elements are
+ // determined by max_file_size, kMaxFileSize, kElementTypeSize, and
+ // Header::kHeaderSize.
+ static constexpr int32_t kMaxNumElements =
+ std::numeric_limits<int32_t>::max();
+
// Creates a new FileBackedVector to read/write content to.
//
// filesystem: Object to make system level calls
@@ -350,6 +358,14 @@ class FileBackedVector {
// OUT_OF_RANGE_ERROR if len < 0 or len >= num_elements()
libtextclassifier3::Status TruncateTo(int32_t new_num_elements);
+ // Sorts the vector within range [begin_idx, end_idx).
+ // It handles SetDirty properly for the file-backed-vector.
+ //
+ // Returns:
+ // OUT_OF_RANGE_ERROR if (0 <= begin_idx < end_idx <= num_elements()) does
+ // not hold
+ libtextclassifier3::Status Sort(int32_t begin_idx, int32_t end_idx);
+
// Mark idx as changed iff idx < changes_end_, so later ComputeChecksum() can
// update checksum by the cached changes without going over [0, changes_end_).
//
@@ -467,13 +483,6 @@ class FileBackedVector {
// Grow file by at least this many elements if array is growable.
static constexpr int64_t kGrowElements = 1u << 14; // 16K
- // Absolute max # of elements allowed. Since we are using int32_t to store
- // num_elements, max value is 2^31-1. Still the actual max # of elements are
- // determined by max_file_size, kMaxFileSize, kElementTypeSize, and
- // Header::kHeaderSize.
- static constexpr int32_t kMaxNumElements =
- std::numeric_limits<int32_t>::max();
-
// Absolute max index allowed.
static constexpr int32_t kMaxIndex = kMaxNumElements - 1;
@@ -532,13 +541,13 @@ template <typename T>
constexpr int32_t FileBackedVector<T>::kElementTypeSize;
template <typename T>
-constexpr int32_t FileBackedVector<T>::kPartialCrcLimitDiv;
+constexpr int32_t FileBackedVector<T>::kMaxNumElements;
template <typename T>
-constexpr int64_t FileBackedVector<T>::kGrowElements;
+constexpr int32_t FileBackedVector<T>::kPartialCrcLimitDiv;
template <typename T>
-constexpr int32_t FileBackedVector<T>::kMaxNumElements;
+constexpr int64_t FileBackedVector<T>::kGrowElements;
template <typename T>
constexpr int32_t FileBackedVector<T>::kMaxIndex;
@@ -951,6 +960,21 @@ libtextclassifier3::Status FileBackedVector<T>::TruncateTo(
}
template <typename T>
+libtextclassifier3::Status FileBackedVector<T>::Sort(int32_t begin_idx,
+ int32_t end_idx) {
+ if (begin_idx < 0 || begin_idx >= end_idx ||
+ end_idx > header_->num_elements) {
+ return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+ "Invalid sort index, %d, %d", begin_idx, end_idx));
+ }
+ for (int32_t i = begin_idx; i < end_idx; ++i) {
+ SetDirty(i);
+ }
+ std::sort(mutable_array() + begin_idx, mutable_array() + end_idx);
+ return libtextclassifier3::Status::OK;
+}
+
+template <typename T>
void FileBackedVector<T>::SetDirty(int32_t idx) {
// Cache original value to update crcs.
if (idx >= 0 && idx < changes_end_) {
diff --git a/icing/file/file-backed-vector_test.cc b/icing/file/file-backed-vector_test.cc
index c526dec..524bbc1 100644
--- a/icing/file/file-backed-vector_test.cc
+++ b/icing/file/file-backed-vector_test.cc
@@ -1019,6 +1019,105 @@ TEST_F(FileBackedVectorTest, TruncateAndReReadFile) {
}
}
+TEST_F(FileBackedVectorTest, Sort) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<int>> vector,
+ FileBackedVector<int>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ ICING_ASSERT_OK(vector->Set(0, 5));
+ ICING_ASSERT_OK(vector->Set(1, 4));
+ ICING_ASSERT_OK(vector->Set(2, 2));
+ ICING_ASSERT_OK(vector->Set(3, 3));
+ ICING_ASSERT_OK(vector->Set(4, 1));
+
+ // Sort vector range [1, 4) (excluding 4).
+ EXPECT_THAT(vector->Sort(/*begin_idx=*/1, /*end_idx=*/4), IsOk());
+ // Verify sorted range should be sorted and others should remain unchanged.
+ EXPECT_THAT(vector->Get(0), IsOkAndHolds(Pointee(5)));
+ EXPECT_THAT(vector->Get(1), IsOkAndHolds(Pointee(2)));
+ EXPECT_THAT(vector->Get(2), IsOkAndHolds(Pointee(3)));
+ EXPECT_THAT(vector->Get(3), IsOkAndHolds(Pointee(4)));
+ EXPECT_THAT(vector->Get(4), IsOkAndHolds(Pointee(1)));
+
+ // Sort again by end_idx = num_elements().
+ EXPECT_THAT(vector->Sort(/*begin_idx=*/0, /*end_idx=*/vector->num_elements()),
+ IsOk());
+ EXPECT_THAT(vector->Get(0), IsOkAndHolds(Pointee(1)));
+ EXPECT_THAT(vector->Get(1), IsOkAndHolds(Pointee(2)));
+ EXPECT_THAT(vector->Get(2), IsOkAndHolds(Pointee(3)));
+ EXPECT_THAT(vector->Get(3), IsOkAndHolds(Pointee(4)));
+ EXPECT_THAT(vector->Get(4), IsOkAndHolds(Pointee(5)));
+}
+
+TEST_F(FileBackedVectorTest, SortByInvalidIndexShouldReturnOutOfRangeError) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<int>> vector,
+ FileBackedVector<int>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ ICING_ASSERT_OK(vector->Set(0, 5));
+ ICING_ASSERT_OK(vector->Set(1, 4));
+ ICING_ASSERT_OK(vector->Set(2, 2));
+ ICING_ASSERT_OK(vector->Set(3, 3));
+ ICING_ASSERT_OK(vector->Set(4, 1));
+
+ EXPECT_THAT(vector->Sort(/*begin_idx=*/-1, /*end_idx=*/4),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+ EXPECT_THAT(vector->Sort(/*begin_idx=*/0, /*end_idx=*/-1),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+ EXPECT_THAT(vector->Sort(/*begin_idx=*/3, /*end_idx=*/3),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+ EXPECT_THAT(vector->Sort(/*begin_idx=*/3, /*end_idx=*/1),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+ EXPECT_THAT(vector->Sort(/*begin_idx=*/5, /*end_idx=*/5),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+ EXPECT_THAT(vector->Sort(/*begin_idx=*/3, /*end_idx=*/6),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST_F(FileBackedVectorTest, SortShouldSetDirtyCorrectly) {
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<int>> vector,
+ FileBackedVector<int>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ ICING_ASSERT_OK(vector->Set(0, 5));
+ ICING_ASSERT_OK(vector->Set(1, 4));
+ ICING_ASSERT_OK(vector->Set(2, 2));
+ ICING_ASSERT_OK(vector->Set(3, 3));
+ ICING_ASSERT_OK(vector->Set(4, 1));
+ } // Destroying the vector should trigger a checksum of the 5 elements
+
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<int>> vector,
+ FileBackedVector<int>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+ // Sort vector range [1, 4) (excluding 4).
+ EXPECT_THAT(vector->Sort(/*begin_idx=*/1, /*end_idx=*/4), IsOk());
+ } // Destroying the vector should update the checksum
+
+ // Creating again should check that the checksum after sorting matches what
+ // was previously saved. This tests the correctness of SetDirty() for sorted
+ // elements.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<int>> vector,
+ FileBackedVector<int>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+ // Verify sorted range should be sorted and others should remain unchanged.
+ EXPECT_THAT(vector->Get(0), IsOkAndHolds(Pointee(5)));
+ EXPECT_THAT(vector->Get(1), IsOkAndHolds(Pointee(2)));
+ EXPECT_THAT(vector->Get(2), IsOkAndHolds(Pointee(3)));
+ EXPECT_THAT(vector->Get(3), IsOkAndHolds(Pointee(4)));
+ EXPECT_THAT(vector->Get(4), IsOkAndHolds(Pointee(1)));
+}
+
TEST_F(FileBackedVectorTest, SetDirty) {
// 1. Create a vector and add some data.
ICING_ASSERT_OK_AND_ASSIGN(
diff --git a/icing/file/filesystem.cc b/icing/file/filesystem.cc
index 10b77db..cd905e7 100644
--- a/icing/file/filesystem.cc
+++ b/icing/file/filesystem.cc
@@ -72,7 +72,7 @@ void LogOpenFileDescriptors() {
<< ") too large.";
fd_lim = kMaxFileDescriptorsToStat;
}
- ICING_LOG(ERROR) << "Listing up to " << fd_lim << " file descriptors.";
+ ICING_LOG(INFO) << "Listing up to " << fd_lim << " file descriptors.";
// Verify that /proc/self/fd is a directory. If not, procfs is not mounted or
// inaccessible for some other reason. In that case, there's no point trying
@@ -94,12 +94,12 @@ void LogOpenFileDescriptors() {
if (len >= 0) {
// Zero-terminate the buffer, because readlink() won't.
target[len < target_size ? len : target_size - 1] = '\0';
- ICING_LOG(ERROR) << "fd " << fd << " -> \"" << target << "\"";
+ ICING_LOG(INFO) << "fd " << fd << " -> \"" << target << "\"";
} else if (errno != ENOENT) {
ICING_LOG(ERROR) << "fd " << fd << " -> ? (errno=" << errno << ")";
}
}
- ICING_LOG(ERROR) << "File descriptor list complete.";
+ ICING_LOG(INFO) << "File descriptor list complete.";
}
// Logs an error formatted as: desc1 + file_name + desc2 + strerror(errnum).
@@ -108,7 +108,11 @@ void LogOpenFileDescriptors() {
// file descriptors (see LogOpenFileDescriptors() above).
void LogOpenError(const char* desc1, const char* file_name, const char* desc2,
int errnum) {
- ICING_LOG(ERROR) << desc1 << file_name << desc2 << strerror(errnum);
+ if (errnum == ENOENT) {
+ ICING_VLOG(1) << desc1 << file_name << desc2 << strerror(errnum);
+ } else {
+ ICING_LOG(ERROR) << desc1 << file_name << desc2 << strerror(errnum);
+ }
if (errnum == EMFILE) {
LogOpenFileDescriptors();
}
@@ -127,6 +131,9 @@ bool ListDirectoryInternal(const char* dir_name,
return false;
}
+ // According to linux man page
+ // (https://man7.org/linux/man-pages/man3/readdir.3.html#RETURN_VALUE), dirent
+ // may be statically allocated, so don't free it.
dirent* p;
// readdir's implementation seems to be thread safe.
while ((p = readdir(dir)) != nullptr) {
@@ -358,7 +365,11 @@ int Filesystem::OpenForRead(const char* file_name) const {
int64_t Filesystem::GetFileSize(int fd) const {
struct stat st;
if (fstat(fd, &st) < 0) {
- ICING_LOG(ERROR) << "Unable to stat file: " << strerror(errno);
+ if (errno == ENOENT) {
+ ICING_VLOG(1) << "Unable to stat file: " << strerror(errno);
+ } else {
+ ICING_LOG(WARNING) << "Unable to stat file: " << strerror(errno);
+ }
return kBadFileSize;
}
return st.st_size;
diff --git a/icing/file/persistent-hash-map.cc b/icing/file/persistent-hash-map.cc
index 0af5e2f..558c242 100644
--- a/icing/file/persistent-hash-map.cc
+++ b/icing/file/persistent-hash-map.cc
@@ -27,6 +27,7 @@
#include "icing/absl_ports/str_cat.h"
#include "icing/file/file-backed-vector.h"
#include "icing/file/memory-mapped-file.h"
+#include "icing/file/persistent-storage.h"
#include "icing/util/crc32.h"
#include "icing/util/status-macros.h"
@@ -58,125 +59,26 @@ libtextclassifier3::StatusOr<int32_t> HashKeyToBucketIndex(
return static_cast<int32_t>(std::hash<std::string_view>()(key) % num_buckets);
}
-// Helper function to PWrite crcs and info to metadata_file_path. Note that
-// metadata_file_path will be the normal or temporary (for branching use when
-// rehashing) metadata file path.
-libtextclassifier3::Status WriteMetadata(const Filesystem& filesystem,
- const char* metadata_file_path,
- const PersistentHashMap::Crcs* crcs,
- const PersistentHashMap::Info* info) {
- ScopedFd sfd(filesystem.OpenForWrite(metadata_file_path));
- if (!sfd.is_valid()) {
- return absl_ports::InternalError("Failed to create metadata file");
- }
-
- // Write crcs and info. File layout: <Crcs><Info>
- if (!filesystem.PWrite(sfd.get(), PersistentHashMap::Crcs::kFileOffset, crcs,
- sizeof(PersistentHashMap::Crcs))) {
- return absl_ports::InternalError("Failed to write crcs into metadata file");
- }
- // Note that PWrite won't change the file offset, so we need to specify
- // the correct offset when writing Info.
- if (!filesystem.PWrite(sfd.get(), PersistentHashMap::Info::kFileOffset, info,
- sizeof(PersistentHashMap::Info))) {
- return absl_ports::InternalError("Failed to write info into metadata file");
- }
-
- return libtextclassifier3::Status::OK;
-}
-
-// Helper function to update checksums from info and storages to a Crcs
-// instance. Note that storages will be the normal instances used by
-// PersistentHashMap, or the temporary instances (for branching use when
-// rehashing).
-libtextclassifier3::Status UpdateChecksums(
- PersistentHashMap::Crcs* crcs, PersistentHashMap::Info* info,
- FileBackedVector<PersistentHashMap::Bucket>* bucket_storage,
- FileBackedVector<PersistentHashMap::Entry>* entry_storage,
- FileBackedVector<char>* kv_storage) {
- // Compute crcs
- ICING_ASSIGN_OR_RETURN(Crc32 bucket_storage_crc,
- bucket_storage->ComputeChecksum());
- ICING_ASSIGN_OR_RETURN(Crc32 entry_storage_crc,
- entry_storage->ComputeChecksum());
- ICING_ASSIGN_OR_RETURN(Crc32 kv_storage_crc, kv_storage->ComputeChecksum());
-
- crcs->component_crcs.info_crc = info->ComputeChecksum().Get();
- crcs->component_crcs.bucket_storage_crc = bucket_storage_crc.Get();
- crcs->component_crcs.entry_storage_crc = entry_storage_crc.Get();
- crcs->component_crcs.kv_storage_crc = kv_storage_crc.Get();
- crcs->all_crc = crcs->component_crcs.ComputeChecksum().Get();
-
- return libtextclassifier3::Status::OK;
-}
-
-// Helper function to validate checksums.
-libtextclassifier3::Status ValidateChecksums(
- const PersistentHashMap::Crcs* crcs, const PersistentHashMap::Info* info,
- FileBackedVector<PersistentHashMap::Bucket>* bucket_storage,
- FileBackedVector<PersistentHashMap::Entry>* entry_storage,
- FileBackedVector<char>* kv_storage) {
- if (crcs->all_crc != crcs->component_crcs.ComputeChecksum().Get()) {
- return absl_ports::FailedPreconditionError(
- "Invalid all crc for PersistentHashMap");
- }
-
- if (crcs->component_crcs.info_crc != info->ComputeChecksum().Get()) {
- return absl_ports::FailedPreconditionError(
- "Invalid info crc for PersistentHashMap");
- }
-
- ICING_ASSIGN_OR_RETURN(Crc32 bucket_storage_crc,
- bucket_storage->ComputeChecksum());
- if (crcs->component_crcs.bucket_storage_crc != bucket_storage_crc.Get()) {
- return absl_ports::FailedPreconditionError(
- "Mismatch crc with PersistentHashMap bucket storage");
- }
-
- ICING_ASSIGN_OR_RETURN(Crc32 entry_storage_crc,
- entry_storage->ComputeChecksum());
- if (crcs->component_crcs.entry_storage_crc != entry_storage_crc.Get()) {
- return absl_ports::FailedPreconditionError(
- "Mismatch crc with PersistentHashMap entry storage");
- }
-
- ICING_ASSIGN_OR_RETURN(Crc32 kv_storage_crc, kv_storage->ComputeChecksum());
- if (crcs->component_crcs.kv_storage_crc != kv_storage_crc.Get()) {
- return absl_ports::FailedPreconditionError(
- "Mismatch crc with PersistentHashMap key value storage");
- }
-
- return libtextclassifier3::Status::OK;
-}
-
-// Since metadata/bucket/entry storages should be branched when rehashing, we
-// have to store them together under the same sub directory
-// ("<base_dir>/<sub_dir>"). On the other hand, key-value storage won't be
-// branched and it will be stored under <base_dir>.
-//
// The following 4 methods are helper functions to get the correct path of
-// metadata/bucket/entry/key-value storages, according to the given base
-// directory and sub directory.
-std::string GetMetadataFilePath(std::string_view base_dir,
- std::string_view sub_dir) {
- return absl_ports::StrCat(base_dir, "/", sub_dir, "/",
- PersistentHashMap::kFilePrefix, ".m");
+// metadata/bucket/entry/key-value storages, according to the given working
+// directory path.
+std::string GetMetadataFilePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/", PersistentHashMap::kFilePrefix,
+ ".m");
}
-std::string GetBucketStorageFilePath(std::string_view base_dir,
- std::string_view sub_dir) {
- return absl_ports::StrCat(base_dir, "/", sub_dir, "/",
- PersistentHashMap::kFilePrefix, ".b");
+std::string GetBucketStorageFilePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/", PersistentHashMap::kFilePrefix,
+ ".b");
}
-std::string GetEntryStorageFilePath(std::string_view base_dir,
- std::string_view sub_dir) {
- return absl_ports::StrCat(base_dir, "/", sub_dir, "/",
- PersistentHashMap::kFilePrefix, ".e");
+std::string GetEntryStorageFilePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/", PersistentHashMap::kFilePrefix,
+ ".e");
}
-std::string GetKeyValueStorageFilePath(std::string_view base_dir) {
- return absl_ports::StrCat(base_dir, "/", PersistentHashMap::kFilePrefix,
+std::string GetKeyValueStorageFilePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/", PersistentHashMap::kFilePrefix,
".k");
}
@@ -234,34 +136,40 @@ bool PersistentHashMap::Options::IsValid() const {
/* static */ libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
PersistentHashMap::Create(const Filesystem& filesystem,
- std::string_view base_dir, const Options& options) {
+ std::string working_path, Options options) {
if (!options.IsValid()) {
return absl_ports::InvalidArgumentError(
"Invalid PersistentHashMap options");
}
- if (!filesystem.FileExists(
- GetMetadataFilePath(base_dir, kSubDirectory).c_str()) ||
+ if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str()) ||
+ !filesystem.FileExists(GetBucketStorageFilePath(working_path).c_str()) ||
+ !filesystem.FileExists(GetEntryStorageFilePath(working_path).c_str()) ||
!filesystem.FileExists(
- GetBucketStorageFilePath(base_dir, kSubDirectory).c_str()) ||
- !filesystem.FileExists(
- GetEntryStorageFilePath(base_dir, kSubDirectory).c_str()) ||
- !filesystem.FileExists(GetKeyValueStorageFilePath(base_dir).c_str())) {
- // TODO: erase all files if missing any.
- return InitializeNewFiles(filesystem, base_dir, options);
+ GetKeyValueStorageFilePath(working_path).c_str())) {
+ // Discard working_path if any of them is missing, and reinitialize.
+ if (filesystem.DirectoryExists(working_path.c_str())) {
+ ICING_RETURN_IF_ERROR(Discard(filesystem, working_path));
+ }
+ return InitializeNewFiles(filesystem, std::move(working_path),
+ std::move(options));
}
- return InitializeExistingFiles(filesystem, base_dir, options);
+ return InitializeExistingFiles(filesystem, std::move(working_path),
+ std::move(options));
}
PersistentHashMap::~PersistentHashMap() {
if (!PersistToDisk().ok()) {
ICING_LOG(WARNING)
- << "Failed to persist hash map to disk while destructing " << base_dir_;
+ << "Failed to persist hash map to disk while destructing "
+ << working_path_;
}
}
libtextclassifier3::Status PersistentHashMap::Put(std::string_view key,
const void* value) {
+ SetDirty();
+
ICING_RETURN_IF_ERROR(ValidateKey(key));
ICING_ASSIGN_OR_RETURN(
int32_t bucket_idx,
@@ -278,7 +186,7 @@ libtextclassifier3::Status PersistentHashMap::Put(std::string_view key,
ICING_ASSIGN_OR_RETURN(const Entry* entry,
entry_storage_->Get(idx_pair.target_entry_index));
- int32_t kv_len = key.length() + 1 + info()->value_type_size;
+ int32_t kv_len = key.length() + 1 + info().value_type_size;
int32_t value_offset = key.length() + 1;
ICING_ASSIGN_OR_RETURN(
typename FileBackedVector<char>::MutableArrayView mutable_kv_arr,
@@ -286,7 +194,7 @@ libtextclassifier3::Status PersistentHashMap::Put(std::string_view key,
// It is the same key and value_size is fixed, so we can directly overwrite
// serialized value.
mutable_kv_arr.SetArray(value_offset, reinterpret_cast<const char*>(value),
- info()->value_type_size);
+ info().value_type_size);
return libtextclassifier3::Status::OK;
}
@@ -302,6 +210,7 @@ libtextclassifier3::Status PersistentHashMap::GetOrPut(std::string_view key,
FindEntryIndexByKey(bucket_idx, key));
if (idx_pair.target_entry_index == Entry::kInvalidIndex) {
// If not found, then insert new key value pair.
+ SetDirty();
return Insert(bucket_idx, key, next_value);
}
@@ -319,14 +228,16 @@ libtextclassifier3::Status PersistentHashMap::Get(std::string_view key,
ICING_ASSIGN_OR_RETURN(EntryIndexPair idx_pair,
FindEntryIndexByKey(bucket_idx, key));
if (idx_pair.target_entry_index == Entry::kInvalidIndex) {
- return absl_ports::NotFoundError(
- absl_ports::StrCat("Key not found in PersistentHashMap ", base_dir_));
+ return absl_ports::NotFoundError(absl_ports::StrCat(
+ "Key not found in PersistentHashMap ", working_path_));
}
return CopyEntryValue(idx_pair.target_entry_index, value);
}
libtextclassifier3::Status PersistentHashMap::Delete(std::string_view key) {
+ SetDirty();
+
ICING_RETURN_IF_ERROR(ValidateKey(key));
ICING_ASSIGN_OR_RETURN(
int32_t bucket_idx,
@@ -335,8 +246,8 @@ libtextclassifier3::Status PersistentHashMap::Delete(std::string_view key) {
ICING_ASSIGN_OR_RETURN(EntryIndexPair idx_pair,
FindEntryIndexByKey(bucket_idx, key));
if (idx_pair.target_entry_index == Entry::kInvalidIndex) {
- return absl_ports::NotFoundError(
- absl_ports::StrCat("Key not found in PersistentHashMap ", base_dir_));
+ return absl_ports::NotFoundError(absl_ports::StrCat(
+ "Key not found in PersistentHashMap ", working_path_));
}
ICING_ASSIGN_OR_RETURN(
@@ -375,7 +286,7 @@ libtextclassifier3::Status PersistentHashMap::Delete(std::string_view key) {
// Zero out the key value bytes. It is necessary for iterator to iterate
// through kv_storage and handle deleted keys properly.
- int32_t kv_len = key.length() + 1 + info()->value_type_size;
+ int32_t kv_len = key.length() + 1 + info().value_type_size;
ICING_RETURN_IF_ERROR(kv_storage_->Set(
mutable_target_entry.Get().key_value_index(), kv_len, '\0'));
@@ -383,23 +294,7 @@ libtextclassifier3::Status PersistentHashMap::Delete(std::string_view key) {
mutable_target_entry.Get().set_key_value_index(kInvalidKVIndex);
mutable_target_entry.Get().set_next_entry_index(Entry::kInvalidIndex);
- ++(info()->num_deleted_entries);
-
- return libtextclassifier3::Status::OK;
-}
-
-libtextclassifier3::Status PersistentHashMap::PersistToDisk() {
- ICING_RETURN_IF_ERROR(bucket_storage_->PersistToDisk());
- ICING_RETURN_IF_ERROR(entry_storage_->PersistToDisk());
- ICING_RETURN_IF_ERROR(kv_storage_->PersistToDisk());
-
- ICING_RETURN_IF_ERROR(UpdateChecksums(crcs(), info(), bucket_storage_.get(),
- entry_storage_.get(),
- kv_storage_.get()));
- // Changes should have been applied to the underlying file when using
- // MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, but call msync() as an
- // extra safety step to ensure they are written out.
- ICING_RETURN_IF_ERROR(metadata_mmapped_file_->PersistToDisk());
+ ++(info().num_deleted_entries);
return libtextclassifier3::Status::OK;
}
@@ -415,8 +310,7 @@ libtextclassifier3::StatusOr<int64_t> PersistentHashMap::GetDiskUsage() const {
int64_t total = bucket_storage_disk_usage + entry_storage_disk_usage +
kv_storage_disk_usage;
Filesystem::IncrementByOrSetInvalid(
- filesystem_->GetDiskUsage(
- GetMetadataFilePath(base_dir_, kSubDirectory).c_str()),
+ filesystem_.GetDiskUsage(GetMetadataFilePath(working_path_).c_str()),
&total);
if (total < 0 || total == Filesystem::kBadFileSize) {
@@ -438,23 +332,15 @@ libtextclassifier3::StatusOr<int64_t> PersistentHashMap::GetElementsSize()
kv_storage_elements_size;
}
-libtextclassifier3::StatusOr<Crc32> PersistentHashMap::ComputeChecksum() {
- Crcs* crcs_ptr = crcs();
- ICING_RETURN_IF_ERROR(UpdateChecksums(crcs_ptr, info(), bucket_storage_.get(),
- entry_storage_.get(),
- kv_storage_.get()));
- return Crc32(crcs_ptr->all_crc);
-}
-
/* static */ libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
PersistentHashMap::InitializeNewFiles(const Filesystem& filesystem,
- std::string_view base_dir,
- const Options& options) {
- // Create directory.
- const std::string dir_path = absl_ports::StrCat(base_dir, "/", kSubDirectory);
- if (!filesystem.CreateDirectoryRecursively(dir_path.c_str())) {
+ std::string&& working_path,
+ Options&& options) {
+ // PersistentHashMap uses working_path as working directory path.
+ // Create working directory.
+ if (!filesystem.CreateDirectory(working_path.c_str())) {
return absl_ports::InternalError(
- absl_ports::StrCat("Failed to create directory: ", dir_path));
+ absl_ports::StrCat("Failed to create directory: ", working_path));
}
int32_t max_num_buckets_required =
@@ -469,9 +355,9 @@ PersistentHashMap::InitializeNewFiles(const Filesystem& filesystem,
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<FileBackedVector<Bucket>> bucket_storage,
FileBackedVector<Bucket>::Create(
- filesystem, GetBucketStorageFilePath(base_dir, kSubDirectory),
+ filesystem, GetBucketStorageFilePath(working_path),
MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
- pre_mapping_mmap_size));
+ options.pre_mapping_fbv ? pre_mapping_mmap_size : 0));
// Initialize entry_storage
pre_mapping_mmap_size = sizeof(Entry) * options.max_num_entries;
@@ -480,72 +366,75 @@ PersistentHashMap::InitializeNewFiles(const Filesystem& filesystem,
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<FileBackedVector<Entry>> entry_storage,
FileBackedVector<Entry>::Create(
- filesystem, GetEntryStorageFilePath(base_dir, kSubDirectory),
+ filesystem, GetEntryStorageFilePath(working_path),
MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
- pre_mapping_mmap_size));
+ options.pre_mapping_fbv ? pre_mapping_mmap_size : 0));
// Initialize kv_storage
pre_mapping_mmap_size =
options.average_kv_byte_size * options.max_num_entries;
max_file_size =
pre_mapping_mmap_size + FileBackedVector<char>::Header::kHeaderSize;
- ICING_ASSIGN_OR_RETURN(std::unique_ptr<FileBackedVector<char>> kv_storage,
- FileBackedVector<char>::Create(
- filesystem, GetKeyValueStorageFilePath(base_dir),
- MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
- max_file_size, pre_mapping_mmap_size));
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<FileBackedVector<char>> kv_storage,
+ FileBackedVector<char>::Create(
+ filesystem, GetKeyValueStorageFilePath(working_path),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
+ options.pre_mapping_fbv ? pre_mapping_mmap_size : 0));
// Initialize buckets.
ICING_RETURN_IF_ERROR(bucket_storage->Set(
/*idx=*/0, /*len=*/options.init_num_buckets, Bucket()));
ICING_RETURN_IF_ERROR(bucket_storage->PersistToDisk());
- // Create and initialize new info
- Info new_info;
- new_info.version = kVersion;
- new_info.value_type_size = options.value_type_size;
- new_info.max_load_factor_percent = options.max_load_factor_percent;
- new_info.num_deleted_entries = 0;
- new_info.num_deleted_key_value_bytes = 0;
-
- // Compute checksums
- Crcs new_crcs;
- ICING_RETURN_IF_ERROR(UpdateChecksums(&new_crcs, &new_info,
- bucket_storage.get(),
- entry_storage.get(), kv_storage.get()));
-
- const std::string metadata_file_path =
- GetMetadataFilePath(base_dir, kSubDirectory);
- // Write new metadata file
- ICING_RETURN_IF_ERROR(WriteMetadata(filesystem, metadata_file_path.c_str(),
- &new_crcs, &new_info));
-
- // Mmap the content of the crcs and info.
- ICING_ASSIGN_OR_RETURN(MemoryMappedFile metadata_mmapped_file,
- MemoryMappedFile::Create(
- filesystem, metadata_file_path,
- MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
- ICING_RETURN_IF_ERROR(metadata_mmapped_file.Remap(
- /*file_offset=*/0, /*mmap_size=*/sizeof(Crcs) + sizeof(Info)));
-
- return std::unique_ptr<PersistentHashMap>(new PersistentHashMap(
- filesystem, base_dir, options, std::move(metadata_mmapped_file),
- std::move(bucket_storage), std::move(entry_storage),
- std::move(kv_storage)));
+ // Initialize metadata file. Create MemoryMappedFile with pre-mapping, and
+ // call GrowAndRemapIfNecessary to grow the underlying file.
+ ICING_ASSIGN_OR_RETURN(
+ MemoryMappedFile metadata_mmapped_file,
+ MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ /*max_file_size=*/kMetadataFileSize,
+ /*pre_mapping_file_offset=*/0,
+ /*pre_mapping_mmap_size=*/kMetadataFileSize));
+ ICING_RETURN_IF_ERROR(metadata_mmapped_file.GrowAndRemapIfNecessary(
+ /*file_offset=*/0, /*mmap_size=*/kMetadataFileSize));
+
+ // Create instance.
+ auto new_persistent_hash_map =
+ std::unique_ptr<PersistentHashMap>(new PersistentHashMap(
+ filesystem, std::move(working_path), std::move(options),
+ std::move(metadata_mmapped_file), std::move(bucket_storage),
+ std::move(entry_storage), std::move(kv_storage)));
+ // Initialize info content by writing mapped memory directly.
+ Info& info_ref = new_persistent_hash_map->info();
+ info_ref.magic = Info::kMagic;
+ info_ref.value_type_size = new_persistent_hash_map->options_.value_type_size;
+ info_ref.max_load_factor_percent =
+ new_persistent_hash_map->options_.max_load_factor_percent;
+ info_ref.num_deleted_entries = 0;
+ info_ref.num_deleted_key_value_bytes = 0;
+ // Initialize new PersistentStorage. The initial checksums will be computed
+ // and set via InitializeNewStorage.
+ ICING_RETURN_IF_ERROR(new_persistent_hash_map->InitializeNewStorage());
+
+ return new_persistent_hash_map;
}
/* static */ libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem,
- std::string_view base_dir,
- const Options& options) {
- // Mmap the content of the crcs and info.
+ std::string&& working_path,
+ Options&& options) {
+ // Initialize metadata file
ICING_ASSIGN_OR_RETURN(
MemoryMappedFile metadata_mmapped_file,
- MemoryMappedFile::Create(
- filesystem, GetMetadataFilePath(base_dir, kSubDirectory),
- MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
- ICING_RETURN_IF_ERROR(metadata_mmapped_file.Remap(
- /*file_offset=*/0, /*mmap_size=*/sizeof(Crcs) + sizeof(Info)));
+ MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ /*max_file_size=*/kMetadataFileSize,
+ /*pre_mapping_file_offset=*/0,
+ /*pre_mapping_mmap_size=*/kMetadataFileSize));
+ if (metadata_mmapped_file.available_size() != kMetadataFileSize) {
+ return absl_ports::FailedPreconditionError("Incorrect metadata file size");
+ }
int32_t max_num_buckets_required = CalculateNumBucketsRequired(
options.max_num_entries, options.max_load_factor_percent);
@@ -557,9 +446,9 @@ PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem,
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<FileBackedVector<Bucket>> bucket_storage,
FileBackedVector<Bucket>::Create(
- filesystem, GetBucketStorageFilePath(base_dir, kSubDirectory),
+ filesystem, GetBucketStorageFilePath(working_path),
MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
- pre_mapping_mmap_size));
+ options.pre_mapping_fbv ? pre_mapping_mmap_size : 0));
// Initialize entry_storage
pre_mapping_mmap_size = sizeof(Entry) * options.max_num_entries;
@@ -568,74 +457,136 @@ PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem,
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<FileBackedVector<Entry>> entry_storage,
FileBackedVector<Entry>::Create(
- filesystem, GetEntryStorageFilePath(base_dir, kSubDirectory),
+ filesystem, GetEntryStorageFilePath(working_path),
MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
- pre_mapping_mmap_size));
+ options.pre_mapping_fbv ? pre_mapping_mmap_size : 0));
// Initialize kv_storage
pre_mapping_mmap_size =
options.average_kv_byte_size * options.max_num_entries;
max_file_size =
pre_mapping_mmap_size + FileBackedVector<char>::Header::kHeaderSize;
- ICING_ASSIGN_OR_RETURN(std::unique_ptr<FileBackedVector<char>> kv_storage,
- FileBackedVector<char>::Create(
- filesystem, GetKeyValueStorageFilePath(base_dir),
- MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
- max_file_size, pre_mapping_mmap_size));
-
- Crcs* crcs_ptr = reinterpret_cast<Crcs*>(
- metadata_mmapped_file.mutable_region() + Crcs::kFileOffset);
- Info* info_ptr = reinterpret_cast<Info*>(
- metadata_mmapped_file.mutable_region() + Info::kFileOffset);
-
- // Value type size should be consistent.
- if (options.value_type_size != info_ptr->value_type_size) {
- return absl_ports::FailedPreconditionError("Incorrect value type size");
- }
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<FileBackedVector<char>> kv_storage,
+ FileBackedVector<char>::Create(
+ filesystem, GetKeyValueStorageFilePath(working_path),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
+ options.pre_mapping_fbv ? pre_mapping_mmap_size : 0));
- // Current # of entries should not exceed options.max_num_entries
- // We compute max_file_size of 3 storages by options.max_num_entries. Since we
- // won't recycle space of deleted entries (and key-value bytes), they're still
- // occupying space in storages. Even if # of "active" entries doesn't exceed
- // options.max_num_entries, the new kvp to be inserted still potentially
- // exceeds max_file_size.
- // Therefore, we should use entry_storage->num_elements() instead of # of
+ // Create instance.
+ auto persistent_hash_map =
+ std::unique_ptr<PersistentHashMap>(new PersistentHashMap(
+ filesystem, std::move(working_path), std::move(options),
+ std::move(metadata_mmapped_file), std::move(bucket_storage),
+ std::move(entry_storage), std::move(kv_storage)));
+ // Initialize existing PersistentStorage. Checksums will be validated.
+ ICING_RETURN_IF_ERROR(persistent_hash_map->InitializeExistingStorage());
+
+ // Validate other values of info and options.
+ // Current # of entries should not exceed options_.max_num_entries
+ // We compute max_file_size of 3 storages by options_.max_num_entries. Since
+ // we won't recycle space of deleted entries (and key-value bytes), they're
+ // still occupying space in storages. Even if # of "active" entries doesn't
+ // exceed options_.max_num_entries, the new kvp to be inserted still
+ // potentially exceeds max_file_size.
+ // Therefore, we should use entry_storage_->num_elements() instead of # of
// "active" entries
- // (i.e. entry_storage->num_elements() - info_ptr->num_deleted_entries) to
+ // (i.e. entry_storage_->num_elements() - info_ptr->num_deleted_entries) to
// check. This feature avoids storages being grown extremely large when there
// are many Delete() and Put() operations.
- if (entry_storage->num_elements() > options.max_num_entries) {
+ if (persistent_hash_map->entry_storage_->num_elements() >
+ persistent_hash_map->options_.max_num_entries) {
return absl_ports::FailedPreconditionError(
"Current # of entries exceeds max num entries");
}
- // Validate checksums of info and 3 storages.
- ICING_RETURN_IF_ERROR(
- ValidateChecksums(crcs_ptr, info_ptr, bucket_storage.get(),
- entry_storage.get(), kv_storage.get()));
+ // Magic should be the same.
+ if (persistent_hash_map->info().magic != Info::kMagic) {
+ return absl_ports::FailedPreconditionError(
+ "PersistentHashMap header magic mismatch");
+ }
+
+ // Value type size should be consistent.
+ if (persistent_hash_map->options_.value_type_size !=
+ persistent_hash_map->info().value_type_size) {
+ return absl_ports::FailedPreconditionError("Incorrect value type size");
+ }
// Allow max_load_factor_percent_ change.
- if (options.max_load_factor_percent != info_ptr->max_load_factor_percent) {
+ if (persistent_hash_map->options_.max_load_factor_percent !=
+ persistent_hash_map->info().max_load_factor_percent) {
ICING_VLOG(2) << "Changing max_load_factor_percent from "
- << info_ptr->max_load_factor_percent << " to "
- << options.max_load_factor_percent;
+ << persistent_hash_map->info().max_load_factor_percent
+ << " to "
+ << persistent_hash_map->options_.max_load_factor_percent;
+
+ persistent_hash_map->SetInfoDirty();
+ persistent_hash_map->info().max_load_factor_percent =
+ persistent_hash_map->options_.max_load_factor_percent;
+ ICING_RETURN_IF_ERROR(
+ persistent_hash_map->RehashIfNecessary(/*force_rehash=*/false));
- info_ptr->max_load_factor_percent = options.max_load_factor_percent;
- crcs_ptr->component_crcs.info_crc = info_ptr->ComputeChecksum().Get();
- crcs_ptr->all_crc = crcs_ptr->component_crcs.ComputeChecksum().Get();
- ICING_RETURN_IF_ERROR(metadata_mmapped_file.PersistToDisk());
+ ICING_RETURN_IF_ERROR(persistent_hash_map->PersistToDisk());
}
- auto persistent_hash_map =
- std::unique_ptr<PersistentHashMap>(new PersistentHashMap(
- filesystem, base_dir, options, std::move(metadata_mmapped_file),
- std::move(bucket_storage), std::move(entry_storage),
- std::move(kv_storage)));
- ICING_RETURN_IF_ERROR(
- persistent_hash_map->RehashIfNecessary(/*force_rehash=*/false));
return persistent_hash_map;
}
+libtextclassifier3::Status PersistentHashMap::PersistStoragesToDisk(
+ bool force) {
+ if (!force && !is_storage_dirty()) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ ICING_RETURN_IF_ERROR(bucket_storage_->PersistToDisk());
+ ICING_RETURN_IF_ERROR(entry_storage_->PersistToDisk());
+ ICING_RETURN_IF_ERROR(kv_storage_->PersistToDisk());
+ is_storage_dirty_ = false;
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status PersistentHashMap::PersistMetadataToDisk(
+ bool force) {
+ // We can skip persisting metadata to disk only if both info and storage are
+ // clean.
+ if (!force && !is_info_dirty() && !is_storage_dirty()) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ // Changes should have been applied to the underlying file when using
+ // MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, but call msync() as an
+ // extra safety step to ensure they are written out.
+ ICING_RETURN_IF_ERROR(metadata_mmapped_file_->PersistToDisk());
+ is_info_dirty_ = false;
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<Crc32> PersistentHashMap::ComputeInfoChecksum(
+ bool force) {
+ if (!force && !is_info_dirty()) {
+ return Crc32(crcs().component_crcs.info_crc);
+ }
+
+ return info().ComputeChecksum();
+}
+
+libtextclassifier3::StatusOr<Crc32> PersistentHashMap::ComputeStoragesChecksum(
+ bool force) {
+ if (!force && !is_storage_dirty()) {
+ return Crc32(crcs().component_crcs.storages_crc);
+ }
+
+ // Compute crcs
+ ICING_ASSIGN_OR_RETURN(Crc32 bucket_storage_crc,
+ bucket_storage_->ComputeChecksum());
+ ICING_ASSIGN_OR_RETURN(Crc32 entry_storage_crc,
+ entry_storage_->ComputeChecksum());
+ ICING_ASSIGN_OR_RETURN(Crc32 kv_storage_crc, kv_storage_->ComputeChecksum());
+
+ return Crc32(bucket_storage_crc.Get() ^ entry_storage_crc.Get() ^
+ kv_storage_crc.Get());
+}
+
libtextclassifier3::StatusOr<PersistentHashMap::EntryIndexPair>
PersistentHashMap::FindEntryIndexByKey(int32_t bucket_idx,
std::string_view key) const {
@@ -674,7 +625,7 @@ libtextclassifier3::Status PersistentHashMap::CopyEntryValue(
ICING_ASSIGN_OR_RETURN(const char* kv_arr,
kv_storage_->Get(entry->key_value_index()));
int32_t value_offset = strlen(kv_arr) + 1;
- memcpy(value, kv_arr + value_offset, info()->value_type_size);
+ memcpy(value, kv_arr + value_offset, info().value_type_size);
return libtextclassifier3::Status::OK;
}
@@ -682,6 +633,8 @@ libtextclassifier3::Status PersistentHashMap::CopyEntryValue(
libtextclassifier3::Status PersistentHashMap::Insert(int32_t bucket_idx,
std::string_view key,
const void* value) {
+ SetDirty();
+
// If entry_storage_->num_elements() + 1 exceeds options_.max_num_entries,
// then return error.
// We compute max_file_size of 3 storages by options_.max_num_entries. Since
@@ -702,7 +655,7 @@ libtextclassifier3::Status PersistentHashMap::Insert(int32_t bucket_idx,
// Append new key value.
int32_t new_kv_idx = kv_storage_->num_elements();
- int32_t kv_len = key.size() + 1 + info()->value_type_size;
+ int32_t kv_len = key.size() + 1 + info().value_type_size;
int32_t value_offset = key.size() + 1;
ICING_ASSIGN_OR_RETURN(
typename FileBackedVector<char>::MutableArrayView mutable_new_kv_arr,
@@ -711,7 +664,7 @@ libtextclassifier3::Status PersistentHashMap::Insert(int32_t bucket_idx,
mutable_new_kv_arr.SetArray(/*idx=*/key.size(), "\0", 1);
mutable_new_kv_arr.SetArray(/*idx=*/value_offset,
reinterpret_cast<const char*>(value),
- info()->value_type_size);
+ info().value_type_size);
// Append new entry.
int32_t new_entry_idx = entry_storage_->num_elements();
@@ -727,7 +680,7 @@ libtextclassifier3::Status PersistentHashMap::RehashIfNecessary(
int32_t new_num_bucket = bucket_storage_->num_elements();
while (new_num_bucket <= Bucket::kMaxNumBuckets / 2 &&
size() > static_cast<int64_t>(new_num_bucket) *
- info()->max_load_factor_percent / 100) {
+ info().max_load_factor_percent / 100) {
new_num_bucket *= 2;
}
@@ -735,6 +688,8 @@ libtextclassifier3::Status PersistentHashMap::RehashIfNecessary(
return libtextclassifier3::Status::OK;
}
+ SetDirty();
+
// Resize and reset buckets.
ICING_RETURN_IF_ERROR(
bucket_storage_->Set(0, new_num_bucket, Bucket(Entry::kInvalidIndex)));
@@ -764,7 +719,7 @@ libtextclassifier3::Status PersistentHashMap::RehashIfNecessary(
entry_storage_->TruncateTo(entry_idx);
}
- info()->num_deleted_entries = 0;
+ info().num_deleted_entries = 0;
return libtextclassifier3::Status::OK;
}
@@ -774,7 +729,7 @@ bool PersistentHashMap::Iterator::Advance() {
// key value pair. In the first round (after construction), curr_key_len_
// is 0, so don't jump over anything.
if (curr_key_len_ != 0) {
- curr_kv_idx_ += curr_key_len_ + 1 + map_->info()->value_type_size;
+ curr_kv_idx_ += curr_key_len_ + 1 + map_->info().value_type_size;
curr_key_len_ = 0;
}
diff --git a/icing/file/persistent-hash-map.h b/icing/file/persistent-hash-map.h
index 57fa070..5f7999d 100644
--- a/icing/file/persistent-hash-map.h
+++ b/icing/file/persistent-hash-map.h
@@ -24,6 +24,7 @@
#include "icing/file/file-backed-vector.h"
#include "icing/file/filesystem.h"
#include "icing/file/memory-mapped-file.h"
+#include "icing/file/persistent-storage.h"
#include "icing/util/crc32.h"
namespace icing {
@@ -34,7 +35,7 @@ namespace lib {
// Key and value can be any type, but callers should serialize key/value by
// themselves and pass raw bytes into the hash map, and the serialized key
// should not contain termination character '\0'.
-class PersistentHashMap {
+class PersistentHashMap : public PersistentStorage {
public:
// For iterating through persistent hash map. The order is not guaranteed.
//
@@ -80,45 +81,15 @@ class PersistentHashMap {
friend class PersistentHashMap;
};
- // Crcs and Info will be written into the metadata file.
- // File layout: <Crcs><Info>
- // Crcs
- struct Crcs {
- static constexpr int32_t kFileOffset = 0;
-
- struct ComponentCrcs {
- uint32_t info_crc;
- uint32_t bucket_storage_crc;
- uint32_t entry_storage_crc;
- uint32_t kv_storage_crc;
-
- bool operator==(const ComponentCrcs& other) const {
- return info_crc == other.info_crc &&
- bucket_storage_crc == other.bucket_storage_crc &&
- entry_storage_crc == other.entry_storage_crc &&
- kv_storage_crc == other.kv_storage_crc;
- }
-
- Crc32 ComputeChecksum() const {
- return Crc32(std::string_view(reinterpret_cast<const char*>(this),
- sizeof(ComponentCrcs)));
- }
- } __attribute__((packed));
-
- bool operator==(const Crcs& other) const {
- return all_crc == other.all_crc && component_crcs == other.component_crcs;
- }
-
- uint32_t all_crc;
- ComponentCrcs component_crcs;
- } __attribute__((packed));
- static_assert(sizeof(Crcs) == 20, "");
+ // Metadata file layout: <Crcs><Info>
+ static constexpr int32_t kCrcsMetadataFileOffset = 0;
+ static constexpr int32_t kInfoMetadataFileOffset =
+ static_cast<int32_t>(sizeof(Crcs));
- // Info
struct Info {
- static constexpr int32_t kFileOffset = static_cast<int32_t>(sizeof(Crcs));
+ static constexpr int32_t kMagic = 0x653afd7b;
- int32_t version;
+ int32_t magic;
int32_t value_type_size;
int32_t max_load_factor_percent;
int32_t num_deleted_entries;
@@ -131,6 +102,9 @@ class PersistentHashMap {
} __attribute__((packed));
static_assert(sizeof(Info) == 20, "");
+ static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info);
+ static_assert(kMetadataFileSize == 32, "");
+
// Bucket
class Bucket {
public:
@@ -237,12 +211,14 @@ class PersistentHashMap {
int32_t max_num_entries_in = Entry::kMaxNumEntries,
int32_t max_load_factor_percent_in = kDefaultMaxLoadFactorPercent,
int32_t average_kv_byte_size_in = kDefaultAverageKVByteSize,
- int32_t init_num_buckets_in = kDefaultInitNumBuckets)
+ int32_t init_num_buckets_in = kDefaultInitNumBuckets,
+ bool pre_mapping_fbv_in = false)
: value_type_size(value_type_size_in),
max_num_entries(max_num_entries_in),
max_load_factor_percent(max_load_factor_percent_in),
average_kv_byte_size(average_kv_byte_size_in),
- init_num_buckets(init_num_buckets_in) {}
+ init_num_buckets(init_num_buckets_in),
+ pre_mapping_fbv(pre_mapping_fbv_in) {}
bool IsValid() const;
@@ -268,35 +244,54 @@ class PersistentHashMap {
// It is used when creating new persistent hash map and ignored when
// creating the instance from existing files.
int32_t init_num_buckets;
- };
- static constexpr int32_t kVersion = 1;
+ // Flag indicating whether memory map max possible file size for underlying
+ // FileBackedVector before growing the actual file size.
+ bool pre_mapping_fbv;
+ };
+ static constexpr WorkingPathType kWorkingPathType =
+ WorkingPathType::kDirectory;
static constexpr std::string_view kFilePrefix = "persistent_hash_map";
- // Only metadata, bucket, entry files are stored under this sub-directory, for
- // rehashing branching use.
- static constexpr std::string_view kSubDirectory = "dynamic";
// Creates a new PersistentHashMap to read/write/delete key value pairs.
//
// filesystem: Object to make system level calls
- // base_dir: Specifies the directory for all persistent hash map related
- // sub-directory and files to be stored. If base_dir doesn't exist,
- // then PersistentHashMap will automatically create it. If files
- // exist, then it will initialize the hash map from existing files.
+ // working_path: Specifies the working path for PersistentStorage.
+ // PersistentHashMap uses working path as working directory and
+ // all related files will be stored under this directory. It
+ // takes full ownership and of working_path_, including
+ // creation/deletion. It is the caller's responsibility to
+ // specify correct working path and avoid mixing different
+ // persistent storages together under the same path. Also the
+ // caller has the ownership for the parent directory of
+ // working_path_, and it is responsible for parent directory
+ // creation/deletion. See PersistentStorage for more details
+ // about the concept of working_path.
// options: Options instance.
//
// Returns:
// INVALID_ARGUMENT_ERROR if any value in options is invalid.
// FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
- // checksum.
+ // checksum or any other inconsistency.
// INTERNAL_ERROR on I/O errors.
// Any FileBackedVector errors.
static libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
- Create(const Filesystem& filesystem, std::string_view base_dir,
- const Options& options);
+ Create(const Filesystem& filesystem, std::string working_path,
+ Options options);
+
+ // Deletes PersistentHashMap under working_path.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ static libtextclassifier3::Status Discard(const Filesystem& filesystem,
+ std::string working_path) {
+ return PersistentStorage::Discard(filesystem, working_path,
+ kWorkingPathType);
+ }
- ~PersistentHashMap();
+ ~PersistentHashMap() override;
// Update a key value pair. If key does not exist, then insert (key, value)
// into the storage. Otherwise overwrite the value into the storage.
@@ -349,13 +344,6 @@ class PersistentHashMap {
Iterator GetIterator() const { return Iterator(this); }
- // Flushes content to underlying files.
- //
- // Returns:
- // OK on success
- // INTERNAL_ERROR on I/O error
- libtextclassifier3::Status PersistToDisk();
-
// Calculates and returns the disk usage (metadata + 3 storages total file
// size) in bytes.
//
@@ -374,16 +362,8 @@ class PersistentHashMap {
// INTERNAL_ERROR on I/O error
libtextclassifier3::StatusOr<int64_t> GetElementsSize() const;
- // Updates all checksums of the persistent hash map components and returns
- // all_crc.
- //
- // Returns:
- // Crc of all components (all_crc) on success
- // INTERNAL_ERROR if any data inconsistency
- libtextclassifier3::StatusOr<Crc32> ComputeChecksum();
-
int32_t size() const {
- return entry_storage_->num_elements() - info()->num_deleted_entries;
+ return entry_storage_->num_elements() - info().num_deleted_entries;
}
bool empty() const { return size() == 0; }
@@ -402,27 +382,58 @@ class PersistentHashMap {
};
explicit PersistentHashMap(
- const Filesystem& filesystem, std::string_view base_dir,
- const Options& options, MemoryMappedFile&& metadata_mmapped_file,
+ const Filesystem& filesystem, std::string&& working_path,
+ Options&& options, MemoryMappedFile&& metadata_mmapped_file,
std::unique_ptr<FileBackedVector<Bucket>> bucket_storage,
std::unique_ptr<FileBackedVector<Entry>> entry_storage,
std::unique_ptr<FileBackedVector<char>> kv_storage)
- : filesystem_(&filesystem),
- base_dir_(base_dir),
- options_(options),
+ : PersistentStorage(filesystem, std::move(working_path),
+ kWorkingPathType),
+ options_(std::move(options)),
metadata_mmapped_file_(std::make_unique<MemoryMappedFile>(
std::move(metadata_mmapped_file))),
bucket_storage_(std::move(bucket_storage)),
entry_storage_(std::move(entry_storage)),
- kv_storage_(std::move(kv_storage)) {}
+ kv_storage_(std::move(kv_storage)),
+ is_info_dirty_(false),
+ is_storage_dirty_(false) {}
static libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
- InitializeNewFiles(const Filesystem& filesystem, std::string_view base_dir,
- const Options& options);
+ InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path,
+ Options&& options);
static libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
InitializeExistingFiles(const Filesystem& filesystem,
- std::string_view base_dir, const Options& options);
+ std::string&& working_path, Options&& options);
+
+ // Flushes contents of all storages to underlying files.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status PersistStoragesToDisk(bool force) override;
+
+ // Flushes contents of metadata file.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status PersistMetadataToDisk(bool force) override;
+
+ // Computes and returns Info checksum.
+ //
+ // Returns:
+ // - Crc of the Info on success
+ libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(bool force) override;
+
+ // Computes and returns all storages checksum. Checksums of bucket_storage_,
+ // entry_storage_ and kv_storage_ will be combined together by XOR.
+ //
+ // Returns:
+ // - Crc of all storages on success
+ // - INTERNAL_ERROR if any data inconsistency
+ libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+ bool force) override;
// Find the index of the target entry (that contains the key) from a bucket
// (specified by bucket index). Also return the previous entry index, since
@@ -468,23 +479,36 @@ class PersistentHashMap {
// Any FileBackedVector errors
libtextclassifier3::Status RehashIfNecessary(bool force_rehash);
- Crcs* crcs() {
- return reinterpret_cast<Crcs*>(metadata_mmapped_file_->mutable_region() +
- Crcs::kFileOffset);
+ Crcs& crcs() override {
+ return *reinterpret_cast<Crcs*>(metadata_mmapped_file_->mutable_region() +
+ kCrcsMetadataFileOffset);
+ }
+
+ const Crcs& crcs() const override {
+ return *reinterpret_cast<const Crcs*>(metadata_mmapped_file_->region() +
+ kCrcsMetadataFileOffset);
+ }
+
+ Info& info() {
+ return *reinterpret_cast<Info*>(metadata_mmapped_file_->mutable_region() +
+ kInfoMetadataFileOffset);
}
- Info* info() {
- return reinterpret_cast<Info*>(metadata_mmapped_file_->mutable_region() +
- Info::kFileOffset);
+ const Info& info() const {
+ return *reinterpret_cast<const Info*>(metadata_mmapped_file_->region() +
+ kInfoMetadataFileOffset);
}
- const Info* info() const {
- return reinterpret_cast<const Info*>(metadata_mmapped_file_->region() +
- Info::kFileOffset);
+ void SetInfoDirty() { is_info_dirty_ = true; }
+ // When storage is dirty, we have to set info dirty as well. So just expose
+ // SetDirty to set both.
+ void SetDirty() {
+ is_info_dirty_ = true;
+ is_storage_dirty_ = true;
}
- const Filesystem* filesystem_;
- std::string base_dir_;
+ bool is_info_dirty() const { return is_info_dirty_; }
+ bool is_storage_dirty() const { return is_storage_dirty_; }
Options options_;
@@ -494,6 +518,9 @@ class PersistentHashMap {
std::unique_ptr<FileBackedVector<Bucket>> bucket_storage_;
std::unique_ptr<FileBackedVector<Entry>> entry_storage_;
std::unique_ptr<FileBackedVector<char>> kv_storage_;
+
+ bool is_info_dirty_;
+ bool is_storage_dirty_;
};
} // namespace lib
diff --git a/icing/file/persistent-hash-map_test.cc b/icing/file/persistent-hash-map_test.cc
index 8fde4a8..5535629 100644
--- a/icing/file/persistent-hash-map_test.cc
+++ b/icing/file/persistent-hash-map_test.cc
@@ -24,7 +24,9 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include "icing/file/file-backed-vector.h"
#include "icing/file/filesystem.h"
+#include "icing/file/persistent-storage.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/tmp-directory.h"
#include "icing/util/crc32.h"
@@ -34,8 +36,10 @@ using ::testing::Eq;
using ::testing::Gt;
using ::testing::HasSubstr;
using ::testing::IsEmpty;
+using ::testing::IsTrue;
using ::testing::Key;
using ::testing::Lt;
+using ::testing::Ne;
using ::testing::Not;
using ::testing::Pair;
using ::testing::Pointee;
@@ -48,7 +52,7 @@ namespace lib {
namespace {
using Bucket = PersistentHashMap::Bucket;
-using Crcs = PersistentHashMap::Crcs;
+using Crcs = PersistentStorage::Crcs;
using Entry = PersistentHashMap::Entry;
using Info = PersistentHashMap::Info;
using Options = PersistentHashMap::Options;
@@ -56,10 +60,14 @@ using Options = PersistentHashMap::Options;
static constexpr int32_t kCorruptedValueOffset = 3;
static constexpr int32_t kTestInitNumBuckets = 1;
-class PersistentHashMapTest : public ::testing::Test {
+class PersistentHashMapTest : public ::testing::TestWithParam<bool> {
protected:
void SetUp() override {
- base_dir_ = GetTestTempDir() + "/persistent_hash_map_test";
+ base_dir_ = GetTestTempDir() + "/icing";
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+ IsTrue());
+
+ working_path_ = base_dir_ + "/persistent_hash_map_test";
}
void TearDown() override {
@@ -93,9 +101,10 @@ class PersistentHashMapTest : public ::testing::Test {
Filesystem filesystem_;
std::string base_dir_;
+ std::string working_path_;
};
-TEST_F(PersistentHashMapTest, OptionsInvalidValueTypeSize) {
+TEST_P(PersistentHashMapTest, OptionsInvalidValueTypeSize) {
Options options(/*value_type_size_in=*/sizeof(int));
ASSERT_TRUE(options.IsValid());
@@ -109,7 +118,7 @@ TEST_F(PersistentHashMapTest, OptionsInvalidValueTypeSize) {
EXPECT_FALSE(options.IsValid());
}
-TEST_F(PersistentHashMapTest, OptionsInvalidMaxNumEntries) {
+TEST_P(PersistentHashMapTest, OptionsInvalidMaxNumEntries) {
Options options(/*value_type_size_in=*/sizeof(int));
ASSERT_TRUE(options.IsValid());
@@ -123,7 +132,7 @@ TEST_F(PersistentHashMapTest, OptionsInvalidMaxNumEntries) {
EXPECT_FALSE(options.IsValid());
}
-TEST_F(PersistentHashMapTest, OptionsInvalidMaxLoadFactorPercent) {
+TEST_P(PersistentHashMapTest, OptionsInvalidMaxLoadFactorPercent) {
Options options(/*value_type_size_in=*/sizeof(int));
ASSERT_TRUE(options.IsValid());
@@ -134,7 +143,7 @@ TEST_F(PersistentHashMapTest, OptionsInvalidMaxLoadFactorPercent) {
EXPECT_FALSE(options.IsValid());
}
-TEST_F(PersistentHashMapTest, OptionsInvalidAverageKVByteSize) {
+TEST_P(PersistentHashMapTest, OptionsInvalidAverageKVByteSize) {
Options options(/*value_type_size_in=*/sizeof(int));
ASSERT_TRUE(options.IsValid());
@@ -145,7 +154,7 @@ TEST_F(PersistentHashMapTest, OptionsInvalidAverageKVByteSize) {
EXPECT_FALSE(options.IsValid());
}
-TEST_F(PersistentHashMapTest, OptionsInvalidInitNumBuckets) {
+TEST_P(PersistentHashMapTest, OptionsInvalidInitNumBuckets) {
Options options(/*value_type_size_in=*/sizeof(int));
ASSERT_TRUE(options.IsValid());
@@ -163,7 +172,7 @@ TEST_F(PersistentHashMapTest, OptionsInvalidInitNumBuckets) {
EXPECT_FALSE(options.IsValid());
}
-TEST_F(PersistentHashMapTest, OptionsNumBucketsRequiredExceedsMaxNumBuckets) {
+TEST_P(PersistentHashMapTest, OptionsNumBucketsRequiredExceedsMaxNumBuckets) {
Options options(/*value_type_size_in=*/sizeof(int));
ASSERT_TRUE(options.IsValid());
@@ -172,7 +181,7 @@ TEST_F(PersistentHashMapTest, OptionsNumBucketsRequiredExceedsMaxNumBuckets) {
EXPECT_FALSE(options.IsValid());
}
-TEST_F(PersistentHashMapTest,
+TEST_P(PersistentHashMapTest,
OptionsEstimatedNumKeyValuePairExceedsStorageMaxSize) {
Options options(/*value_type_size_in=*/sizeof(int));
ASSERT_TRUE(options.IsValid());
@@ -185,29 +194,33 @@ TEST_F(PersistentHashMapTest,
EXPECT_FALSE(options.IsValid());
}
-TEST_F(PersistentHashMapTest, InvalidBaseDir) {
- EXPECT_THAT(
- PersistentHashMap::Create(filesystem_, "/dev/null",
- Options(/*value_type_size_in=*/sizeof(int))),
- StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+TEST_P(PersistentHashMapTest, InvalidWorkingPath) {
+ EXPECT_THAT(PersistentHashMap::Create(
+ filesystem_, "/dev/null/persistent_hash_map_test",
+ Options(/*value_type_size_in=*/sizeof(int))),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
}
-TEST_F(PersistentHashMapTest, CreateWithInvalidOptionsShouldFail) {
+TEST_P(PersistentHashMapTest, CreateWithInvalidOptionsShouldFail) {
Options invalid_options(/*value_type_size_in=*/-1);
+ invalid_options.pre_mapping_fbv = GetParam();
ASSERT_FALSE(invalid_options.IsValid());
EXPECT_THAT(
- PersistentHashMap::Create(filesystem_, base_dir_, invalid_options),
+ PersistentHashMap::Create(filesystem_, working_path_, invalid_options),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(PersistentHashMapTest, InitializeNewFiles) {
+TEST_P(PersistentHashMapTest, InitializeNewFiles) {
{
- ASSERT_FALSE(filesystem_.DirectoryExists(base_dir_.c_str()));
+ ASSERT_FALSE(filesystem_.DirectoryExists(working_path_.c_str()));
+
+ Options options(/*value_type_size_in=*/sizeof(int));
+ options.pre_mapping_fbv = GetParam();
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_,
- Options(/*value_type_size_in=*/sizeof(int))));
+ PersistentHashMap::Create(filesystem_, working_path_,
+ std::move(options)));
EXPECT_THAT(persistent_hash_map, Pointee(IsEmpty()));
ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
@@ -215,17 +228,16 @@ TEST_F(PersistentHashMapTest, InitializeNewFiles) {
// Metadata file should be initialized correctly for both info and crcs
// sections.
- const std::string metadata_file_path =
- absl_ports::StrCat(base_dir_, "/", PersistentHashMap::kSubDirectory, "/",
- PersistentHashMap::kFilePrefix, ".m");
+ const std::string metadata_file_path = absl_ports::StrCat(
+ working_path_, "/", PersistentHashMap::kFilePrefix, ".m");
ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
ASSERT_TRUE(metadata_sfd.is_valid());
// Check info section
Info info;
ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
- Info::kFileOffset));
- EXPECT_THAT(info.version, Eq(PersistentHashMap::kVersion));
+ PersistentHashMap::kInfoMetadataFileOffset));
+ EXPECT_THAT(info.magic, Eq(Info::kMagic));
EXPECT_THAT(info.value_type_size, Eq(sizeof(int)));
EXPECT_THAT(info.max_load_factor_percent,
Eq(Options::kDefaultMaxLoadFactorPercent));
@@ -235,13 +247,10 @@ TEST_F(PersistentHashMapTest, InitializeNewFiles) {
// Check crcs section
Crcs crcs;
ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
- Crcs::kFileOffset));
+ PersistentHashMap::kCrcsMetadataFileOffset));
// # of elements in bucket_storage should be 1, so it should have non-zero
- // crc value.
- EXPECT_THAT(crcs.component_crcs.bucket_storage_crc, Not(Eq(0)));
- // Other empty file backed vectors should have 0 crc value.
- EXPECT_THAT(crcs.component_crcs.entry_storage_crc, Eq(0));
- EXPECT_THAT(crcs.component_crcs.kv_storage_crc, Eq(0));
+ // all storages crc value.
+ EXPECT_THAT(crcs.component_crcs.storages_crc, Ne(0));
EXPECT_THAT(crcs.component_crcs.info_crc,
Eq(Crc32(std::string_view(reinterpret_cast<const char*>(&info),
sizeof(Info)))
@@ -253,25 +262,25 @@ TEST_F(PersistentHashMapTest, InitializeNewFiles) {
.Get()));
}
-TEST_F(PersistentHashMapTest, InitializeNewFilesWithCustomInitNumBuckets) {
+TEST_P(PersistentHashMapTest, InitializeNewFilesWithCustomInitNumBuckets) {
int custom_init_num_buckets = 128;
- // Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
/*max_load_factor_percent_in=*/
Options::kDefaultMaxLoadFactorPercent,
/*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
- /*init_num_buckets_in=*/custom_init_num_buckets)));
+ /*init_num_buckets_in=*/custom_init_num_buckets,
+ /*pre_mapping_fbv=*/GetParam())));
EXPECT_THAT(persistent_hash_map->num_buckets(), Eq(custom_init_num_buckets));
}
-TEST_F(PersistentHashMapTest,
+TEST_P(PersistentHashMapTest,
InitializeNewFilesWithInitNumBucketsSmallerThanNumBucketsRequired) {
int init_num_buckets = 65536;
@@ -279,19 +288,21 @@ TEST_F(PersistentHashMapTest,
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/1,
/*max_load_factor_percent_in=*/
Options::kDefaultMaxLoadFactorPercent,
/*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
- /*init_num_buckets_in=*/init_num_buckets)));
+ /*init_num_buckets_in=*/init_num_buckets,
+ /*pre_mapping_fbv=*/GetParam())));
EXPECT_THAT(persistent_hash_map->num_buckets(), Eq(init_num_buckets));
}
-TEST_F(PersistentHashMapTest, InitNumBucketsShouldNotAffectExistingFiles) {
+TEST_P(PersistentHashMapTest, InitNumBucketsShouldNotAffectExistingFiles) {
Options options(/*value_type_size_in=*/sizeof(int));
+ options.pre_mapping_fbv = GetParam();
int original_init_num_buckets = 4;
{
@@ -301,7 +312,7 @@ TEST_F(PersistentHashMapTest, InitNumBucketsShouldNotAffectExistingFiles) {
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
EXPECT_THAT(persistent_hash_map->num_buckets(),
Eq(original_init_num_buckets));
@@ -314,20 +325,21 @@ TEST_F(PersistentHashMapTest, InitNumBucketsShouldNotAffectExistingFiles) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
// # of buckets should still be the original value.
EXPECT_THAT(persistent_hash_map->num_buckets(),
Eq(original_init_num_buckets));
}
-TEST_F(PersistentHashMapTest,
+TEST_P(PersistentHashMapTest,
InitializationShouldFailWithoutPersistToDiskOrDestruction) {
Options options(/*value_type_size_in=*/sizeof(int));
+ options.pre_mapping_fbv = GetParam();
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
// Put some key value pairs.
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
@@ -343,17 +355,18 @@ TEST_F(PersistentHashMapTest,
// Without calling PersistToDisk, checksums will not be recomputed or synced
// to disk, so initializing another instance on the same files should fail.
- EXPECT_THAT(PersistentHashMap::Create(filesystem_, base_dir_, options),
+ EXPECT_THAT(PersistentHashMap::Create(filesystem_, working_path_, options),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
-TEST_F(PersistentHashMapTest, InitializationShouldSucceedWithPersistToDisk) {
+TEST_P(PersistentHashMapTest, InitializationShouldSucceedWithPersistToDisk) {
Options options(/*value_type_size_in=*/sizeof(int));
+ options.pre_mapping_fbv = GetParam();
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map1,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
// Put some key value pairs.
ICING_ASSERT_OK(persistent_hash_map1->Put("a", Serialize(1).data()));
@@ -374,20 +387,21 @@ TEST_F(PersistentHashMapTest, InitializationShouldSucceedWithPersistToDisk) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map2,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
EXPECT_THAT(persistent_hash_map2, Pointee(SizeIs(2)));
EXPECT_THAT(GetValueByKey(persistent_hash_map2.get(), "a"), IsOkAndHolds(1));
EXPECT_THAT(GetValueByKey(persistent_hash_map2.get(), "b"), IsOkAndHolds(2));
}
-TEST_F(PersistentHashMapTest, InitializationShouldSucceedAfterDestruction) {
+TEST_P(PersistentHashMapTest, InitializationShouldSucceedAfterDestruction) {
Options options(/*value_type_size_in=*/sizeof(int));
+ options.pre_mapping_fbv = GetParam();
{
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data()));
ICING_ASSERT_OK(persistent_hash_map->Put("c", Serialize(3).data()));
@@ -407,21 +421,77 @@ TEST_F(PersistentHashMapTest, InitializationShouldSucceedAfterDestruction) {
// we should be able to get the same contents.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
EXPECT_THAT(persistent_hash_map, Pointee(SizeIs(2)));
EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "a"), IsOkAndHolds(1));
EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "b"), IsOkAndHolds(2));
}
}
-TEST_F(PersistentHashMapTest,
+TEST_P(PersistentHashMapTest,
+ InitializeExistingFilesWithDifferentMagicShouldFail) {
+ Options options(/*value_type_size_in=*/sizeof(int));
+ options.pre_mapping_fbv = GetParam();
+
+ {
+ // Create new persistent hash map
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PersistentHashMap> persistent_hash_map,
+ PersistentHashMap::Create(filesystem_, working_path_, options));
+ ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
+
+ ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
+ }
+
+ {
+ // Manually change kMagic and update checksum
+ const std::string metadata_file_path = absl_ports::StrCat(
+ working_path_, "/", PersistentHashMap::kFilePrefix, ".m");
+ ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ ASSERT_TRUE(metadata_sfd.is_valid());
+
+ Crcs crcs;
+ ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
+ PersistentHashMap::kCrcsMetadataFileOffset));
+
+ Info info;
+ ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
+ PersistentHashMap::kInfoMetadataFileOffset));
+
+ // Manually change magic and update checksums.
+ info.magic += kCorruptedValueOffset;
+ crcs.component_crcs.info_crc = info.ComputeChecksum().Get();
+ crcs.all_crc = crcs.component_crcs.ComputeChecksum().Get();
+ ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(),
+ PersistentHashMap::kCrcsMetadataFileOffset,
+ &crcs, sizeof(Crcs)));
+ ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(),
+ PersistentHashMap::kInfoMetadataFileOffset,
+ &info, sizeof(Info)));
+ }
+
+ {
+ // Attempt to create the persistent hash map with different magic. This
+ // should fail.
+ libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
+ persistent_hash_map_or =
+ PersistentHashMap::Create(filesystem_, working_path_, options);
+ EXPECT_THAT(persistent_hash_map_or,
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(persistent_hash_map_or.status().error_message(),
+ HasSubstr("PersistentHashMap header magic mismatch"));
+ }
+}
+
+TEST_P(PersistentHashMapTest,
InitializeExistingFilesWithDifferentValueTypeSizeShouldFail) {
{
// Create new persistent hash map
+ Options options(/*value_type_size_in=*/sizeof(int));
+ options.pre_mapping_fbv = GetParam();
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_,
- Options(/*value_type_size_in=*/sizeof(int))));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
@@ -430,11 +500,13 @@ TEST_F(PersistentHashMapTest,
{
// Attempt to create the persistent hash map with different value type size.
// This should fail.
- ASSERT_THAT(sizeof(char), Not(Eq(sizeof(int))));
+ ASSERT_THAT(sizeof(char), Ne(sizeof(int)));
+
+ Options options(/*value_type_size_in=*/sizeof(char));
+ options.pre_mapping_fbv = GetParam();
libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
- persistent_hash_map_or = PersistentHashMap::Create(
- filesystem_, base_dir_,
- Options(/*value_type_size_in=*/sizeof(char)));
+ persistent_hash_map_or =
+ PersistentHashMap::Create(filesystem_, working_path_, options);
EXPECT_THAT(persistent_hash_map_or,
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(persistent_hash_map_or.status().error_message(),
@@ -442,14 +514,15 @@ TEST_F(PersistentHashMapTest,
}
}
-TEST_F(PersistentHashMapTest,
+TEST_P(PersistentHashMapTest,
InitializeExistingFilesWithMaxNumEntriesSmallerThanSizeShouldFail) {
Options options(/*value_type_size_in=*/sizeof(int));
+ options.pre_mapping_fbv = GetParam();
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data()));
@@ -461,7 +534,7 @@ TEST_F(PersistentHashMapTest,
options.max_num_entries = 1;
ASSERT_TRUE(options.IsValid());
- EXPECT_THAT(PersistentHashMap::Create(filesystem_, base_dir_, options),
+ EXPECT_THAT(PersistentHashMap::Create(filesystem_, working_path_, options),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
@@ -478,38 +551,39 @@ TEST_F(PersistentHashMapTest,
options.max_num_entries = 1;
ASSERT_TRUE(options.IsValid());
- EXPECT_THAT(PersistentHashMap::Create(filesystem_, base_dir_, options),
+ EXPECT_THAT(PersistentHashMap::Create(filesystem_, working_path_, options),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
}
-TEST_F(PersistentHashMapTest, InitializeExistingFilesWithWrongAllCrc) {
+TEST_P(PersistentHashMapTest, InitializeExistingFilesWithWrongAllCrc) {
Options options(/*value_type_size_in=*/sizeof(int));
+ options.pre_mapping_fbv = GetParam();
{
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
}
- const std::string metadata_file_path =
- absl_ports::StrCat(base_dir_, "/", PersistentHashMap::kSubDirectory, "/",
- PersistentHashMap::kFilePrefix, ".m");
+ const std::string metadata_file_path = absl_ports::StrCat(
+ working_path_, "/", PersistentHashMap::kFilePrefix, ".m");
ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
ASSERT_TRUE(metadata_sfd.is_valid());
Crcs crcs;
ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
- Crcs::kFileOffset));
+ PersistentHashMap::kCrcsMetadataFileOffset));
// Manually corrupt all_crc
crcs.all_crc += kCorruptedValueOffset;
- ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Crcs::kFileOffset, &crcs,
- sizeof(Crcs)));
+ ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(),
+ PersistentHashMap::kCrcsMetadataFileOffset,
+ &crcs, sizeof(Crcs)));
metadata_sfd.reset();
{
@@ -517,206 +591,207 @@ TEST_F(PersistentHashMapTest, InitializeExistingFilesWithWrongAllCrc) {
// corrupted all_crc. This should fail.
libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
persistent_hash_map_or =
- PersistentHashMap::Create(filesystem_, base_dir_, options);
+ PersistentHashMap::Create(filesystem_, working_path_, options);
EXPECT_THAT(persistent_hash_map_or,
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(persistent_hash_map_or.status().error_message(),
- HasSubstr("Invalid all crc for PersistentHashMap"));
+ HasSubstr("Invalid all crc"));
}
}
-TEST_F(PersistentHashMapTest,
+TEST_P(PersistentHashMapTest,
InitializeExistingFilesWithCorruptedInfoShouldFail) {
Options options(/*value_type_size_in=*/sizeof(int));
+ options.pre_mapping_fbv = GetParam();
{
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
}
- const std::string metadata_file_path =
- absl_ports::StrCat(base_dir_, "/", PersistentHashMap::kSubDirectory, "/",
- PersistentHashMap::kFilePrefix, ".m");
+ const std::string metadata_file_path = absl_ports::StrCat(
+ working_path_, "/", PersistentHashMap::kFilePrefix, ".m");
ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
ASSERT_TRUE(metadata_sfd.is_valid());
Info info;
ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
- Info::kFileOffset));
+ PersistentHashMap::kInfoMetadataFileOffset));
// Modify info, but don't update the checksum. This would be similar to
// corruption of info.
info.num_deleted_entries += kCorruptedValueOffset;
- ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Info::kFileOffset, &info,
- sizeof(Info)));
+ ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(),
+ PersistentHashMap::kInfoMetadataFileOffset,
+ &info, sizeof(Info)));
{
// Attempt to create the persistent hash map with info that doesn't match
// its checksum and confirm that it fails.
libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
persistent_hash_map_or =
- PersistentHashMap::Create(filesystem_, base_dir_, options);
+ PersistentHashMap::Create(filesystem_, working_path_, options);
EXPECT_THAT(persistent_hash_map_or,
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(persistent_hash_map_or.status().error_message(),
- HasSubstr("Invalid info crc for PersistentHashMap"));
+ HasSubstr("Invalid info crc"));
}
}
-TEST_F(PersistentHashMapTest,
- InitializeExistingFilesWithWrongBucketStorageCrc) {
+TEST_P(PersistentHashMapTest,
+ InitializeExistingFilesWithCorruptedBucketStorage) {
Options options(/*value_type_size_in=*/sizeof(int));
+ options.pre_mapping_fbv = GetParam();
{
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
}
- const std::string metadata_file_path =
- absl_ports::StrCat(base_dir_, "/", PersistentHashMap::kSubDirectory, "/",
- PersistentHashMap::kFilePrefix, ".m");
- ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
- ASSERT_TRUE(metadata_sfd.is_valid());
-
- Crcs crcs;
- ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
- Crcs::kFileOffset));
+ {
+ // Update bucket storage manually.
+ const std::string bucket_storage_file_path = absl_ports::StrCat(
+ working_path_, "/", PersistentHashMap::kFilePrefix, ".b");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<Bucket>> bucket_storage,
+ FileBackedVector<Bucket>::Create(
+ filesystem_, bucket_storage_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc,
+ bucket_storage->ComputeChecksum());
+ ICING_ASSERT_OK(bucket_storage->Append(Bucket()));
+ ICING_ASSERT_OK(bucket_storage->PersistToDisk());
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc,
+ bucket_storage->ComputeChecksum());
+ ASSERT_THAT(old_crc, Not(Eq(new_crc)));
+ }
- // Manually corrupt bucket_storage_crc
- crcs.component_crcs.bucket_storage_crc += kCorruptedValueOffset;
- crcs.all_crc = Crc32(std::string_view(
- reinterpret_cast<const char*>(&crcs.component_crcs),
- sizeof(Crcs::ComponentCrcs)))
- .Get();
- ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Crcs::kFileOffset, &crcs,
- sizeof(Crcs)));
{
// Attempt to create the persistent hash map with metadata containing
// corrupted bucket_storage_crc. This should fail.
libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
persistent_hash_map_or =
- PersistentHashMap::Create(filesystem_, base_dir_, options);
+ PersistentHashMap::Create(filesystem_, working_path_, options);
EXPECT_THAT(persistent_hash_map_or,
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
- EXPECT_THAT(
- persistent_hash_map_or.status().error_message(),
- HasSubstr("Mismatch crc with PersistentHashMap bucket storage"));
+ EXPECT_THAT(persistent_hash_map_or.status().error_message(),
+ HasSubstr("Invalid storages crc"));
}
}
-TEST_F(PersistentHashMapTest, InitializeExistingFilesWithWrongEntryStorageCrc) {
+TEST_P(PersistentHashMapTest,
+ InitializeExistingFilesWithCorruptedEntryStorage) {
Options options(/*value_type_size_in=*/sizeof(int));
+ options.pre_mapping_fbv = GetParam();
{
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
}
- const std::string metadata_file_path =
- absl_ports::StrCat(base_dir_, "/", PersistentHashMap::kSubDirectory, "/",
- PersistentHashMap::kFilePrefix, ".m");
- ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
- ASSERT_TRUE(metadata_sfd.is_valid());
-
- Crcs crcs;
- ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
- Crcs::kFileOffset));
+ {
+ // Update entry storage manually.
+ const std::string entry_storage_file_path = absl_ports::StrCat(
+ working_path_, "/", PersistentHashMap::kFilePrefix, ".e");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<Entry>> entry_storage,
+ FileBackedVector<Entry>::Create(
+ filesystem_, entry_storage_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc, entry_storage->ComputeChecksum());
+ ICING_ASSERT_OK(entry_storage->Append(
+ Entry(/*key_value_index=*/-1, /*next_entry_index=*/-1)));
+ ICING_ASSERT_OK(entry_storage->PersistToDisk());
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc, entry_storage->ComputeChecksum());
+ ASSERT_THAT(old_crc, Not(Eq(new_crc)));
+ }
- // Manually corrupt entry_storage_crc
- crcs.component_crcs.entry_storage_crc += kCorruptedValueOffset;
- crcs.all_crc = Crc32(std::string_view(
- reinterpret_cast<const char*>(&crcs.component_crcs),
- sizeof(Crcs::ComponentCrcs)))
- .Get();
- ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Crcs::kFileOffset, &crcs,
- sizeof(Crcs)));
{
// Attempt to create the persistent hash map with metadata containing
// corrupted entry_storage_crc. This should fail.
libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
persistent_hash_map_or =
- PersistentHashMap::Create(filesystem_, base_dir_, options);
+ PersistentHashMap::Create(filesystem_, working_path_, options);
EXPECT_THAT(persistent_hash_map_or,
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(persistent_hash_map_or.status().error_message(),
- HasSubstr("Mismatch crc with PersistentHashMap entry storage"));
+ HasSubstr("Invalid storages crc"));
}
}
-TEST_F(PersistentHashMapTest,
- InitializeExistingFilesWithWrongKeyValueStorageCrc) {
+TEST_P(PersistentHashMapTest,
+ InitializeExistingFilesWithCorruptedKeyValueStorage) {
Options options(/*value_type_size_in=*/sizeof(int));
+ options.pre_mapping_fbv = GetParam();
{
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
}
- const std::string metadata_file_path =
- absl_ports::StrCat(base_dir_, "/", PersistentHashMap::kSubDirectory, "/",
- PersistentHashMap::kFilePrefix, ".m");
- ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
- ASSERT_TRUE(metadata_sfd.is_valid());
-
- Crcs crcs;
- ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
- Crcs::kFileOffset));
+ {
+ // Update kv storage manually.
+ const std::string kv_storage_file_path = absl_ports::StrCat(
+ working_path_, "/", PersistentHashMap::kFilePrefix, ".k");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> kv_storage,
+ FileBackedVector<char>::Create(
+ filesystem_, kv_storage_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc, kv_storage->ComputeChecksum());
+ ICING_ASSERT_OK(kv_storage->Append('z'));
+ ICING_ASSERT_OK(kv_storage->PersistToDisk());
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc, kv_storage->ComputeChecksum());
+ ASSERT_THAT(old_crc, Not(Eq(new_crc)));
+ }
- // Manually corrupt kv_storage_crc
- crcs.component_crcs.kv_storage_crc += kCorruptedValueOffset;
- crcs.all_crc = Crc32(std::string_view(
- reinterpret_cast<const char*>(&crcs.component_crcs),
- sizeof(Crcs::ComponentCrcs)))
- .Get();
- ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Crcs::kFileOffset, &crcs,
- sizeof(Crcs)));
{
// Attempt to create the persistent hash map with metadata containing
// corrupted kv_storage_crc. This should fail.
libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
persistent_hash_map_or =
- PersistentHashMap::Create(filesystem_, base_dir_, options);
+ PersistentHashMap::Create(filesystem_, working_path_, options);
EXPECT_THAT(persistent_hash_map_or,
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
- EXPECT_THAT(
- persistent_hash_map_or.status().error_message(),
- HasSubstr("Mismatch crc with PersistentHashMap key value storage"));
+ EXPECT_THAT(persistent_hash_map_or.status().error_message(),
+ HasSubstr("Invalid storages crc"));
}
}
-TEST_F(PersistentHashMapTest,
+TEST_P(PersistentHashMapTest,
InitializeExistingFilesAllowDifferentMaxLoadFactorPercent) {
Options options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
/*max_load_factor_percent_in=*/Options::kDefaultMaxLoadFactorPercent,
/*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
- /*init_num_buckets_in=*/kTestInitNumBuckets);
+ /*init_num_buckets_in=*/kTestInitNumBuckets,
+ /*pre_mapping_fbv=*/GetParam());
{
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data()));
@@ -732,14 +807,14 @@ TEST_F(PersistentHashMapTest,
options.max_load_factor_percent = 200;
ASSERT_TRUE(options.IsValid());
ASSERT_THAT(options.max_load_factor_percent,
- Not(Eq(Options::kDefaultMaxLoadFactorPercent)));
+ Ne(Options::kDefaultMaxLoadFactorPercent));
// Attempt to create the persistent hash map with different max load factor
// percent. This should succeed and metadata should be modified correctly.
// Also verify all entries should remain unchanged.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
EXPECT_THAT(persistent_hash_map, Pointee(SizeIs(2)));
EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "a"), IsOkAndHolds(1));
@@ -748,15 +823,14 @@ TEST_F(PersistentHashMapTest,
ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
}
- const std::string metadata_file_path =
- absl_ports::StrCat(base_dir_, "/", PersistentHashMap::kSubDirectory, "/",
- PersistentHashMap::kFilePrefix, ".m");
+ const std::string metadata_file_path = absl_ports::StrCat(
+ working_path_, "/", PersistentHashMap::kFilePrefix, ".m");
ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
ASSERT_TRUE(metadata_sfd.is_valid());
Info info;
ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
- Info::kFileOffset));
+ PersistentHashMap::kInfoMetadataFileOffset));
EXPECT_THAT(info.max_load_factor_percent,
Eq(options.max_load_factor_percent));
@@ -765,20 +839,21 @@ TEST_F(PersistentHashMapTest,
{
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
}
}
-TEST_F(PersistentHashMapTest,
+TEST_P(PersistentHashMapTest,
InitializeExistingFilesWithDifferentMaxLoadFactorPercentShouldRehash) {
Options options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
/*max_load_factor_percent_in=*/Options::kDefaultMaxLoadFactorPercent,
/*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
- /*init_num_buckets_in=*/kTestInitNumBuckets);
+ /*init_num_buckets_in=*/kTestInitNumBuckets,
+ /*pre_mapping_fbv=*/GetParam());
double prev_loading_percent;
int prev_num_buckets;
@@ -786,7 +861,7 @@ TEST_F(PersistentHashMapTest,
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data()));
ICING_ASSERT_OK(persistent_hash_map->Put("c", Serialize(3).data()));
@@ -816,7 +891,7 @@ TEST_F(PersistentHashMapTest,
// should remain the same.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
EXPECT_THAT(persistent_hash_map->num_buckets(), Eq(prev_num_buckets));
@@ -834,14 +909,14 @@ TEST_F(PersistentHashMapTest,
// exceeds the limit.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
// After changing max_load_factor_percent, there should be rehashing and the
// new loading should not be greater than the new max load factor.
EXPECT_THAT(persistent_hash_map->size() * 100.0 /
persistent_hash_map->num_buckets(),
Not(Gt(options.max_load_factor_percent)));
- EXPECT_THAT(persistent_hash_map->num_buckets(), Not(Eq(prev_num_buckets)));
+ EXPECT_THAT(persistent_hash_map->num_buckets(), Ne(prev_num_buckets));
EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "a"), IsOkAndHolds(1));
EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "b"), IsOkAndHolds(2));
@@ -851,19 +926,20 @@ TEST_F(PersistentHashMapTest,
}
}
-TEST_F(PersistentHashMapTest, PutAndGet) {
+TEST_P(PersistentHashMapTest, PutAndGet) {
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
/*max_load_factor_percent_in=*/
Options::kDefaultMaxLoadFactorPercent,
/*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
- /*init_num_buckets_in=*/kTestInitNumBuckets)));
+ /*init_num_buckets_in=*/kTestInitNumBuckets,
+ /*pre_mapping_fbv=*/GetParam())));
EXPECT_THAT(persistent_hash_map, Pointee(IsEmpty()));
EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"),
@@ -887,19 +963,20 @@ TEST_F(PersistentHashMapTest, PutAndGet) {
ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
}
-TEST_F(PersistentHashMapTest, PutShouldOverwriteValueIfKeyExists) {
+TEST_P(PersistentHashMapTest, PutShouldOverwriteValueIfKeyExists) {
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
/*max_load_factor_percent_in=*/
Options::kDefaultMaxLoadFactorPercent,
/*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
- /*init_num_buckets_in=*/kTestInitNumBuckets)));
+ /*init_num_buckets_in=*/kTestInitNumBuckets,
+ /*pre_mapping_fbv=*/GetParam())));
ICING_ASSERT_OK(
persistent_hash_map->Put("default-google.com", Serialize(100).data()));
@@ -920,19 +997,20 @@ TEST_F(PersistentHashMapTest, PutShouldOverwriteValueIfKeyExists) {
IsOkAndHolds(300));
}
-TEST_F(PersistentHashMapTest, ShouldRehash) {
+TEST_P(PersistentHashMapTest, ShouldRehash) {
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
/*max_load_factor_percent_in=*/
Options::kDefaultMaxLoadFactorPercent,
/*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
- /*init_num_buckets_in=*/kTestInitNumBuckets)));
+ /*init_num_buckets_in=*/kTestInitNumBuckets,
+ /*pre_mapping_fbv=*/GetParam())));
int original_num_buckets = persistent_hash_map->num_buckets();
// Insert 100 key value pairs. There should be rehashing so the loading of
@@ -946,8 +1024,7 @@ TEST_F(PersistentHashMapTest, ShouldRehash) {
persistent_hash_map->num_buckets(),
Not(Gt(Options::kDefaultMaxLoadFactorPercent)));
}
- EXPECT_THAT(persistent_hash_map->num_buckets(),
- Not(Eq(original_num_buckets)));
+ EXPECT_THAT(persistent_hash_map->num_buckets(), Ne(original_num_buckets));
// After rehashing, we should still be able to get all inserted entries.
for (int i = 0; i < 100; ++i) {
@@ -956,19 +1033,20 @@ TEST_F(PersistentHashMapTest, ShouldRehash) {
}
}
-TEST_F(PersistentHashMapTest, GetOrPutShouldPutIfKeyDoesNotExist) {
+TEST_P(PersistentHashMapTest, GetOrPutShouldPutIfKeyDoesNotExist) {
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
/*max_load_factor_percent_in=*/
Options::kDefaultMaxLoadFactorPercent,
/*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
- /*init_num_buckets_in=*/kTestInitNumBuckets)));
+ /*init_num_buckets_in=*/kTestInitNumBuckets,
+ /*pre_mapping_fbv=*/GetParam())));
ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -982,19 +1060,20 @@ TEST_F(PersistentHashMapTest, GetOrPutShouldPutIfKeyDoesNotExist) {
IsOkAndHolds(1));
}
-TEST_F(PersistentHashMapTest, GetOrPutShouldGetIfKeyExists) {
+TEST_P(PersistentHashMapTest, GetOrPutShouldGetIfKeyExists) {
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
/*max_load_factor_percent_in=*/
Options::kDefaultMaxLoadFactorPercent,
/*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
- /*init_num_buckets_in=*/kTestInitNumBuckets)));
+ /*init_num_buckets_in=*/kTestInitNumBuckets,
+ /*pre_mapping_fbv_in=*/GetParam())));
ASSERT_THAT(
persistent_hash_map->Put("default-google.com", Serialize(1).data()),
@@ -1011,19 +1090,20 @@ TEST_F(PersistentHashMapTest, GetOrPutShouldGetIfKeyExists) {
IsOkAndHolds(1));
}
-TEST_F(PersistentHashMapTest, Delete) {
+TEST_P(PersistentHashMapTest, Delete) {
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
/*max_load_factor_percent_in=*/
Options::kDefaultMaxLoadFactorPercent,
/*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
- /*init_num_buckets_in=*/kTestInitNumBuckets)));
+ /*init_num_buckets_in=*/kTestInitNumBuckets,
+ /*pre_mapping_fbv_in=*/GetParam())));
// Delete a non-existing key should get NOT_FOUND error
EXPECT_THAT(persistent_hash_map->Delete("default-google.com"),
@@ -1062,19 +1142,20 @@ TEST_F(PersistentHashMapTest, Delete) {
IsOkAndHolds(50));
}
-TEST_F(PersistentHashMapTest, DeleteMultiple) {
+TEST_P(PersistentHashMapTest, DeleteMultiple) {
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
/*max_load_factor_percent_in=*/
Options::kDefaultMaxLoadFactorPercent,
/*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
- /*init_num_buckets_in=*/kTestInitNumBuckets)));
+ /*init_num_buckets_in=*/kTestInitNumBuckets,
+ /*pre_mapping_fbv_in=*/GetParam())));
std::unordered_map<std::string, int> existing_keys;
std::unordered_set<std::string> deleted_keys;
@@ -1115,7 +1196,7 @@ TEST_F(PersistentHashMapTest, DeleteMultiple) {
Eq(existing_keys));
}
-TEST_F(PersistentHashMapTest, DeleteBucketHeadElement) {
+TEST_P(PersistentHashMapTest, DeleteBucketHeadElement) {
// Create new persistent hash map
// Set max_load_factor_percent as 1000. Load factor percent is calculated as
// 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of
@@ -1125,13 +1206,14 @@ TEST_F(PersistentHashMapTest, DeleteBucketHeadElement) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
/*max_load_factor_percent_in=*/1000,
/*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
- /*init_num_buckets_in=*/kTestInitNumBuckets)));
+ /*init_num_buckets_in=*/kTestInitNumBuckets,
+ /*pre_mapping_fbv_in=*/GetParam())));
ICING_ASSERT_OK(
persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
@@ -1153,7 +1235,7 @@ TEST_F(PersistentHashMapTest, DeleteBucketHeadElement) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(PersistentHashMapTest, DeleteBucketIntermediateElement) {
+TEST_P(PersistentHashMapTest, DeleteBucketIntermediateElement) {
// Create new persistent hash map
// Set max_load_factor_percent as 1000. Load factor percent is calculated as
// 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of
@@ -1163,13 +1245,14 @@ TEST_F(PersistentHashMapTest, DeleteBucketIntermediateElement) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
/*max_load_factor_percent_in=*/1000,
/*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
- /*init_num_buckets_in=*/kTestInitNumBuckets)));
+ /*init_num_buckets_in=*/kTestInitNumBuckets,
+ /*pre_mapping_fbv_in=*/GetParam())));
ICING_ASSERT_OK(
persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
@@ -1190,7 +1273,7 @@ TEST_F(PersistentHashMapTest, DeleteBucketIntermediateElement) {
IsOkAndHolds(2));
}
-TEST_F(PersistentHashMapTest, DeleteBucketTailElement) {
+TEST_P(PersistentHashMapTest, DeleteBucketTailElement) {
// Create new persistent hash map
// Set max_load_factor_percent as 1000. Load factor percent is calculated as
// 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of
@@ -1200,13 +1283,14 @@ TEST_F(PersistentHashMapTest, DeleteBucketTailElement) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
/*max_load_factor_percent_in=*/1000,
/*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
- /*init_num_buckets_in=*/kTestInitNumBuckets)));
+ /*init_num_buckets_in=*/kTestInitNumBuckets,
+ /*pre_mapping_fbv_in=*/GetParam())));
ICING_ASSERT_OK(
persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
@@ -1228,7 +1312,7 @@ TEST_F(PersistentHashMapTest, DeleteBucketTailElement) {
IsOkAndHolds(2));
}
-TEST_F(PersistentHashMapTest, DeleteBucketOnlySingleElement) {
+TEST_P(PersistentHashMapTest, DeleteBucketOnlySingleElement) {
// Create new persistent hash map
// Set max_load_factor_percent as 1000. Load factor percent is calculated as
// 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of
@@ -1238,13 +1322,14 @@ TEST_F(PersistentHashMapTest, DeleteBucketOnlySingleElement) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
/*max_load_factor_percent_in=*/1000,
/*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
- /*init_num_buckets_in=*/kTestInitNumBuckets)));
+ /*init_num_buckets_in=*/kTestInitNumBuckets,
+ /*pre_mapping_fbv_in=*/GetParam())));
ICING_ASSERT_OK(
persistent_hash_map->Put("default-google.com", Serialize(100).data()));
@@ -1257,19 +1342,20 @@ TEST_F(PersistentHashMapTest, DeleteBucketOnlySingleElement) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(PersistentHashMapTest, OperationsWhenReachingMaxNumEntries) {
+TEST_P(PersistentHashMapTest, OperationsWhenReachingMaxNumEntries) {
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/1,
/*max_load_factor_percent_in=*/
Options::kDefaultMaxLoadFactorPercent,
/*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
- /*init_num_buckets_in=*/1)));
+ /*init_num_buckets_in=*/kTestInitNumBuckets,
+ /*pre_mapping_fbv_in=*/GetParam())));
ICING_ASSERT_OK(
persistent_hash_map->Put("default-google.com", Serialize(100).data()));
@@ -1293,12 +1379,13 @@ TEST_F(PersistentHashMapTest, OperationsWhenReachingMaxNumEntries) {
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
-TEST_F(PersistentHashMapTest, ShouldFailIfKeyContainsTerminationCharacter) {
+TEST_P(PersistentHashMapTest, ShouldFailIfKeyContainsTerminationCharacter) {
// Create new persistent hash map
+ Options options(/*value_type_size_in=*/sizeof(int));
+ options.pre_mapping_fbv = GetParam();
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_,
- Options(/*value_type_size_in=*/sizeof(int))));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
const char invalid_key[] = "a\0bc";
std::string_view invalid_key_view(invalid_key, 4);
@@ -1314,36 +1401,38 @@ TEST_F(PersistentHashMapTest, ShouldFailIfKeyContainsTerminationCharacter) {
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(PersistentHashMapTest, EmptyHashMapIterator) {
+TEST_P(PersistentHashMapTest, EmptyHashMapIterator) {
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
/*max_load_factor_percent_in=*/
Options::kDefaultMaxLoadFactorPercent,
/*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
- /*init_num_buckets_in=*/kTestInitNumBuckets)));
+ /*init_num_buckets_in=*/kTestInitNumBuckets,
+ /*pre_mapping_fbv_in=*/GetParam())));
EXPECT_FALSE(persistent_hash_map->GetIterator().Advance());
}
-TEST_F(PersistentHashMapTest, Iterator) {
+TEST_P(PersistentHashMapTest, Iterator) {
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
/*max_load_factor_percent_in=*/
Options::kDefaultMaxLoadFactorPercent,
/*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
- /*init_num_buckets_in=*/kTestInitNumBuckets)));
+ /*init_num_buckets_in=*/kTestInitNumBuckets,
+ /*pre_mapping_fbv_in=*/GetParam())));
std::unordered_map<std::string, int> kvps;
// Insert 100 key value pairs
@@ -1358,19 +1447,20 @@ TEST_F(PersistentHashMapTest, Iterator) {
Eq(kvps));
}
-TEST_F(PersistentHashMapTest, IteratorAfterDeletingFirstKeyValuePair) {
+TEST_P(PersistentHashMapTest, IteratorAfterDeletingFirstKeyValuePair) {
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
/*max_load_factor_percent_in=*/
Options::kDefaultMaxLoadFactorPercent,
/*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
- /*init_num_buckets_in=*/kTestInitNumBuckets)));
+ /*init_num_buckets_in=*/kTestInitNumBuckets,
+ /*pre_mapping_fbv_in=*/GetParam())));
ICING_ASSERT_OK(
persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
@@ -1387,19 +1477,20 @@ TEST_F(PersistentHashMapTest, IteratorAfterDeletingFirstKeyValuePair) {
Pair("default-google.com-2", 2)));
}
-TEST_F(PersistentHashMapTest, IteratorAfterDeletingIntermediateKeyValuePair) {
+TEST_P(PersistentHashMapTest, IteratorAfterDeletingIntermediateKeyValuePair) {
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
/*max_load_factor_percent_in=*/
Options::kDefaultMaxLoadFactorPercent,
/*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
- /*init_num_buckets_in=*/kTestInitNumBuckets)));
+ /*init_num_buckets_in=*/kTestInitNumBuckets,
+ /*pre_mapping_fbv_in=*/GetParam())));
ICING_ASSERT_OK(
persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
@@ -1416,19 +1507,20 @@ TEST_F(PersistentHashMapTest, IteratorAfterDeletingIntermediateKeyValuePair) {
Pair("default-google.com-2", 2)));
}
-TEST_F(PersistentHashMapTest, IteratorAfterDeletingLastKeyValuePair) {
+TEST_P(PersistentHashMapTest, IteratorAfterDeletingLastKeyValuePair) {
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
/*max_load_factor_percent_in=*/
Options::kDefaultMaxLoadFactorPercent,
/*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
- /*init_num_buckets_in=*/kTestInitNumBuckets)));
+ /*init_num_buckets_in=*/kTestInitNumBuckets,
+ /*pre_mapping_fbv_in=*/GetParam())));
ICING_ASSERT_OK(
persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
@@ -1445,19 +1537,20 @@ TEST_F(PersistentHashMapTest, IteratorAfterDeletingLastKeyValuePair) {
Pair("default-google.com-1", 1)));
}
-TEST_F(PersistentHashMapTest, IteratorAfterDeletingAllKeyValuePairs) {
+TEST_P(PersistentHashMapTest, IteratorAfterDeletingAllKeyValuePairs) {
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
/*max_load_factor_percent_in=*/
Options::kDefaultMaxLoadFactorPercent,
/*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
- /*init_num_buckets_in=*/kTestInitNumBuckets)));
+ /*init_num_buckets_in=*/kTestInitNumBuckets,
+ /*pre_mapping_fbv_in=*/GetParam())));
ICING_ASSERT_OK(
persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
@@ -1475,6 +1568,9 @@ TEST_F(PersistentHashMapTest, IteratorAfterDeletingAllKeyValuePairs) {
EXPECT_FALSE(persistent_hash_map->GetIterator().Advance());
}
+INSTANTIATE_TEST_SUITE_P(PersistentHashMapTest, PersistentHashMapTest,
+ testing::Values(true, false));
+
} // namespace
} // namespace lib
diff --git a/icing/file/persistent-storage.cc b/icing/file/persistent-storage.cc
new file mode 100644
index 0000000..9a595ef
--- /dev/null
+++ b/icing/file/persistent-storage.cc
@@ -0,0 +1,55 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/persistent-storage.h"
+
+#include <string>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/legacy/core/icing-string-util.h"
+
+namespace icing {
+namespace lib {
+
+/* static */ libtextclassifier3::Status PersistentStorage::Discard(
+ const Filesystem& filesystem, const std::string& working_path,
+ WorkingPathType working_path_type) {
+ switch (working_path_type) {
+ case WorkingPathType::kSingleFile: {
+ if (!filesystem.DeleteFile(working_path.c_str())) {
+ return absl_ports::InternalError(absl_ports::StrCat(
+ "Failed to delete PersistentStorage file: ", working_path));
+ }
+ return libtextclassifier3::Status::OK;
+ }
+ case WorkingPathType::kDirectory: {
+ if (!filesystem.DeleteDirectoryRecursively(working_path.c_str())) {
+ return absl_ports::InternalError(absl_ports::StrCat(
+ "Failed to delete PersistentStorage directory: ", working_path));
+ }
+ return libtextclassifier3::Status::OK;
+ }
+ case WorkingPathType::kDummy:
+ return libtextclassifier3::Status::OK;
+ }
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Unknown working path type %d for PersistentStorage %s",
+ static_cast<int>(working_path_type), working_path.c_str()));
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/file/persistent-storage.h b/icing/file/persistent-storage.h
new file mode 100644
index 0000000..9cb5e4d
--- /dev/null
+++ b/icing/file/persistent-storage.h
@@ -0,0 +1,369 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_FILE_PERSISTENT_STORAGE_H_
+#define ICING_FILE_PERSISTENT_STORAGE_H_
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/util/crc32.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+// PersistentStorage: an abstract class for all persistent data structures.
+// - It provides some common persistent file methods, e.g. PersistToDisk.
+// - It encapsulates most of the checksum handling logics (including update and
+// validation).
+//
+// Terminology:
+// - Crcs: checksum section
+// - Info: (custom) information for derived class
+// - Metadata: Crcs + Info
+//
+// Usually a persistent data structure will have its own custom Info and
+// storages (single or composite storages) definition. To create a new
+// persistent data structure via PersistentStorage:
+// - Decide what type the working path is (single file or directory). See
+// working_path_ and WorkingPathType for more details.
+// - Create a new class that inherits PersistentStorage:
+// - Declare custom Info and design the metadata section layout.
+// Usually the layout is <Crcs><Info>, and there are 2 common ways to
+// manage metadata section:
+// - Have a separate file for metadata. In this case, the new persistent
+// data structure contains multiple files, so working path should be used
+// as directory path and multiple files will be stored under it. Example:
+// PersistentHashMap.
+// - Have a single file for both metadata and storage data. In this case,
+// the file layout should be <Crcs><Info><Storage Data>, and
+// working path should be used as file path. Example: FileBackedVector.
+// - Handle working path file/directory creation and deletion.
+// PersistentStorage only provides static Discard() method to use. The
+// derived class should implement other logics, e.g. working path (file
+// /directory) creation, check condition to discard working path and start
+// over new file(s).
+// - Implement all pure virtual methods:
+// - PersistStoragesToDisk: persist all (composite) storages. In general,
+// the implementation will be calling PersistToDisk for all composite
+// storages.
+// - PersistMetadataToDisk: persist metadata, including Crcs and Info.
+// - If the derived class maintains a concrete Crc and (custom) Info
+// instance, then it should perform write/pwrite into the metadata
+// section.
+// - If the derived class uses memory-mapped region directly for metadata,
+// then it should call MemoryMappedFile::PersistToDisk.
+// - See crcs() for more details.
+// - ComputeInfoChecksum: compute the checksum for custom Info.
+// - ComputeStoragesChecksum: compute the (combined) checksum for all
+// (composite) storages. In general, the implementation will be calling
+// UpdateChecksums for all composite storages and XOR all checksums.
+// - crcs(): provide the reference for PersistentStorage to write checksums.
+// The derived class can either maintain a concrete Crcs instance, or
+// reinterpret_cast the memory-mapped region to Crcs reference. Either
+// choice is fine as long as PersistMetadataToDisk flushes it to disk
+// correctly.
+// - Call either InitializeNewStorage or InitializeExistingStorage when creating
+// and initializing an instance, depending on initializing new storage or from
+// existing file(s).
+class PersistentStorage {
+ public:
+ enum class WorkingPathType {
+ kSingleFile,
+ kDirectory,
+ kDummy,
+ };
+
+ // Crcs and Info will be written into the metadata section. Info is defined by
+ // the actual implementation of each persistent storage. Usually the Metadata
+ // layout is: <Crcs><Info>
+ struct Crcs {
+ struct ComponentCrcs {
+ uint32_t info_crc;
+ uint32_t storages_crc;
+
+ bool operator==(const ComponentCrcs& other) const {
+ return info_crc == other.info_crc && storages_crc == other.storages_crc;
+ }
+
+ Crc32 ComputeChecksum() const {
+ return Crc32(std::string_view(reinterpret_cast<const char*>(this),
+ sizeof(ComponentCrcs)));
+ }
+ } __attribute__((packed));
+
+ bool operator==(const Crcs& other) const {
+ return all_crc == other.all_crc && component_crcs == other.component_crcs;
+ }
+
+ uint32_t all_crc;
+ ComponentCrcs component_crcs;
+ } __attribute__((packed));
+ static_assert(sizeof(Crcs) == 12, "");
+
+ // Deletes working_path according to its type.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ // - INVALID_ARGUMENT_ERROR if working_path_type is unknown type
+ static libtextclassifier3::Status Discard(const Filesystem& filesystem,
+ const std::string& working_path,
+ WorkingPathType working_path_type);
+
+ virtual ~PersistentStorage() = default;
+
+ // Initializes new persistent storage. It computes the initial checksums and
+ // writes into the metadata file.
+ //
+ // Note: either InitializeNewStorage or InitializeExistingStorage should be
+ // invoked after creating a PersistentStorage instance before using, otherwise
+ // an uninitialized instance will fail to use persistent storage features,
+ // e.g. PersistToDisk, UpdateChecksums.
+ //
+ // Returns:
+ // - OK on success or already initialized
+ // - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
+ // on actual implementation
+ libtextclassifier3::Status InitializeNewStorage() {
+ if (is_initialized_) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ ICING_RETURN_IF_ERROR(UpdateChecksumsInternal(/*force=*/true));
+ ICING_RETURN_IF_ERROR(PersistStoragesToDisk(/*force=*/true));
+ ICING_RETURN_IF_ERROR(PersistMetadataToDisk(/*force=*/true));
+
+ is_initialized_ = true;
+ return libtextclassifier3::Status::OK;
+ }
+
+ // Initializes persistent storage from existing file(s).
+ //
+ // It enforces the following check(s):
+ // - Validate checksums.
+ //
+ // Note: either InitializeNewStorage or InitializeExistingStorage should be
+ // invoked after creating a PersistentStorage instance before using.
+ //
+ // Returns:
+ // - OK on success or already initialized
+ // - FAILED_PRECONDITION_ERROR if checksum validation fails.
+ // - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
+ // on actual implementation
+ libtextclassifier3::Status InitializeExistingStorage() {
+ if (is_initialized_) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ ICING_RETURN_IF_ERROR(ValidateChecksums());
+
+ is_initialized_ = true;
+ return libtextclassifier3::Status::OK;
+ }
+
+ // Flushes contents to underlying files.
+ // 1) Flushes storages.
+ // 2) Updates all checksums by new data.
+ // 3) Flushes metadata.
+ //
+ // Force flag will be passed down to PersistMetadataToDisk,
+ // PersistStoragesToDisk, ComputeInfoChecksum, ComputeStoragesChecksum.
+ // - If force == true, then performs actual persisting operations/recomputes
+ // the checksum.
+ // - Otherwise, the derived class can decide itself whether skipping
+ // persisting operations/doing lazy checksum recomputing if the storage is
+ // not dirty.
+ //
+ // Returns:
+ // - OK on success
+ // - FAILED_PRECONDITION_ERROR if PersistentStorage is uninitialized
+ // - Any errors from PersistStoragesToDisk, UpdateChecksums,
+ // PersistMetadataToDisk, depending on actual implementation
+ libtextclassifier3::Status PersistToDisk(bool force = false) {
+ if (!is_initialized_) {
+ return absl_ports::FailedPreconditionError(absl_ports::StrCat(
+ "PersistentStorage ", working_path_, " not initialized"));
+ }
+
+ ICING_RETURN_IF_ERROR(UpdateChecksumsInternal(force));
+ ICING_RETURN_IF_ERROR(PersistStoragesToDisk(force));
+ ICING_RETURN_IF_ERROR(PersistMetadataToDisk(force));
+ return libtextclassifier3::Status::OK;
+ }
+
+ // Updates checksums of all components and returns the overall crc (all_crc)
+ // of the persistent storage.
+ //
+ // Force flag will be passed down ComputeInfoChecksum,
+ // ComputeStoragesChecksum.
+ // - If force == true, then recomputes the checksum.
+ // - Otherwise, the derived class can decide itself whether doing lazy
+ // checksum recomputing if the storage is not dirty.
+ //
+ // Returns:
+ // - Overall crc of the persistent storage on success
+ // - FAILED_PRECONDITION_ERROR if PersistentStorage is uninitialized
+ // - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
+ // on actual implementation
+ libtextclassifier3::StatusOr<Crc32> UpdateChecksums(bool force = false) {
+ if (!is_initialized_) {
+ return absl_ports::FailedPreconditionError(absl_ports::StrCat(
+ "PersistentStorage ", working_path_, " not initialized"));
+ }
+
+ return UpdateChecksumsInternal(force);
+ }
+
+ protected:
+ explicit PersistentStorage(const Filesystem& filesystem,
+ std::string working_path,
+ WorkingPathType working_path_type)
+ : filesystem_(filesystem),
+ working_path_(std::move(working_path)),
+ working_path_type_(working_path_type),
+ is_initialized_(false) {}
+
+ // Flushes contents of metadata. The implementation should flush Crcs and Info
+ // correctly, depending on whether they're using memory-mapped regions or
+ // concrete instances in the derived class.
+ //
+ // Returns:
+ // - OK on success
+ // - Any other errors, depending on actual implementation
+ virtual libtextclassifier3::Status PersistMetadataToDisk(bool force) = 0;
+
+ // Flushes contents of all storages to underlying files.
+ //
+ // Returns:
+ // - OK on success
+ // - Any other errors, depending on actual implementation
+ virtual libtextclassifier3::Status PersistStoragesToDisk(bool force) = 0;
+
+ // Computes and returns Info checksum.
+ // - If force = true, then recompute the entire checksum.
+ // - Otherwise, the derived class can decide itself whether doing lazy
+ // checksum computing if the storage is not dirty.
+ //
+ // This function will be mainly called by UpdateChecksums.
+ //
+ // Returns:
+ // - Crc of the Info on success
+ // - Any other errors, depending on actual implementation
+ virtual libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(
+ bool force) = 0;
+
+ // Computes and returns all storages checksum. If there are multiple storages,
+ // usually we XOR their checksums together to a single checksum.
+ // - If force = true, then recompute the entire checksum.
+ // - Otherwise, the derived class can decide itself whether doing lazy
+ // checksum computing if the storage is not dirty.
+ //
+ // This function will be mainly called by UpdateChecksums.
+ //
+ // Returns:
+ // - Crc of all storages on success
+ // - Any other errors from depending on actual implementation
+ virtual libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+ bool force) = 0;
+
+ // Returns the Crcs instance reference. The derived class can either own a
+ // concrete Crcs instance, or reinterpret_cast the memory-mapped region to
+ // Crcs reference. PersistMetadataToDisk should flush it to disk correctly.
+ virtual Crcs& crcs() = 0;
+ virtual const Crcs& crcs() const = 0;
+
+ const Filesystem& filesystem_; // Does not own
+ // Path to the storage. It can be a single file path or a directory path
+ // depending on the implementation of the derived class.
+ //
+ // Note that the derived storage class will take full ownership and of
+ // working_path_, including creation/deletion. It is the caller's
+ // responsibility to specify correct working path and avoid mixing different
+ // persistent storages together under the same path. Also the caller has the
+ // ownership for the parent directory of working_path_, and it is responsible
+ // for parent directory creation/deletion.
+ std::string working_path_;
+ WorkingPathType working_path_type_;
+
+ bool is_initialized_;
+
+ private:
+ // Updates checksums of all components and returns the overall crc (all_crc)
+ // of the persistent storage. Different from UpdateChecksums, it won't check
+ // if PersistentStorage is initialized or not.
+ //
+ // Returns:
+ // - Overall crc of the persistent storage on success
+ // - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
+ // on actual implementation
+ libtextclassifier3::StatusOr<Crc32> UpdateChecksumsInternal(bool force) {
+ Crcs& crcs_ref = crcs();
+ // Compute and update storages + info checksums.
+ ICING_ASSIGN_OR_RETURN(Crc32 info_crc, ComputeInfoChecksum(force));
+ ICING_ASSIGN_OR_RETURN(Crc32 storages_crc, ComputeStoragesChecksum(force));
+ if (crcs_ref.component_crcs.info_crc == info_crc.Get() &&
+ crcs_ref.component_crcs.storages_crc == storages_crc.Get()) {
+ // If info and storages crc haven't changed, then we don't have to update
+ // checksums.
+ return Crc32(crcs_ref.all_crc);
+ }
+
+ crcs_ref.component_crcs.info_crc = info_crc.Get();
+ crcs_ref.component_crcs.storages_crc = storages_crc.Get();
+
+ // Finally compute and update overall checksum.
+ crcs_ref.all_crc = crcs_ref.component_crcs.ComputeChecksum().Get();
+ return Crc32(crcs_ref.all_crc);
+ }
+
+ // Validates all checksums of the persistent storage.
+ //
+ // Returns:
+ // - OK on success
+ // - FAILED_PRECONDITION_ERROR if any checksum is incorrect.
+ // - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
+ // on actual implementation
+ libtextclassifier3::Status ValidateChecksums() {
+ const Crcs& crcs_ref = crcs();
+ if (crcs_ref.all_crc != crcs_ref.component_crcs.ComputeChecksum().Get()) {
+ return absl_ports::FailedPreconditionError("Invalid all crc");
+ }
+
+ ICING_ASSIGN_OR_RETURN(Crc32 info_crc, ComputeInfoChecksum(/*force=*/true));
+ if (crcs_ref.component_crcs.info_crc != info_crc.Get()) {
+ return absl_ports::FailedPreconditionError("Invalid info crc");
+ }
+
+ ICING_ASSIGN_OR_RETURN(Crc32 storages_crc,
+ ComputeStoragesChecksum(/*force=*/true));
+ if (crcs_ref.component_crcs.storages_crc != storages_crc.Get()) {
+ return absl_ports::FailedPreconditionError("Invalid storages crc");
+ }
+
+ return libtextclassifier3::Status::OK;
+ }
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_FILE_PERSISTENT_STORAGE_H_
diff --git a/icing/file/portable-file-backed-proto-log.h b/icing/file/portable-file-backed-proto-log.h
index e48e6e0..a36bd9e 100644
--- a/icing/file/portable-file-backed-proto-log.h
+++ b/icing/file/portable-file-backed-proto-log.h
@@ -64,7 +64,6 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/file/filesystem.h"
@@ -79,6 +78,7 @@
#include "icing/util/data-loss.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
+#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
namespace icing {
namespace lib {
@@ -106,13 +106,32 @@ class PortableFileBackedProtoLog {
// compressed size larger than max_proto_size are also not accepted.
const int32_t max_proto_size;
+ // Level of compression if enabled, NO_COMPRESSION = 0, BEST_SPEED = 1,
+ // BEST_COMPRESSION = 9
+ const int32_t compression_level;
+
// Must specify values for options.
Options() = delete;
- explicit Options(bool compress_in,
- const int32_t max_proto_size_in = kMaxProtoSize)
- : compress(compress_in), max_proto_size(max_proto_size_in) {}
+ explicit Options(
+ bool compress_in, const int32_t max_proto_size_in = kMaxProtoSize,
+ const int32_t compression_level_in = kDeflateCompressionLevel)
+ : compress(compress_in),
+ max_proto_size(max_proto_size_in),
+ compression_level(compression_level_in) {}
};
+ // Our internal max for protos.
+ //
+ // WARNING: Changing this to a larger number may invalidate our assumption
+ // that that proto size can safely be stored in the last 3 bytes of the proto
+ // header.
+ static constexpr int kMaxProtoSize = (1 << 24) - 1; // 16MiB
+ static_assert(kMaxProtoSize <= 0x00FFFFFF,
+ "kMaxProtoSize doesn't fit in 3 bytes");
+
+ // Level of compression, BEST_SPEED = 1, BEST_COMPRESSION = 9
+ static constexpr int kDeflateCompressionLevel = 3;
+
// Number of bytes we reserve for the heading at the beginning of the proto
// log. We reserve this so the header can grow without running into the
// contents of the proto log, triggering an unnecessary migration of the data.
@@ -480,7 +499,8 @@ class PortableFileBackedProtoLog {
// Object can only be instantiated via the ::Create factory.
PortableFileBackedProtoLog(const Filesystem* filesystem,
const std::string& file_path,
- std::unique_ptr<Header> header);
+ std::unique_ptr<Header> header,
+ int32_t compression_level);
// Initializes a new proto log.
//
@@ -556,18 +576,6 @@ class PortableFileBackedProtoLog {
// protos we support.
static constexpr uint8_t kProtoMagic = 0x5C;
- // Our internal max for protos.
- //
- // WARNING: Changing this to a larger number may invalidate our assumption
- // that that proto size can safely be stored in the last 3 bytes of the proto
- // header.
- static constexpr int kMaxProtoSize = (1 << 24) - 1; // 16MiB
- static_assert(kMaxProtoSize <= 0x00FFFFFF,
- "kMaxProtoSize doesn't fit in 3 bytes");
-
- // Level of compression, BEST_SPEED = 1, BEST_COMPRESSION = 9
- static constexpr int kDeflateCompressionLevel = 3;
-
// Chunks of the file to mmap at a time, so we don't mmap the entire file.
// Only used on 32-bit devices
static constexpr int kMmapChunkSize = 4 * 1024 * 1024; // 4MiB
@@ -576,15 +584,17 @@ class PortableFileBackedProtoLog {
const Filesystem* const filesystem_;
const std::string file_path_;
std::unique_ptr<Header> header_;
+ const int32_t compression_level_;
};
template <typename ProtoT>
PortableFileBackedProtoLog<ProtoT>::PortableFileBackedProtoLog(
const Filesystem* filesystem, const std::string& file_path,
- std::unique_ptr<Header> header)
+ std::unique_ptr<Header> header, int32_t compression_level)
: filesystem_(filesystem),
file_path_(file_path),
- header_(std::move(header)) {
+ header_(std::move(header)),
+ compression_level_(compression_level) {
fd_.reset(filesystem_->OpenForAppend(file_path.c_str()));
}
@@ -617,6 +627,12 @@ PortableFileBackedProtoLog<ProtoT>::Create(const Filesystem* filesystem,
options.max_proto_size));
}
+ if (options.compression_level < 0 || options.compression_level > 9) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "options.compression_level must be between 0 and 9 inclusive, was %d",
+ options.compression_level));
+ }
+
if (!filesystem->FileExists(file_path.c_str())) {
return InitializeNewFile(filesystem, file_path, options);
}
@@ -660,7 +676,8 @@ PortableFileBackedProtoLog<ProtoT>::InitializeNewFile(
CreateResult create_result = {
std::unique_ptr<PortableFileBackedProtoLog<ProtoT>>(
new PortableFileBackedProtoLog<ProtoT>(filesystem, file_path,
- std::move(header))),
+ std::move(header),
+ options.compression_level)),
/*data_loss=*/DataLoss::NONE, /*recalculated_checksum=*/false};
return create_result;
@@ -788,7 +805,8 @@ PortableFileBackedProtoLog<ProtoT>::InitializeExistingFile(
CreateResult create_result = {
std::unique_ptr<PortableFileBackedProtoLog<ProtoT>>(
new PortableFileBackedProtoLog<ProtoT>(filesystem, file_path,
- std::move(header))),
+ std::move(header),
+ options.compression_level)),
data_loss, recalculated_checksum};
return create_result;
@@ -893,7 +911,7 @@ PortableFileBackedProtoLog<ProtoT>::WriteProto(const ProtoT& proto) {
if (header_->GetCompressFlag()) {
protobuf_ports::GzipOutputStream::Options options;
options.format = protobuf_ports::GzipOutputStream::ZLIB;
- options.compression_level = kDeflateCompressionLevel;
+ options.compression_level = compression_level_;
protobuf_ports::GzipOutputStream compressing_stream(&proto_stream, options);
@@ -971,8 +989,7 @@ PortableFileBackedProtoLog<ProtoT>::ReadProto(int64_t file_offset) const {
return absl_ports::NotFoundError("The proto data has been erased.");
}
- google::protobuf::io::ArrayInputStream proto_stream(buf.get(),
- stored_size);
+ google::protobuf::io::ArrayInputStream proto_stream(buf.get(), stored_size);
// Deserialize proto
ProtoT proto;
diff --git a/icing/file/portable-file-backed-proto-log_test.cc b/icing/file/portable-file-backed-proto-log_test.cc
index af09d18..bf5e604 100644
--- a/icing/file/portable-file-backed-proto-log_test.cc
+++ b/icing/file/portable-file-backed-proto-log_test.cc
@@ -73,24 +73,18 @@ class PortableFileBackedProtoLogTest : public ::testing::Test {
const Filesystem filesystem_;
std::string file_path_;
bool compress_ = true;
+ int32_t compression_level_ =
+ PortableFileBackedProtoLog<DocumentProto>::kDeflateCompressionLevel;
int64_t max_proto_size_ = 256 * 1024; // 256 KiB
};
TEST_F(PortableFileBackedProtoLogTest, Initialize) {
- // max_proto_size must be greater than 0
- int invalid_max_proto_size = 0;
- ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
- &filesystem_, file_path_,
- PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, invalid_max_proto_size)),
- StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-
ICING_ASSERT_OK_AND_ASSIGN(
PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
- PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
- max_proto_size_)));
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_, compression_level_)));
EXPECT_THAT(create_result.proto_log, NotNull());
EXPECT_FALSE(create_result.has_data_loss());
EXPECT_FALSE(create_result.recalculated_checksum);
@@ -99,7 +93,41 @@ TEST_F(PortableFileBackedProtoLogTest, Initialize) {
ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- !compress_, max_proto_size_)),
+ !compress_, max_proto_size_, compression_level_)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(PortableFileBackedProtoLogTest, InitializeValidatesOptions) {
+ // max_proto_size must be greater than 0
+ int invalid_max_proto_size = 0;
+ ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, invalid_max_proto_size, compression_level_)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // max_proto_size must be under 16 MiB
+ invalid_max_proto_size = 16 * 1024 * 1024;
+ ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, invalid_max_proto_size, compression_level_)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // compression_level must be between 0 and 9 inclusive
+ int invalid_compression_level = -1;
+ ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_, invalid_compression_level)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // compression_level must be between 0 and 9 inclusive
+ invalid_compression_level = 10;
+ ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_, invalid_compression_level)),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
@@ -108,8 +136,8 @@ TEST_F(PortableFileBackedProtoLogTest, ReservedSpaceForHeader) {
PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
- PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
- max_proto_size_)));
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_, compression_level_)));
// With no protos written yet, the log should be minimum the size of the
// reserved header space.
@@ -123,8 +151,8 @@ TEST_F(PortableFileBackedProtoLogTest, WriteProtoTooLarge) {
PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
- PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
- max_proto_size)));
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -140,8 +168,8 @@ TEST_F(PortableFileBackedProtoLogTest, ReadProtoWrongKProtoMagic) {
PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
- PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
- max_proto_size_)));
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -175,7 +203,7 @@ TEST_F(PortableFileBackedProtoLogTest, ReadWriteUncompressedProto) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- /*compress_in=*/false, max_proto_size_)));
+ /*compress_in=*/false, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -222,7 +250,7 @@ TEST_F(PortableFileBackedProtoLogTest, ReadWriteUncompressedProto) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- /*compress_in=*/false, max_proto_size_)));
+ /*compress_in=*/false, max_proto_size_, compression_level_)));
auto recreated_proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -244,7 +272,7 @@ TEST_F(PortableFileBackedProtoLogTest, ReadWriteCompressedProto) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- /*compress_in=*/true, max_proto_size_)));
+ /*compress_in=*/true, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -291,7 +319,7 @@ TEST_F(PortableFileBackedProtoLogTest, ReadWriteCompressedProto) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- /*compress_in=*/true, max_proto_size_)));
+ /*compress_in=*/true, max_proto_size_, compression_level_)));
auto recreated_proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -304,6 +332,168 @@ TEST_F(PortableFileBackedProtoLogTest, ReadWriteCompressedProto) {
}
}
+TEST_F(PortableFileBackedProtoLogTest, ReadWriteDifferentCompressionLevel) {
+ int document1_offset;
+ int document2_offset;
+ int document3_offset;
+
+ // The first proto to write that's close to the max size. Leave some room for
+ // the rest of the proto properties.
+ std::string long_str(max_proto_size_ - 1024, 'a');
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .AddStringProperty("long_str", long_str)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder().SetKey("namespace2", "uri2").Build();
+ DocumentProto document3 =
+ DocumentBuilder().SetKey("namespace3", "uri3").Build();
+
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ /*compress_in=*/true, max_proto_size_,
+ /*compression_level_in=*/3)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.has_data_loss());
+
+ // Write the first proto
+ ICING_ASSERT_OK_AND_ASSIGN(document1_offset,
+ proto_log->WriteProto(document1));
+
+ // Check that what we read is what we wrote
+ ASSERT_THAT(proto_log->ReadProto(document1_offset),
+ IsOkAndHolds(EqualsProto(document1)));
+
+ ICING_ASSERT_OK(proto_log->PersistToDisk());
+ }
+
+ // Make a new proto_log with the same file_path but different compression
+ // level, and make sure we can still read from and write to the same
+ // underlying file.
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ /*compress_in=*/true, max_proto_size_,
+ /*compression_level_in=*/9)));
+ auto recreated_proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.has_data_loss());
+
+ // Check the first proto
+ ASSERT_THAT(recreated_proto_log->ReadProto(document1_offset),
+ IsOkAndHolds(EqualsProto(document1)));
+
+ // Write a second proto
+ ICING_ASSERT_OK_AND_ASSIGN(document2_offset,
+ recreated_proto_log->WriteProto(document2));
+
+ ASSERT_GT(document2_offset, document1_offset);
+
+ // Check the second proto
+ ASSERT_THAT(recreated_proto_log->ReadProto(document2_offset),
+ IsOkAndHolds(EqualsProto(document2)));
+
+ ICING_ASSERT_OK(recreated_proto_log->PersistToDisk());
+ }
+
+ // One more time but with 0 compression level
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ /*compress_in=*/true, max_proto_size_,
+ /*compression_level=*/0)));
+ auto recreated_proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.has_data_loss());
+
+ // Check the first proto
+ ASSERT_THAT(recreated_proto_log->ReadProto(document1_offset),
+ IsOkAndHolds(EqualsProto(document1)));
+
+ // Check the second proto
+ ASSERT_THAT(recreated_proto_log->ReadProto(document2_offset),
+ IsOkAndHolds(EqualsProto(document2)));
+
+ // Write a third proto
+ ICING_ASSERT_OK_AND_ASSIGN(document3_offset,
+ recreated_proto_log->WriteProto(document3));
+
+ ASSERT_GT(document3_offset, document2_offset);
+
+ // Check the third proto
+ ASSERT_THAT(recreated_proto_log->ReadProto(document3_offset),
+ IsOkAndHolds(EqualsProto(document3)));
+ }
+}
+
+TEST_F(PortableFileBackedProtoLogTest,
+ WriteDifferentCompressionLevelDifferentSizes) {
+ int document_log_size_with_compression_3;
+ int document_log_size_with_no_compression;
+
+ // The first proto to write that's close to the max size. Leave some room for
+ // the rest of the proto properties.
+ std::string long_str(max_proto_size_ - 1024, 'a');
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .AddStringProperty("long_str", long_str)
+ .Build();
+
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ /*compress_in=*/true, max_proto_size_,
+ /*compression_level_in=*/3)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.has_data_loss());
+
+ // Write the proto
+ ICING_ASSERT_OK(proto_log->WriteProto(document1));
+ ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+ document_log_size_with_compression_3 =
+ filesystem_.GetFileSize(file_path_.c_str());
+ }
+
+ // Delete the proto_log so we can reuse the file_path
+ filesystem_.DeleteFile(file_path_.c_str());
+
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ /*compress_in=*/true, max_proto_size_,
+ /*compression_level_in=*/0)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.has_data_loss());
+
+ // Write the proto
+ ICING_ASSERT_OK(proto_log->WriteProto(document1));
+ ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+ document_log_size_with_no_compression =
+ filesystem_.GetFileSize(file_path_.c_str());
+
+ // Uncompressed document file size should be larger than original compressed
+ // document file size
+ ASSERT_GT(document_log_size_with_no_compression,
+ document_log_size_with_compression_3);
+ }
+}
+
TEST_F(PortableFileBackedProtoLogTest, CorruptHeader) {
{
ICING_ASSERT_OK_AND_ASSIGN(
@@ -311,7 +501,7 @@ TEST_F(PortableFileBackedProtoLogTest, CorruptHeader) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto recreated_proto_log = std::move(create_result.proto_log);
EXPECT_FALSE(create_result.has_data_loss());
}
@@ -328,7 +518,7 @@ TEST_F(PortableFileBackedProtoLogTest, CorruptHeader) {
ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)),
+ compress_, max_proto_size_, compression_level_)),
StatusIs(libtextclassifier3::StatusCode::INTERNAL,
HasSubstr("Invalid header checksum")));
}
@@ -341,7 +531,7 @@ TEST_F(PortableFileBackedProtoLogTest, DifferentMagic) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto recreated_proto_log = std::move(create_result.proto_log);
EXPECT_FALSE(create_result.has_data_loss());
@@ -360,7 +550,7 @@ TEST_F(PortableFileBackedProtoLogTest, DifferentMagic) {
ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)),
+ compress_, max_proto_size_, compression_level_)),
StatusIs(libtextclassifier3::StatusCode::INTERNAL,
HasSubstr("Invalid header kMagic")));
}
@@ -383,7 +573,7 @@ TEST_F(PortableFileBackedProtoLogTest,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
EXPECT_FALSE(create_result.has_data_loss());
@@ -410,7 +600,7 @@ TEST_F(PortableFileBackedProtoLogTest,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
EXPECT_FALSE(create_result.has_data_loss());
EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
@@ -432,7 +622,7 @@ TEST_F(PortableFileBackedProtoLogTest,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -478,7 +668,7 @@ TEST_F(PortableFileBackedProtoLogTest,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
EXPECT_TRUE(create_result.has_data_loss());
EXPECT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE));
@@ -507,7 +697,7 @@ TEST_F(PortableFileBackedProtoLogTest, DirtyBitFalseAlarmKeepsData) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -537,7 +727,7 @@ TEST_F(PortableFileBackedProtoLogTest, DirtyBitFalseAlarmKeepsData) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
EXPECT_FALSE(create_result.has_data_loss());
@@ -569,7 +759,7 @@ TEST_F(PortableFileBackedProtoLogTest,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -615,7 +805,7 @@ TEST_F(PortableFileBackedProtoLogTest,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_TRUE(create_result.has_data_loss());
ASSERT_THAT(create_result.data_loss, Eq(DataLoss::PARTIAL));
@@ -640,7 +830,7 @@ TEST_F(PortableFileBackedProtoLogTest,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -663,7 +853,7 @@ TEST_F(PortableFileBackedProtoLogTest,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
// We previously persisted to disk so everything should be in a perfect
// state.
@@ -683,7 +873,7 @@ TEST_F(PortableFileBackedProtoLogTest,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -707,7 +897,7 @@ TEST_F(PortableFileBackedProtoLogTest,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
// We previously persisted to disk so everything should be in a perfect
// state.
@@ -726,7 +916,7 @@ TEST_F(PortableFileBackedProtoLogTest, DirtyBitIsFalseAfterPutAndDestructor) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -751,7 +941,7 @@ TEST_F(PortableFileBackedProtoLogTest, DirtyBitIsFalseAfterPutAndDestructor) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
// We previously persisted to disk so everything should be in a perfect
// state.
@@ -771,7 +961,7 @@ TEST_F(PortableFileBackedProtoLogTest,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -797,7 +987,7 @@ TEST_F(PortableFileBackedProtoLogTest,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
// We previously persisted to disk so everything should be in a perfect
// state.
@@ -819,8 +1009,8 @@ TEST_F(PortableFileBackedProtoLogTest, Iterator) {
PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
- PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
- max_proto_size_)));
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -872,7 +1062,7 @@ TEST_F(PortableFileBackedProtoLogTest, ComputeChecksum) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -890,7 +1080,7 @@ TEST_F(PortableFileBackedProtoLogTest, ComputeChecksum) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -915,8 +1105,8 @@ TEST_F(PortableFileBackedProtoLogTest, EraseProtoShouldSetZero) {
PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
- PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
- max_proto_size_)));
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -950,8 +1140,8 @@ TEST_F(PortableFileBackedProtoLogTest, EraseProtoShouldReturnNotFound) {
PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
- PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
- max_proto_size_)));
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -993,7 +1183,7 @@ TEST_F(PortableFileBackedProtoLogTest, ChecksumShouldBeCorrectWithErasedProto) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -1021,7 +1211,7 @@ TEST_F(PortableFileBackedProtoLogTest, ChecksumShouldBeCorrectWithErasedProto) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -1041,7 +1231,7 @@ TEST_F(PortableFileBackedProtoLogTest, ChecksumShouldBeCorrectWithErasedProto) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -1063,7 +1253,7 @@ TEST_F(PortableFileBackedProtoLogTest, ChecksumShouldBeCorrectWithErasedProto) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
EXPECT_FALSE(create_result.has_data_loss());
}
diff --git a/icing/file/posting_list/flash-index-storage.cc b/icing/file/posting_list/flash-index-storage.cc
index f74bc55..21fea8a 100644
--- a/icing/file/posting_list/flash-index-storage.cc
+++ b/icing/file/posting_list/flash-index-storage.cc
@@ -15,12 +15,16 @@
#include "icing/file/posting_list/flash-index-storage.h"
#include <sys/types.h>
+#include <unistd.h>
#include <algorithm>
#include <cerrno>
+#include <cinttypes>
#include <cstdint>
#include <memory>
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/file/posting_list/index-block.h"
@@ -33,27 +37,14 @@
namespace icing {
namespace lib {
-namespace {
-
-uint32_t SelectBlockSize() {
- // This should be close to the flash page size.
- static constexpr uint32_t kMinBlockSize = 4096;
-
- // Determine a good block size.
- uint32_t page_size = getpagesize();
- uint32_t block_size = std::max(kMinBlockSize, page_size);
-
- // Align up to the nearest page size.
- return math_util::RoundUpTo(block_size, page_size);
-}
-
-} // namespace
-
libtextclassifier3::StatusOr<FlashIndexStorage> FlashIndexStorage::Create(
- const std::string& index_filename, const Filesystem* filesystem,
- PostingListUsedSerializer* serializer, bool in_memory) {
+ std::string index_filename, const Filesystem* filesystem,
+ PostingListSerializer* serializer, bool in_memory) {
ICING_RETURN_ERROR_IF_NULL(filesystem);
- FlashIndexStorage storage(index_filename, filesystem, serializer, in_memory);
+ ICING_RETURN_ERROR_IF_NULL(serializer);
+
+ FlashIndexStorage storage(filesystem, std::move(index_filename), serializer,
+ in_memory);
if (!storage.Init()) {
return absl_ports::InternalError(
"Unable to successfully read header block!");
@@ -61,15 +52,26 @@ libtextclassifier3::StatusOr<FlashIndexStorage> FlashIndexStorage::Create(
return storage;
}
-FlashIndexStorage::FlashIndexStorage(const std::string& index_filename,
- const Filesystem* filesystem,
- PostingListUsedSerializer* serializer,
- bool has_in_memory_freelists)
- : index_filename_(index_filename),
- num_blocks_(0),
- filesystem_(filesystem),
- serializer_(serializer),
- has_in_memory_freelists_(has_in_memory_freelists) {}
+/* static */ libtextclassifier3::StatusOr<int>
+FlashIndexStorage::ReadHeaderMagic(const Filesystem* filesystem,
+ const std::string& index_filename) {
+ ICING_RETURN_ERROR_IF_NULL(filesystem);
+
+ if (!filesystem->FileExists(index_filename.c_str())) {
+ return absl_ports::NotFoundError("Flash index file doesn't exist");
+ }
+
+ ScopedFd sfd(filesystem->OpenForRead(index_filename.c_str()));
+ if (!sfd.is_valid()) {
+ return absl_ports::InternalError("Fail to open flash index file");
+ }
+
+ uint32_t block_size = SelectBlockSize();
+ // Read and validate header.
+ ICING_ASSIGN_OR_RETURN(HeaderBlock header_block,
+ HeaderBlock::Read(filesystem, sfd.get(), block_size));
+ return header_block.header()->magic;
+}
FlashIndexStorage::~FlashIndexStorage() {
if (header_block_ != nullptr) {
@@ -78,9 +80,21 @@ FlashIndexStorage::~FlashIndexStorage() {
}
}
+/* static */ uint32_t FlashIndexStorage::SelectBlockSize() {
+ // This should be close to the flash page size.
+ static constexpr uint32_t kMinBlockSize = 4096;
+
+ // Determine a good block size.
+ uint32_t page_size = getpagesize();
+ uint32_t block_size = std::max(kMinBlockSize, page_size);
+
+ // Align up to the nearest page size.
+ return math_util::RoundUpTo(block_size, page_size);
+}
+
bool FlashIndexStorage::Init() {
- block_fd_ = ScopedFd(filesystem_->OpenForWrite(index_filename_.c_str()));
- if (!block_fd_.is_valid()) {
+ storage_sfd_ = ScopedFd(filesystem_->OpenForWrite(index_filename_.c_str()));
+ if (!storage_sfd_.is_valid()) {
return false;
}
@@ -90,7 +104,7 @@ bool FlashIndexStorage::Init() {
bool FlashIndexStorage::InitHeader() {
// Look for an existing file size.
- int64_t file_size = filesystem_->GetFileSize(block_fd_.get());
+ int64_t file_size = filesystem_->GetFileSize(storage_sfd_.get());
if (file_size == Filesystem::kBadFileSize) {
ICING_LOG(ERROR) << "Could not initialize main index. Bad file size.";
return false;
@@ -125,8 +139,7 @@ bool FlashIndexStorage::CreateHeader() {
// Work down from the largest posting list that fits in
// block_size. We don't care about locality of blocks because this
// is a flash index.
- for (uint32_t posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
- block_size, serializer_->GetDataTypeBytes());
+ for (uint32_t posting_list_bytes = max_posting_list_bytes();
posting_list_bytes >= serializer_->GetMinPostingListSize();
posting_list_bytes /= 2) {
uint32_t aligned_posting_list_bytes =
@@ -149,8 +162,8 @@ bool FlashIndexStorage::CreateHeader() {
}
// Write the header.
- if (!header_block_->Write(block_fd_.get())) {
- filesystem_->Truncate(block_fd_.get(), 0);
+ if (!header_block_->Write(storage_sfd_.get())) {
+ filesystem_->Truncate(storage_sfd_.get(), 0);
return false;
}
num_blocks_ = 1;
@@ -162,7 +175,7 @@ bool FlashIndexStorage::OpenHeader(int64_t file_size) {
// Read and validate header.
ICING_ASSIGN_OR_RETURN(
HeaderBlock read_header,
- HeaderBlock::Read(filesystem_, block_fd_.get(), block_size), false);
+ HeaderBlock::Read(filesystem_, storage_sfd_.get(), block_size), false);
if (read_header.header()->magic != HeaderBlock::Header::kMagic) {
ICING_LOG(ERROR) << "Index header block wrong magic";
return false;
@@ -200,7 +213,7 @@ bool FlashIndexStorage::OpenHeader(int64_t file_size) {
<< block_size << "). Defaulting to existing block size "
<< read_header.header()->block_size;
ICING_ASSIGN_OR_RETURN(HeaderBlock read_header,
- HeaderBlock::Read(filesystem_, block_fd_.get(),
+ HeaderBlock::Read(filesystem_, storage_sfd_.get(),
read_header.header()->block_size),
false);
}
@@ -226,20 +239,20 @@ bool FlashIndexStorage::OpenHeader(int64_t file_size) {
bool FlashIndexStorage::PersistToDisk() {
// First, write header.
- if (!header_block_->Write(block_fd_.get())) {
+ if (!header_block_->Write(storage_sfd_.get())) {
ICING_LOG(ERROR) << "Write index header failed: " << strerror(errno);
return false;
}
// Then sync.
- return filesystem_->DataSync(block_fd_.get());
+ return filesystem_->DataSync(storage_sfd_.get());
}
libtextclassifier3::Status FlashIndexStorage::Reset() {
// Reset in-memory members to default values.
num_blocks_ = 0;
header_block_.reset();
- block_fd_.reset();
+ storage_sfd_.reset();
in_memory_freelists_.clear();
// Delete the underlying file.
@@ -260,36 +273,36 @@ libtextclassifier3::StatusOr<PostingListHolder>
FlashIndexStorage::GetPostingList(PostingListIdentifier id) const {
ICING_ASSIGN_OR_RETURN(IndexBlock block, GetIndexBlock(id.block_index()));
ICING_ASSIGN_OR_RETURN(
- PostingListUsed posting_list,
+ IndexBlock::PostingListAndBlockInfo pl_block_info,
block.GetAllocatedPostingList(id.posting_list_index()));
- PostingListHolder holder = {std::move(posting_list), std::move(block), id};
- return holder;
+ return PostingListHolder(std::move(pl_block_info.posting_list_used), id,
+ pl_block_info.next_block_index);
}
libtextclassifier3::StatusOr<IndexBlock> FlashIndexStorage::GetIndexBlock(
- int block_index) const {
+ uint32_t block_index) const {
if (block_index >= num_blocks_) {
return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
- "Unable to create an index block at index %d when only %d blocks have "
- "been allocated.",
+ "Unable to create an index block at index %" PRIu32
+ " when only %d blocks have been allocated.",
block_index, num_blocks_));
}
off_t offset = static_cast<off_t>(block_index) * block_size();
return IndexBlock::CreateFromPreexistingIndexBlockRegion(
- *filesystem_, index_filename_, serializer_, offset, block_size());
+ filesystem_, serializer_, storage_sfd_.get(), offset, block_size());
}
libtextclassifier3::StatusOr<IndexBlock> FlashIndexStorage::CreateIndexBlock(
- int block_index, uint32_t posting_list_size) const {
+ uint32_t block_index, uint32_t posting_list_size) const {
if (block_index >= num_blocks_) {
return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
- "Unable to create an index block at index %d when only %d blocks have "
- "been allocated.",
+ "Unable to create an index block at index %" PRIu32
+ " when only %d blocks have been allocated.",
block_index, num_blocks_));
}
off_t offset = static_cast<off_t>(block_index) * block_size();
return IndexBlock::CreateFromUninitializedRegion(
- *filesystem_, index_filename_, serializer_, offset, block_size(),
+ filesystem_, serializer_, storage_sfd_.get(), offset, block_size(),
posting_list_size);
}
@@ -315,21 +328,19 @@ FlashIndexStorage::GetPostingListFromInMemoryFreeList(int block_info_index) {
// need to free this posting list.
ICING_ASSIGN_OR_RETURN(IndexBlock block,
GetIndexBlock(posting_list_id.block_index()));
- block.FreePostingList(posting_list_id.posting_list_index());
+ ICING_RETURN_IF_ERROR(
+ block.FreePostingList(posting_list_id.posting_list_index()));
// Now, we can allocate a posting list from the same index block. It may not
// be the same posting list that was just freed, but that's okay.
- ICING_ASSIGN_OR_RETURN(PostingListIndex posting_list_index,
+ ICING_ASSIGN_OR_RETURN(IndexBlock::PostingListAndBlockInfo pl_block_info,
block.AllocatePostingList());
- posting_list_id =
- PostingListIdentifier(posting_list_id.block_index(), posting_list_index,
- posting_list_id.posting_list_index_bits());
- ICING_ASSIGN_OR_RETURN(
- PostingListUsed posting_list,
- block.GetAllocatedPostingList(posting_list_id.posting_list_index()));
- PostingListHolder holder = {std::move(posting_list), std::move(block),
- posting_list_id};
- return holder;
+ posting_list_id = PostingListIdentifier(
+ posting_list_id.block_index(), pl_block_info.posting_list_index,
+ posting_list_id.posting_list_index_bits());
+
+ return PostingListHolder(std::move(pl_block_info.posting_list_used),
+ posting_list_id, pl_block_info.next_block_index);
}
libtextclassifier3::StatusOr<PostingListHolder>
@@ -344,19 +355,18 @@ FlashIndexStorage::GetPostingListFromOnDiskFreeList(int block_info_index) {
// Get the index block
ICING_ASSIGN_OR_RETURN(IndexBlock block, GetIndexBlock(block_index));
- ICING_ASSIGN_OR_RETURN(PostingListIndex posting_list_index,
+ ICING_ASSIGN_OR_RETURN(IndexBlock::PostingListAndBlockInfo pl_block_info,
block.AllocatePostingList());
- PostingListIdentifier posting_list_id = PostingListIdentifier(
- block_index, posting_list_index, block.posting_list_index_bits());
- ICING_ASSIGN_OR_RETURN(
- PostingListUsed posting_list,
- block.GetAllocatedPostingList(posting_list_id.posting_list_index()));
- if (!block.has_free_posting_lists()) {
- RemoveFromOnDiskFreeList(block_index, block_info_index, &block);
+ PostingListIdentifier posting_list_id =
+ PostingListIdentifier(block_index, pl_block_info.posting_list_index,
+ block.posting_list_index_bits());
+ if (!pl_block_info.has_free_posting_lists) {
+ ICING_RETURN_IF_ERROR(
+ RemoveFromOnDiskFreeList(block_index, block_info_index, &block));
}
- PostingListHolder holder = {std::move(posting_list), std::move(block),
- posting_list_id};
- return holder;
+
+ return PostingListHolder(std::move(pl_block_info.posting_list_used),
+ posting_list_id, pl_block_info.next_block_index);
}
libtextclassifier3::StatusOr<PostingListHolder>
@@ -371,29 +381,26 @@ FlashIndexStorage::AllocateNewPostingList(int block_info_index) {
CreateIndexBlock(block_index, header_block_->header()
->index_block_infos[block_info_index]
.posting_list_bytes));
- ICING_ASSIGN_OR_RETURN(PostingListIndex posting_list_index,
+ ICING_ASSIGN_OR_RETURN(IndexBlock::PostingListAndBlockInfo pl_block_info,
block.AllocatePostingList());
- PostingListIdentifier posting_list_id = PostingListIdentifier(
- block_index, posting_list_index, block.posting_list_index_bits());
- ICING_ASSIGN_OR_RETURN(
- PostingListUsed posting_list,
- block.GetAllocatedPostingList(posting_list_id.posting_list_index()));
- if (block.has_free_posting_lists()) {
+ PostingListIdentifier posting_list_id =
+ PostingListIdentifier(block_index, pl_block_info.posting_list_index,
+ block.posting_list_index_bits());
+ if (pl_block_info.has_free_posting_lists) {
AddToOnDiskFreeList(block_index, block_info_index, &block);
}
- PostingListHolder holder = {std::move(posting_list), std::move(block),
- posting_list_id};
- return holder;
+
+ return PostingListHolder(std::move(pl_block_info.posting_list_used),
+ posting_list_id, pl_block_info.next_block_index);
}
libtextclassifier3::StatusOr<PostingListHolder>
FlashIndexStorage::AllocatePostingList(uint32_t min_posting_list_bytes) {
- int max_block_size = IndexBlock::CalculateMaxPostingListBytes(
- block_size(), serializer_->GetDataTypeBytes());
- if (min_posting_list_bytes > max_block_size) {
+ int max_pl_size = max_posting_list_bytes();
+ if (min_posting_list_bytes > max_pl_size) {
return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
"Requested posting list size %d exceeds max posting list size %d",
- min_posting_list_bytes, max_block_size));
+ min_posting_list_bytes, max_pl_size));
}
int best_block_info_index = FindBestIndexBlockInfo(min_posting_list_bytes);
@@ -411,29 +418,80 @@ FlashIndexStorage::AllocatePostingList(uint32_t min_posting_list_bytes) {
return AllocateNewPostingList(best_block_info_index);
}
+libtextclassifier3::StatusOr<PostingListHolder>
+FlashIndexStorage::AllocateAndChainMaxSizePostingList(
+ uint32_t prev_block_index) {
+ uint32_t max_pl_size = max_posting_list_bytes();
+ int best_block_info_index = FindBestIndexBlockInfo(max_pl_size);
+
+ auto holder_or = GetPostingListFromInMemoryFreeList(best_block_info_index);
+ if (!holder_or.ok()) {
+ // Nothing in memory. Look for something in the block file.
+ holder_or = GetPostingListFromOnDiskFreeList(best_block_info_index);
+ }
+
+ if (!holder_or.ok()) {
+ // Nothing in memory or block file. Allocate new block and posting list.
+ holder_or = AllocateNewPostingList(best_block_info_index);
+ }
+
+ if (!holder_or.ok()) {
+ return holder_or;
+ }
+
+ PostingListHolder holder = std::move(holder_or).ValueOrDie();
+ ICING_ASSIGN_OR_RETURN(IndexBlock block,
+ GetIndexBlock(holder.id.block_index()));
+ ICING_RETURN_IF_ERROR(block.SetNextBlockIndex(prev_block_index));
+ holder.next_block_index = prev_block_index;
+ return holder;
+}
+
void FlashIndexStorage::AddToOnDiskFreeList(uint32_t block_index,
int block_info_index,
IndexBlock* index_block) {
- index_block->set_next_block_index(header_block_->header()
- ->index_block_infos[block_info_index]
- .free_list_block_index);
+ libtextclassifier3::Status status =
+ index_block->SetNextBlockIndex(header_block_->header()
+ ->index_block_infos[block_info_index]
+ .free_list_block_index);
+ if (!status.ok()) {
+ // If an error occurs, then simply skip this block. It just prevents us from
+ // allocating posting lists from this free block in the future and thus
+ // wastes at most one block, but the entire storage (including the
+ // FlashIndexStorage header) is still valid. Therefore, we can swallow
+ // errors here.
+ ICING_VLOG(1) << "Fail to set next block index to chain blocks with free "
+ "lists on disk: "
+ << status.error_message();
+ return;
+ }
+
header_block_->header()
->index_block_infos[block_info_index]
.free_list_block_index = block_index;
}
-void FlashIndexStorage::RemoveFromOnDiskFreeList(uint32_t block_index,
- int block_info_index,
- IndexBlock* index_block) {
+libtextclassifier3::Status FlashIndexStorage::RemoveFromOnDiskFreeList(
+ uint32_t block_index, int block_info_index, IndexBlock* index_block) {
// Cannot be used anymore. Move free ptr to the next block.
+ ICING_ASSIGN_OR_RETURN(uint32_t next_block_index,
+ index_block->GetNextBlockIndex());
+ ICING_RETURN_IF_ERROR(index_block->SetNextBlockIndex(kInvalidBlockIndex));
header_block_->header()
->index_block_infos[block_info_index]
- .free_list_block_index = index_block->next_block_index();
- index_block->set_next_block_index(kInvalidBlockIndex);
+ .free_list_block_index = next_block_index;
+ return libtextclassifier3::Status::OK;
}
-void FlashIndexStorage::FreePostingList(PostingListHolder holder) {
- uint32_t posting_list_bytes = holder.block.get_posting_list_bytes();
+libtextclassifier3::Status FlashIndexStorage::FreePostingList(
+ PostingListHolder&& holder) {
+ ICING_ASSIGN_OR_RETURN(IndexBlock block,
+ GetIndexBlock(holder.id.block_index()));
+ if (block.posting_list_bytes() == max_posting_list_bytes()) {
+ block.SetNextBlockIndex(kInvalidBlockIndex);
+ }
+
+ uint32_t posting_list_bytes = block.posting_list_bytes();
int best_block_info_index = FindBestIndexBlockInfo(posting_list_bytes);
// It *should* be guaranteed elsewhere that FindBestIndexBlockInfo will not
@@ -443,14 +501,24 @@ void FlashIndexStorage::FreePostingList(PostingListHolder holder) {
best_block_info_index < in_memory_freelists_.size()) {
in_memory_freelists_[best_block_info_index].Push(holder.id);
} else {
- bool was_full = !holder.block.has_free_posting_lists();
- holder.block.FreePostingList(holder.id.posting_list_index());
+ ICING_ASSIGN_OR_RETURN(bool was_not_full, block.HasFreePostingLists());
+ ICING_RETURN_IF_ERROR(
+ block.FreePostingList(holder.id.posting_list_index()));
// If this block was not already full, then it is already in the free list.
- if (was_full) {
+ if (!was_not_full) {
AddToOnDiskFreeList(holder.id.block_index(), best_block_info_index,
- &holder.block);
+ &block);
}
}
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status FlashIndexStorage::WritePostingListToDisk(
+ const PostingListHolder& holder) {
+ ICING_ASSIGN_OR_RETURN(IndexBlock block,
+ GetIndexBlock(holder.id.block_index()));
+ return block.WritePostingListToDisk(holder.posting_list,
+ holder.id.posting_list_index());
}
int FlashIndexStorage::GrowIndex() {
@@ -461,7 +529,7 @@ int FlashIndexStorage::GrowIndex() {
// Grow the index file.
if (!filesystem_->Grow(
- block_fd_.get(),
+ storage_sfd_.get(),
static_cast<uint64_t>(num_blocks_ + 1) * block_size())) {
ICING_VLOG(1) << "Error growing index file: " << strerror(errno);
return kInvalidBlockIndex;
@@ -470,7 +538,7 @@ int FlashIndexStorage::GrowIndex() {
return num_blocks_++;
}
-void FlashIndexStorage::FlushInMemoryFreeList() {
+libtextclassifier3::Status FlashIndexStorage::FlushInMemoryFreeList() {
for (int i = 0; i < in_memory_freelists_.size(); ++i) {
FreeList& freelist = in_memory_freelists_.at(i);
auto freelist_elt_or = freelist.TryPop();
@@ -487,17 +555,19 @@ void FlashIndexStorage::FlushInMemoryFreeList() {
continue;
}
IndexBlock block = std::move(block_or).ValueOrDie();
- bool was_full = !block.has_free_posting_lists();
- block.FreePostingList(freelist_elt.posting_list_index());
+ ICING_ASSIGN_OR_RETURN(bool was_not_full, block.HasFreePostingLists());
+ ICING_RETURN_IF_ERROR(
+ block.FreePostingList(freelist_elt.posting_list_index()));
// If this block was not already full, then it is already in the free
// list.
- if (was_full) {
+ if (!was_not_full) {
AddToOnDiskFreeList(freelist_elt.block_index(), /*block_info_index=*/i,
&block);
}
freelist_elt_or = freelist.TryPop();
}
}
+ return libtextclassifier3::Status::OK;
}
void FlashIndexStorage::GetDebugInfo(DebugInfoVerbosity::Code verbosity,
@@ -518,10 +588,12 @@ void FlashIndexStorage::GetDebugInfo(DebugInfoVerbosity::Code verbosity,
IcingStringUtil::SStringAppendF(out, 100, "%u ", block_index);
++count;
+ block_index = kInvalidBlockIndex;
if (block_or.ok()) {
- block_index = block_or.ValueOrDie().next_block_index();
- } else {
- block_index = kInvalidBlockIndex;
+ auto block_index_or = block_or.ValueOrDie().GetNextBlockIndex();
+ if (block_index_or.ok()) {
+ block_index = block_index_or.ValueOrDie();
+ }
}
}
IcingStringUtil::SStringAppendF(out, 100, "(count=%d)\n", count);
diff --git a/icing/file/posting_list/flash-index-storage.h b/icing/file/posting_list/flash-index-storage.h
index 032bfd2..378b2dc 100644
--- a/icing/file/posting_list/flash-index-storage.h
+++ b/icing/file/posting_list/flash-index-storage.h
@@ -20,37 +20,47 @@
#include <string>
#include <vector>
+#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/absl_ports/canonical_errors.h"
#include "icing/file/filesystem.h"
#include "icing/file/posting_list/flash-index-storage-header.h"
#include "icing/file/posting_list/index-block.h"
#include "icing/file/posting_list/posting-list-identifier.h"
#include "icing/file/posting_list/posting-list-used.h"
-#include "icing/legacy/core/icing-packed-pod.h"
#include "icing/proto/debug.pb.h"
#include "icing/store/document-id.h"
namespace icing {
namespace lib {
-// The PostingListHolder struct exists to group together related PostingListUsed
-// IndexBlock pairs and their ids.
+// PostingListHolder: group PostingListUsed, id, and some other useful info for
+// callers.
struct PostingListHolder {
- // PostingListUseds interpret data that they themselves do NOT own. The data
- // being interpreted is stored on a flash block and its memory mapping is
- // owned by the IndexBlock. As such, the lifecycle of the PostingListUsed must
- // NOT exceed the lifecycle of the IndexBlock.
+ // PostingListUsed owns an in-memory posting list data buffer. The data being
+ // interpreted is initialized via PRead from the storage. As such, we should
+ // sync it to disk after modifying it.
PostingListUsed posting_list;
- IndexBlock block;
- // The PostingListIdentifier, which identifies both the IndexBlock and the
- // PostingListUsed, is also returned for convenience.
+
+ // The PostingListIdentifier, which identifies both the block index and the
+ // posting list index on that block, is also returned for convenience.
PostingListIdentifier id;
+
+ // Next block index is also returned for convenience. If PostingListUsed is a
+ // max-sized posting list, then the caller has to use this value to handle
+ // chained max-sized posting list blocks.
+ uint32_t next_block_index;
+
+ explicit PostingListHolder(PostingListUsed&& posting_list_in,
+ PostingListIdentifier id_in,
+ uint32_t next_block_index_in)
+ : posting_list(std::move(posting_list_in)),
+ id(id_in),
+ next_block_index(next_block_index_in) {}
};
-// The FlashIndexStorage class manages the actual file that makes up the index.
-// It allocates IndexBlocks as needed and maintains freelists to prevent
-// excessive block fragmentation.
+// The FlashIndexStorage class manages the actual file that makes up blocks for
+// posting lists. It allocates IndexBlocks as needed and maintains freelists to
+// prevent excessive block fragmentation.
//
// It maintains two types of free lists:
// 1. On-disk, Header free list - This free list is stored in the Header
@@ -81,22 +91,42 @@ class FlashIndexStorage {
//
// RETURNS:
// - On success, a valid instance of FlashIndexStorage
- // - INTERNAL error if unable to create a new header or read the existing
+ // - FAILED_PRECONDITION_ERROR if filesystem or serializer is null
+ // - INTERNAL_ERROR if unable to create a new header or read the existing
// one from disk.
static libtextclassifier3::StatusOr<FlashIndexStorage> Create(
- const std::string& index_filename, const Filesystem* filesystem,
- PostingListUsedSerializer* serializer, bool in_memory = true);
+ std::string index_filename, const Filesystem* filesystem,
+ PostingListSerializer* serializer, bool in_memory = true);
+
+ // Reads magic from existing file header. We need this during Icing
+ // initialization phase to determine the version.
+ //
+ // RETURNS:
+ // - On success, a valid magic
+ // - FAILED_PRECONDITION_ERROR if filesystem is null
+ // - NOT_FOUND_ERROR if the flash index file doesn't exist
+ // - INTERNAL_ERROR on I/O error
+ static libtextclassifier3::StatusOr<int> ReadHeaderMagic(
+ const Filesystem* filesystem, const std::string& index_filename);
+
+ FlashIndexStorage(FlashIndexStorage&&) = default;
+ FlashIndexStorage(const FlashIndexStorage&) = delete;
+ FlashIndexStorage& operator=(FlashIndexStorage&&) = default;
+ FlashIndexStorage& operator=(const FlashIndexStorage&) = delete;
+
+ ~FlashIndexStorage();
+
+ // Selects block size to use.
+ static uint32_t SelectBlockSize();
- // Retrieve the PostingList referred to by PostingListIdentifier. This posting
- // list must have been previously allocated by a prior call to
+ // Retrieves the PostingList referred to by PostingListIdentifier. This
+ // posting list must have been previously allocated by a prior call to
// AllocatePostingList.
//
// RETURNS:
// - On success, a valid instance of PostingListHolder containing the
// requested PostingListUsed.
- // - INVALID_ARGUMENT if id.posting_list_index() is out of bounds in the
- // IndexBlock referred to by id.block_index()
- // - INTERNAL_ERROR if unable to access the region in file.
+ // - Any IndexBlock errors
libtextclassifier3::StatusOr<PostingListHolder> GetPostingList(
PostingListIdentifier id) const;
@@ -106,19 +136,51 @@ class FlashIndexStorage {
// RETURNS:
// - On success, a valid instance of PostingListHolder containing the
// requested PostingListUsed.
- // - RESOURCE_EXHAUSTED error if unable to grow the index to create a
+ // - INVALID_ARGUMENT_ERROR if min_posting_list_bytes >
+ // max_posting_list_bytes()
+ // - RESOURCE_EXHAUSTED_ERROR if unable to grow the index to create a
// PostingListUsed of the requested size.
+ // - Any IndexBlock errors
libtextclassifier3::StatusOr<PostingListHolder> AllocatePostingList(
uint32_t min_posting_list_bytes);
- ~FlashIndexStorage();
- FlashIndexStorage(FlashIndexStorage&&) = default;
- FlashIndexStorage(const FlashIndexStorage&) = delete;
- FlashIndexStorage& operator=(FlashIndexStorage&&) = default;
- FlashIndexStorage& operator=(const FlashIndexStorage&) = delete;
+ // Allocates a new IndexBlock with a single max-sized PostingListUsed. This
+ // chains index blocks by setting the next_block_index field of this new
+ // block's header to be prev_block_index and returns a PostingListHolder
+ // containing a max-sized PostingListUsed.
+ //
+ // RETURNS:
+ // - On success, a valid instance of PostingListHolder containing the
+ // requested PostingListUsed.
+ // - RESOURCE_EXHAUSTED_ERROR if unable to grow the index to create a
+ // PostingListUsed of max size
+ // - Any IndexBlock errors
+ libtextclassifier3::StatusOr<PostingListHolder>
+ AllocateAndChainMaxSizePostingList(uint32_t prev_block_index);
- // Free the PostingListUsed that this holder holds.
- void FreePostingList(PostingListHolder holder);
+ // Frees the PostingListUsed that this holder holds.
+ //
+ // RETURNS:
+ // - OK on success
+ // - Any IndexBlock errors
+ libtextclassifier3::Status FreePostingList(PostingListHolder&& holder);
+
+ // Writes back the PostingListUsed that this holder holds to disk.
+ //
+ // RETURNS:
+ // - OK on success
+ // - Any IndexBlock errors
+ libtextclassifier3::Status WritePostingListToDisk(
+ const PostingListHolder& holder);
+
+ // Discards all existing data by deleting the existing file and
+ // re-initializing a new one.
+ //
+ // RETURNS:
+ // - OK on success
+ // - INTERNAL_ERROR if unable to delete existing files or initialize a new
+ // file with header
+ libtextclassifier3::Status Reset();
// Used to track the largest docid indexed in the index.
DocumentId get_last_indexed_docid() const {
@@ -134,7 +196,7 @@ class FlashIndexStorage {
// Returns the size of the index file in bytes.
int64_t GetDiskUsage() const {
- return filesystem_->GetDiskUsage(block_fd_.get());
+ return filesystem_->GetDiskUsage(storage_sfd_.get());
}
// Returns the size of the index file used to contains data.
@@ -145,6 +207,12 @@ class FlashIndexStorage {
int num_blocks() const { return num_blocks_; }
+ // Gets the byte size of max sized posting list.
+ uint32_t max_posting_list_bytes() const {
+ return IndexBlock::CalculateMaxPostingListBytes(
+ block_size(), serializer_->GetDataTypeBytes());
+ }
+
// Info about the index based on the block size.
int block_size() const { return header_block_->header()->block_size; }
@@ -158,19 +226,22 @@ class FlashIndexStorage {
return 1.0 - static_cast<double>(num_blocks_) / kMaxBlockIndex;
}
- const PostingListUsedSerializer* serializer() const { return serializer_; }
- PostingListUsedSerializer* serializer() { return serializer_; }
-
- libtextclassifier3::Status Reset();
+ const PostingListSerializer* serializer() const { return serializer_; }
+ PostingListSerializer* serializer() { return serializer_; }
// TODO(b/222349894) Convert the string output to a protocol buffer instead.
void GetDebugInfo(DebugInfoVerbosity::Code verbosity, std::string* out) const;
private:
- explicit FlashIndexStorage(const std::string& index_filename,
- const Filesystem* filesystem,
- PostingListUsedSerializer* serializer,
- bool has_in_memory_freelists);
+ explicit FlashIndexStorage(const Filesystem* filesystem,
+ std::string&& index_filename,
+ PostingListSerializer* serializer,
+ bool has_in_memory_freelists)
+ : filesystem_(filesystem),
+ index_filename_(std::move(index_filename)),
+ serializer_(serializer),
+ num_blocks_(0),
+ has_in_memory_freelists_(has_in_memory_freelists) {}
// Init the index from persistence. Create if file does not exist. We do not
// erase corrupt files.
@@ -189,31 +260,38 @@ class FlashIndexStorage {
// the values stored in it.
bool OpenHeader(int64_t file_size);
- // Add the IndexBlock referred to by block_index in the on-disk free list with
- // index block_info_index.
+ // Adds the IndexBlock referred to by block_index in the on-disk free list
+ // with index block_info_index.
void AddToOnDiskFreeList(uint32_t block_index, int block_info_index,
IndexBlock* index_block);
- // Remove the IndexBlock referred to by block_index from the Header free list
+ // Removes the IndexBlock referred to by block_index from the Header free list
// with index block_info_index.
- void RemoveFromOnDiskFreeList(uint32_t block_index, int block_info_index,
- IndexBlock* index_block);
+ //
+ // RETURNS:
+ // - OK on success
+ // - Any IndexBlock errors
+ libtextclassifier3::Status RemoveFromOnDiskFreeList(uint32_t block_index,
+ int block_info_index,
+ IndexBlock* index_block);
- // Returns:
+ // RETURNS:
// - On success, a valid PostingListHolder created from the first entry of
// the in-memory freelist at block_info_index
- // - NOT_FOUND if there was no entry in the freelist
- // - RESOURCE_EXHAUSTED if the PostingList in the freelist couldn't be
- // allocated for some reason.
+ // - OUT_OF_RANGE_ERROR if in_memory_freelists_ contains
+ // PostingListIdentifier with block_index >= num_blocks_
+ // - NOT_FOUND_ERROR if there was no entry in the freelist
+ // - Any IndexBlock errors
libtextclassifier3::StatusOr<PostingListHolder>
GetPostingListFromInMemoryFreeList(int block_info_index);
- // Returns:
+ // RETURNS:
// - On success, a valid PostingListHolder created from the first entry of
// the on-disk freelist at block_info_index
- // - NOT_FOUND if there was no entry in the freelist
- // - RESOURCE_EXHAUSTED if the PostingList in the freelist couldn't be
- // allocated for some reason.
+ // - OUT_OF_RANGE_ERROR if header()->index_block_infos[block_info_index]
+ // contains block_index >= num_blocks_
+ // - NOT_FOUND_ERROR if there was no entry in the freelist
+ // - Any IndexBlock errors
libtextclassifier3::StatusOr<PostingListHolder>
GetPostingListFromOnDiskFreeList(int block_info_index);
@@ -222,22 +300,24 @@ class FlashIndexStorage {
// IndexBlock.
// - RESOURCE_EXHAUSTED if the index couldn't be grown to fit a new
// IndexBlock.
+ // - Any IndexBlock errors
libtextclassifier3::StatusOr<PostingListHolder> AllocateNewPostingList(
int block_info_index);
// Returns:
// - On success, a newly created IndexBlock at block_index with posting
// lists of size posting_list_size
- // - INTERNAL_ERROR if unable to access the region in file representing the
- // IndexBlock
+ // - OUT_OF_RANGE_ERROR if block_index >= num_blocks_
+ // - Any IndexBlock errors
libtextclassifier3::StatusOr<IndexBlock> CreateIndexBlock(
- int block_index, uint32_t posting_list_size) const;
+ uint32_t block_index, uint32_t posting_list_size) const;
// Returns:
// - On success, the IndexBlock that exists at block_index
- // - INTERNAL_ERROR if unable to access the region in file representing the
- // IndexBlock
- libtextclassifier3::StatusOr<IndexBlock> GetIndexBlock(int block_index) const;
+ // - OUT_OF_RANGE_ERROR if block_index >= num_blocks_
+ // - Any IndexBlock errors
+ libtextclassifier3::StatusOr<IndexBlock> GetIndexBlock(
+ uint32_t block_index) const;
// Add a new block to the end of the file and return its block
// index. Returns kInvalidBlockIndex if unable to grow the index file.
@@ -249,13 +329,20 @@ class FlashIndexStorage {
int FindBestIndexBlockInfo(uint32_t posting_list_bytes) const;
// Flushes the in-memory free list to disk.
- void FlushInMemoryFreeList();
+ //
+ // RETURNS:
+ // - OK on success
+ // - Any IndexBlock errors
+ libtextclassifier3::Status FlushInMemoryFreeList();
- // Underlying filename.
+ const Filesystem* filesystem_; // not owned; can't be null
std::string index_filename_;
+ PostingListSerializer* serializer_; // not owned; can't be null
+
// We open the index file into this fd.
- ScopedFd block_fd_;
+ ScopedFd storage_sfd_;
+
int num_blocks_; // can be inferred from index file size
std::unique_ptr<HeaderBlock> header_block_;
@@ -285,10 +372,6 @@ class FlashIndexStorage {
};
std::vector<FreeList> in_memory_freelists_;
- const Filesystem* filesystem_; // not owned; can't be null
-
- PostingListUsedSerializer* serializer_; // not owned; can't be null
-
bool has_in_memory_freelists_;
};
diff --git a/icing/file/posting_list/flash-index-storage_test.cc b/icing/file/posting_list/flash-index-storage_test.cc
index 50f21f3..3e2d239 100644
--- a/icing/file/posting_list/flash-index-storage_test.cc
+++ b/icing/file/posting_list/flash-index-storage_test.cc
@@ -26,8 +26,9 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/file/filesystem.h"
+#include "icing/file/posting_list/flash-index-storage-header.h"
#include "icing/index/hit/hit.h"
-#include "icing/index/main/posting-list-used-hit-serializer.h"
+#include "icing/index/main/posting-list-hit-serializer.h"
#include "icing/store/document-id.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/tmp-directory.h"
@@ -42,6 +43,7 @@ using ::testing::Eq;
using ::testing::IsEmpty;
using ::testing::IsFalse;
using ::testing::IsTrue;
+using ::testing::Ne;
using ::testing::Not;
class FlashIndexStorageTest : public testing::Test {
@@ -52,7 +54,7 @@ class FlashIndexStorageTest : public testing::Test {
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(test_dir_.c_str()));
// TODO(b/249829533): test different serializers
- serializer_ = std::make_unique<PostingListUsedHitSerializer>();
+ serializer_ = std::make_unique<PostingListHitSerializer>();
}
void TearDown() override {
@@ -64,9 +66,53 @@ class FlashIndexStorageTest : public testing::Test {
std::string test_dir_;
std::string file_name_;
Filesystem filesystem_;
- std::unique_ptr<PostingListUsedHitSerializer> serializer_;
+ std::unique_ptr<PostingListHitSerializer> serializer_;
};
+TEST_F(FlashIndexStorageTest, ReadHeaderMagic) {
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ FlashIndexStorage flash_index_storage,
+ FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get()));
+ }
+ EXPECT_THAT(FlashIndexStorage::ReadHeaderMagic(&filesystem_, file_name_),
+ IsOkAndHolds(HeaderBlock::Header::kMagic));
+}
+
+TEST_F(FlashIndexStorageTest, ReadHeaderMagicOldVersion) {
+ int block_size;
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ FlashIndexStorage flash_index_storage,
+ FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get()));
+ block_size = flash_index_storage.block_size();
+ }
+
+ int old_magic = 0x6dfba6ae;
+ ASSERT_THAT(old_magic, Ne(HeaderBlock::Header::kMagic));
+ {
+ // Manually modify the header magic.
+ ScopedFd sfd(filesystem_.OpenForWrite(file_name_.c_str()));
+ ASSERT_THAT(sfd.is_valid(), IsTrue());
+
+ // Read and validate header.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ HeaderBlock header_block,
+ HeaderBlock::Read(&filesystem_, sfd.get(), block_size));
+ header_block.header()->magic = old_magic;
+ ASSERT_THAT(header_block.Write(sfd.get()), IsTrue());
+ }
+
+ EXPECT_THAT(FlashIndexStorage::ReadHeaderMagic(&filesystem_, file_name_),
+ IsOkAndHolds(old_magic));
+}
+
+TEST_F(FlashIndexStorageTest,
+ ReadHeaderMagicNonExistingFileShouldGetNotFoundError) {
+ EXPECT_THAT(FlashIndexStorage::ReadHeaderMagic(&filesystem_, file_name_),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
TEST_F(FlashIndexStorageTest, CorruptHeader) {
{
// Create the header file
diff --git a/icing/file/posting_list/index-block.cc b/icing/file/posting_list/index-block.cc
index 1b9982e..3fa397c 100644
--- a/icing/file/posting_list/index-block.cc
+++ b/icing/file/posting_list/index-block.cc
@@ -18,13 +18,14 @@
#include <cstdint>
#include <memory>
-#include <string_view>
+#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
-#include "icing/file/memory-mapped-file.h"
+#include "icing/absl_ports/str_cat.h"
#include "icing/file/posting_list/posting-list-common.h"
#include "icing/file/posting_list/posting-list-free.h"
+#include "icing/file/posting_list/posting-list-used.h"
#include "icing/file/posting_list/posting-list-utils.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/util/logging.h"
@@ -36,7 +37,7 @@ namespace lib {
namespace {
libtextclassifier3::Status ValidatePostingListBytes(
- PostingListUsedSerializer* serializer, uint32_t posting_list_bytes,
+ PostingListSerializer* serializer, uint32_t posting_list_bytes,
uint32_t block_size) {
if (posting_list_bytes > IndexBlock::CalculateMaxPostingListBytes(
block_size, serializer->GetDataTypeBytes()) ||
@@ -55,84 +56,50 @@ libtextclassifier3::Status ValidatePostingListBytes(
} // namespace
-libtextclassifier3::StatusOr<IndexBlock>
+/* static */ libtextclassifier3::StatusOr<IndexBlock>
IndexBlock::CreateFromPreexistingIndexBlockRegion(
- const Filesystem& filesystem, std::string_view file_path,
- PostingListUsedSerializer* serializer, off_t offset, uint32_t block_size) {
+ const Filesystem* filesystem, PostingListSerializer* serializer, int fd,
+ off_t block_file_offset, uint32_t block_size) {
if (block_size < sizeof(BlockHeader)) {
return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
"Provided block_size %d is too small to fit even the BlockHeader!",
block_size));
}
- ICING_ASSIGN_OR_RETURN(MemoryMappedFile mmapped_file,
- MemoryMappedFile::Create(
- filesystem, file_path,
- MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
- ICING_RETURN_IF_ERROR(mmapped_file.Remap(offset, block_size));
- IndexBlock block(serializer, std::move(mmapped_file));
+
+ BlockHeader header;
+ if (!filesystem->PRead(fd, &header, sizeof(BlockHeader), block_file_offset)) {
+ return absl_ports::InternalError("PRead block header error");
+ }
+
ICING_RETURN_IF_ERROR(ValidatePostingListBytes(
- serializer, block.get_posting_list_bytes(), block_size));
- return block;
+ serializer, header.posting_list_bytes, block_size));
+
+ return IndexBlock(filesystem, serializer, fd, block_file_offset, block_size,
+ header.posting_list_bytes);
}
-libtextclassifier3::StatusOr<IndexBlock>
-IndexBlock::CreateFromUninitializedRegion(const Filesystem& filesystem,
- std::string_view file_path,
- PostingListUsedSerializer* serializer,
- off_t offset, uint32_t block_size,
+/* static */ libtextclassifier3::StatusOr<IndexBlock>
+IndexBlock::CreateFromUninitializedRegion(const Filesystem* filesystem,
+ PostingListSerializer* serializer,
+ int fd, off_t block_file_offset,
+ uint32_t block_size,
uint32_t posting_list_bytes) {
if (block_size < sizeof(BlockHeader)) {
return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
"Provided block_size %d is too small to fit even the BlockHeader!",
block_size));
}
+
ICING_RETURN_IF_ERROR(
ValidatePostingListBytes(serializer, posting_list_bytes, block_size));
- ICING_ASSIGN_OR_RETURN(MemoryMappedFile mmapped_file,
- MemoryMappedFile::Create(
- filesystem, file_path,
- MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
- ICING_RETURN_IF_ERROR(mmapped_file.Remap(offset, block_size));
- IndexBlock block(serializer, std::move(mmapped_file));
- // Safe to ignore the return value of Reset. Reset returns an error if
- // posting_list_bytes is invalid, but this function ensures that
- // posting_list_bytes is valid thanks to the call to ValidatePostingListBytes
- // above.
- block.Reset(posting_list_bytes);
- return block;
-}
-
-IndexBlock::IndexBlock(PostingListUsedSerializer* serializer,
- MemoryMappedFile&& mmapped_block)
- : header_(reinterpret_cast<BlockHeader*>(mmapped_block.mutable_region())),
- posting_lists_start_ptr_(mmapped_block.mutable_region() +
- sizeof(BlockHeader)),
- block_size_in_bytes_(mmapped_block.region_size()),
- serializer_(serializer),
- mmapped_block_(
- std::make_unique<MemoryMappedFile>(std::move(mmapped_block))) {}
-
-libtextclassifier3::Status IndexBlock::Reset(int posting_list_bytes) {
- ICING_RETURN_IF_ERROR(ValidatePostingListBytes(
- serializer_, posting_list_bytes, mmapped_block_->region_size()));
- header_->free_list_posting_list_index = kInvalidPostingListIndex;
- header_->next_block_index = kInvalidBlockIndex;
- header_->posting_list_bytes = posting_list_bytes;
+ IndexBlock block(filesystem, serializer, fd, block_file_offset, block_size,
+ posting_list_bytes);
+ ICING_RETURN_IF_ERROR(block.Reset());
- // Starting with the last posting list, prepend each posting list to the free
- // list. At the end, the beginning of the free list should be the first
- // posting list.
- for (PostingListIndex posting_list_index = max_num_posting_lists() - 1;
- posting_list_index >= 0; --posting_list_index) {
- // Adding the posting list at posting_list_index to the free list will
- // modify both the posting list and also
- // header_->free_list_posting_list_index.
- FreePostingList(posting_list_index);
- }
- return libtextclassifier3::Status::OK;
+ return block;
}
-libtextclassifier3::StatusOr<PostingListUsed>
+libtextclassifier3::StatusOr<IndexBlock::PostingListAndBlockInfo>
IndexBlock::GetAllocatedPostingList(PostingListIndex posting_list_index) {
if (posting_list_index >= max_num_posting_lists() || posting_list_index < 0) {
return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
@@ -140,71 +107,226 @@ IndexBlock::GetAllocatedPostingList(PostingListIndex posting_list_index) {
"posting lists.",
posting_list_index, max_num_posting_lists()));
}
- return PostingListUsed::CreateFromPreexistingPostingListUsedRegion(
- serializer_, get_posting_list_ptr(posting_list_index),
- get_posting_list_bytes());
+
+ // Read out the header from disk.
+ ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
+
+ // Read out the allocated posting list from disk.
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<uint8_t[]> posting_list_buffer,
+ ReadPostingList(posting_list_index));
+
+ ICING_ASSIGN_OR_RETURN(
+ PostingListUsed pl_used,
+ PostingListUsed::CreateFromPreexistingPostingListUsedRegion(
+ serializer_, std::move(posting_list_buffer), posting_list_bytes_));
+ return PostingListAndBlockInfo(
+ std::move(pl_used), posting_list_index, header.next_block_index,
+ /*has_free_posting_lists_in=*/header.free_list_posting_list_index !=
+ kInvalidPostingListIndex);
}
-libtextclassifier3::StatusOr<PostingListIndex>
+libtextclassifier3::StatusOr<IndexBlock::PostingListAndBlockInfo>
IndexBlock::AllocatePostingList() {
- if (!has_free_posting_lists()) {
+ // Read out the header from disk.
+ ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
+
+ if (header.free_list_posting_list_index == kInvalidPostingListIndex) {
return absl_ports::ResourceExhaustedError(
"No available posting lists to allocate.");
}
// Pull one off the free list.
- PostingListIndex posting_list_index = header_->free_list_posting_list_index;
+ PostingListIndex posting_list_index = header.free_list_posting_list_index;
- // We know at this point that posting_list_bytes will return a valid pl size
- // (because an already initialized IndexBlock instance can't have an invalid
- // posting_list_bytes). So CreateFromPreexistingPostingListFreeRegion will
- // always return OK and ValueOrDie is safe to call.
- auto posting_list_or =
+ // Read out the posting list from disk.
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<uint8_t[]> posting_list_buffer,
+ ReadPostingList(posting_list_index));
+ // Step 1: get the next (chained) free posting list index and set it to block
+ // header.
+ ICING_ASSIGN_OR_RETURN(
+ PostingListFree pl_free,
PostingListFree::CreateFromPreexistingPostingListFreeRegion(
- get_posting_list_ptr(posting_list_index), get_posting_list_bytes(),
+ posting_list_buffer.get(), posting_list_bytes_,
serializer_->GetDataTypeBytes(),
- serializer_->GetMinPostingListSize());
- PostingListFree plfree = std::move(posting_list_or).ValueOrDie();
-
- header_->free_list_posting_list_index = plfree.get_next_posting_list_index();
- if (header_->free_list_posting_list_index != kInvalidPostingListIndex &&
- header_->free_list_posting_list_index >= max_num_posting_lists()) {
+ serializer_->GetMinPostingListSize()));
+ header.free_list_posting_list_index = pl_free.get_next_posting_list_index();
+ if (header.free_list_posting_list_index != kInvalidPostingListIndex &&
+ header.free_list_posting_list_index >= max_num_posting_lists()) {
ICING_LOG(ERROR)
<< "Free Posting List points to an invalid posting list index!";
- header_->free_list_posting_list_index = kInvalidPostingListIndex;
+ header.free_list_posting_list_index = kInvalidPostingListIndex;
}
- // Make it a used posting list.
- PostingListUsed::CreateFromUnitializedRegion(
- serializer_, get_posting_list_ptr(posting_list_index),
- get_posting_list_bytes());
- return posting_list_index;
+ // Step 2: create PostingListUsed instance. The original content in the above
+ // posting_list_buffer is not important now because
+ // PostingListUsed::CreateFromUnitializedRegion will wipe it out, and
+ // we only need to sync it to disk after initializing.
+ ICING_ASSIGN_OR_RETURN(PostingListUsed pl_used,
+ PostingListUsed::CreateFromUnitializedRegion(
+ serializer_, posting_list_bytes_));
+
+ // Sync the initialized posting list (overwrite the original content of
+ // PostingListFree) and header to disk.
+ ICING_RETURN_IF_ERROR(
+ WritePostingList(posting_list_index, pl_used.posting_list_buffer()));
+ ICING_RETURN_IF_ERROR(WriteHeader(header));
+
+ return PostingListAndBlockInfo(
+ std::move(pl_used), posting_list_index, header.next_block_index,
+ /*has_free_posting_lists_in=*/header.free_list_posting_list_index !=
+ kInvalidPostingListIndex);
}
-void IndexBlock::FreePostingList(PostingListIndex posting_list_index) {
+libtextclassifier3::Status IndexBlock::FreePostingList(
+ PostingListIndex posting_list_index) {
if (posting_list_index >= max_num_posting_lists() || posting_list_index < 0) {
- ICING_LOG(ERROR) << "Cannot free posting list with index "
- << posting_list_index << " in IndexBlock with only "
- << max_num_posting_lists() << " posting lists.";
- return;
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Cannot free posting list with index %d in IndexBlock with only %d "
+ "posting lists.",
+ posting_list_index, max_num_posting_lists()));
+ }
+
+ ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
+ ICING_RETURN_IF_ERROR(FreePostingListImpl(header, posting_list_index));
+ ICING_RETURN_IF_ERROR(WriteHeader(header));
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IndexBlock::WritePostingListToDisk(
+ const PostingListUsed& posting_list_used,
+ PostingListIndex posting_list_index) {
+ if (posting_list_index >= max_num_posting_lists() || posting_list_index < 0) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Cannot write posting list with index %d in IndexBlock with only %d "
+ "posting lists.",
+ posting_list_index, max_num_posting_lists()));
+ }
+
+ if (posting_list_used.size_in_bytes() != posting_list_bytes_) {
+ return absl_ports::InvalidArgumentError(
+ "Cannot write posting list into a block with different posting list "
+ "bytes");
}
- // We know at this point that posting_list_bytes will return a valid pl size.
- // So CreateFromUninitializedRegion will always return OK and ValueOrDie is
- // safe to call.
- auto posting_list_or = PostingListFree::CreateFromUnitializedRegion(
- get_posting_list_ptr(posting_list_index), get_posting_list_bytes(),
- serializer_->GetDataTypeBytes(), serializer_->GetMinPostingListSize());
- PostingListFree plfree = std::move(posting_list_or).ValueOrDie();
+ if (!posting_list_used.is_dirty()) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ // Write the allocated posting list to disk.
+ return WritePostingList(posting_list_index,
+ posting_list_used.posting_list_buffer());
+}
+
+libtextclassifier3::StatusOr<uint32_t> IndexBlock::GetNextBlockIndex() const {
+ ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
+ return header.next_block_index;
+}
+
+libtextclassifier3::Status IndexBlock::SetNextBlockIndex(
+ uint32_t next_block_index) {
+ ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
+ header.next_block_index = next_block_index;
+ ICING_RETURN_IF_ERROR(WriteHeader(header));
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<bool> IndexBlock::HasFreePostingLists() const {
+ ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
+ return header.free_list_posting_list_index != kInvalidPostingListIndex;
+}
+
+libtextclassifier3::Status IndexBlock::Reset() {
+ BlockHeader header;
+ header.free_list_posting_list_index = kInvalidPostingListIndex;
+ header.next_block_index = kInvalidBlockIndex;
+ header.posting_list_bytes = posting_list_bytes_;
+
+ // Starting with the last posting list, prepend each posting list to the free
+ // list. At the end, the beginning of the free list should be the first
+ // posting list.
+ for (PostingListIndex posting_list_index = max_num_posting_lists() - 1;
+ posting_list_index >= 0; --posting_list_index) {
+ // Adding the posting list at posting_list_index to the free list will
+ // modify both the posting list and also
+ // header.free_list_posting_list_index.
+ ICING_RETURN_IF_ERROR(FreePostingListImpl(header, posting_list_index));
+ }
+
+ // Sync the header to disk.
+ ICING_RETURN_IF_ERROR(WriteHeader(header));
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IndexBlock::FreePostingListImpl(
+ BlockHeader& header, PostingListIndex posting_list_index) {
+ // Read out the posting list from disk.
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<uint8_t[]> posting_list_buffer,
+ ReadPostingList(posting_list_index));
+
+ ICING_ASSIGN_OR_RETURN(PostingListFree plfree,
+ PostingListFree::CreateFromUnitializedRegion(
+ posting_list_buffer.get(), posting_list_bytes(),
+ serializer_->GetDataTypeBytes(),
+ serializer_->GetMinPostingListSize()));
// Put at the head of the list.
- plfree.set_next_posting_list_index(header_->free_list_posting_list_index);
- header_->free_list_posting_list_index = posting_list_index;
+ plfree.set_next_posting_list_index(header.free_list_posting_list_index);
+ header.free_list_posting_list_index = posting_list_index;
+
+ // Sync the posting list to disk.
+ ICING_RETURN_IF_ERROR(
+ WritePostingList(posting_list_index, posting_list_buffer.get()));
+ return libtextclassifier3::Status::OK;
}
-char* IndexBlock::get_posting_list_ptr(PostingListIndex posting_list_index) {
- return posting_lists_start_ptr_ +
- get_posting_list_bytes() * posting_list_index;
+libtextclassifier3::StatusOr<IndexBlock::BlockHeader> IndexBlock::ReadHeader()
+ const {
+ BlockHeader header;
+ if (!filesystem_->PRead(fd_, &header, sizeof(BlockHeader),
+ block_file_offset_)) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("PRead block header error: ", strerror(errno)));
+ }
+ if (header.posting_list_bytes != posting_list_bytes_) {
+ return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+ "Inconsistent posting list bytes between block header (%d) and class "
+ "instance (%d)",
+ header.posting_list_bytes, posting_list_bytes_));
+ }
+ return header;
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<uint8_t[]>>
+IndexBlock::ReadPostingList(PostingListIndex posting_list_index) const {
+ auto posting_list_buffer = std::make_unique<uint8_t[]>(posting_list_bytes_);
+ if (!filesystem_->PRead(fd_, posting_list_buffer.get(), posting_list_bytes_,
+ get_posting_list_file_offset(posting_list_index))) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("PRead posting list error: ", strerror(errno)));
+ }
+ return posting_list_buffer;
+}
+
+libtextclassifier3::Status IndexBlock::WriteHeader(const BlockHeader& header) {
+ if (!filesystem_->PWrite(fd_, block_file_offset_, &header,
+ sizeof(BlockHeader))) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("PWrite block header error: ", strerror(errno)));
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IndexBlock::WritePostingList(
+ PostingListIndex posting_list_index, const uint8_t* posting_list_buffer) {
+ if (!filesystem_->PWrite(fd_,
+ get_posting_list_file_offset(posting_list_index),
+ posting_list_buffer, posting_list_bytes_)) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("PWrite posting list error: ", strerror(errno)));
+ }
+ return libtextclassifier3::Status::OK;
}
} // namespace lib
diff --git a/icing/file/posting_list/index-block.h b/icing/file/posting_list/index-block.h
index 589f155..21ad13f 100644
--- a/icing/file/posting_list/index-block.h
+++ b/icing/file/posting_list/index-block.h
@@ -19,9 +19,10 @@
#include <cstdint>
#include <memory>
-#include <string_view>
-#include "icing/file/memory-mapped-file.h"
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/filesystem.h"
#include "icing/file/posting_list/posting-list-common.h"
#include "icing/file/posting_list/posting-list-used.h"
#include "icing/legacy/index/icing-bit-util.h"
@@ -31,15 +32,27 @@ namespace lib {
// This class is used to manage I/O to a single flash block and to manage the
// division of that flash block into PostingLists. It provides an interface to
-// allocate, free and read posting lists.
+// allocate, free and read posting lists. Note that IndexBlock is stateless:
+// - Any changes to block header will be synced to disk before the method
+// returns.
+// - Any posting list allocation/freeing will be synced to disk before the
+// method returns.
+// - When getting an allocated posting list, it PReads the contents from disk to
+// a buffer and transfer the ownership to PostingListUsed. Any changes to
+// PostingListUsed will not be visible to other instances until calling
+// WritePostingListToDisk.
//
// An IndexBlock contains a small header and an array of fixed-size posting list
// buffers. Initially, all posting lists are chained in a singly-linked free
// list.
//
-// When we want to get a new PostingList from an IndexBlock, we just
-// pull one off the free list. When the user wants to return the
-// PostingList to the free pool, we prepend it to the free list.
+// When we want to get a new PostingList from an IndexBlock, we just pull one
+// off the free list. When the user wants to return the PostingList to the free
+// pool, we prepend it to the free list.
+//
+// Read-write the same block is NOT thread safe. If we try to read-write the
+// same block at the same time (either by the same or different IndexBlock
+// instances), then it causes race condition and the behavior is undefined.
class IndexBlock {
public:
// What is the maximum posting list size in bytes that can be stored in this
@@ -50,35 +63,57 @@ class IndexBlock {
data_type_bytes;
}
- // Create an IndexBlock to reference the previously used region of the
- // mmapped_file starting at offset with size block_size
+ // Creates an IndexBlock to reference the previously used region of the file
+ // descriptor starting at block_file_offset with size block_size.
+ //
+ // - serializer: for reading/writing posting list. Also some additional
+ // information (e.g. data size) should be provided by the
+ // serializer.
+ // - fd: a valid file descriptor opened for write by the caller.
+ // - block_file_offset: absolute offset of the file (fd).
+ // - block_size: byte size of this block.
+ //
+ // Unlike CreateFromUninitializedRegion, a pre-existing index block has
+ // already determined and written posting list bytes into block header, so it
+ // will be read from block header and the caller doesn't have to provide.
//
// RETURNS:
- // - a valid IndexBlock on success
- // - INVALID_ARGUMENT if size is too small for even just the BlockHeader or
- // if the posting list size stored in the region is not a valid posting
- // list size or it exceeds max_posting_list_bytes(size).
- // - INTERNAL_ERROR if unable to mmap the region [offset, offset+block_size)
+ // - A valid IndexBlock instance on success
+ // - INVALID_ARGUMENT_ERROR
+ // - If block_size is too small for even just the BlockHeader
+ // - If the posting list size stored in the region is not a valid posting
+ // list size (e.g. exceeds max_posting_list_bytes(size))
+ // - INTERNAL_ERROR on I/O error
static libtextclassifier3::StatusOr<IndexBlock>
- CreateFromPreexistingIndexBlockRegion(const Filesystem& filesystem,
- std::string_view file_path,
- PostingListUsedSerializer* serializer,
- off_t offset, uint32_t block_size);
+ CreateFromPreexistingIndexBlockRegion(const Filesystem* filesystem,
+ PostingListSerializer* serializer,
+ int fd, off_t block_file_offset,
+ uint32_t block_size);
- // Create an IndexBlock to reference an uninitialized region of the
- // mmapped_file starting at offset with size block_size. The IndexBlock will
- // initialize the region to be an empty IndexBlock with posting lists of size
- // posting_list_bytes.
+ // Creates an IndexBlock to reference an uninitialized region of the file
+ // descriptor starting at block_file_offset with size block_size. The
+ // IndexBlock will initialize the region to be an empty IndexBlock with
+ // posting lists of size posting_list_bytes.
+ //
+ // - serializer: for reading/writing posting list. Also some additional
+ // information (e.g. data size) should be provided by the
+ // serializer.
+ // - fd: a valid file descriptor opened for write by the caller.
+ // - block_file_offset: absolute offset of the file (fd).
+ // - block_size: byte size of this block.
+ // - posting_list_bytes: byte size of all posting lists in this block. This
+ // information will be written into block header.
//
// RETURNS:
- // - a valid IndexBlock on success
- // - INVALID_ARGUMENT if size is too small for even just the BlockHeader or
- // if posting_list_bytes is not a valid posting list size or it exceeds
- // max_posting_list_bytes(size).
- // - INTERNAL_ERROR if unable to mmap the region [offset, offset+block_size)
+ // - A valid IndexBlock instance on success
+ // - INVALID_ARGUMENT_ERROR
+ // - If block_size is too small for even just the BlockHeader
+ // - If the posting list size stored in the region is not a valid posting
+ // list size (e.g. exceeds max_posting_list_bytes(size))
+ // - INTERNAL_ERROR on I/O error
static libtextclassifier3::StatusOr<IndexBlock> CreateFromUninitializedRegion(
- const Filesystem& filesystem, std::string_view file_path,
- PostingListUsedSerializer* serializer, off_t offset, uint32_t block_size,
+ const Filesystem* filesystem, PostingListSerializer* serializer, int fd,
+ off_t block_file_offset, uint32_t block_size,
uint32_t posting_list_bytes);
IndexBlock(const IndexBlock&) = delete;
@@ -86,30 +121,53 @@ class IndexBlock {
IndexBlock(IndexBlock&&) = default;
IndexBlock& operator=(IndexBlock&&) = default;
- ~IndexBlock() {
- if (mmapped_block_ != nullptr) {
- mmapped_block_->PersistToDisk();
- }
- }
+ ~IndexBlock() = default;
- // Instantiate a PostingListUsed at posting_list_index with the existing
- // content in the IndexBlock.
+ struct PostingListAndBlockInfo {
+ PostingListUsed posting_list_used;
+ PostingListIndex posting_list_index;
+
+ uint32_t next_block_index;
+
+ // Flag indicating if there are any free posting lists available after this
+ // allocation request.
+ bool has_free_posting_lists;
+
+ explicit PostingListAndBlockInfo(PostingListUsed&& posting_list_used_in,
+ PostingListIndex posting_list_index_in,
+ uint32_t next_block_index_in,
+ bool has_free_posting_lists_in)
+ : posting_list_used(std::move(posting_list_used_in)),
+ posting_list_index(posting_list_index_in),
+ next_block_index(next_block_index_in),
+ has_free_posting_lists(has_free_posting_lists_in) {}
+ };
+
+ // PReads existing posting list content at posting_list_index, instantiates a
+ // PostingListUsed, and returns it with some additional index block info.
//
// RETURNS:
- // - a valid PostingListUsed on success
- // - INVALID_ARGUMENT if posting_list_index >= max_num_posting_lists()
- libtextclassifier3::StatusOr<PostingListUsed> GetAllocatedPostingList(
+ // - A valid PostingListAndBlockInfo on success
+ // - INVALID_ARGUMENT_ERROR if posting_list_index < 0 or posting_list_index
+ // >= max_num_posting_lists()
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::StatusOr<PostingListAndBlockInfo> GetAllocatedPostingList(
PostingListIndex posting_list_index);
- // Allocates a PostingListUsed in the IndexBlock, if possible.
+ // Allocates a PostingListUsed in the IndexBlock, initializes the content
+ // (by serializer), and returns the initialized PostingListUsed instance,
+ // PostingListIndex, and some additional index block info.
//
// RETURNS:
- // - a valid PostingListIndex that can be used to retrieve the allocated
- // PostingListUsed via a call to GetAllocatedPostingList
- // - RESOURCE_EXHAUSTED if !has_free_posting_lists()
- libtextclassifier3::StatusOr<PostingListIndex> AllocatePostingList();
+ // - A valid PostingListAndBlockInfo instance on success
+ // - RESOURCE_EXHAUSTED_ERROR if there is already no free posting list
+ // available, i.e. !HasFreePostingLists()
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::StatusOr<PostingListAndBlockInfo> AllocatePostingList();
- // Free posting list at posting_list_index.
+ // Frees a posting list at posting_list_index, adds it into the free list
+ // chain and updates block header. Both changes on posting list free and
+ // header will be synced to disk.
//
// It is considered an error to "double-free" a posting list. You should never
// call FreePostingList(index) with the same index twice, unless that index
@@ -127,86 +185,182 @@ class IndexBlock {
// index = block.AllocatePostingList();
// DoSomethingElse(block.GetAllocatedPostingList(index));
// // A-Ok! We called AllocatePostingList() since the last FreePostingList()
- // call. block.FreePostingList(index);
+ // // call.
+ // block.FreePostingList(index);
//
- // Has no effect if posting_list_index >= max_num_posting_lists().
- void FreePostingList(PostingListIndex posting_list_index);
+ // RETURNS:
+ // - OK on success
+ // - INVALID_ARGUMENT_ERROR if posting_list_index < 0 or posting_list_index
+ // >= max_num_posting_lists()
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status FreePostingList(
+ PostingListIndex posting_list_index);
- // Blocks can be chained. The interpretation of the chaining is up
- // to the caller.
- uint32_t next_block_index() const { return header_->next_block_index; }
+ // Writes back an allocated posting list (PostingListUsed) at
+ // posting_list_index to disk.
+ //
+ // RETURNS:
+ // - OK on success
+ // - INVALID_ARGUMENT_ERROR
+ // - If posting_list_index < 0 or posting_list_index >=
+ // max_num_posting_lists()
+ // - If posting_list_used.size_in_bytes() != posting_list_bytes_
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status WritePostingListToDisk(
+ const PostingListUsed& posting_list_used,
+ PostingListIndex posting_list_index);
- void set_next_block_index(uint32_t next_block_index) {
- header_->next_block_index = next_block_index;
- }
+ // PReads to get the index of next block from block header. Blocks can be
+ // chained, and the interpretation of the chaining is up to the caller.
+ //
+ // RETURNS:
+ // - Next block index on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::StatusOr<uint32_t> GetNextBlockIndex() const;
+
+ // PWrites block header to set the index of next block.
+ //
+ // RETURNS:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status SetNextBlockIndex(uint32_t next_block_index);
+
+ // PReads to get whether or not there are available posting lists in the free
+ // list.
+ //
+ // RETURNS:
+ // - A bool value on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::StatusOr<bool> HasFreePostingLists() const;
// Retrieves the size (in bytes) of the posting lists in this IndexBlock.
- uint32_t get_posting_list_bytes() const {
- return header_->posting_list_bytes;
- }
+ uint32_t posting_list_bytes() const { return posting_list_bytes_; }
- // Maximum number of posting lists in the block.
+ // Retrieves maximum number of posting lists in the block.
uint32_t max_num_posting_lists() const {
- return total_posting_lists_bytes() / get_posting_list_bytes();
+ return total_posting_lists_bytes() / posting_list_bytes_;
}
- // Number of bits required to store the largest PostingListIndex in this
- // block.
+ // Retrieves number of bits required to store the largest PostingListIndex in
+ // this block.
int posting_list_index_bits() const {
return BitsToStore(max_num_posting_lists());
}
- // Returns whether or not there are available posting lists in the free list.
- bool has_free_posting_lists() const {
- return header_->free_list_posting_list_index != kInvalidPostingListIndex;
- }
-
private:
- // Assumes that mmapped_file already has established a valid mapping to the
- // requested block.
- explicit IndexBlock(PostingListUsedSerializer* serializer,
- MemoryMappedFile&& mmapped_block);
+ struct BlockHeader {
+ // Index of the next block if this block is being chained or part of a free
+ // list.
+ uint32_t next_block_index;
+
+ // Index to the first PostingListFree in the IndexBlock. This is the start
+ // of the free list.
+ PostingListIndex free_list_posting_list_index;
+
+ // The size of each posting list in the IndexBlock. This value will be
+ // initialized when calling CreateFromUninitializedRegion once and remain
+ // unchanged.
+ uint32_t posting_list_bytes;
+ };
+
+ // Assumes that fd has been opened for write.
+ explicit IndexBlock(const Filesystem* filesystem,
+ PostingListSerializer* serializer, int fd,
+ off_t block_file_offset, uint32_t block_size_in_bytes,
+ uint32_t posting_list_bytes)
+ : filesystem_(filesystem),
+ serializer_(serializer),
+ fd_(fd),
+ block_file_offset_(block_file_offset),
+ block_size_in_bytes_(block_size_in_bytes),
+ posting_list_bytes_(posting_list_bytes) {}
// Resets IndexBlock to hold posting lists of posting_list_bytes size and adds
// all posting lists to the free list.
//
// RETURNS:
- // - OK, on success
- // - INVALID_ARGUMENT if posting_list_bytes is a valid posting list size.
- libtextclassifier3::Status Reset(int posting_list_bytes);
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status Reset();
- char* get_posting_list_ptr(PostingListIndex posting_list_index);
+ // Frees a posting list at posting_list_index, adds it into the free list
+ // chain and updates (sets) the given block header instance.
+ //
+ // - This function is served to avoid redundant block header PWrite when
+ // freeing multiple posting lists.
+ // - The caller should provide a BlockHeader instance for updating the free
+ // list chain, and finally sync it to disk.
+ //
+ // REQUIRES: 0 <= posting_list_index < max_posting_list_bytes()
+ //
+ // RETURNS:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status FreePostingListImpl(
+ BlockHeader& header, PostingListIndex posting_list_index);
- // Bytes in the block available for posting lists (minus header,
- // alignment, etc.).
+ // PReads block header from the file.
+ //
+ // RETURNS:
+ // - A BlockHeader instance on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::StatusOr<BlockHeader> ReadHeader() const;
+
+ // PReads posting list content at posting_list_index. Note that it can be a
+ // freed or allocated posting list.
+ //
+ // REQUIRES: 0 <= posting_list_index < max_posting_list_bytes()
+ //
+ // RETURNS:
+ // - A data buffer with size = posting_list_bytes_ on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::StatusOr<std::unique_ptr<uint8_t[]>> ReadPostingList(
+ PostingListIndex posting_list_index) const;
+
+ // PWrites block header to the file.
+ //
+ // RETURNS:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status WriteHeader(const BlockHeader& header);
+
+ // PWrites posting list content at posting_list_index. Note that it can be a
+ // freed or allocated posting list.
+ //
+ // REQUIRES: 0 <= posting_list_index < max_posting_list_bytes() and size of
+ // posting_list_buffer is posting_list_bytes_.
+ //
+ // RETURNS:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status WritePostingList(
+ PostingListIndex posting_list_index, const uint8_t* posting_list_buffer);
+
+ // Retrieves the absolute file (fd) offset of a posting list at
+ // posting_list_index.
+ //
+ // REQUIRES: 0 <= posting_list_index < max_posting_list_bytes()
+ off_t get_posting_list_file_offset(
+ PostingListIndex posting_list_index) const {
+ return block_file_offset_ + sizeof(BlockHeader) +
+ posting_list_bytes_ * posting_list_index;
+ }
+
+ // Retrieves the byte size in the block available for posting lists (excluding
+ // the size of block header).
uint32_t total_posting_lists_bytes() const {
return block_size_in_bytes_ - sizeof(BlockHeader);
}
- struct BlockHeader {
- // Index of the next block if this block is being chained or part of a free
- // list.
- uint32_t next_block_index;
-
- // Index to the first PostingListFree in the IndexBlock. This is the start
- // of the free list.
- PostingListIndex free_list_posting_list_index;
+ const Filesystem* filesystem_; // Does not own.
- // The size of each posting list in the IndexBlock.
- uint32_t posting_list_bytes;
- };
- // Pointer to the header of this block. The header is used to store info about
- // this block and its posting lists.
- BlockHeader* header_;
- // Pointer to the beginning of the posting lists region - the area the block
- // after the header.
- char* posting_lists_start_ptr_;
- uint32_t block_size_in_bytes_;
+ PostingListSerializer* serializer_; // Does not own.
- PostingListUsedSerializer* serializer_; // Does not own.
+ int fd_; // Does not own.
- // MemoryMappedFile used to interact with the underlying flash block.
- std::unique_ptr<MemoryMappedFile> mmapped_block_;
+ off_t block_file_offset_;
+ uint32_t block_size_in_bytes_;
+ uint32_t posting_list_bytes_;
};
} // namespace lib
diff --git a/icing/file/posting_list/index-block_test.cc b/icing/file/posting_list/index-block_test.cc
index 775858d..fcc134a 100644
--- a/icing/file/posting_list/index-block_test.cc
+++ b/icing/file/posting_list/index-block_test.cc
@@ -19,7 +19,7 @@
#include "gtest/gtest.h"
#include "icing/file/filesystem.h"
#include "icing/file/posting_list/posting-list-used.h"
-#include "icing/index/main/posting-list-used-hit-serializer.h"
+#include "icing/index/main/posting-list-hit-serializer.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/tmp-directory.h"
@@ -30,6 +30,8 @@ namespace {
using ::testing::ElementsAreArray;
using ::testing::Eq;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
static constexpr int kBlockSize = 4096;
@@ -40,22 +42,28 @@ class IndexBlockTest : public ::testing::Test {
flash_file_ = test_dir_ + "/0";
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(test_dir_.c_str()));
+ sfd_ = std::make_unique<ScopedFd>(
+ filesystem_.OpenForWrite(flash_file_.c_str()));
+ ASSERT_TRUE(sfd_->is_valid());
+
// Grow the file by one block for the IndexBlock to use.
- ASSERT_TRUE(filesystem_.Grow(flash_file_.c_str(), kBlockSize));
+ ASSERT_TRUE(filesystem_.Grow(sfd_->get(), kBlockSize));
// TODO: test different serializers
- serializer_ = std::make_unique<PostingListUsedHitSerializer>();
+ serializer_ = std::make_unique<PostingListHitSerializer>();
}
void TearDown() override {
serializer_.reset();
+ sfd_.reset();
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
}
+ Filesystem filesystem_;
std::string test_dir_;
std::string flash_file_;
- Filesystem filesystem_;
- std::unique_ptr<PostingListUsedHitSerializer> serializer_;
+ std::unique_ptr<ScopedFd> sfd_;
+ std::unique_ptr<PostingListHitSerializer> serializer_;
};
TEST_F(IndexBlockTest, CreateFromUninitializedRegionProducesEmptyBlock) {
@@ -65,9 +73,9 @@ TEST_F(IndexBlockTest, CreateFromUninitializedRegionProducesEmptyBlock) {
// Create an IndexBlock from this newly allocated file block.
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
- filesystem_, flash_file_, serializer_.get(),
+ &filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize, kPostingListBytes));
- EXPECT_TRUE(block.has_free_posting_lists());
+ EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsTrue()));
}
}
@@ -77,9 +85,9 @@ TEST_F(IndexBlockTest, SizeAccessorsWorkCorrectly) {
// Create an IndexBlock from this newly allocated file block.
ICING_ASSERT_OK_AND_ASSIGN(IndexBlock block,
IndexBlock::CreateFromUninitializedRegion(
- filesystem_, flash_file_, serializer_.get(),
+ &filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize, kPostingListBytes1));
- EXPECT_THAT(block.get_posting_list_bytes(), Eq(kPostingListBytes1));
+ EXPECT_THAT(block.posting_list_bytes(), Eq(kPostingListBytes1));
// There should be (4096 - 12) / 20 = 204 posting lists
// (sizeof(BlockHeader)==12). We can store a PostingListIndex of 203 in only 8
// bits.
@@ -91,9 +99,9 @@ TEST_F(IndexBlockTest, SizeAccessorsWorkCorrectly) {
// Create an IndexBlock from this newly allocated file block.
ICING_ASSERT_OK_AND_ASSIGN(
block, IndexBlock::CreateFromUninitializedRegion(
- filesystem_, flash_file_, serializer_.get(), /*offset=*/0,
+ &filesystem_, serializer_.get(), sfd_->get(), /*offset=*/0,
kBlockSize, kPostingListBytes2));
- EXPECT_THAT(block.get_posting_list_bytes(), Eq(kPostingListBytes2));
+ EXPECT_THAT(block.posting_list_bytes(), Eq(kPostingListBytes2));
// There should be (4096 - 12) / 200 = 20 posting lists
// (sizeof(BlockHeader)==12). We can store a PostingListIndex of 19 in only 5
// bits.
@@ -116,32 +124,36 @@ TEST_F(IndexBlockTest, IndexBlockChangesPersistAcrossInstances) {
// Create an IndexBlock from this newly allocated file block.
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
- filesystem_, flash_file_, serializer_.get(),
- /*offset=*/0,
- /*block_size=*/kBlockSize, kPostingListBytes));
+ &filesystem_, serializer_.get(), sfd_->get(),
+ /*offset=*/0, kBlockSize, kPostingListBytes));
// Add hits to the first posting list.
- ICING_ASSERT_OK_AND_ASSIGN(allocated_index, block.AllocatePostingList());
- ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used,
- block.GetAllocatedPostingList(allocated_index));
+ ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info,
+ block.AllocatePostingList());
for (const Hit& hit : test_hits) {
- ICING_ASSERT_OK(serializer_->PrependHit(&pl_used, hit));
+ ICING_ASSERT_OK(
+ serializer_->PrependHit(&alloc_info.posting_list_used, hit));
}
EXPECT_THAT(
- serializer_->GetHits(&pl_used),
+ serializer_->GetHits(&alloc_info.posting_list_used),
IsOkAndHolds(ElementsAreArray(test_hits.rbegin(), test_hits.rend())));
+
+ ICING_ASSERT_OK(block.WritePostingListToDisk(
+ alloc_info.posting_list_used, alloc_info.posting_list_index));
+ allocated_index = alloc_info.posting_list_index;
}
{
// Create an IndexBlock from the previously allocated file block.
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock block, IndexBlock::CreateFromPreexistingIndexBlockRegion(
- filesystem_, flash_file_, serializer_.get(),
+ &filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize));
- ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used,
- block.GetAllocatedPostingList(allocated_index));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ IndexBlock::PostingListAndBlockInfo pl_block_info,
+ block.GetAllocatedPostingList(allocated_index));
EXPECT_THAT(
- serializer_->GetHits(&pl_used),
+ serializer_->GetHits(&pl_block_info.posting_list_used),
IsOkAndHolds(ElementsAreArray(test_hits.rbegin(), test_hits.rend())));
- EXPECT_TRUE(block.has_free_posting_lists());
+ EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsTrue()));
}
}
@@ -168,58 +180,64 @@ TEST_F(IndexBlockTest, IndexBlockMultiplePostingLists) {
// Create an IndexBlock from this newly allocated file block.
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
- filesystem_, flash_file_, serializer_.get(),
+ &filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize, kPostingListBytes));
// Add hits to the first posting list.
- ICING_ASSERT_OK_AND_ASSIGN(allocated_index_1, block.AllocatePostingList());
- ICING_ASSERT_OK_AND_ASSIGN(
- PostingListUsed pl_used_1,
- block.GetAllocatedPostingList(allocated_index_1));
+ ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info_1,
+ block.AllocatePostingList());
for (const Hit& hit : hits_in_posting_list1) {
- ICING_ASSERT_OK(serializer_->PrependHit(&pl_used_1, hit));
+ ICING_ASSERT_OK(
+ serializer_->PrependHit(&alloc_info_1.posting_list_used, hit));
}
- EXPECT_THAT(serializer_->GetHits(&pl_used_1),
+ EXPECT_THAT(serializer_->GetHits(&alloc_info_1.posting_list_used),
IsOkAndHolds(ElementsAreArray(hits_in_posting_list1.rbegin(),
hits_in_posting_list1.rend())));
// Add hits to the second posting list.
- ICING_ASSERT_OK_AND_ASSIGN(allocated_index_2, block.AllocatePostingList());
- ICING_ASSERT_OK_AND_ASSIGN(
- PostingListUsed pl_used_2,
- block.GetAllocatedPostingList(allocated_index_2));
+ ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info_2,
+ block.AllocatePostingList());
for (const Hit& hit : hits_in_posting_list2) {
- ICING_ASSERT_OK(serializer_->PrependHit(&pl_used_2, hit));
+ ICING_ASSERT_OK(
+ serializer_->PrependHit(&alloc_info_2.posting_list_used, hit));
}
- EXPECT_THAT(serializer_->GetHits(&pl_used_2),
+ EXPECT_THAT(serializer_->GetHits(&alloc_info_2.posting_list_used),
IsOkAndHolds(ElementsAreArray(hits_in_posting_list2.rbegin(),
hits_in_posting_list2.rend())));
EXPECT_THAT(block.AllocatePostingList(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
- EXPECT_FALSE(block.has_free_posting_lists());
+ EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsFalse()));
+
+ // Write both posting lists to disk.
+ ICING_ASSERT_OK(block.WritePostingListToDisk(
+ alloc_info_1.posting_list_used, alloc_info_1.posting_list_index));
+ ICING_ASSERT_OK(block.WritePostingListToDisk(
+ alloc_info_2.posting_list_used, alloc_info_2.posting_list_index));
+ allocated_index_1 = alloc_info_1.posting_list_index;
+ allocated_index_2 = alloc_info_2.posting_list_index;
}
{
// Create an IndexBlock from the previously allocated file block.
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock block, IndexBlock::CreateFromPreexistingIndexBlockRegion(
- filesystem_, flash_file_, serializer_.get(),
+ &filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize));
ICING_ASSERT_OK_AND_ASSIGN(
- PostingListUsed pl_used_1,
+ IndexBlock::PostingListAndBlockInfo pl_block_info_1,
block.GetAllocatedPostingList(allocated_index_1));
- EXPECT_THAT(serializer_->GetHits(&pl_used_1),
+ EXPECT_THAT(serializer_->GetHits(&pl_block_info_1.posting_list_used),
IsOkAndHolds(ElementsAreArray(hits_in_posting_list1.rbegin(),
hits_in_posting_list1.rend())));
ICING_ASSERT_OK_AND_ASSIGN(
- PostingListUsed pl_used_2,
+ IndexBlock::PostingListAndBlockInfo pl_block_info_2,
block.GetAllocatedPostingList(allocated_index_2));
- EXPECT_THAT(serializer_->GetHits(&pl_used_2),
+ EXPECT_THAT(serializer_->GetHits(&pl_block_info_2.posting_list_used),
IsOkAndHolds(ElementsAreArray(hits_in_posting_list2.rbegin(),
hits_in_posting_list2.rend())));
EXPECT_THAT(block.AllocatePostingList(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
- EXPECT_FALSE(block.has_free_posting_lists());
+ EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsFalse()));
}
}
@@ -229,7 +247,7 @@ TEST_F(IndexBlockTest, IndexBlockReallocatingPostingLists) {
// Create an IndexBlock from this newly allocated file block.
ICING_ASSERT_OK_AND_ASSIGN(IndexBlock block,
IndexBlock::CreateFromUninitializedRegion(
- filesystem_, flash_file_, serializer_.get(),
+ &filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize, kPostingListBytes));
// Add hits to the first posting list.
@@ -240,14 +258,13 @@ TEST_F(IndexBlockTest, IndexBlockReallocatingPostingLists) {
Hit(/*section_id=*/3, /*document_id=*/3, /*term_frequency=*/17),
Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultTermFrequency),
};
- ICING_ASSERT_OK_AND_ASSIGN(PostingListIndex allocated_index_1,
+ ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info_1,
block.AllocatePostingList());
- ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used_1,
- block.GetAllocatedPostingList(allocated_index_1));
for (const Hit& hit : hits_in_posting_list1) {
- ICING_ASSERT_OK(serializer_->PrependHit(&pl_used_1, hit));
+ ICING_ASSERT_OK(
+ serializer_->PrependHit(&alloc_info_1.posting_list_used, hit));
}
- EXPECT_THAT(serializer_->GetHits(&pl_used_1),
+ EXPECT_THAT(serializer_->GetHits(&alloc_info_1.posting_list_used),
IsOkAndHolds(ElementsAreArray(hits_in_posting_list1.rbegin(),
hits_in_posting_list1.rend())));
@@ -259,45 +276,44 @@ TEST_F(IndexBlockTest, IndexBlockReallocatingPostingLists) {
Hit(/*section_id=*/11, /*document_id=*/306, /*term_frequency=*/12),
Hit(/*section_id=*/10, /*document_id=*/306, Hit::kDefaultTermFrequency),
};
- ICING_ASSERT_OK_AND_ASSIGN(PostingListIndex allocated_index_2,
+ ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info_2,
block.AllocatePostingList());
- ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used_2,
- block.GetAllocatedPostingList(allocated_index_2));
for (const Hit& hit : hits_in_posting_list2) {
- ICING_ASSERT_OK(serializer_->PrependHit(&pl_used_2, hit));
+ ICING_ASSERT_OK(
+ serializer_->PrependHit(&alloc_info_2.posting_list_used, hit));
}
- EXPECT_THAT(serializer_->GetHits(&pl_used_2),
+ EXPECT_THAT(serializer_->GetHits(&alloc_info_2.posting_list_used),
IsOkAndHolds(ElementsAreArray(hits_in_posting_list2.rbegin(),
hits_in_posting_list2.rend())));
EXPECT_THAT(block.AllocatePostingList(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
- EXPECT_FALSE(block.has_free_posting_lists());
+ EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsFalse()));
// Now free the first posting list. Then, reallocate it and fill it with a
// different set of hits.
- block.FreePostingList(allocated_index_1);
- EXPECT_TRUE(block.has_free_posting_lists());
+ block.FreePostingList(alloc_info_1.posting_list_index);
+ EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsTrue()));
std::vector<Hit> hits_in_posting_list3{
Hit(/*section_id=*/12, /*document_id=*/0, /*term_frequency=*/88),
Hit(/*section_id=*/17, /*document_id=*/1, Hit::kDefaultTermFrequency),
Hit(/*section_id=*/0, /*document_id=*/2, /*term_frequency=*/2),
};
- ICING_ASSERT_OK_AND_ASSIGN(PostingListIndex allocated_index_3,
+ ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info_3,
block.AllocatePostingList());
- EXPECT_THAT(allocated_index_3, Eq(allocated_index_1));
- ICING_ASSERT_OK_AND_ASSIGN(pl_used_1,
- block.GetAllocatedPostingList(allocated_index_3));
+ EXPECT_THAT(alloc_info_3.posting_list_index,
+ Eq(alloc_info_3.posting_list_index));
for (const Hit& hit : hits_in_posting_list3) {
- ICING_ASSERT_OK(serializer_->PrependHit(&pl_used_1, hit));
+ ICING_ASSERT_OK(
+ serializer_->PrependHit(&alloc_info_3.posting_list_used, hit));
}
- EXPECT_THAT(serializer_->GetHits(&pl_used_1),
+ EXPECT_THAT(serializer_->GetHits(&alloc_info_3.posting_list_used),
IsOkAndHolds(ElementsAreArray(hits_in_posting_list3.rbegin(),
hits_in_posting_list3.rend())));
EXPECT_THAT(block.AllocatePostingList(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
- EXPECT_FALSE(block.has_free_posting_lists());
+ EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsFalse()));
}
TEST_F(IndexBlockTest, IndexBlockNextBlockIndex) {
@@ -309,29 +325,29 @@ TEST_F(IndexBlockTest, IndexBlockNextBlockIndex) {
// next block index.
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
- filesystem_, flash_file_, serializer_.get(),
+ &filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize, kPostingListBytes));
- EXPECT_THAT(block.next_block_index(), Eq(kInvalidBlockIndex));
- block.set_next_block_index(kSomeBlockIndex);
- EXPECT_THAT(block.next_block_index(), Eq(kSomeBlockIndex));
+ EXPECT_THAT(block.GetNextBlockIndex(), IsOkAndHolds(kInvalidBlockIndex));
+ EXPECT_THAT(block.SetNextBlockIndex(kSomeBlockIndex), IsOk());
+ EXPECT_THAT(block.GetNextBlockIndex(), IsOkAndHolds(kSomeBlockIndex));
}
{
// Create an IndexBlock from this previously allocated file block and make
// sure that next_block_index is still set properly.
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock block, IndexBlock::CreateFromPreexistingIndexBlockRegion(
- filesystem_, flash_file_, serializer_.get(),
+ &filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize));
- EXPECT_THAT(block.next_block_index(), Eq(kSomeBlockIndex));
+ EXPECT_THAT(block.GetNextBlockIndex(), IsOkAndHolds(kSomeBlockIndex));
}
{
// Create an IndexBlock, treating this file block as uninitialized. This
// reset the next_block_index to kInvalidBlockIndex.
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
- filesystem_, flash_file_, serializer_.get(),
+ &filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize, kPostingListBytes));
- EXPECT_THAT(block.next_block_index(), Eq(kInvalidBlockIndex));
+ EXPECT_THAT(block.GetNextBlockIndex(), IsOkAndHolds(kInvalidBlockIndex));
}
}
diff --git a/icing/file/posting_list/posting-list-accessor.cc b/icing/file/posting_list/posting-list-accessor.cc
index 00f4417..67d7a21 100644
--- a/icing/file/posting_list/posting-list-accessor.cc
+++ b/icing/file/posting_list/posting-list-accessor.cc
@@ -19,7 +19,6 @@
#include "icing/absl_ports/canonical_errors.h"
#include "icing/file/posting_list/flash-index-storage.h"
-#include "icing/file/posting_list/index-block.h"
#include "icing/file/posting_list/posting-list-identifier.h"
#include "icing/file/posting_list/posting-list-used.h"
#include "icing/util/status-macros.h"
@@ -27,19 +26,21 @@
namespace icing {
namespace lib {
-void PostingListAccessor::FlushPreexistingPostingList() {
- if (preexisting_posting_list_->block.max_num_posting_lists() == 1) {
- // If this is a max-sized posting list, then just keep track of the id for
- // chaining. It'll be flushed to disk when preexisting_posting_list_ is
- // destructed.
+libtextclassifier3::Status PostingListAccessor::FlushPreexistingPostingList() {
+ if (preexisting_posting_list_->posting_list.size_in_bytes() ==
+ storage_->max_posting_list_bytes()) {
+ // If this is a max-sized posting list, then sync to disk and keep track of
+ // the id.
+ ICING_RETURN_IF_ERROR(
+ storage_->WritePostingListToDisk(*preexisting_posting_list_));
prev_block_identifier_ = preexisting_posting_list_->id;
} else {
// If this is NOT a max-sized posting list, then our data have outgrown this
// particular posting list. Move the data into the in-memory posting list
// and free this posting list.
//
- // Move will always succeed since posting_list_buffer_ is max_pl_bytes.
- GetSerializer()->MoveFrom(/*dst=*/&posting_list_buffer_,
+ // Move will always succeed since in_memory_posting_list_ is max_pl_bytes.
+ GetSerializer()->MoveFrom(/*dst=*/&in_memory_posting_list_,
/*src=*/&preexisting_posting_list_->posting_list);
// Now that all the contents of this posting list have been copied, there's
@@ -48,58 +49,78 @@ void PostingListAccessor::FlushPreexistingPostingList() {
storage_->FreePostingList(std::move(*preexisting_posting_list_));
}
preexisting_posting_list_.reset();
+ return libtextclassifier3::Status::OK;
}
libtextclassifier3::Status PostingListAccessor::FlushInMemoryPostingList() {
- // We exceeded max_pl_bytes(). Need to flush posting_list_buffer_ and update
- // the chain.
- uint32_t max_posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
- storage_->block_size(), GetSerializer()->GetDataTypeBytes());
+ // We exceeded max_pl_bytes(). Need to flush in_memory_posting_list_ and
+ // update the chain.
ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
- storage_->AllocatePostingList(max_posting_list_bytes));
- holder.block.set_next_block_index(prev_block_identifier_.block_index());
+ storage_->AllocateAndChainMaxSizePostingList(
+ prev_block_identifier_.block_index()));
+ ICING_RETURN_IF_ERROR(
+ GetSerializer()->MoveFrom(/*dst=*/&holder.posting_list,
+ /*src=*/&in_memory_posting_list_));
+ ICING_RETURN_IF_ERROR(storage_->WritePostingListToDisk(holder));
+
+ // Set prev block id only if persist to disk succeeded.
prev_block_identifier_ = holder.id;
- return GetSerializer()->MoveFrom(/*dst=*/&holder.posting_list,
- /*src=*/&posting_list_buffer_);
+ return libtextclassifier3::Status::OK;
}
PostingListAccessor::FinalizeResult PostingListAccessor::Finalize() && {
if (preexisting_posting_list_ != nullptr) {
- // Our data are already in an existing posting list. Nothing else to do, but
- // return its id.
- return FinalizeResult(libtextclassifier3::Status::OK,
- preexisting_posting_list_->id);
+ // Sync to disk.
+ return FinalizeResult(
+ storage_->WritePostingListToDisk(*preexisting_posting_list_),
+ preexisting_posting_list_->id);
}
- if (GetSerializer()->GetBytesUsed(&posting_list_buffer_) <= 0) {
+
+ if (GetSerializer()->GetBytesUsed(&in_memory_posting_list_) <= 0) {
return FinalizeResult(absl_ports::InvalidArgumentError(
"Can't finalize an empty PostingListAccessor. "
"There's nothing to Finalize!"),
PostingListIdentifier::kInvalid);
}
- uint32_t posting_list_bytes =
- GetSerializer()->GetMinPostingListSizeToFit(&posting_list_buffer_);
+
+ libtextclassifier3::StatusOr<PostingListHolder> holder_or;
if (prev_block_identifier_.is_valid()) {
- posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
- storage_->block_size(), GetSerializer()->GetDataTypeBytes());
+ // If prev_block_identifier_ is valid, then it means there was a max-sized
+ // posting list, so we have to allocate another new max size posting list
+ // and chain them together.
+ holder_or = storage_->AllocateAndChainMaxSizePostingList(
+ prev_block_identifier_.block_index());
+ } else {
+ // Otherwise, it is the first posting list, and we can use smaller size pl.
+ // Note that even if it needs a max-sized posting list here, it is ok to
+ // call AllocatePostingList without setting next block index since we don't
+ // have any previous posting list to chain and AllocatePostingList will set
+ // next block index to kInvalidBlockIndex.
+ uint32_t posting_list_bytes =
+ GetSerializer()->GetMinPostingListSizeToFit(&in_memory_posting_list_);
+ holder_or = storage_->AllocatePostingList(posting_list_bytes);
}
- auto holder_or = storage_->AllocatePostingList(posting_list_bytes);
+
if (!holder_or.ok()) {
return FinalizeResult(std::move(holder_or).status(),
prev_block_identifier_);
}
PostingListHolder holder = std::move(holder_or).ValueOrDie();
- if (prev_block_identifier_.is_valid()) {
- holder.block.set_next_block_index(prev_block_identifier_.block_index());
- }
// Move to allocated area. This should never actually return an error. We know
// that editor.posting_list() is valid because it wouldn't have successfully
- // returned by AllocatePostingList if it wasn't. We know posting_list_buffer_
- // is valid because we created it in-memory. And finally, we know that the
- // data from posting_list_buffer_ will fit in editor.posting_list() because we
- // requested it be at at least posting_list_bytes large.
+ // returned by AllocatePostingList if it wasn't. We know
+ // in_memory_posting_list_ is valid because we created it in-memory. And
+ // finally, we know that the data from in_memory_posting_list_ will fit in
+ // editor.posting_list() because we requested it be at at least
+ // posting_list_bytes large.
auto status = GetSerializer()->MoveFrom(/*dst=*/&holder.posting_list,
- /*src=*/&posting_list_buffer_);
+ /*src=*/&in_memory_posting_list_);
+ if (!status.ok()) {
+ return FinalizeResult(std::move(status), prev_block_identifier_);
+ }
+
+ status = storage_->WritePostingListToDisk(holder);
if (!status.ok()) {
return FinalizeResult(std::move(status), prev_block_identifier_);
}
diff --git a/icing/file/posting_list/posting-list-accessor.h b/icing/file/posting_list/posting-list-accessor.h
index c7d614f..91f1f2d 100644
--- a/icing/file/posting_list/posting-list-accessor.h
+++ b/icing/file/posting_list/posting-list-accessor.h
@@ -28,9 +28,8 @@ namespace lib {
// This class serves to:
// 1. Expose PostingListUseds to clients of FlashIndexStorage
-// 2. Ensure the corresponding instance of IndexBlock has the same lifecycle as
-// the instance of PostingListUsed that the client has access to, while
-// not exposing IndexBlock's api surface.
+// 2. Handles flushing posting list properly, including choosing the most
+// efficient size of PL, chaining max-sized PL correctly, etc.
// 3. Ensure that PostingListUseds can only be freed by calling methods which
// will also properly maintain the FlashIndexStorage free list and prevent
// callers from modifying the Posting List after freeing.
@@ -62,30 +61,28 @@ class PostingListAccessor {
// second-to-last posting list.
FinalizeResult Finalize() &&;
- virtual PostingListUsedSerializer* GetSerializer() = 0;
+ virtual PostingListSerializer* GetSerializer() = 0;
protected:
- explicit PostingListAccessor(
- FlashIndexStorage* storage,
- std::unique_ptr<uint8_t[]> posting_list_buffer_array,
- PostingListUsed posting_list_buffer)
+ explicit PostingListAccessor(FlashIndexStorage* storage,
+ PostingListUsed in_memory_posting_list)
: storage_(storage),
prev_block_identifier_(PostingListIdentifier::kInvalid),
- posting_list_buffer_array_(std::move(posting_list_buffer_array)),
- posting_list_buffer_(std::move(posting_list_buffer)),
+ in_memory_posting_list_(std::move(in_memory_posting_list)),
has_reached_posting_list_chain_end_(false) {}
// Flushes preexisting_posting_list_ to disk if it's a max-sized posting list
// and populates prev_block_identifier.
// If it's not a max-sized posting list, moves the contents of
- // preexisting_posting_list_ to posting_list_buffer_ and frees
+ // preexisting_posting_list_ to in_memory_posting_list_ and frees
// preexisting_posting_list_.
// Sets preexisting_posting_list_ to nullptr.
- void FlushPreexistingPostingList();
+ libtextclassifier3::Status FlushPreexistingPostingList();
- // Flushes posting_list_buffer_ to a max-sized posting list on disk, setting
- // its next pointer to prev_block_identifier_ and updating
- // prev_block_identifier_ to point to the just-written posting list.
+ // Flushes in_memory_posting_list_ to a max-sized posting list on disk, chains
+ // the newly allocated max-size posting list block by setting its next pointer
+ // to prev_block_identifier_, and updates prev_block_identifier_ to point to
+ // the newly allocated posting list.
libtextclassifier3::Status FlushInMemoryPostingList();
// Frees all posting lists in the posting list chain starting at
@@ -102,19 +99,15 @@ class PostingListAccessor {
// An editor to an existing posting list on disk. If available (non-NULL),
// we'll try to add all data to this posting list. Once this posting list
// fills up, we'll either 1) chain it (if a max-sized posting list) and put
- // future data in posting_list_buffer_ or 2) copy all of its data into
- // posting_list_buffer_ and free this pl (if not a max-sized posting list).
+ // future data in in_memory_posting_list_ or 2) copy all of its data into
+ // in_memory_posting_list_ and free this pl (if not a max-sized posting list).
// TODO(tjbarron) provide a benchmark to demonstrate the effects that re-using
// existing posting lists has on latency.
std::unique_ptr<PostingListHolder> preexisting_posting_list_;
// In-memory posting list used to buffer data before writing them to the
// smallest on-disk posting list that will fit them.
- // posting_list_buffer_array_ owns the memory region that posting_list_buffer_
- // interprets. Therefore, posting_list_buffer_array_ must have the same
- // lifecycle as posting_list_buffer_.
- std::unique_ptr<uint8_t[]> posting_list_buffer_array_;
- PostingListUsed posting_list_buffer_;
+ PostingListUsed in_memory_posting_list_;
bool has_reached_posting_list_chain_end_;
};
diff --git a/icing/file/posting_list/posting-list-free_test.cc b/icing/file/posting_list/posting-list-free_test.cc
index 99e3cf5..503012d 100644
--- a/icing/file/posting_list/posting-list-free_test.cc
+++ b/icing/file/posting_list/posting-list-free_test.cc
@@ -19,7 +19,7 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "gtest/gtest.h"
-#include "icing/index/main/posting-list-used-hit-serializer.h"
+#include "icing/index/main/posting-list-hit-serializer.h"
#include "icing/testing/common-matchers.h"
namespace icing {
@@ -30,7 +30,7 @@ namespace {
// TODO(b/249829533): test different serializers
TEST(PostingListTest, PostingListFree) {
- PostingListUsedHitSerializer serializer;
+ PostingListHitSerializer serializer;
static const size_t kHitsSize = 2551 * sizeof(Hit);
std::unique_ptr<char[]> hits_buf = std::make_unique<char[]>(kHitsSize);
@@ -43,7 +43,7 @@ TEST(PostingListTest, PostingListFree) {
}
TEST(PostingListTest, PostingListTooSmallInvalidArgument) {
- PostingListUsedHitSerializer serializer;
+ PostingListHitSerializer serializer;
const size_t kHitSizeTooSmall =
serializer.GetMinPostingListSize() - sizeof(Hit);
@@ -61,7 +61,7 @@ TEST(PostingListTest, PostingListTooSmallInvalidArgument) {
}
TEST(PostingListTest, PostingListNotAlignedInvalidArgument) {
- PostingListUsedHitSerializer serializer;
+ PostingListHitSerializer serializer;
const size_t kHitSizeNotAligned = serializer.GetMinPostingListSize() + 1;
std::unique_ptr<char[]> hits_buf =
@@ -79,7 +79,7 @@ TEST(PostingListTest, PostingListNotAlignedInvalidArgument) {
}
TEST(PostingListTest, PostingListNullBufferFailedPrecondition) {
- PostingListUsedHitSerializer serializer;
+ PostingListHitSerializer serializer;
const size_t kHitSize = serializer.GetMinPostingListSize();
// nullptr posting_list_buffer
@@ -96,7 +96,7 @@ TEST(PostingListTest, PostingListNullBufferFailedPrecondition) {
}
TEST(PostingListTest, PostingListFreePreexistingRegion) {
- PostingListUsedHitSerializer serializer;
+ PostingListHitSerializer serializer;
constexpr PostingListIndex kOtherPostingListIndex = 12;
static const size_t kHitsSize = 2551 * sizeof(Hit);
@@ -124,7 +124,7 @@ TEST(PostingListTest, PostingListFreePreexistingRegion) {
}
TEST(PostingListTest, PostingListFreeUninitializedRegion) {
- PostingListUsedHitSerializer serializer;
+ PostingListHitSerializer serializer;
constexpr PostingListIndex kOtherPostingListIndex = 12;
static const size_t kHitsSize = 2551 * sizeof(Hit);
diff --git a/icing/file/posting_list/posting-list-identifier.h b/icing/file/posting_list/posting-list-identifier.h
index 54b2888..78821e8 100644
--- a/icing/file/posting_list/posting-list-identifier.h
+++ b/icing/file/posting_list/posting-list-identifier.h
@@ -78,7 +78,7 @@ class PostingListIdentifier {
/*val=*/block_index);
}
- int block_index() const {
+ uint32_t block_index() const {
return BITFIELD_GET(val_, kEncodedPostingListIndexBits, kBlockIndexBits);
}
diff --git a/icing/file/posting_list/posting-list-used.cc b/icing/file/posting_list/posting-list-used.cc
index 370b9c7..d049b13 100644
--- a/icing/file/posting_list/posting-list-used.cc
+++ b/icing/file/posting_list/posting-list-used.cc
@@ -15,6 +15,7 @@
#include "icing/file/posting_list/posting-list-used.h"
#include <cstdint>
+#include <memory>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
@@ -27,8 +28,8 @@ namespace lib {
libtextclassifier3::StatusOr<PostingListUsed>
PostingListUsed::CreateFromPreexistingPostingListUsedRegion(
- PostingListUsedSerializer* serializer, void* posting_list_buffer,
- uint32_t size_in_bytes) {
+ PostingListSerializer* serializer,
+ std::unique_ptr<uint8_t[]> posting_list_buffer, uint32_t size_in_bytes) {
ICING_RETURN_ERROR_IF_NULL(serializer);
ICING_RETURN_ERROR_IF_NULL(posting_list_buffer);
@@ -38,16 +39,17 @@ PostingListUsed::CreateFromPreexistingPostingListUsedRegion(
return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
"Requested posting list size %d is invalid!", size_in_bytes));
}
- return PostingListUsed(posting_list_buffer, size_in_bytes);
+ return PostingListUsed(std::move(posting_list_buffer), size_in_bytes);
}
libtextclassifier3::StatusOr<PostingListUsed>
-PostingListUsed::CreateFromUnitializedRegion(
- PostingListUsedSerializer* serializer, void* posting_list_buffer,
- uint32_t size_in_bytes) {
- ICING_ASSIGN_OR_RETURN(PostingListUsed posting_list_used,
- CreateFromPreexistingPostingListUsedRegion(
- serializer, posting_list_buffer, size_in_bytes));
+PostingListUsed::CreateFromUnitializedRegion(PostingListSerializer* serializer,
+ uint32_t size_in_bytes) {
+ ICING_ASSIGN_OR_RETURN(
+ PostingListUsed posting_list_used,
+ CreateFromPreexistingPostingListUsedRegion(
+ serializer, std::make_unique<uint8_t[]>(size_in_bytes),
+ size_in_bytes));
serializer->Clear(&posting_list_used);
return posting_list_used;
}
diff --git a/icing/file/posting_list/posting-list-used.h b/icing/file/posting_list/posting-list-used.h
index 5821880..980d210 100644
--- a/icing/file/posting_list/posting-list-used.h
+++ b/icing/file/posting_list/posting-list-used.h
@@ -16,6 +16,7 @@
#define ICING_FILE_POSTING_LIST_POSTING_LIST_USED_H_
#include <cstdint>
+#include <memory>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
@@ -41,9 +42,9 @@ class PostingListUsed;
// related methods to serialize/deserialize Hit data to/from posting lists.
// - FlashIndexStorage, IndexBlock, PostingListUsed use the serializer created
// by MainIndex, but hold the reference/pointer in the interface format
-// (PostingListUsedSerializer) and only use common interface methods to manage
+// (PostingListSerializer) and only use common interface methods to manage
// posting list.
-class PostingListUsedSerializer {
+class PostingListSerializer {
public:
// Special data is either a DataType instance or data_start_offset.
template <typename DataType>
@@ -67,7 +68,7 @@ class PostingListUsedSerializer {
static constexpr uint32_t kNumSpecialData = 2;
- virtual ~PostingListUsedSerializer() = default;
+ virtual ~PostingListSerializer() = default;
// Returns byte size of the data type.
virtual uint32_t GetDataTypeBytes() const = 0;
@@ -106,57 +107,65 @@ class PostingListUsedSerializer {
PostingListUsed* src) const = 0;
};
-// A posting list with data in it. Layout depends on the serializer.
+// A posting list with in-memory data. The caller should sync it to disk via
+// FlashIndexStorage. Layout depends on the serializer.
class PostingListUsed {
public:
- // Creates a PostingListUsed that points to a buffer of size_in_bytes bytes.
- // 'Preexisting' means that posting_list_buffer was previously modified by
- // another instance of PostingListUsed.
- //
- // Caller owns the data buffer and must not free it while using a
- // PostingListUsed.
+ // Creates a PostingListUsed that takes over the ownership of
+ // posting_list_buffer with size_in_bytes bytes. 'Preexisting' means that
+ // the data in posting_list_buffer was previously modified by another instance
+ // of PostingListUsed, and the caller should read the data from disk to
+ // posting_list_buffer.
//
// RETURNS:
// - A valid PostingListUsed if successful
// - INVALID_ARGUMENT if posting_list_utils::IsValidPostingListSize check
- // fails
+ // fails
// - FAILED_PRECONDITION if serializer or posting_list_buffer is null
static libtextclassifier3::StatusOr<PostingListUsed>
CreateFromPreexistingPostingListUsedRegion(
- PostingListUsedSerializer* serializer, void* posting_list_buffer,
- uint32_t size_in_bytes);
+ PostingListSerializer* serializer,
+ std::unique_ptr<uint8_t[]> posting_list_buffer, uint32_t size_in_bytes);
- // Creates a PostingListUsed that points to a buffer of size_in_bytes bytes
- // and initializes the content of the buffer so that the returned
- // PostingListUsed is empty.
- //
- // Caller owns the posting_list_buffer buffer and must not free it while using
- // a PostingListUsed.
+ // Creates a PostingListUsed that owns a buffer of size_in_bytes bytes and
+ // initializes the content of the buffer so that the returned PostingListUsed
+ // is empty.
//
// RETURNS:
// - A valid PostingListUsed if successful
// - INVALID_ARGUMENT if posting_list_utils::IsValidPostingListSize check
- // fails
- // - FAILED_PRECONDITION if serializer or posting_list_buffer is null
+ // fails
+ // - FAILED_PRECONDITION if serializer is null
static libtextclassifier3::StatusOr<PostingListUsed>
- CreateFromUnitializedRegion(PostingListUsedSerializer* serializer,
- void* posting_list_buffer,
+ CreateFromUnitializedRegion(PostingListSerializer* serializer,
uint32_t size_in_bytes);
- uint8_t* posting_list_buffer() { return posting_list_buffer_; }
- const uint8_t* posting_list_buffer() const { return posting_list_buffer_; }
+ uint8_t* posting_list_buffer() {
+ is_dirty_ = true;
+ return posting_list_buffer_.get();
+ }
+
+ const uint8_t* posting_list_buffer() const {
+ return posting_list_buffer_.get();
+ }
uint32_t size_in_bytes() const { return size_in_bytes_; }
+ bool is_dirty() const { return is_dirty_; }
+
private:
- explicit PostingListUsed(void* posting_list_buffer, uint32_t size_in_bytes)
- : posting_list_buffer_(static_cast<uint8_t*>(posting_list_buffer)),
- size_in_bytes_(size_in_bytes) {}
+ explicit PostingListUsed(std::unique_ptr<uint8_t[]> posting_list_buffer,
+ uint32_t size_in_bytes)
+ : posting_list_buffer_(std::move(posting_list_buffer)),
+ size_in_bytes_(size_in_bytes),
+ is_dirty_(false) {}
// A byte array of size size_in_bytes_ containing encoded data for this
// posting list.
- uint8_t* posting_list_buffer_; // does not own!
+ std::unique_ptr<uint8_t[]> posting_list_buffer_;
uint32_t size_in_bytes_;
+
+ bool is_dirty_;
};
} // namespace lib
diff --git a/icing/file/version-util.cc b/icing/file/version-util.cc
new file mode 100644
index 0000000..7684262
--- /dev/null
+++ b/icing/file/version-util.cc
@@ -0,0 +1,146 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/version-util.h"
+
+#include <cstdint>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/index.h"
+
+namespace icing {
+namespace lib {
+
+namespace version_util {
+
+libtextclassifier3::StatusOr<VersionInfo> ReadVersion(
+ const Filesystem& filesystem, const std::string& version_file_path,
+ const std::string& index_base_dir) {
+ // 1. Read the version info.
+ VersionInfo existing_version_info(-1, -1);
+ if (filesystem.FileExists(version_file_path.c_str()) &&
+ !filesystem.PRead(version_file_path.c_str(), &existing_version_info,
+ sizeof(VersionInfo), /*offset=*/0)) {
+ return absl_ports::InternalError("Fail to read version");
+ }
+
+ // 2. Check the Index magic to see if we're actually on version 0.
+ libtextclassifier3::StatusOr<int> existing_flash_index_magic_or =
+ Index::ReadFlashIndexMagic(&filesystem, index_base_dir);
+ if (!existing_flash_index_magic_or.ok()) {
+ if (absl_ports::IsNotFound(existing_flash_index_magic_or.status())) {
+ // Flash index magic doesn't exist. In this case, we're unable to
+ // determine the version change state correctly (regardless of the
+ // existence of the version file), so invalidate VersionInfo by setting
+ // version to -1, but still keep the max_version value read in step 1.
+ existing_version_info.version = -1;
+ return existing_version_info;
+ }
+ // Real error.
+ return std::move(existing_flash_index_magic_or).status();
+ }
+ if (existing_flash_index_magic_or.ValueOrDie() ==
+ kVersionZeroFlashIndexMagic) {
+ existing_version_info.version = 0;
+ if (existing_version_info.max_version == -1) {
+ existing_version_info.max_version = 0;
+ }
+ }
+
+ return existing_version_info;
+}
+
+libtextclassifier3::Status WriteVersion(const Filesystem& filesystem,
+ const std::string& version_file_path,
+ const VersionInfo& version_info) {
+ ScopedFd scoped_fd(filesystem.OpenForWrite(version_file_path.c_str()));
+ if (!scoped_fd.is_valid() ||
+ !filesystem.PWrite(scoped_fd.get(), /*offset=*/0, &version_info,
+ sizeof(VersionInfo)) ||
+ !filesystem.DataSync(scoped_fd.get())) {
+ return absl_ports::InternalError("Fail to write version");
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+StateChange GetVersionStateChange(const VersionInfo& existing_version_info,
+ int32_t curr_version) {
+ if (!existing_version_info.IsValid()) {
+ return StateChange::kUndetermined;
+ }
+
+ if (existing_version_info.version == 0) {
+ return (existing_version_info.max_version == existing_version_info.version)
+ ? StateChange::kVersionZeroUpgrade
+ : StateChange::kVersionZeroRollForward;
+ }
+
+ if (existing_version_info.version == curr_version) {
+ return StateChange::kCompatible;
+ } else if (existing_version_info.version > curr_version) {
+ return StateChange::kRollBack;
+ } else { // existing_version_info.version < curr_version
+ return (existing_version_info.max_version == existing_version_info.version)
+ ? StateChange::kUpgrade
+ : StateChange::kRollForward;
+ }
+}
+
+bool ShouldRebuildDerivedFiles(const VersionInfo& existing_version_info,
+ int32_t curr_version) {
+ StateChange state_change =
+ GetVersionStateChange(existing_version_info, curr_version);
+ switch (state_change) {
+ case StateChange::kCompatible:
+ return false;
+ case StateChange::kUndetermined:
+ [[fallthrough]];
+ case StateChange::kRollBack:
+ [[fallthrough]];
+ case StateChange::kRollForward:
+ [[fallthrough]];
+ case StateChange::kVersionZeroRollForward:
+ [[fallthrough]];
+ case StateChange::kVersionZeroUpgrade:
+ return true;
+ case StateChange::kUpgrade:
+ break;
+ }
+
+ bool should_rebuild = false;
+ int32_t existing_version = existing_version_info.version;
+ while (existing_version < curr_version) {
+ switch (existing_version) {
+ case 1: {
+ // version 1 -> version 2 upgrade, no need to rebuild
+ break;
+ }
+ default:
+ // This should not happen. Rebuild anyway if unsure.
+ should_rebuild |= true;
+ }
+ ++existing_version;
+ }
+ return should_rebuild;
+}
+
+} // namespace version_util
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/file/version-util.h b/icing/file/version-util.h
new file mode 100644
index 0000000..30c457d
--- /dev/null
+++ b/icing/file/version-util.h
@@ -0,0 +1,114 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_FILE_VERSION_UTIL_H_
+#define ICING_FILE_VERSION_UTIL_H_
+
+#include <cstdint>
+#include <string>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/filesystem.h"
+
+namespace icing {
+namespace lib {
+
+namespace version_util {
+
+// - Version 0: Android T. Can be identified only by flash index magic.
+// - Version 1: Android U release 2023-06.
+// - Version 2: Android U 1st mainline release 2023-09. Schema is compatible
+// with version 1.
+// TODO(b/288969109): bump kVersion to 2 before finalizing the 1st Android U
+// mainline release.
+// LINT.IfChange(kVersion)
+inline static constexpr int32_t kVersion = 2;
+// LINT.ThenChange(//depot/google3/icing/schema/schema-store.cc:min_overlay_version_compatibility)
+inline static constexpr int32_t kVersionOne = 1;
+inline static constexpr int32_t kVersionTwo = 2;
+
+inline static constexpr int kVersionZeroFlashIndexMagic = 0x6dfba6ae;
+
+struct VersionInfo {
+ int32_t version;
+ int32_t max_version;
+
+ explicit VersionInfo(int32_t version_in, int32_t max_version_in)
+ : version(version_in), max_version(max_version_in) {}
+
+ bool IsValid() const { return version >= 0 && max_version >= 0; }
+
+ bool operator==(const VersionInfo& other) const {
+ return version == other.version && max_version == other.max_version;
+ }
+} __attribute__((packed));
+static_assert(sizeof(VersionInfo) == 8, "");
+
+enum class StateChange {
+ kUndetermined,
+ kCompatible,
+ kRollForward,
+ kRollBack,
+ kUpgrade,
+ kVersionZeroUpgrade,
+ kVersionZeroRollForward,
+};
+
+// Helper method to read version info (using version file and flash index header
+// magic) from the existing data. If the state is invalid (e.g. flash index
+// header file is missing), then return an invalid VersionInfo.
+//
+// RETURNS:
+// - Existing data's VersionInfo on success
+// - INTERNAL_ERROR on I/O errors
+libtextclassifier3::StatusOr<VersionInfo> ReadVersion(
+ const Filesystem& filesystem, const std::string& version_file_path,
+ const std::string& index_base_dir);
+
+// Helper method to write version file.
+//
+// RETURNS:
+// - OK on success
+// - INTERNAL_ERROR on I/O errors
+libtextclassifier3::Status WriteVersion(const Filesystem& filesystem,
+ const std::string& version_file_path,
+ const VersionInfo& version_info);
+
+// Helper method to determine the change state between the existing data version
+// and the current code version.
+//
+// REQUIRES: curr_version > 0. We implement version checking in version 1, so
+// the callers (except unit tests) will always use a version # greater than 0.
+//
+// RETURNS: StateChange
+StateChange GetVersionStateChange(const VersionInfo& existing_version_info,
+ int32_t curr_version = kVersion);
+
+// Helper method to determine whether Icing should rebuild all derived files.
+// Sometimes it is not required to rebuild derived files when
+// roll-forward/upgrading. This function "encodes" upgrade paths and checks if
+// the roll-forward/upgrading requires derived files to be rebuilt or not.
+//
+// REQUIRES: curr_version > 0. We implement version checking in version 1, so
+// the callers (except unit tests) will always use a version # greater than 0.
+bool ShouldRebuildDerivedFiles(const VersionInfo& existing_version_info,
+ int32_t curr_version = kVersion);
+
+} // namespace version_util
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_FILE_VERSION_UTIL_H_
diff --git a/icing/file/version-util_test.cc b/icing/file/version-util_test.cc
new file mode 100644
index 0000000..e94c351
--- /dev/null
+++ b/icing/file/version-util_test.cc
@@ -0,0 +1,474 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/version-util.h"
+
+#include <optional>
+#include <string>
+#include <utility>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/posting_list/flash-index-storage-header.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+namespace version_util {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+
+struct VersionUtilReadVersionTestParam {
+ std::optional<VersionInfo> existing_version_info;
+ std::optional<int> existing_flash_index_magic;
+ VersionInfo expected_version_info;
+
+ explicit VersionUtilReadVersionTestParam(
+ std::optional<VersionInfo> existing_version_info_in,
+ std::optional<int> existing_flash_index_magic_in,
+ VersionInfo expected_version_info_in)
+ : existing_version_info(std::move(existing_version_info_in)),
+ existing_flash_index_magic(std::move(existing_flash_index_magic_in)),
+ expected_version_info(std::move(expected_version_info_in)) {}
+};
+
+class VersionUtilReadVersionTest
+ : public ::testing::TestWithParam<VersionUtilReadVersionTestParam> {
+ protected:
+ void SetUp() override {
+ base_dir_ = GetTestTempDir() + "/version_util_test";
+ version_file_path_ = base_dir_ + "/version";
+ index_path_ = base_dir_ + "/index";
+
+ ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()));
+ }
+
+ void TearDown() override {
+ ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(base_dir_.c_str()));
+ }
+
+ const Filesystem& filesystem() const { return filesystem_; }
+
+ Filesystem filesystem_;
+ std::string base_dir_;
+ std::string version_file_path_;
+ std::string index_path_;
+};
+
+TEST_P(VersionUtilReadVersionTest, ReadVersion) {
+ const VersionUtilReadVersionTestParam& param = GetParam();
+
+ // Prepare version file and flash index file.
+ if (param.existing_version_info.has_value()) {
+ ICING_ASSERT_OK(WriteVersion(filesystem_, version_file_path_,
+ param.existing_version_info.value()));
+ }
+
+ if (param.existing_flash_index_magic.has_value()) {
+ HeaderBlock header_block(&filesystem_, /*block_size=*/4096);
+ header_block.header()->magic = param.existing_flash_index_magic.value();
+
+ std::string main_index_dir = index_path_ + "/idx/main";
+ ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(main_index_dir.c_str()));
+ std::string flash_index_file_path = main_index_dir + "/main_index";
+
+ ScopedFd sfd(filesystem_.OpenForWrite(flash_index_file_path.c_str()));
+ ASSERT_TRUE(sfd.is_valid());
+ ASSERT_TRUE(header_block.Write(sfd.get()));
+ }
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ VersionInfo version_info,
+ ReadVersion(filesystem_, version_file_path_, index_path_));
+ EXPECT_THAT(version_info, Eq(param.expected_version_info));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ VersionUtilReadVersionTest, VersionUtilReadVersionTest,
+ testing::Values(
+ // - Version file doesn't exist
+ // - Flash index doesn't exist
+ // - Result: version -1, max_version -1 (invalid)
+ VersionUtilReadVersionTestParam(
+ /*existing_version_info_in=*/std::nullopt,
+ /*existing_flash_index_magic_in=*/std::nullopt,
+ /*expected_version_info_in=*/
+ VersionInfo(/*version_in=*/-1, /*max_version=*/-1)),
+
+ // - Version file doesn't exist
+ // - Flash index exists with version 0 magic
+ // - Result: version 0, max_version 0
+ VersionUtilReadVersionTestParam(
+ /*existing_version_info_in=*/std::nullopt,
+ /*existing_flash_index_magic_in=*/
+ std::make_optional<int>(kVersionZeroFlashIndexMagic),
+ /*expected_version_info_in=*/
+ VersionInfo(/*version_in=*/0, /*max_version=*/0)),
+
+ // - Version file doesn't exist
+ // - Flash index exists with non version 0 magic
+ // - Result: version -1, max_version -1 (invalid)
+ VersionUtilReadVersionTestParam(
+ /*existing_version_info_in=*/std::nullopt,
+ /*existing_flash_index_magic_in=*/
+ std::make_optional<int>(kVersionZeroFlashIndexMagic + 1),
+ /*expected_version_info_in=*/
+ VersionInfo(/*version_in=*/-1, /*max_version=*/-1)),
+
+ // - Version file exists
+ // - Flash index doesn't exist
+ // - Result: version -1, max_version 1 (invalid)
+ VersionUtilReadVersionTestParam(
+ /*existing_version_info_in=*/std::make_optional<VersionInfo>(
+ /*version_in=*/1, /*max_version=*/1),
+ /*existing_flash_index_magic_in=*/std::nullopt,
+ /*expected_version_info_in=*/
+ VersionInfo(/*version_in=*/-1, /*max_version=*/1)),
+
+ // - Version file exists: version 1, max_version 1
+ // - Flash index exists with version 0 magic
+ // - Result: version 0, max_version 1
+ VersionUtilReadVersionTestParam(
+ /*existing_version_info_in=*/std::make_optional<VersionInfo>(
+ /*version_in=*/1, /*max_version=*/1),
+ /*existing_flash_index_magic_in=*/
+ std::make_optional<int>(kVersionZeroFlashIndexMagic),
+ /*expected_version_info_in=*/
+ VersionInfo(/*version_in=*/0, /*max_version=*/1)),
+
+ // - Version file exists: version 2, max_version 3
+ // - Flash index exists with version 0 magic
+ // - Result: version 0, max_version 3
+ VersionUtilReadVersionTestParam(
+ /*existing_version_info_in=*/std::make_optional<VersionInfo>(
+ /*version_in=*/2, /*max_version=*/3),
+ /*existing_flash_index_magic_in=*/
+ std::make_optional<int>(kVersionZeroFlashIndexMagic),
+ /*expected_version_info_in=*/
+ VersionInfo(/*version_in=*/0, /*max_version=*/3)),
+
+ // - Version file exists: version 1, max_version 1
+ // - Flash index exists with non version 0 magic
+ // - Result: version 1, max_version 1
+ VersionUtilReadVersionTestParam(
+ /*existing_version_info_in=*/std::make_optional<VersionInfo>(
+ /*version_in=*/1, /*max_version=*/1),
+ /*existing_flash_index_magic_in=*/
+ std::make_optional<int>(kVersionZeroFlashIndexMagic + 1),
+ /*expected_version_info_in=*/
+ VersionInfo(/*version_in=*/1, /*max_version=*/1)),
+
+ // - Version file exists: version 2, max_version 3
+ // - Flash index exists with non version 0 magic
+ // - Result: version 2, max_version 3
+ VersionUtilReadVersionTestParam(
+ /*existing_version_info_in=*/std::make_optional<VersionInfo>(
+ /*version_in=*/2, /*max_version=*/3),
+ /*existing_flash_index_magic_in=*/
+ std::make_optional<int>(kVersionZeroFlashIndexMagic + 1),
+ /*expected_version_info_in=*/
+ VersionInfo(/*version_in=*/2, /*max_version=*/3))));
+
+struct VersionUtilStateChangeTestParam {
+ VersionInfo existing_version_info;
+ int32_t curr_version;
+ StateChange expected_state_change;
+
+ explicit VersionUtilStateChangeTestParam(VersionInfo existing_version_info_in,
+ int32_t curr_version_in,
+ StateChange expected_state_change_in)
+ : existing_version_info(std::move(existing_version_info_in)),
+ curr_version(curr_version_in),
+ expected_state_change(expected_state_change_in) {}
+};
+
+class VersionUtilStateChangeTest
+ : public ::testing::TestWithParam<VersionUtilStateChangeTestParam> {};
+
+TEST_P(VersionUtilStateChangeTest, GetVersionStateChange) {
+ const VersionUtilStateChangeTestParam& param = GetParam();
+
+ EXPECT_THAT(
+ GetVersionStateChange(param.existing_version_info, param.curr_version),
+ Eq(param.expected_state_change));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ VersionUtilStateChangeTest, VersionUtilStateChangeTest,
+ testing::Values(
+ // - version -1, max_version -1 (invalid)
+ // - Current version = 1
+ // - Result: undetermined
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(-1, -1),
+ /*curr_version_in=*/1,
+ /*expected_state_change_in=*/StateChange::kUndetermined),
+
+ // - version -1, max_version 1 (invalid)
+ // - Current version = 1
+ // - Result: undetermined
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(-1, 1),
+ /*curr_version_in=*/1,
+ /*expected_state_change_in=*/StateChange::kUndetermined),
+
+ // - version -1, max_version -1 (invalid)
+ // - Current version = 2
+ // - Result: undetermined
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(-1, -1),
+ /*curr_version_in=*/2,
+ /*expected_state_change_in=*/StateChange::kUndetermined),
+
+ // - version -1, max_version 1 (invalid)
+ // - Current version = 2
+ // - Result: undetermined
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(-1, 1),
+ /*curr_version_in=*/2,
+ /*expected_state_change_in=*/StateChange::kUndetermined),
+
+ // - version 0, max_version 0
+ // - Current version = 1
+ // - Result: version 0 upgrade
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(0, 0),
+ /*curr_version_in=*/1,
+ /*expected_state_change_in=*/StateChange::kVersionZeroUpgrade),
+
+ // - version 0, max_version 1
+ // - Current version = 1
+ // - Result: version 0 roll forward
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(0, 1),
+ /*curr_version_in=*/1,
+ /*expected_state_change_in=*/StateChange::kVersionZeroRollForward),
+
+ // - version 0, max_version 2
+ // - Current version = 1
+ // - Result: version 0 roll forward
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(0, 2),
+ /*curr_version_in=*/1,
+ /*expected_state_change_in=*/StateChange::kVersionZeroRollForward),
+
+ // - version 0, max_version 0
+ // - Current version = 2
+ // - Result: version 0 upgrade
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(0, 0),
+ /*curr_version_in=*/2,
+ /*expected_state_change_in=*/StateChange::kVersionZeroUpgrade),
+
+ // - version 0, max_version 1
+ // - Current version = 2
+ // - Result: version 0 upgrade
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(0, 1),
+ /*curr_version_in=*/2,
+ /*expected_state_change_in=*/StateChange::kVersionZeroRollForward),
+
+ // - version 0, max_version 2
+ // - Current version = 2
+ // - Result: version 0 roll forward
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(0, 2),
+ /*curr_version_in=*/2,
+ /*expected_state_change_in=*/StateChange::kVersionZeroRollForward),
+
+ // - version 1, max_version 1
+ // - Current version = 1
+ // - Result: compatible
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(1, 1),
+ /*curr_version_in=*/1,
+ /*expected_state_change_in=*/StateChange::kCompatible),
+
+ // - version 1, max_version 2
+ // - Current version = 1
+ // - Result: compatible
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(1, 2),
+ /*curr_version_in=*/1,
+ /*expected_state_change_in=*/StateChange::kCompatible),
+
+ // - version 2, max_version 2
+ // - Current version = 1
+ // - Result: roll back
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(2, 2),
+ /*curr_version_in=*/1,
+ /*expected_state_change_in=*/StateChange::kRollBack),
+
+ // - version 2, max_version 3
+ // - Current version = 1
+ // - Result: roll back
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(2, 3),
+ /*curr_version_in=*/1,
+ /*expected_state_change_in=*/StateChange::kRollBack),
+
+ // - version 1, max_version 1
+ // - Current version = 2
+ // - Result: upgrade
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(1, 1),
+ /*curr_version_in=*/2,
+ /*expected_state_change_in=*/StateChange::kUpgrade),
+
+ // - version 1, max_version 2
+ // - Current version = 2
+ // - Result: roll forward
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(1, 2),
+ /*curr_version_in=*/2,
+ /*expected_state_change_in=*/StateChange::kRollForward),
+
+ // - version 1, max_version 2
+ // - Current version = 3
+ // - Result: roll forward
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(1, 2),
+ /*curr_version_in=*/3,
+ /*expected_state_change_in=*/StateChange::kRollForward),
+
+ // - version 1, max_version 3
+ // - Current version = 2
+ // - Result: roll forward
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(1, 3),
+ /*curr_version_in=*/2,
+ /*expected_state_change_in=*/StateChange::kRollForward),
+
+ // - version 2, max_version 2
+ // - Current version = 2
+ // - Result: compatible
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(2, 2),
+ /*curr_version_in=*/2,
+ /*expected_state_change_in=*/StateChange::kCompatible),
+
+ // - version 2, max_version 3
+ // - Current version = 2
+ // - Result: compatible
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(2, 3),
+ /*curr_version_in=*/2,
+ /*expected_state_change_in=*/StateChange::kCompatible),
+
+ // - version 3, max_version 3
+ // - Current version = 2
+ // - Result: rollback
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(3, 3),
+ /*curr_version_in=*/2,
+ /*expected_state_change_in=*/StateChange::kRollBack),
+
+ // - version 3, max_version 4
+ // - Current version = 2
+ // - Result: rollback
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(3, 4),
+ /*curr_version_in=*/2,
+ /*expected_state_change_in=*/StateChange::kRollBack)));
+
+TEST(VersionUtilTest, ShouldRebuildDerivedFilesUndeterminedVersion) {
+ EXPECT_THAT(
+ ShouldRebuildDerivedFiles(VersionInfo(-1, -1), /*curr_version=*/1),
+ IsTrue());
+ EXPECT_THAT(
+ ShouldRebuildDerivedFiles(VersionInfo(-1, -1), /*curr_version=*/2),
+ IsTrue());
+}
+
+TEST(VersionUtilTest, ShouldRebuildDerivedFilesVersionZeroUpgrade) {
+ // 0 -> 1
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(0, 0), /*curr_version=*/1),
+ IsTrue());
+
+ // 0 -> 2
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(0, 0), /*curr_version=*/2),
+ IsTrue());
+}
+
+TEST(VersionUtilTest, ShouldRebuildDerivedFilesVersionZeroRollForward) {
+ // (1 -> 0), 0 -> 1
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(0, 1), /*curr_version=*/1),
+ IsTrue());
+
+ // (1 -> 0), 0 -> 2
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(0, 1), /*curr_version=*/2),
+ IsTrue());
+
+ // (2 -> 0), 0 -> 1
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(0, 2), /*curr_version=*/1),
+ IsTrue());
+}
+
+TEST(VersionUtilTest, ShouldRebuildDerivedFilesRollBack) {
+ // 2 -> 1
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(2, 2), /*curr_version=*/1),
+ IsTrue());
+
+ // 3 -> 1
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(3, 3), /*curr_version=*/1),
+ IsTrue());
+
+ // (3 -> 2), 2 -> 1
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(2, 3), /*curr_version=*/1),
+ IsTrue());
+}
+
+TEST(VersionUtilTest, ShouldRebuildDerivedFilesRollForward) {
+ // (2 -> 1), 1 -> 2
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(1, 2), /*curr_version=*/2),
+ IsTrue());
+
+ // (2 -> 1), 1 -> 3
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(1, 2), /*curr_version=*/3),
+ IsTrue());
+
+ // (3 -> 1), 1 -> 2
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(1, 3), /*curr_version=*/2),
+ IsTrue());
+}
+
+TEST(VersionUtilTest, ShouldRebuildDerivedFilesCompatible) {
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(2, 2), /*curr_version=*/2),
+ IsFalse());
+
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(2, 3), /*curr_version=*/2),
+ IsFalse());
+}
+
+TEST(VersionUtilTest, ShouldRebuildDerivedFilesUpgrade) {
+ // Unlike other state changes, upgrade depends on the actual "encoded path".
+
+ // kVersionOne -> kVersionTwo
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(kVersionOne, kVersionOne),
+ /*curr_version=*/kVersionTwo),
+ IsFalse());
+}
+
+} // namespace
+
+} // namespace version_util
+} // namespace lib
+} // namespace icing
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
index bf9c102..6680dae 100644
--- a/icing/icing-search-engine.cc
+++ b/icing/icing-search-engine.cc
@@ -32,12 +32,18 @@
#include "icing/file/destructible-file.h"
#include "icing/file/file-backed-proto.h"
#include "icing/file/filesystem.h"
+#include "icing/file/version-util.h"
+#include "icing/index/data-indexing-handler.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/index-processor.h"
#include "icing/index/index.h"
+#include "icing/index/integer-section-indexing-handler.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
-#include "icing/index/numeric/dummy-numeric-index.h"
+#include "icing/index/numeric/integer-index.h"
+#include "icing/index/string-section-indexing-handler.h"
#include "icing/join/join-processor.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/join/qualified-id-join-indexing-handler.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/portable/endian.h"
#include "icing/proto/debug.pb.h"
@@ -63,6 +69,7 @@
#include "icing/result/page-result.h"
#include "icing/result/projection-tree.h"
#include "icing/result/projector.h"
+#include "icing/result/result-adjustment-info.h"
#include "icing/result/result-retriever-v2.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/schema-util.h"
@@ -74,7 +81,6 @@
#include "icing/scoring/scoring-processor.h"
#include "icing/store/document-id.h"
#include "icing/store/document-store.h"
-#include "icing/store/suggestion-result-checker-impl.h"
#include "icing/tokenization/language-segmenter-factory.h"
#include "icing/tokenization/language-segmenter.h"
#include "icing/transform/normalizer-factory.h"
@@ -91,8 +97,12 @@ namespace lib {
namespace {
+constexpr std::string_view kVersionFilename = "version";
constexpr std::string_view kDocumentSubfolderName = "document_dir";
constexpr std::string_view kIndexSubfolderName = "index_dir";
+constexpr std::string_view kIntegerIndexSubfolderName = "integer_index_dir";
+constexpr std::string_view kQualifiedIdJoinIndexSubfolderName =
+ "qualified_id_join_index_dir";
constexpr std::string_view kSchemaSubfolderName = "schema_dir";
constexpr std::string_view kSetSchemaMarkerFilename = "set_schema_marker";
constexpr std::string_view kInitMarkerFilename = "init_marker";
@@ -131,6 +141,15 @@ libtextclassifier3::Status ValidateResultSpec(
"ResultSpecProto.num_total_bytes_per_page_threshold cannot be "
"non-positive.");
}
+ if (result_spec.max_joined_children_per_parent_to_return() < 0) {
+ return absl_ports::InvalidArgumentError(
+ "ResultSpecProto.max_joined_children_per_parent_to_return cannot be "
+ "negative.");
+ }
+ if (result_spec.num_to_score() <= 0) {
+ return absl_ports::InvalidArgumentError(
+ "ResultSpecProto.num_to_score cannot be non-positive.");
+ }
// Validate ResultGroupings.
std::unordered_set<int32_t> unique_entry_ids;
ResultSpecProto::ResultGroupingType result_grouping_type =
@@ -208,118 +227,10 @@ libtextclassifier3::Status ValidateSuggestionSpec(
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::StatusOr<
- std::unordered_map<NamespaceId, std::unordered_set<DocumentId>>>
-PopulateDocumentIdFilters(
- const DocumentStore* document_store,
- const icing::lib::SuggestionSpecProto& suggestion_spec,
- const std::unordered_set<NamespaceId>& namespace_ids) {
- std::unordered_map<NamespaceId, std::unordered_set<DocumentId>>
- document_id_filter_map;
- document_id_filter_map.reserve(suggestion_spec.document_uri_filters_size());
- for (const NamespaceDocumentUriGroup& namespace_document_uri_group :
- suggestion_spec.document_uri_filters()) {
- auto namespace_id_or = document_store->GetNamespaceId(
- namespace_document_uri_group.namespace_());
- if (!namespace_id_or.ok()) {
- // The current namespace doesn't exist.
- continue;
- }
- NamespaceId namespace_id = namespace_id_or.ValueOrDie();
- if (!namespace_ids.empty() &&
- namespace_ids.find(namespace_id) == namespace_ids.end()) {
- // The current namespace doesn't appear in the namespace filter.
- return absl_ports::InvalidArgumentError(absl_ports::StrCat(
- "The namespace : ", namespace_document_uri_group.namespace_(),
- " appears in the document uri filter, but doesn't appear in the "
- "namespace filter."));
- }
-
- if (namespace_document_uri_group.document_uris().empty()) {
- // Client should use namespace filter to filter out all document under
- // a namespace.
- return absl_ports::InvalidArgumentError(absl_ports::StrCat(
- "The namespace : ", namespace_document_uri_group.namespace_(),
- " has empty document uri in the document uri filter. Please use the "
- "namespace filter to exclude a namespace instead of the document uri "
- "filter."));
- }
-
- // Translate namespace document Uris into document_ids
- std::unordered_set<DocumentId> target_document_ids;
- target_document_ids.reserve(
- namespace_document_uri_group.document_uris_size());
- for (std::string_view document_uri :
- namespace_document_uri_group.document_uris()) {
- auto document_id_or = document_store->GetDocumentId(
- namespace_document_uri_group.namespace_(), document_uri);
- if (!document_id_or.ok()) {
- continue;
- }
- target_document_ids.insert(document_id_or.ValueOrDie());
- }
- document_id_filter_map.insert({namespace_id, target_document_ids});
- }
- return document_id_filter_map;
-}
-
-libtextclassifier3::StatusOr<std::unordered_map<SchemaTypeId, SectionIdMask>>
-PopulatePropertyFilters(
- const SchemaStore* schema_store,
- const icing::lib::SuggestionSpecProto& suggestion_spec,
- const std::unordered_set<SchemaTypeId>& schema_type_ids) {
- std::unordered_map<SchemaTypeId, SectionIdMask> property_filter_map;
- property_filter_map.reserve(suggestion_spec.type_property_filters_size());
- for (const TypePropertyMask& type_field_mask :
- suggestion_spec.type_property_filters()) {
- auto schema_type_id_or =
- schema_store->GetSchemaTypeId(type_field_mask.schema_type());
- if (!schema_type_id_or.ok()) {
- // The current schema doesn't exist
- continue;
- }
- SchemaTypeId schema_type_id = schema_type_id_or.ValueOrDie();
-
- if (!schema_type_ids.empty() &&
- schema_type_ids.find(schema_type_id) == schema_type_ids.end()) {
- // The current schema type doesn't appear in the schema type filter.
- return absl_ports::InvalidArgumentError(absl_ports::StrCat(
- "The schema : ", type_field_mask.schema_type(),
- " appears in the property filter, but doesn't appear in the schema"
- " type filter."));
- }
-
- if (type_field_mask.paths().empty()) {
- return absl_ports::InvalidArgumentError(absl_ports::StrCat(
- "The schema type : ", type_field_mask.schema_type(),
- " has empty path in the property filter. Please use the schema type"
- " filter to exclude a schema type instead of the property filter."));
- }
-
- // Translate property paths into section id mask
- SectionIdMask section_mask = kSectionIdMaskNone;
- auto section_metadata_list_or =
- schema_store->GetSectionMetadata(type_field_mask.schema_type());
- if (!section_metadata_list_or.ok()) {
- // The current schema doesn't has section metadata.
- continue;
- }
- std::unordered_set<std::string> target_property_paths;
- target_property_paths.reserve(type_field_mask.paths_size());
- for (const std::string& target_property_path : type_field_mask.paths()) {
- target_property_paths.insert(target_property_path);
- }
- const std::vector<SectionMetadata>* section_metadata_list =
- section_metadata_list_or.ValueOrDie();
- for (const SectionMetadata& section_metadata : *section_metadata_list) {
- if (target_property_paths.find(section_metadata.path) !=
- target_property_paths.end()) {
- section_mask |= UINT64_C(1) << section_metadata.id;
- }
- }
- property_filter_map.insert({schema_type_id, section_mask});
- }
- return property_filter_map;
+// Version file is a single file under base_dir containing version info of the
+// existing data.
+std::string MakeVersionFilePath(const std::string& base_dir) {
+ return absl_ports::StrCat(base_dir, "/", kVersionFilename);
}
// Document store files are in a standalone subfolder for easier file
@@ -343,6 +254,22 @@ std::string MakeIndexDirectoryPath(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kIndexSubfolderName);
}
+// Working path for integer index. Integer index is derived from
+// PersistentStorage and it will take full ownership of this working path,
+// including creation/deletion. See PersistentStorage for more details about
+// working path.
+std::string MakeIntegerIndexWorkingPath(const std::string& base_dir) {
+ return absl_ports::StrCat(base_dir, "/", kIntegerIndexSubfolderName);
+}
+
+// Working path for qualified id join index. It is derived from
+// PersistentStorage and it will take full ownership of this working path,
+// including creation/deletion. See PersistentStorage for more details about
+// working path.
+std::string MakeQualifiedIdJoinIndexWorkingPath(const std::string& base_dir) {
+ return absl_ports::StrCat(base_dir, "/", kQualifiedIdJoinIndexSubfolderName);
+}
+
// SchemaStore files are in a standalone subfolder for easier file management.
// We can delete and recreate the subfolder and not touch/affect anything
// else.
@@ -450,15 +377,12 @@ libtextclassifier3::Status RetrieveAndAddDocumentInfo(
return libtextclassifier3::Status::OK;
}
-bool ShouldRebuildIndex(const OptimizeStatsProto& optimize_stats) {
+bool ShouldRebuildIndex(const OptimizeStatsProto& optimize_stats,
+ float optimize_rebuild_index_threshold) {
int num_invalid_documents = optimize_stats.num_deleted_documents() +
optimize_stats.num_expired_documents();
- // Rebuilding the index could be faster than optimizing the index if we have
- // removed most of the documents.
- // Based on benchmarks, 85%~95% seems to be a good threshold for most cases.
- // TODO(b/238236206): Try using the number of remaining hits in this
- // condition, and allow clients to configure the threshold.
- return num_invalid_documents >= optimize_stats.num_original_documents() * 0.9;
+ return num_invalid_documents >= optimize_stats.num_original_documents() *
+ optimize_rebuild_index_threshold;
}
// Useful method to get RankingStrategy if advanced scoring is enabled. When the
@@ -531,6 +455,7 @@ void IcingSearchEngine::ResetMembers() {
normalizer_.reset();
index_.reset();
integer_index_.reset();
+ qualified_id_join_index_.reset();
}
libtextclassifier3::Status IcingSearchEngine::CheckInitMarkerFile(
@@ -546,20 +471,34 @@ libtextclassifier3::Status IcingSearchEngine::CheckInitMarkerFile(
// fails, then just assume the value is zero (the most likely reason for
// failure would be non-existence because the last init was successful
// anyways).
- ScopedFd marker_file_fd(filesystem_->OpenForWrite(marker_filepath.c_str()));
+ std::unique_ptr<ScopedFd> marker_file_fd = std::make_unique<ScopedFd>(
+ filesystem_->OpenForWrite(marker_filepath.c_str()));
libtextclassifier3::Status status;
if (file_exists &&
- filesystem_->PRead(marker_file_fd.get(), &network_init_attempts,
+ filesystem_->PRead(marker_file_fd->get(), &network_init_attempts,
sizeof(network_init_attempts), /*offset=*/0)) {
host_init_attempts = GNetworkToHostL(network_init_attempts);
if (host_init_attempts > kMaxUnsuccessfulInitAttempts) {
// We're tried and failed to init too many times. We need to throw
// everything out and start from scratch.
ResetMembers();
+ marker_file_fd.reset();
+
+ // Delete the entire base directory.
if (!filesystem_->DeleteDirectoryRecursively(
options_.base_dir().c_str())) {
return absl_ports::InternalError("Failed to delete icing base dir!");
}
+
+ // Create the base directory again and reopen marker file.
+ if (!filesystem_->CreateDirectoryRecursively(
+ options_.base_dir().c_str())) {
+ return absl_ports::InternalError("Failed to create icing base dir!");
+ }
+
+ marker_file_fd = std::make_unique<ScopedFd>(
+ filesystem_->OpenForWrite(marker_filepath.c_str()));
+
status = absl_ports::DataLossError(
"Encountered failed initialization limit. Cleared all data.");
host_init_attempts = 0;
@@ -574,10 +513,10 @@ libtextclassifier3::Status IcingSearchEngine::CheckInitMarkerFile(
++host_init_attempts;
network_init_attempts = GHostToNetworkL(host_init_attempts);
// Write the updated number of attempts before we get started.
- if (!filesystem_->PWrite(marker_file_fd.get(), /*offset=*/0,
+ if (!filesystem_->PWrite(marker_file_fd->get(), /*offset=*/0,
&network_init_attempts,
sizeof(network_init_attempts)) ||
- !filesystem_->DataSync(marker_file_fd.get())) {
+ !filesystem_->DataSync(marker_file_fd->get())) {
return absl_ports::InternalError(
"Failed to write and sync init marker file");
}
@@ -639,6 +578,33 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
return status;
}
+ // Read version file and determine the state change.
+ const std::string version_filepath = MakeVersionFilePath(options_.base_dir());
+ const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
+ ICING_ASSIGN_OR_RETURN(
+ version_util::VersionInfo version_info,
+ version_util::ReadVersion(*filesystem_, version_filepath, index_dir));
+ version_util::StateChange version_state_change =
+ version_util::GetVersionStateChange(version_info);
+ if (version_state_change != version_util::StateChange::kCompatible) {
+ // Step 1: migrate schema according to the version state change.
+ ICING_RETURN_IF_ERROR(SchemaStore::MigrateSchema(
+ filesystem_.get(), MakeSchemaDirectoryPath(options_.base_dir()),
+ version_state_change, version_util::kVersion));
+
+ // Step 2: discard all derived data if needed rebuild.
+ if (version_util::ShouldRebuildDerivedFiles(version_info)) {
+ ICING_RETURN_IF_ERROR(DiscardDerivedFiles());
+ }
+
+ // Step 3: update version file
+ version_util::VersionInfo new_version_info(
+ version_util::kVersion,
+ std::max(version_info.max_version, version_util::kVersion));
+ ICING_RETURN_IF_ERROR(version_util::WriteVersion(
+ *filesystem_, version_filepath, new_version_info));
+ }
+
ICING_RETURN_IF_ERROR(InitializeSchemaStore(initialize_stats));
// TODO(b/156383798) : Resolve how to specify the locale.
@@ -653,21 +619,25 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
std::string marker_filepath =
MakeSetSchemaMarkerFilePath(options_.base_dir());
- // TODO(b/249829533): switch to use persistent numeric index after
- // implementing and initialize numeric index.
- integer_index_ = std::make_unique<DummyNumericIndex<int64_t>>();
-
libtextclassifier3::Status index_init_status;
if (absl_ports::IsNotFound(schema_store_->GetSchema().status())) {
// The schema was either lost or never set before. Wipe out the doc store
// and index directories and initialize them from scratch.
const std::string doc_store_dir =
MakeDocumentDirectoryPath(options_.base_dir());
- const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
+ const std::string integer_index_dir =
+ MakeIntegerIndexWorkingPath(options_.base_dir());
+ const std::string qualified_id_join_index_dir =
+ MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir());
if (!filesystem_->DeleteDirectoryRecursively(doc_store_dir.c_str()) ||
- !filesystem_->DeleteDirectoryRecursively(index_dir.c_str())) {
+ !filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) ||
+ !IntegerIndex::Discard(*filesystem_, integer_index_dir).ok() ||
+ !QualifiedIdJoinIndex::Discard(*filesystem_,
+ qualified_id_join_index_dir)
+ .ok()) {
return absl_ports::InternalError(absl_ports::StrCat(
- "Could not delete directories: ", index_dir, " and ", doc_store_dir));
+ "Could not delete directories: ", index_dir, ", ", integer_index_dir,
+ ", ", qualified_id_join_index_dir, " and ", doc_store_dir));
}
ICING_RETURN_IF_ERROR(InitializeDocumentStore(
/*force_recovery_and_revalidate_documents=*/false, initialize_stats));
@@ -682,9 +652,11 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
/*force_recovery_and_revalidate_documents=*/true, initialize_stats));
// We're going to need to build the index from scratch. So just delete its
- // files now.
- const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
- Index::Options index_options(index_dir, options_.index_merge_size());
+ // directory now.
+ // Discard index directory and instantiate a new one.
+ Index::Options index_options(index_dir, options_.index_merge_size(),
+ options_.lite_index_sort_at_indexing(),
+ options_.lite_index_sort_size());
if (!filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) ||
!filesystem_->CreateDirectoryRecursively(index_dir.c_str())) {
return absl_ports::InternalError(
@@ -694,6 +666,28 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
Index::Create(index_options, filesystem_.get(),
icing_filesystem_.get()));
+ // Discard integer index directory and instantiate a new one.
+ std::string integer_index_dir =
+ MakeIntegerIndexWorkingPath(options_.base_dir());
+ ICING_RETURN_IF_ERROR(
+ IntegerIndex::Discard(*filesystem_, integer_index_dir));
+ ICING_ASSIGN_OR_RETURN(
+ integer_index_,
+ IntegerIndex::Create(*filesystem_, std::move(integer_index_dir),
+ options_.integer_index_bucket_split_threshold(),
+ options_.pre_mapping_fbv()));
+
+ // Discard qualified id join index directory and instantiate a new one.
+ std::string qualified_id_join_index_dir =
+ MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir());
+ ICING_RETURN_IF_ERROR(QualifiedIdJoinIndex::Discard(
+ *filesystem_, qualified_id_join_index_dir));
+ ICING_ASSIGN_OR_RETURN(
+ qualified_id_join_index_,
+ QualifiedIdJoinIndex::Create(
+ *filesystem_, std::move(qualified_id_join_index_dir),
+ options_.pre_mapping_fbv(), options_.use_persistent_hash_map()));
+
std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
IndexRestorationResult restore_result = RestoreIndexIfNeeded();
index_init_status = std::move(restore_result.status);
@@ -712,6 +706,28 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
restore_timer->GetElapsedMilliseconds());
initialize_stats->set_index_restoration_cause(
InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
+ initialize_stats->set_integer_index_restoration_cause(
+ InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
+ initialize_stats->set_qualified_id_join_index_restoration_cause(
+ InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
+ } else if (version_state_change != version_util::StateChange::kCompatible) {
+ ICING_RETURN_IF_ERROR(InitializeDocumentStore(
+ /*force_recovery_and_revalidate_documents=*/true, initialize_stats));
+ index_init_status = InitializeIndex(initialize_stats);
+ if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
+ return index_init_status;
+ }
+
+ initialize_stats->set_schema_store_recovery_cause(
+ InitializeStatsProto::VERSION_CHANGED);
+ initialize_stats->set_document_store_recovery_cause(
+ InitializeStatsProto::VERSION_CHANGED);
+ initialize_stats->set_index_restoration_cause(
+ InitializeStatsProto::VERSION_CHANGED);
+ initialize_stats->set_integer_index_restoration_cause(
+ InitializeStatsProto::VERSION_CHANGED);
+ initialize_stats->set_qualified_id_join_index_restoration_cause(
+ InitializeStatsProto::VERSION_CHANGED);
} else {
ICING_RETURN_IF_ERROR(InitializeDocumentStore(
/*force_recovery_and_revalidate_documents=*/false, initialize_stats));
@@ -726,8 +742,7 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
}
result_state_manager_ = std::make_unique<ResultStateManager>(
- performance_configuration_.max_num_total_hits, *document_store_,
- clock_.get());
+ performance_configuration_.max_num_total_hits, *document_store_);
return status;
}
@@ -766,7 +781,10 @@ libtextclassifier3::Status IcingSearchEngine::InitializeDocumentStore(
DocumentStore::CreateResult create_result,
DocumentStore::Create(
filesystem_.get(), document_dir, clock_.get(), schema_store_.get(),
- force_recovery_and_revalidate_documents, initialize_stats));
+ force_recovery_and_revalidate_documents,
+ options_.document_store_namespace_id_fingerprint(),
+ options_.pre_mapping_fbv(), options_.use_persistent_hash_map(),
+ options_.compression_level(), initialize_stats));
document_store_ = std::move(create_result.document_store);
return libtextclassifier3::Status::OK;
@@ -782,9 +800,12 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
return absl_ports::InternalError(
absl_ports::StrCat("Could not create directory: ", index_dir));
}
- Index::Options index_options(index_dir, options_.index_merge_size());
+ Index::Options index_options(index_dir, options_.index_merge_size(),
+ options_.lite_index_sort_at_indexing(),
+ options_.lite_index_sort_size());
- InitializeStatsProto::RecoveryCause recovery_cause;
+ // Term index
+ InitializeStatsProto::RecoveryCause index_recovery_cause;
auto index_or =
Index::Create(index_options, filesystem_.get(), icing_filesystem_.get());
if (!index_or.ok()) {
@@ -794,7 +815,7 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
absl_ports::StrCat("Could not recreate directory: ", index_dir));
}
- recovery_cause = InitializeStatsProto::IO_ERROR;
+ index_recovery_cause = InitializeStatsProto::IO_ERROR;
// Try recreating it from scratch and re-indexing everything.
ICING_ASSIGN_OR_RETURN(index_,
@@ -805,15 +826,86 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
index_ = std::move(index_or).ValueOrDie();
// If a recover does have to happen, then it must be because the index is
// out of sync with the document store.
- recovery_cause = InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
+ index_recovery_cause = InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
+ }
+
+ // Integer index
+ std::string integer_index_dir =
+ MakeIntegerIndexWorkingPath(options_.base_dir());
+ InitializeStatsProto::RecoveryCause integer_index_recovery_cause;
+ auto integer_index_or =
+ IntegerIndex::Create(*filesystem_, integer_index_dir,
+ options_.integer_index_bucket_split_threshold(),
+ options_.pre_mapping_fbv());
+ if (!integer_index_or.ok()) {
+ ICING_RETURN_IF_ERROR(
+ IntegerIndex::Discard(*filesystem_, integer_index_dir));
+
+ integer_index_recovery_cause = InitializeStatsProto::IO_ERROR;
+
+ // Try recreating it from scratch and re-indexing everything.
+ ICING_ASSIGN_OR_RETURN(
+ integer_index_,
+ IntegerIndex::Create(*filesystem_, std::move(integer_index_dir),
+ options_.integer_index_bucket_split_threshold(),
+ options_.pre_mapping_fbv()));
+ } else {
+ // Integer index was created fine.
+ integer_index_ = std::move(integer_index_or).ValueOrDie();
+ // If a recover does have to happen, then it must be because the index is
+ // out of sync with the document store.
+ integer_index_recovery_cause =
+ InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
+ }
+
+ // Qualified id join index
+ std::string qualified_id_join_index_dir =
+ MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir());
+ InitializeStatsProto::RecoveryCause qualified_id_join_index_recovery_cause;
+ auto qualified_id_join_index_or = QualifiedIdJoinIndex::Create(
+ *filesystem_, qualified_id_join_index_dir, options_.pre_mapping_fbv(),
+ options_.use_persistent_hash_map());
+ if (!qualified_id_join_index_or.ok()) {
+ ICING_RETURN_IF_ERROR(QualifiedIdJoinIndex::Discard(
+ *filesystem_, qualified_id_join_index_dir));
+
+ qualified_id_join_index_recovery_cause = InitializeStatsProto::IO_ERROR;
+
+ // Try recreating it from scratch and rebuild everything.
+ ICING_ASSIGN_OR_RETURN(
+ qualified_id_join_index_,
+ QualifiedIdJoinIndex::Create(
+ *filesystem_, std::move(qualified_id_join_index_dir),
+ options_.pre_mapping_fbv(), options_.use_persistent_hash_map()));
+ } else {
+ // Qualified id join index was created fine.
+ qualified_id_join_index_ =
+ std::move(qualified_id_join_index_or).ValueOrDie();
+ // If a recover does have to happen, then it must be because the index is
+ // out of sync with the document store.
+ qualified_id_join_index_recovery_cause =
+ InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
}
std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
IndexRestorationResult restore_result = RestoreIndexIfNeeded();
- if (restore_result.needed_restoration) {
+ if (restore_result.index_needed_restoration ||
+ restore_result.integer_index_needed_restoration ||
+ restore_result.qualified_id_join_index_needed_restoration) {
initialize_stats->set_index_restoration_latency_ms(
restore_timer->GetElapsedMilliseconds());
- initialize_stats->set_index_restoration_cause(recovery_cause);
+
+ if (restore_result.index_needed_restoration) {
+ initialize_stats->set_index_restoration_cause(index_recovery_cause);
+ }
+ if (restore_result.integer_index_needed_restoration) {
+ initialize_stats->set_integer_index_restoration_cause(
+ integer_index_recovery_cause);
+ }
+ if (restore_result.qualified_id_join_index_needed_restoration) {
+ initialize_stats->set_qualified_id_join_index_restoration_cause(
+ qualified_id_join_index_recovery_cause);
+ }
}
return restore_result.status;
}
@@ -857,7 +949,8 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
DestructibleFile marker_file(marker_filepath, filesystem_.get());
auto set_schema_result_or = schema_store_->SetSchema(
- std::move(new_schema), ignore_errors_and_delete_documents);
+ std::move(new_schema), ignore_errors_and_delete_documents,
+ options_.allow_circular_schema_definitions());
if (!set_schema_result_or.ok()) {
TransformStatus(set_schema_result_or.status(), result_status);
return result_proto;
@@ -894,6 +987,14 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
std::move(index_incompatible_type));
}
+ bool join_incompatible =
+ !set_schema_result.schema_types_join_incompatible_by_name.empty();
+ for (const std::string& join_incompatible_type :
+ set_schema_result.schema_types_join_incompatible_by_name) {
+ result_proto.add_join_incompatible_changed_schema_types(
+ std::move(join_incompatible_type));
+ }
+
libtextclassifier3::Status status;
if (set_schema_result.success) {
if (lost_previous_schema) {
@@ -916,19 +1017,24 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
}
if (lost_previous_schema || index_incompatible) {
- // Clears all index files
- status = index_->Reset();
+ // Clears search indices
+ status = ClearSearchIndices();
if (!status.ok()) {
TransformStatus(status, result_status);
return result_proto;
}
+ }
- status = integer_index_->Reset();
+ if (lost_previous_schema || join_incompatible) {
+ // Clears join indices
+ status = ClearJoinIndices();
if (!status.ok()) {
TransformStatus(status, result_status);
return result_proto;
}
+ }
+ if (lost_previous_schema || index_incompatible || join_incompatible) {
IndexRestorationResult restore_result = RestoreIndexIfNeeded();
// DATA_LOSS means that we have successfully re-added content to the
// index. Some indexed content was lost, but otherwise the index is in a
@@ -1037,24 +1143,23 @@ PutResultProto IcingSearchEngine::Put(DocumentProto&& document) {
}
DocumentId document_id = document_id_or.ValueOrDie();
- auto index_processor_or = IndexProcessor::Create(
- normalizer_.get(), index_.get(), integer_index_.get(), clock_.get());
- if (!index_processor_or.ok()) {
- TransformStatus(index_processor_or.status(), result_status);
+ auto data_indexing_handlers_or = CreateDataIndexingHandlers();
+ if (!data_indexing_handlers_or.ok()) {
+ TransformStatus(data_indexing_handlers_or.status(), result_status);
return result_proto;
}
- std::unique_ptr<IndexProcessor> index_processor =
- std::move(index_processor_or).ValueOrDie();
+ IndexProcessor index_processor(
+ std::move(data_indexing_handlers_or).ValueOrDie(), clock_.get());
- auto index_status = index_processor->IndexDocument(
+ auto index_status = index_processor.IndexDocument(
tokenized_document, document_id, put_document_stats);
// Getting an internal error from the index could possibly mean that the index
- // is broken. Try to rebuild the index to recover.
+ // is broken. Try to rebuild them to recover.
if (absl_ports::IsInternal(index_status)) {
ICING_LOG(ERROR) << "Got an internal error from the index. Trying to "
"rebuild the index!\n"
<< index_status.error_message();
- index_status = index_->Reset();
+ index_status = ClearAllIndices();
if (index_status.ok()) {
index_status = RestoreIndexIfNeeded().status;
if (!index_status.ok()) {
@@ -1062,16 +1167,17 @@ PutResultProto IcingSearchEngine::Put(DocumentProto&& document) {
"indexing a document.";
}
} else {
- ICING_LOG(ERROR) << "Failed to reset the index after a failure of "
- "indexing a document.";
+ ICING_LOG(ERROR)
+ << "Failed to clear indices after a failure of indexing a document.";
}
}
if (!index_status.ok()) {
// If we encountered a failure or cannot resolve an internal error while
// indexing this document, then mark it as deleted.
+ int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
libtextclassifier3::Status delete_status =
- document_store_->Delete(document_id);
+ document_store_->Delete(document_id, current_time_ms);
if (!delete_status.ok()) {
// This is pretty dire (and, hopefully, unlikely). We can't roll back the
// document that we just added. Wipeout the whole index.
@@ -1107,12 +1213,13 @@ GetResultProto IcingSearchEngine::Get(const std::string_view name_space,
DocumentProto document = std::move(document_or).ValueOrDie();
std::unique_ptr<ProjectionTree> type_projection_tree;
std::unique_ptr<ProjectionTree> wildcard_projection_tree;
- for (const TypePropertyMask& type_field_mask :
- result_spec.type_property_masks()) {
- if (type_field_mask.schema_type() == document.schema()) {
+ for (const SchemaStore::ExpandedTypePropertyMask& type_field_mask :
+ schema_store_->ExpandTypePropertyMasks(
+ result_spec.type_property_masks())) {
+ if (type_field_mask.schema_type == document.schema()) {
type_projection_tree = std::make_unique<ProjectionTree>(type_field_mask);
- } else if (type_field_mask.schema_type() ==
- ProjectionTree::kSchemaTypeWildcard) {
+ } else if (type_field_mask.schema_type ==
+ SchemaStore::kSchemaTypeWildcard) {
wildcard_projection_tree =
std::make_unique<ProjectionTree>(type_field_mask);
}
@@ -1189,7 +1296,9 @@ DeleteResultProto IcingSearchEngine::Delete(const std::string_view name_space,
std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
// TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
// that can support error logging.
- libtextclassifier3::Status status = document_store_->Delete(name_space, uri);
+ int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
+ libtextclassifier3::Status status =
+ document_store_->Delete(name_space, uri, current_time_ms);
if (!status.ok()) {
LogSeverity::Code severity = ERROR;
if (absl_ports::IsNotFound(status)) {
@@ -1323,8 +1432,9 @@ DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery(
std::unique_ptr<QueryProcessor> query_processor =
std::move(query_processor_or).ValueOrDie();
+ int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
auto query_results_or = query_processor->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::NONE);
+ search_spec, ScoringSpecProto::RankingStrategy::NONE, current_time_ms);
if (!query_results_or.ok()) {
TransformStatus(query_results_or.status(), result_status);
delete_stats->set_parse_query_latency_ms(
@@ -1361,7 +1471,8 @@ DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery(
}
}
status = document_store_->Delete(
- query_results.root_iterator->doc_hit_info().document_id());
+ query_results.root_iterator->doc_hit_info().document_id(),
+ current_time_ms);
if (!status.ok()) {
TransformStatus(status, result_status);
delete_stats->set_document_removal_latency_ms(
@@ -1464,7 +1575,9 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
// guaranteed to work, so we update index according to the new document store.
std::unique_ptr<Timer> optimize_index_timer = clock_->GetNewTimer();
bool should_rebuild_index =
- !document_id_old_to_new_or.ok() || ShouldRebuildIndex(*optimize_stats);
+ !document_id_old_to_new_or.ok() ||
+ ShouldRebuildIndex(*optimize_stats,
+ options_.optimize_rebuild_index_threshold());
if (!should_rebuild_index) {
optimize_stats->set_index_restoration_mode(
OptimizeStatsProto::INDEX_TRANSLATION);
@@ -1476,10 +1589,30 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
<< index_optimize_status.error_message();
should_rebuild_index = true;
}
+
+ libtextclassifier3::Status integer_index_optimize_status =
+ integer_index_->Optimize(document_id_old_to_new_or.ValueOrDie(),
+ document_store_->last_added_document_id());
+ if (!integer_index_optimize_status.ok()) {
+ ICING_LOG(WARNING) << "Failed to optimize integer index. Error: "
+ << integer_index_optimize_status.error_message();
+ should_rebuild_index = true;
+ }
+
+ libtextclassifier3::Status qualified_id_join_index_optimize_status =
+ qualified_id_join_index_->Optimize(
+ document_id_old_to_new_or.ValueOrDie(),
+ document_store_->last_added_document_id());
+ if (!qualified_id_join_index_optimize_status.ok()) {
+ ICING_LOG(WARNING)
+ << "Failed to optimize qualified id join index. Error: "
+ << qualified_id_join_index_optimize_status.error_message();
+ should_rebuild_index = true;
+ }
}
// If we received a DATA_LOSS error from OptimizeDocumentStore, we have a
// valid document store, but it might be the old one or the new one. So throw
- // out the index and rebuild from scratch.
+ // out the index data and rebuild from scratch.
// Likewise, if Index::Optimize failed, then attempt to recover the index by
// rebuilding from scratch.
// If ShouldRebuildIndex() returns true, we will also rebuild the index for
@@ -1487,26 +1620,13 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
if (should_rebuild_index) {
optimize_stats->set_index_restoration_mode(
OptimizeStatsProto::FULL_INDEX_REBUILD);
- ICING_LOG(WARNING) << "Resetting the entire index!";
+ ICING_LOG(WARNING) << "Clearing the entire index!";
- // Reset string index
- libtextclassifier3::Status index_reset_status = index_->Reset();
- if (!index_reset_status.ok()) {
+ libtextclassifier3::Status index_clear_status = ClearAllIndices();
+ if (!index_clear_status.ok()) {
status = absl_ports::Annotate(
- absl_ports::InternalError("Failed to reset index."),
- index_reset_status.error_message());
- TransformStatus(status, result_status);
- optimize_stats->set_index_restoration_latency_ms(
- optimize_index_timer->GetElapsedMilliseconds());
- return result_proto;
- }
-
- // Reset integer index
- index_reset_status = integer_index_->Reset();
- if (!index_reset_status.ok()) {
- status = absl_ports::Annotate(
- absl_ports::InternalError("Failed to reset integer index."),
- index_reset_status.error_message());
+ absl_ports::InternalError("Failed to clear index."),
+ index_clear_status.error_message());
TransformStatus(status, result_status);
optimize_stats->set_index_restoration_latency_ms(
optimize_index_timer->GetElapsedMilliseconds());
@@ -1709,6 +1829,7 @@ libtextclassifier3::Status IcingSearchEngine::InternalPersistToDisk(
ICING_RETURN_IF_ERROR(document_store_->PersistToDisk(PersistType::FULL));
ICING_RETURN_IF_ERROR(index_->PersistToDisk());
ICING_RETURN_IF_ERROR(integer_index_->PersistToDisk());
+ ICING_RETURN_IF_ERROR(qualified_id_join_index_->PersistToDisk());
return libtextclassifier3::Status::OK;
}
@@ -1716,18 +1837,61 @@ libtextclassifier3::Status IcingSearchEngine::InternalPersistToDisk(
SearchResultProto IcingSearchEngine::Search(
const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
const ResultSpecProto& result_spec) {
+ if (search_spec.use_read_only_search()) {
+ return SearchLockedShared(search_spec, scoring_spec, result_spec);
+ } else {
+ return SearchLockedExclusive(search_spec, scoring_spec, result_spec);
+ }
+}
+
+SearchResultProto IcingSearchEngine::SearchLockedShared(
+ const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
+ const ResultSpecProto& result_spec) {
+ std::unique_ptr<Timer> overall_timer = clock_->GetNewTimer();
+
+ // Only acquire an overall read-lock for this implementation. Finer-grained
+ // locks are implemented around code paths that write changes to Icing's data
+ // members.
+ absl_ports::shared_lock l(&mutex_);
+ int64_t lock_acquisition_latency = overall_timer->GetElapsedMilliseconds();
+
+ SearchResultProto result_proto =
+ InternalSearch(search_spec, scoring_spec, result_spec);
+
+ result_proto.mutable_query_stats()->set_lock_acquisition_latency_ms(
+ lock_acquisition_latency);
+ result_proto.mutable_query_stats()->set_latency_ms(
+ overall_timer->GetElapsedMilliseconds());
+ return result_proto;
+}
+
+SearchResultProto IcingSearchEngine::SearchLockedExclusive(
+ const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
+ const ResultSpecProto& result_spec) {
+ std::unique_ptr<Timer> overall_timer = clock_->GetNewTimer();
+
+ // Acquire the overall write-lock for this locked implementation.
+ absl_ports::unique_lock l(&mutex_);
+ int64_t lock_acquisition_latency = overall_timer->GetElapsedMilliseconds();
+
+ SearchResultProto result_proto =
+ InternalSearch(search_spec, scoring_spec, result_spec);
+
+ result_proto.mutable_query_stats()->set_lock_acquisition_latency_ms(
+ lock_acquisition_latency);
+ result_proto.mutable_query_stats()->set_latency_ms(
+ overall_timer->GetElapsedMilliseconds());
+ return result_proto;
+}
+
+SearchResultProto IcingSearchEngine::InternalSearch(
+ const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
+ const ResultSpecProto& result_spec) {
SearchResultProto result_proto;
StatusProto* result_status = result_proto.mutable_status();
QueryStatsProto* query_stats = result_proto.mutable_query_stats();
query_stats->set_query_length(search_spec.query().length());
- ScopedTimer overall_timer(clock_->GetNewTimer(), [query_stats](int64_t t) {
- query_stats->set_latency_ms(t);
- });
- // TODO(b/146008613) Explore ideas to make this function read-only.
- absl_ports::unique_lock l(&mutex_);
- query_stats->set_lock_acquisition_latency_ms(
- overall_timer.timer().GetElapsedMilliseconds());
if (!initialized_) {
result_status->set_code(StatusProto::FAILED_PRECONDITION);
result_status->set_message("IcingSearchEngine has not been initialized!");
@@ -1754,9 +1918,52 @@ SearchResultProto IcingSearchEngine::Search(
query_stats->set_is_first_page(true);
query_stats->set_requested_page_size(result_spec.num_per_page());
- // Process query and score
+ const JoinSpecProto& join_spec = search_spec.join_spec();
+ std::unique_ptr<JoinChildrenFetcher> join_children_fetcher;
+ std::unique_ptr<ResultAdjustmentInfo> child_result_adjustment_info;
+ int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
+ if (!join_spec.parent_property_expression().empty() &&
+ !join_spec.child_property_expression().empty()) {
+ // Process child query
+ QueryScoringResults nested_query_scoring_results = ProcessQueryAndScore(
+ join_spec.nested_spec().search_spec(),
+ join_spec.nested_spec().scoring_spec(),
+ join_spec.nested_spec().result_spec(),
+ /*join_children_fetcher=*/nullptr, current_time_ms);
+ // TOOD(b/256022027): set different kinds of latency for 2nd query.
+ if (!nested_query_scoring_results.status.ok()) {
+ TransformStatus(nested_query_scoring_results.status, result_status);
+ return result_proto;
+ }
+
+ JoinProcessor join_processor(document_store_.get(), schema_store_.get(),
+ qualified_id_join_index_.get(),
+ current_time_ms);
+ // Building a JoinChildrenFetcher where child documents are grouped by
+ // their joinable values.
+ libtextclassifier3::StatusOr<JoinChildrenFetcher> join_children_fetcher_or =
+ join_processor.GetChildrenFetcher(
+ search_spec.join_spec(),
+ std::move(nested_query_scoring_results.scored_document_hits));
+ if (!join_children_fetcher_or.ok()) {
+ TransformStatus(join_children_fetcher_or.status(), result_status);
+ return result_proto;
+ }
+ join_children_fetcher = std::make_unique<JoinChildrenFetcher>(
+ std::move(join_children_fetcher_or).ValueOrDie());
+
+ // Assign child's ResultAdjustmentInfo.
+ child_result_adjustment_info = std::make_unique<ResultAdjustmentInfo>(
+ join_spec.nested_spec().search_spec(),
+ join_spec.nested_spec().scoring_spec(),
+ join_spec.nested_spec().result_spec(), schema_store_.get(),
+ std::move(nested_query_scoring_results.query_terms));
+ }
+
+ // Process parent query
QueryScoringResults query_scoring_results =
- ProcessQueryAndScore(search_spec, scoring_spec, result_spec);
+ ProcessQueryAndScore(search_spec, scoring_spec, result_spec,
+ join_children_fetcher.get(), current_time_ms);
int term_count = 0;
for (const auto& section_and_terms : query_scoring_results.query_terms) {
term_count += section_and_terms.second.size();
@@ -1778,33 +1985,30 @@ SearchResultProto IcingSearchEngine::Search(
return result_proto;
}
- std::unique_ptr<ScoredDocumentHitsRanker> ranker;
- if (search_spec.has_join_spec()) {
- // Process 2nd query
- QueryScoringResults nested_query_scoring_results = ProcessQueryAndScore(
- search_spec.join_spec().nested_spec().search_spec(),
- search_spec.join_spec().nested_spec().scoring_spec(),
- search_spec.join_spec().nested_spec().result_spec());
- // TOOD(b/256022027): set different kinds of latency for 2nd query.
- if (!nested_query_scoring_results.status.ok()) {
- TransformStatus(nested_query_scoring_results.status, result_status);
- return result_proto;
- }
+ // Construct parent's result adjustment info.
+ auto parent_result_adjustment_info = std::make_unique<ResultAdjustmentInfo>(
+ search_spec, scoring_spec, result_spec, schema_store_.get(),
+ std::move(query_scoring_results.query_terms));
+ std::unique_ptr<ScoredDocumentHitsRanker> ranker;
+ if (join_children_fetcher != nullptr) {
+ std::unique_ptr<Timer> join_timer = clock_->GetNewTimer();
// Join 2 scored document hits
- JoinProcessor join_processor(document_store_.get());
+ JoinProcessor join_processor(document_store_.get(), schema_store_.get(),
+ qualified_id_join_index_.get(),
+ current_time_ms);
libtextclassifier3::StatusOr<std::vector<JoinedScoredDocumentHit>>
joined_result_document_hits_or = join_processor.Join(
- search_spec.join_spec(),
- std::move(query_scoring_results.scored_document_hits),
- std::move(nested_query_scoring_results.scored_document_hits));
+ join_spec, std::move(query_scoring_results.scored_document_hits),
+ *join_children_fetcher);
if (!joined_result_document_hits_or.ok()) {
TransformStatus(joined_result_document_hits_or.status(), result_status);
return result_proto;
}
std::vector<JoinedScoredDocumentHit> joined_result_document_hits =
std::move(joined_result_document_hits_or).ValueOrDie();
- // TODO(b/256022027): set join latency
+
+ query_stats->set_join_latency_ms(join_timer->GetElapsedMilliseconds());
std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
// Ranks results
@@ -1845,9 +2049,9 @@ SearchResultProto IcingSearchEngine::Search(
libtextclassifier3::StatusOr<std::pair<uint64_t, PageResult>>
page_result_info_or = result_state_manager_->CacheAndRetrieveFirstPage(
- std::move(ranker), std::move(query_scoring_results.query_terms),
- search_spec, scoring_spec, result_spec, *document_store_,
- *result_retriever);
+ std::move(ranker), std::move(parent_result_adjustment_info),
+ std::move(child_result_adjustment_info), result_spec,
+ *document_store_, *result_retriever, current_time_ms);
if (!page_result_info_or.ok()) {
TransformStatus(page_result_info_or.status(), result_status);
query_stats->set_document_retrieval_latency_ms(
@@ -1860,8 +2064,11 @@ SearchResultProto IcingSearchEngine::Search(
// Assembles the final search result proto
result_proto.mutable_results()->Reserve(
page_result_info.second.results.size());
+
+ int32_t child_count = 0;
for (SearchResultProto::ResultProto& result :
page_result_info.second.results) {
+ child_count += result.joined_results_size();
result_proto.mutable_results()->Add(std::move(result));
}
@@ -1874,6 +2081,9 @@ SearchResultProto IcingSearchEngine::Search(
component_timer->GetElapsedMilliseconds());
query_stats->set_num_results_returned_current_page(
result_proto.results_size());
+
+ query_stats->set_num_joined_results_returned_current_page(child_count);
+
query_stats->set_num_results_with_snippets(
page_result_info.second.num_results_with_snippets);
return result_proto;
@@ -1881,7 +2091,8 @@ SearchResultProto IcingSearchEngine::Search(
IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore(
const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
- const ResultSpecProto& result_spec) {
+ const ResultSpecProto& result_spec,
+ const JoinChildrenFetcher* join_children_fetcher, int64_t current_time_ms) {
std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
// Gets unordered results from query processor
@@ -1902,7 +2113,7 @@ IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore(
libtextclassifier3::StatusOr<QueryResults> query_results_or;
if (ranking_strategy_or.ok()) {
query_results_or = query_processor->ParseSearch(
- search_spec, ranking_strategy_or.ValueOrDie());
+ search_spec, ranking_strategy_or.ValueOrDie(), current_time_ms);
} else {
query_results_or = ranking_strategy_or.status();
}
@@ -1920,7 +2131,8 @@ IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore(
// Scores but does not rank the results.
libtextclassifier3::StatusOr<std::unique_ptr<ScoringProcessor>>
scoring_processor_or = ScoringProcessor::Create(
- scoring_spec, document_store_.get(), schema_store_.get());
+ scoring_spec, document_store_.get(), schema_store_.get(),
+ current_time_ms, join_children_fetcher);
if (!scoring_processor_or.ok()) {
return QueryScoringResults(std::move(scoring_processor_or).status(),
std::move(query_results.query_terms),
@@ -1932,7 +2144,7 @@ IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore(
std::move(scoring_processor_or).ValueOrDie();
std::vector<ScoredDocumentHit> scored_document_hits =
scoring_processor->Score(std::move(query_results.root_iterator),
- performance_configuration_.num_to_score,
+ result_spec.num_to_score(),
&query_results.query_term_iterators);
int64_t scoring_latency_ms = component_timer->GetElapsedMilliseconds();
@@ -1970,9 +2182,10 @@ SearchResultProto IcingSearchEngine::GetNextPage(uint64_t next_page_token) {
std::unique_ptr<ResultRetrieverV2> result_retriever =
std::move(result_retriever_or).ValueOrDie();
+ int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
libtextclassifier3::StatusOr<std::pair<uint64_t, PageResult>>
page_result_info_or = result_state_manager_->GetNextPage(
- next_page_token, *result_retriever);
+ next_page_token, *result_retriever, current_time_ms);
if (!page_result_info_or.ok()) {
if (absl_ports::IsNotFound(page_result_info_or.status())) {
// NOT_FOUND means an empty result.
@@ -1992,8 +2205,11 @@ SearchResultProto IcingSearchEngine::GetNextPage(uint64_t next_page_token) {
// Assembles the final search result proto
result_proto.mutable_results()->Reserve(
page_result_info.second.results.size());
+
+ int32_t child_count = 0;
for (SearchResultProto::ResultProto& result :
page_result_info.second.results) {
+ child_count += result.joined_results_size();
result_proto.mutable_results()->Add(std::move(result));
}
@@ -2012,6 +2228,8 @@ SearchResultProto IcingSearchEngine::GetNextPage(uint64_t next_page_token) {
result_proto.results_size());
query_stats->set_num_results_with_snippets(
page_result_info.second.num_results_with_snippets);
+ query_stats->set_num_joined_results_returned_current_page(child_count);
+
return result_proto;
}
@@ -2077,9 +2295,12 @@ IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) {
// Tries to rebuild document store if swapping fails, to avoid leaving the
// system in the broken state for future operations.
- auto create_result_or =
- DocumentStore::Create(filesystem_.get(), current_document_dir,
- clock_.get(), schema_store_.get());
+ auto create_result_or = DocumentStore::Create(
+ filesystem_.get(), current_document_dir, clock_.get(),
+ schema_store_.get(), /*force_recovery_and_revalidate_documents=*/false,
+ options_.document_store_namespace_id_fingerprint(),
+ options_.pre_mapping_fbv(), options_.use_persistent_hash_map(),
+ options_.compression_level(), /*initialize_stats=*/nullptr);
// TODO(b/144458732): Implement a more robust version of
// TC_ASSIGN_OR_RETURN that can support error logging.
if (!create_result_or.ok()) {
@@ -2093,8 +2314,7 @@ IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) {
}
document_store_ = std::move(create_result_or.ValueOrDie().document_store);
result_state_manager_ = std::make_unique<ResultStateManager>(
- performance_configuration_.max_num_total_hits, *document_store_,
- clock_.get());
+ performance_configuration_.max_num_total_hits, *document_store_);
// Potential data loss
// TODO(b/147373249): Find a way to detect true data loss error
@@ -2103,9 +2323,12 @@ IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) {
}
// Recreates the doc store instance
- auto create_result_or =
- DocumentStore::Create(filesystem_.get(), current_document_dir,
- clock_.get(), schema_store_.get());
+ auto create_result_or = DocumentStore::Create(
+ filesystem_.get(), current_document_dir, clock_.get(),
+ schema_store_.get(), /*force_recovery_and_revalidate_documents=*/false,
+ options_.document_store_namespace_id_fingerprint(),
+ options_.pre_mapping_fbv(), options_.use_persistent_hash_map(),
+ options_.compression_level(), /*initialize_stats=*/nullptr);
if (!create_result_or.ok()) {
// Unable to create DocumentStore from the new file. Mark as uninitialized
// and return INTERNAL.
@@ -2116,8 +2339,7 @@ IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) {
}
document_store_ = std::move(create_result_or.ValueOrDie().document_store);
result_state_manager_ = std::make_unique<ResultStateManager>(
- performance_configuration_.max_num_total_hits, *document_store_,
- clock_.get());
+ performance_configuration_.max_num_total_hits, *document_store_);
// Deletes tmp directory
if (!filesystem_->DeleteDirectoryRecursively(
@@ -2132,52 +2354,51 @@ IcingSearchEngine::IndexRestorationResult
IcingSearchEngine::RestoreIndexIfNeeded() {
DocumentId last_stored_document_id =
document_store_->last_added_document_id();
- DocumentId last_indexed_document_id = index_->last_added_document_id();
-
- if (last_stored_document_id == last_indexed_document_id) {
+ if (last_stored_document_id == index_->last_added_document_id() &&
+ last_stored_document_id == integer_index_->last_added_document_id() &&
+ last_stored_document_id ==
+ qualified_id_join_index_->last_added_document_id()) {
// No need to recover.
- return {libtextclassifier3::Status::OK, false};
+ return {libtextclassifier3::Status::OK, false, false, false};
}
if (last_stored_document_id == kInvalidDocumentId) {
- // Document store is empty but index is not. Reset the index.
- return {index_->Reset(), false};
+ // Document store is empty but index is not. Clear the index.
+ return {ClearAllIndices(), false, false, false};
}
- // TruncateTo ensures that the index does not hold any data that is not
- // present in the ground truth. If the document store lost some documents,
- // TruncateTo will ensure that the index does not contain any hits from those
- // lost documents. If the index does not contain any hits for documents with
- // document id greater than last_stored_document_id, then TruncateTo will have
- // no effect.
- auto status = index_->TruncateTo(last_stored_document_id);
- if (!status.ok()) {
- return {status, false};
+ // Truncate indices first.
+ auto truncate_result_or = TruncateIndicesTo(last_stored_document_id);
+ if (!truncate_result_or.ok()) {
+ return {std::move(truncate_result_or).status(), false, false, false};
}
- // Last indexed document id may have changed thanks to TruncateTo.
- last_indexed_document_id = index_->last_added_document_id();
- DocumentId first_document_to_reindex =
- (last_indexed_document_id != kInvalidDocumentId)
- ? index_->last_added_document_id() + 1
- : kMinDocumentId;
- if (first_document_to_reindex > last_stored_document_id) {
+ TruncateIndexResult truncate_result =
+ std::move(truncate_result_or).ValueOrDie();
+
+ if (truncate_result.first_document_to_reindex > last_stored_document_id) {
// Nothing to restore. Just return.
- return {libtextclassifier3::Status::OK, false};
+ return {libtextclassifier3::Status::OK, false, false, false};
}
- auto index_processor_or = IndexProcessor::Create(
- normalizer_.get(), index_.get(), integer_index_.get(), clock_.get());
- if (!index_processor_or.ok()) {
- return {index_processor_or.status(), true};
+ auto data_indexing_handlers_or = CreateDataIndexingHandlers();
+ if (!data_indexing_handlers_or.ok()) {
+ return {data_indexing_handlers_or.status(),
+ truncate_result.index_needed_restoration,
+ truncate_result.integer_index_needed_restoration,
+ truncate_result.qualified_id_join_index_needed_restoration};
}
- std::unique_ptr<IndexProcessor> index_processor =
- std::move(index_processor_or).ValueOrDie();
+ // By using recovery_mode for IndexProcessor, we're able to replay documents
+ // from smaller document id and it will skip documents that are already been
+ // indexed.
+ IndexProcessor index_processor(
+ std::move(data_indexing_handlers_or).ValueOrDie(), clock_.get(),
+ /*recovery_mode=*/true);
ICING_VLOG(1) << "Restoring index by replaying documents from document id "
- << first_document_to_reindex << " to document id "
- << last_stored_document_id;
+ << truncate_result.first_document_to_reindex
+ << " to document id " << last_stored_document_id;
libtextclassifier3::Status overall_status;
- for (DocumentId document_id = first_document_to_reindex;
+ for (DocumentId document_id = truncate_result.first_document_to_reindex;
document_id <= last_stored_document_id; ++document_id) {
libtextclassifier3::StatusOr<DocumentProto> document_or =
document_store_->Get(document_id);
@@ -2189,7 +2410,9 @@ IcingSearchEngine::RestoreIndexIfNeeded() {
continue;
} else {
// Returns other errors
- return {document_or.status(), true};
+ return {document_or.status(), truncate_result.index_needed_restoration,
+ truncate_result.integer_index_needed_restoration,
+ truncate_result.qualified_id_join_index_needed_restoration};
}
}
DocumentProto document(std::move(document_or).ValueOrDie());
@@ -2199,25 +2422,33 @@ IcingSearchEngine::RestoreIndexIfNeeded() {
language_segmenter_.get(),
std::move(document));
if (!tokenized_document_or.ok()) {
- return {tokenized_document_or.status(), true};
+ return {tokenized_document_or.status(),
+ truncate_result.index_needed_restoration,
+ truncate_result.integer_index_needed_restoration,
+ truncate_result.qualified_id_join_index_needed_restoration};
}
TokenizedDocument tokenized_document(
std::move(tokenized_document_or).ValueOrDie());
libtextclassifier3::Status status =
- index_processor->IndexDocument(tokenized_document, document_id);
+ index_processor.IndexDocument(tokenized_document, document_id);
if (!status.ok()) {
if (!absl_ports::IsDataLoss(status)) {
// Real error. Stop recovering and pass it up.
- return {status, true};
+ return {status, truncate_result.index_needed_restoration,
+ truncate_result.integer_index_needed_restoration,
+ truncate_result.qualified_id_join_index_needed_restoration};
}
+ // FIXME: why can we skip data loss error here?
// Just a data loss. Keep trying to add the remaining docs, but report the
// data loss when we're done.
overall_status = status;
}
}
- return {overall_status, true};
+ return {overall_status, truncate_result.index_needed_restoration,
+ truncate_result.integer_index_needed_restoration,
+ truncate_result.qualified_id_join_index_needed_restoration};
}
libtextclassifier3::StatusOr<bool> IcingSearchEngine::LostPreviousSchema() {
@@ -2243,6 +2474,179 @@ libtextclassifier3::StatusOr<bool> IcingSearchEngine::LostPreviousSchema() {
return document_store_->last_added_document_id() != kInvalidDocumentId;
}
+libtextclassifier3::StatusOr<std::vector<std::unique_ptr<DataIndexingHandler>>>
+IcingSearchEngine::CreateDataIndexingHandlers() {
+ std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
+
+ // Term index handler
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<StringSectionIndexingHandler>
+ string_section_indexing_handler,
+ StringSectionIndexingHandler::Create(
+ clock_.get(), normalizer_.get(), index_.get()));
+ handlers.push_back(std::move(string_section_indexing_handler));
+
+ // Integer index handler
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<IntegerSectionIndexingHandler>
+ integer_section_indexing_handler,
+ IntegerSectionIndexingHandler::Create(
+ clock_.get(), integer_index_.get()));
+ handlers.push_back(std::move(integer_section_indexing_handler));
+
+ // Qualified id join index handler
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<QualifiedIdJoinIndexingHandler>
+ qualified_id_join_indexing_handler,
+ QualifiedIdJoinIndexingHandler::Create(
+ clock_.get(), qualified_id_join_index_.get()));
+ handlers.push_back(std::move(qualified_id_join_indexing_handler));
+
+ return handlers;
+}
+
+libtextclassifier3::StatusOr<IcingSearchEngine::TruncateIndexResult>
+IcingSearchEngine::TruncateIndicesTo(DocumentId last_stored_document_id) {
+ // Attempt to truncate term index.
+ // TruncateTo ensures that the index does not hold any data that is not
+ // present in the ground truth. If the document store lost some documents,
+ // TruncateTo will ensure that the index does not contain any hits from those
+ // lost documents. If the index does not contain any hits for documents with
+ // document id greater than last_stored_document_id, then TruncateTo will have
+ // no effect.
+ ICING_RETURN_IF_ERROR(index_->TruncateTo(last_stored_document_id));
+
+ // Get last indexed document id for term index after truncating.
+ DocumentId term_index_last_added_document_id =
+ index_->last_added_document_id();
+ DocumentId first_document_to_reindex =
+ (term_index_last_added_document_id != kInvalidDocumentId)
+ ? term_index_last_added_document_id + 1
+ : kMinDocumentId;
+ bool index_needed_restoration =
+ (last_stored_document_id != term_index_last_added_document_id);
+
+ // Attempt to truncate integer index.
+ bool integer_index_needed_restoration = false;
+ DocumentId integer_index_last_added_document_id =
+ integer_index_->last_added_document_id();
+ if (integer_index_last_added_document_id == kInvalidDocumentId ||
+ last_stored_document_id > integer_index_last_added_document_id) {
+ // If last_stored_document_id is greater than
+ // integer_index_last_added_document_id, then we only have to replay docs
+ // starting from integer_index_last_added_document_id + 1. Also use std::min
+ // since we might need to replay even smaller doc ids for term index.
+ integer_index_needed_restoration = true;
+ if (integer_index_last_added_document_id != kInvalidDocumentId) {
+ first_document_to_reindex = std::min(
+ first_document_to_reindex, integer_index_last_added_document_id + 1);
+ } else {
+ first_document_to_reindex = kMinDocumentId;
+ }
+ } else if (last_stored_document_id < integer_index_last_added_document_id) {
+ // Clear the entire integer index if last_stored_document_id is smaller than
+ // integer_index_last_added_document_id, because there is no way to remove
+ // data with doc_id > last_stored_document_id from integer index and we have
+ // to rebuild.
+ ICING_RETURN_IF_ERROR(integer_index_->Clear());
+
+ // Since the entire integer index is discarded, we start to rebuild it by
+ // setting first_document_to_reindex to kMinDocumentId.
+ integer_index_needed_restoration = true;
+ first_document_to_reindex = kMinDocumentId;
+ }
+
+ // Attempt to truncate qualified id join index
+ bool qualified_id_join_index_needed_restoration = false;
+ DocumentId qualified_id_join_index_last_added_document_id =
+ qualified_id_join_index_->last_added_document_id();
+ if (qualified_id_join_index_last_added_document_id == kInvalidDocumentId ||
+ last_stored_document_id >
+ qualified_id_join_index_last_added_document_id) {
+ // If last_stored_document_id is greater than
+ // qualified_id_join_index_last_added_document_id, then we only have to
+ // replay docs starting from (qualified_id_join_index_last_added_document_id
+ // + 1). Also use std::min since we might need to replay even smaller doc
+ // ids for other components.
+ qualified_id_join_index_needed_restoration = true;
+ if (qualified_id_join_index_last_added_document_id != kInvalidDocumentId) {
+ first_document_to_reindex =
+ std::min(first_document_to_reindex,
+ qualified_id_join_index_last_added_document_id + 1);
+ } else {
+ first_document_to_reindex = kMinDocumentId;
+ }
+ } else if (last_stored_document_id <
+ qualified_id_join_index_last_added_document_id) {
+ // Clear the entire qualified id join index if last_stored_document_id is
+ // smaller than qualified_id_join_index_last_added_document_id, because
+ // there is no way to remove data with doc_id > last_stored_document_id from
+ // join index efficiently and we have to rebuild.
+ ICING_RETURN_IF_ERROR(qualified_id_join_index_->Clear());
+
+ // Since the entire qualified id join index is discarded, we start to
+ // rebuild it by setting first_document_to_reindex to kMinDocumentId.
+ qualified_id_join_index_needed_restoration = true;
+ first_document_to_reindex = kMinDocumentId;
+ }
+
+ return TruncateIndexResult(first_document_to_reindex,
+ index_needed_restoration,
+ integer_index_needed_restoration,
+ qualified_id_join_index_needed_restoration);
+}
+
+libtextclassifier3::Status IcingSearchEngine::DiscardDerivedFiles() {
+ if (schema_store_ != nullptr || document_store_ != nullptr ||
+ index_ != nullptr || integer_index_ != nullptr ||
+ qualified_id_join_index_ != nullptr) {
+ return absl_ports::FailedPreconditionError(
+ "Cannot discard derived files while having valid instances");
+ }
+
+ // Schema store
+ ICING_RETURN_IF_ERROR(
+ SchemaStore::DiscardDerivedFiles(filesystem_.get(), options_.base_dir()));
+
+ // Document store
+ ICING_RETURN_IF_ERROR(DocumentStore::DiscardDerivedFiles(
+ filesystem_.get(), options_.base_dir()));
+
+ // Term index
+ if (!filesystem_->DeleteDirectoryRecursively(
+ MakeIndexDirectoryPath(options_.base_dir()).c_str())) {
+ return absl_ports::InternalError("Failed to discard index");
+ }
+
+ // Integer index
+ if (!filesystem_->DeleteDirectoryRecursively(
+ MakeIntegerIndexWorkingPath(options_.base_dir()).c_str())) {
+ return absl_ports::InternalError("Failed to discard integer index");
+ }
+
+ // Qualified id join index
+ if (!filesystem_->DeleteDirectoryRecursively(
+ MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir()).c_str())) {
+ return absl_ports::InternalError(
+ "Failed to discard qualified id join index");
+ }
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IcingSearchEngine::ClearSearchIndices() {
+ ICING_RETURN_IF_ERROR(index_->Reset());
+ ICING_RETURN_IF_ERROR(integer_index_->Clear());
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IcingSearchEngine::ClearJoinIndices() {
+ return qualified_id_join_index_->Clear();
+}
+
+libtextclassifier3::Status IcingSearchEngine::ClearAllIndices() {
+ ICING_RETURN_IF_ERROR(ClearSearchIndices());
+ ICING_RETURN_IF_ERROR(ClearJoinIndices());
+ return libtextclassifier3::Status::OK;
+}
+
ResetResultProto IcingSearchEngine::Reset() {
absl_ports::unique_lock l(&mutex_);
return ResetInternal();
@@ -2302,7 +2706,8 @@ SuggestionResponse IcingSearchEngine::SearchSuggestions(
// Create the suggestion processor.
auto suggestion_processor_or = SuggestionProcessor::Create(
- index_.get(), language_segmenter_.get(), normalizer_.get());
+ index_.get(), integer_index_.get(), language_segmenter_.get(),
+ normalizer_.get(), document_store_.get(), schema_store_.get());
if (!suggestion_processor_or.ok()) {
TransformStatus(suggestion_processor_or.status(), response_status);
return response;
@@ -2310,74 +2715,10 @@ SuggestionResponse IcingSearchEngine::SearchSuggestions(
std::unique_ptr<SuggestionProcessor> suggestion_processor =
std::move(suggestion_processor_or).ValueOrDie();
- // Populate target namespace filter.
- std::unordered_set<NamespaceId> namespace_ids;
- namespace_ids.reserve(suggestion_spec.namespace_filters_size());
- for (std::string_view name_space : suggestion_spec.namespace_filters()) {
- auto namespace_id_or = document_store_->GetNamespaceId(name_space);
- if (!namespace_id_or.ok()) {
- // The current namespace doesn't exist.
- continue;
- }
- namespace_ids.insert(namespace_id_or.ValueOrDie());
- }
- if (namespace_ids.empty() && !suggestion_spec.namespace_filters().empty()) {
- // None of desired namespace exists, we should return directly.
- response_status->set_code(StatusProto::OK);
- return response;
- }
-
- // Populate target document id filter.
- auto document_id_filter_map_or = PopulateDocumentIdFilters(
- document_store_.get(), suggestion_spec, namespace_ids);
- if (!document_id_filter_map_or.ok()) {
- TransformStatus(document_id_filter_map_or.status(), response_status);
- return response;
- }
- std::unordered_map<NamespaceId, std::unordered_set<DocumentId>>
- document_id_filter_map = document_id_filter_map_or.ValueOrDie();
- if (document_id_filter_map.empty() &&
- !suggestion_spec.document_uri_filters().empty()) {
- // None of desired DocumentId exists, we should return directly.
- response_status->set_code(StatusProto::OK);
- return response;
- }
-
- // Populate target schema type filter.
- std::unordered_set<SchemaTypeId> schema_type_ids;
- schema_type_ids.reserve(suggestion_spec.schema_type_filters_size());
- for (std::string_view schema_type : suggestion_spec.schema_type_filters()) {
- auto schema_type_id_or = schema_store_->GetSchemaTypeId(schema_type);
- if (!schema_type_id_or.ok()) {
- continue;
- }
- schema_type_ids.insert(schema_type_id_or.ValueOrDie());
- }
- if (schema_type_ids.empty() &&
- !suggestion_spec.schema_type_filters().empty()) {
- // None of desired schema type exists, we should return directly.
- response_status->set_code(StatusProto::OK);
- return response;
- }
-
- // Populate target properties filter.
- auto property_filter_map_or = PopulatePropertyFilters(
- schema_store_.get(), suggestion_spec, schema_type_ids);
- if (!property_filter_map_or.ok()) {
- TransformStatus(property_filter_map_or.status(), response_status);
- return response;
- }
- std::unordered_map<SchemaTypeId, SectionIdMask> property_filter_map =
- property_filter_map_or.ValueOrDie();
-
// Run suggestion based on given SuggestionSpec.
- SuggestionResultCheckerImpl suggestion_result_checker_impl(
- document_store_.get(), std::move(namespace_ids),
- std::move(document_id_filter_map), std::move(schema_type_ids),
- std::move(property_filter_map));
+ int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
libtextclassifier3::StatusOr<std::vector<TermMetadata>> terms_or =
- suggestion_processor->QuerySuggestions(suggestion_spec,
- &suggestion_result_checker_impl);
+ suggestion_processor->QuerySuggestions(suggestion_spec, current_time_ms);
if (!terms_or.ok()) {
TransformStatus(terms_or.status(), response_status);
return response;
diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h
index 221d86c..d9d5ff6 100644
--- a/icing/icing-search-engine.h
+++ b/icing/icing-search-engine.h
@@ -26,9 +26,12 @@
#include "icing/absl_ports/mutex.h"
#include "icing/absl_ports/thread_annotations.h"
#include "icing/file/filesystem.h"
+#include "icing/index/data-indexing-handler.h"
#include "icing/index/index.h"
#include "icing/index/numeric/numeric-index.h"
#include "icing/jni/jni-cache.h"
+#include "icing/join/join-children-fetcher.h"
+#include "icing/join/qualified-id-join-index.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/performance-configuration.h"
#include "icing/proto/debug.pb.h"
@@ -472,10 +475,13 @@ class IcingSearchEngine {
std::unique_ptr<Index> index_ ICING_GUARDED_BY(mutex_);
// Storage for all hits of numeric contents from the document store.
- // TODO(b/249829533): integrate more functions with integer_index_.
std::unique_ptr<NumericIndex<int64_t>> integer_index_
ICING_GUARDED_BY(mutex_);
+ // Storage for all join qualified ids from the document store.
+ std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index_
+ ICING_GUARDED_BY(mutex_);
+
// Pointer to JNI class references
const std::unique_ptr<const JniCache> jni_cache_;
@@ -548,8 +554,8 @@ class IcingSearchEngine {
InitializeStatsProto* initialize_stats)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
- // Do any initialization/recovery necessary to create a DocumentStore
- // instance.
+ // Do any initialization/recovery necessary to create term index, integer
+ // index, and qualified id join index instances.
//
// Returns:
// OK on success
@@ -561,6 +567,30 @@ class IcingSearchEngine {
InitializeStatsProto* initialize_stats)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+ // Implementation of IcingSearchEngine::Search that only grabs the overall
+ // read-lock, allowing for parallel non-exclusive operations.
+ // This implementation is used if search_spec.use_read_only_search is true.
+ SearchResultProto SearchLockedShared(const SearchSpecProto& search_spec,
+ const ScoringSpecProto& scoring_spec,
+ const ResultSpecProto& result_spec)
+ ICING_LOCKS_EXCLUDED(mutex_);
+
+ // Implementation of IcingSearchEngine::Search that requires the overall
+ // write lock. No other operations of any kind can be executed in parallel if
+ // this version is used.
+ // This implementation is used if search_spec.use_read_only_search is false.
+ SearchResultProto SearchLockedExclusive(const SearchSpecProto& search_spec,
+ const ScoringSpecProto& scoring_spec,
+ const ResultSpecProto& result_spec)
+ ICING_LOCKS_EXCLUDED(mutex_);
+
+ // Helper method for the actual work to Search. We need this separate
+ // method to manage locking for Search.
+ SearchResultProto InternalSearch(const SearchSpecProto& search_spec,
+ const ScoringSpecProto& scoring_spec,
+ const ResultSpecProto& result_spec)
+ ICING_SHARED_LOCKS_REQUIRED(mutex_);
+
// Processes query and scores according to the specs. It is a helper function
// (called by Search) to process and score normal query and the nested child
// query for join search.
@@ -587,10 +617,11 @@ class IcingSearchEngine {
parse_query_latency_ms(parse_query_latency_ms_in),
scoring_latency_ms(scoring_latency_ms_in) {}
};
- QueryScoringResults ProcessQueryAndScore(const SearchSpecProto& search_spec,
- const ScoringSpecProto& scoring_spec,
- const ResultSpecProto& result_spec)
- ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+ QueryScoringResults ProcessQueryAndScore(
+ const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
+ const ResultSpecProto& result_spec,
+ const JoinChildrenFetcher* join_children_fetcher, int64_t current_time_ms)
+ ICING_SHARED_LOCKS_REQUIRED(mutex_);
// Many of the internal components rely on other components' derived data.
// Check that everything is consistent with each other so that we're not
@@ -607,6 +638,15 @@ class IcingSearchEngine {
libtextclassifier3::Status CheckConsistency()
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+ // Discards all derived data.
+ //
+ // Returns:
+ // OK on success
+ // FAILED_PRECONDITION_ERROR if those instances are valid (non nullptr)
+ // INTERNAL_ERROR on any I/O errors
+ libtextclassifier3::Status DiscardDerivedFiles()
+ ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
// Repopulates derived data off our ground truths.
//
// Returns:
@@ -637,9 +677,10 @@ class IcingSearchEngine {
OptimizeStatsProto* optimize_stats)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
- // Helper method to restore missing document data in index_. All documents
- // will be reindexed. This does not clear the index, so it is recommended to
- // call Index::Reset first.
+ // Helper method to restore missing document data in index_, integer_index_,
+ // and qualified_id_join_index_. All documents will be reindexed. This does
+ // not clear the index, so it is recommended to call ClearAllIndices,
+ // ClearSearchIndices, or ClearJoinIndices first if needed.
//
// Returns:
// On success, OK and a bool indicating whether or not restoration was
@@ -652,7 +693,9 @@ class IcingSearchEngine {
// INTERNAL_ERROR on any IO errors
struct IndexRestorationResult {
libtextclassifier3::Status status;
- bool needed_restoration;
+ bool index_needed_restoration;
+ bool integer_index_needed_restoration;
+ bool qualified_id_join_index_needed_restoration;
};
IndexRestorationResult RestoreIndexIfNeeded()
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
@@ -668,6 +711,70 @@ class IcingSearchEngine {
// INTERNAL_ERROR on I/O error
libtextclassifier3::StatusOr<bool> LostPreviousSchema()
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+ // Helper method to create all types of data indexing handlers to index term,
+ // integer, and join qualified ids.
+ libtextclassifier3::StatusOr<
+ std::vector<std::unique_ptr<DataIndexingHandler>>>
+ CreateDataIndexingHandlers() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+ // Helper method to discard parts of (term, integer, qualified id join)
+ // indices if they contain data for document ids greater than
+ // last_stored_document_id.
+ //
+ // REQUIRES: last_stored_document_id is valid (!= kInvalidDocumentId). Note:
+ // if we want to truncate everything in the index, then please call
+ // ClearSearchIndices/ClearJoinIndices/ClearAllIndices instead.
+ //
+ // Returns:
+ // On success, a DocumentId indicating the first document to start for
+ // reindexing and 2 bool flags indicating whether term or integer index
+ // needs restoration.
+ // INTERNAL on any I/O errors
+ struct TruncateIndexResult {
+ DocumentId first_document_to_reindex;
+ bool index_needed_restoration;
+ bool integer_index_needed_restoration;
+ bool qualified_id_join_index_needed_restoration;
+
+ explicit TruncateIndexResult(
+ DocumentId first_document_to_reindex_in,
+ bool index_needed_restoration_in,
+ bool integer_index_needed_restoration_in,
+ bool qualified_id_join_index_needed_restoration_in)
+ : first_document_to_reindex(first_document_to_reindex_in),
+ index_needed_restoration(index_needed_restoration_in),
+ integer_index_needed_restoration(integer_index_needed_restoration_in),
+ qualified_id_join_index_needed_restoration(
+ qualified_id_join_index_needed_restoration_in) {}
+ };
+ libtextclassifier3::StatusOr<TruncateIndexResult> TruncateIndicesTo(
+ DocumentId last_stored_document_id)
+ ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+ // Helper method to discard search (term, integer) indices.
+ //
+ // Returns:
+ // OK on success
+ // INTERNAL_ERROR on any I/O errors
+ libtextclassifier3::Status ClearSearchIndices()
+ ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+ // Helper method to discard join (qualified id) indices.
+ //
+ // Returns:
+ // OK on success
+ // INTERNAL_ERROR on any I/O errors
+ libtextclassifier3::Status ClearJoinIndices()
+ ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+ // Helper method to discard all search and join indices.
+ //
+ // Returns:
+ // OK on success
+ // INTERNAL_ERROR on any I/O errors
+ libtextclassifier3::Status ClearAllIndices()
+ ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
};
} // namespace lib
diff --git a/icing/icing-search-engine_backwards_compatibility_test.cc b/icing/icing-search-engine_backwards_compatibility_test.cc
index b9233cb..178e923 100644
--- a/icing/icing-search-engine_backwards_compatibility_test.cc
+++ b/icing/icing-search-engine_backwards_compatibility_test.cc
@@ -118,16 +118,17 @@ TEST_F(IcingSearchEngineBackwardsCompatibilityTest,
IcingSearchEngine icing(icing_options, GetTestJniCache());
InitializeResultProto init_result = icing.Initialize();
EXPECT_THAT(init_result.status(), ProtoIsOk());
+
+ // Since there will be version change, the recovery cause will be
+ // VERSION_CHANGED.
EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
Eq(InitializeStatsProto::NO_DATA_LOSS));
EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
- Eq(InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT));
+ Eq(InitializeStatsProto::VERSION_CHANGED));
EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- // The main and lite indexes are in legacy formats and therefore will need to
- // be rebuilt from scratch.
+ Eq(InitializeStatsProto::VERSION_CHANGED));
EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::IO_ERROR));
+ Eq(InitializeStatsProto::VERSION_CHANGED));
// Set up schema, this is the one used to validate documents in the testdata
// files. Do not change unless you're also updating the testdata files.
@@ -257,17 +258,17 @@ TEST_F(IcingSearchEngineBackwardsCompatibilityTest, MigrateToLargerScale) {
IcingSearchEngine icing(icing_options, GetTestJniCache());
InitializeResultProto init_result = icing.Initialize();
EXPECT_THAT(init_result.status(), ProtoIsOk());
+
+ // Since there will be version change, the recovery cause will be
+ // VERSION_CHANGED.
EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
Eq(InitializeStatsProto::NO_DATA_LOSS));
- // No recovery is required for the document store.
EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
+ Eq(InitializeStatsProto::VERSION_CHANGED));
EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- // The main and lite indexes are in legacy formats and therefore will need to
- // be rebuilt from scratch.
+ Eq(InitializeStatsProto::VERSION_CHANGED));
EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::IO_ERROR));
+ Eq(InitializeStatsProto::VERSION_CHANGED));
// Verify that the schema stored in the index matches the one that we expect.
// Do not change unless you're also updating the testdata files.
@@ -384,6 +385,185 @@ TEST_F(IcingSearchEngineBackwardsCompatibilityTest, MigrateToLargerScale) {
expected_document3_search));
}
+TEST_F(IcingSearchEngineBackwardsCompatibilityTest,
+ MigrateToAppendOnlySchemaStorage) {
+ // Copy the testdata files into our IcingSearchEngine directory
+ std::string test_data_dir = GetTestDataDir("blob_schema_store");
+
+ // Create dst directory that we'll initialize the IcingSearchEngine over.
+ std::string base_dir = GetTestBaseDir() + "_migrate";
+ ASSERT_THAT(filesystem()->DeleteDirectoryRecursively(base_dir.c_str()), true);
+ ASSERT_THAT(filesystem()->CreateDirectoryRecursively(base_dir.c_str()), true);
+
+ ASSERT_TRUE(filesystem()->CopyDirectory(test_data_dir.c_str(),
+ base_dir.c_str(),
+ /*recursive=*/true));
+
+ IcingSearchEngineOptions icing_options;
+ icing_options.set_base_dir(base_dir);
+
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ InitializeResultProto init_result = icing.Initialize();
+ EXPECT_THAT(init_result.status(), ProtoIsOk());
+
+ // Since there will be version change, the recovery cause will be
+ // VERSION_CHANGED.
+ EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+ Eq(InitializeStatsProto::VERSION_CHANGED));
+ // TODO: create enum code for legacy schema store recovery after schema store
+ // change is made.
+ EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::VERSION_CHANGED));
+ EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::VERSION_CHANGED));
+
+ // Verify that the schema stored in the index matches the one that we expect.
+ // Do not change unless you're also updating the testdata files.
+ SchemaProto expected_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("transaction")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("unindexedStringProperty")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("unindexedIntegerProperty")
+ .SetDataType(TYPE_INT64)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableIntegerProperty")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("stringExactProperty")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("stringPrefixProperty")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ GetSchemaResultProto expected_get_schema_result_proto;
+ expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_schema_result_proto.mutable_schema() = expected_schema;
+ ASSERT_THAT(icing.GetSchema(), EqualsProto(expected_get_schema_result_proto));
+
+ // These are the documents that are stored in the testdata files. Do not
+ // change unless you're also updating the testdata files.
+ DocumentProto expected_document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo")
+ .AddStringProperty("body", "bar")
+ .Build();
+
+ DocumentProto expected_document2 = DocumentBuilder()
+ .SetKey("namespace2", "uri1")
+ .SetSchema("email")
+ .SetCreationTimestampMs(20)
+ .SetScore(123)
+ .AddStringProperty("subject", "phoo")
+ .Build();
+
+ DocumentProto expected_document3 =
+ DocumentBuilder()
+ .SetKey("namespace3", "uri3")
+ .SetSchema("transaction")
+ .SetCreationTimestampMs(30)
+ .SetScore(123)
+ .AddStringProperty("stringExactProperty", "foo")
+ .AddInt64Property("indexableIntegerProperty", 10)
+ .Build();
+
+ EXPECT_THAT(
+ icing
+ .Get(expected_document1.namespace_(), expected_document1.uri(),
+ GetResultSpecProto::default_instance())
+ .document(),
+ EqualsProto(expected_document1));
+ EXPECT_THAT(
+ icing
+ .Get(expected_document2.namespace_(), expected_document2.uri(),
+ GetResultSpecProto::default_instance())
+ .document(),
+ EqualsProto(expected_document2));
+ EXPECT_THAT(
+ icing
+ .Get(expected_document3.namespace_(), expected_document3.uri(),
+ GetResultSpecProto::default_instance())
+ .document(),
+ EqualsProto(expected_document3));
+
+ // Searching for "foo" should get us document1 and not document3 due to the
+ // schema type filter.
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("foo");
+ search_spec.add_schema_type_filters("email");
+
+ SearchResultProto expected_document1_search;
+ expected_document1_search.mutable_status()->set_code(StatusProto::OK);
+ *expected_document1_search.mutable_results()->Add()->mutable_document() =
+ expected_document1;
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_document1_search));
+
+ // Searching for "phoo" should get us document2.
+ search_spec.set_query("phoo");
+
+ SearchResultProto expected_document2_search;
+ expected_document2_search.mutable_status()->set_code(StatusProto::OK);
+ *expected_document2_search.mutable_results()->Add()->mutable_document() =
+ expected_document2;
+
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_document2_search));
+
+ // Searching for "foo" should get us both document 1 and document3 now that
+ // schema type 'transaction' has been added to the schema filter.
+ search_spec.set_query("foo");
+ search_spec.add_schema_type_filters("transaction");
+
+ SearchResultProto expected_document_1_and_3_search;
+ expected_document_1_and_3_search.mutable_status()->set_code(StatusProto::OK);
+ *expected_document_1_and_3_search.mutable_results()
+ ->Add()
+ ->mutable_document() = expected_document3;
+ *expected_document_1_and_3_search.mutable_results()
+ ->Add()
+ ->mutable_document() = expected_document1;
+
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_document_1_and_3_search));
+}
+
} // namespace
} // namespace lib
} // namespace icing
diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc
index 5849ac8..354d11c 100644
--- a/icing/icing-search-engine_benchmark.cc
+++ b/icing/icing-search-engine_benchmark.cc
@@ -16,7 +16,9 @@
#include <fstream>
#include <iostream>
+#include <limits>
#include <memory>
+#include <numeric>
#include <ostream>
#include <random>
#include <sstream>
@@ -32,17 +34,22 @@
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
#include "icing/icing-search-engine.h"
+#include "icing/join/join-processor.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/initialize.pb.h"
+#include "icing/proto/persist.pb.h"
#include "icing/proto/reset.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/scoring.pb.h"
#include "icing/proto/search.pb.h"
#include "icing/proto/status.pb.h"
#include "icing/proto/term.pb.h"
+#include "icing/query/query-features.h"
#include "icing/schema-builder.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/document-generator.h"
+#include "icing/testing/numeric/number-generator.h"
+#include "icing/testing/numeric/uniform-distribution-integer-generator.h"
#include "icing/testing/random-string.h"
#include "icing/testing/schema-generator.h"
#include "icing/testing/tmp-directory.h"
@@ -90,14 +97,6 @@ constexpr int kAvgDocumentSize = 300;
// ASSUME: ~75% of the document's size comes from it's content.
constexpr float kContentSizePct = 0.7;
-// Average length of word in English is 4.7 characters.
-constexpr int kAvgTokenLen = 5;
-// Made up value. This results in a fairly reasonable language - the majority of
-// generated words are 3-9 characters, ~3% of words are >=20 chars, and the
-// longest ones are 27 chars, (roughly consistent with the longest,
-// non-contrived English words
-// https://en.wikipedia.org/wiki/Longest_word_in_English)
-constexpr int kTokenStdDev = 7;
constexpr int kLanguageSize = 1000;
// Lite Index size required to fit 128k docs, each doc requires ~64 bytes of
@@ -117,22 +116,6 @@ std::vector<std::string> CreateNamespaces(int num_namespaces) {
return namespaces;
}
-// Creates a vector containing num_words randomly-generated words for use by
-// documents.
-template <typename Rand>
-std::vector<std::string> CreateLanguages(int num_words, Rand* r) {
- std::vector<std::string> language;
- std::normal_distribution<> norm_dist(kAvgTokenLen, kTokenStdDev);
- while (--num_words >= 0) {
- int word_length = 0;
- while (word_length < 1) {
- word_length = std::round(norm_dist(*r));
- }
- language.push_back(RandomString(kAlNumAlphabet, word_length, r));
- }
- return language;
-}
-
SearchSpecProto CreateSearchSpec(const std::string& query,
const std::vector<std::string>& namespaces,
TermMatchType::Code match_type) {
@@ -202,6 +185,25 @@ std::vector<DocumentProto> GenerateRandomDocuments(
return random_docs;
}
+std::unique_ptr<NumberGenerator<int64_t>> CreateIntegerGenerator(
+ size_t num_documents) {
+ // Since the collision # follows poisson distribution with lambda =
+ // (num_keys / range), we set the range 10x (lambda = 0.1) to avoid too many
+ // collisions.
+ //
+ // Distribution:
+ // - keys in range being picked for 0 times: 90.5%
+ // - keys in range being picked for 1 time: 9%
+ // - keys in range being picked for 2 times: 0.45%
+ // - keys in range being picked for 3 times: 0.015%
+ //
+ // For example, num_keys = 1M, range = 10M. Then there will be ~904837 unique
+ // keys, 45242 keys being picked twice, 1508 keys being picked thrice ...
+ return std::make_unique<UniformDistributionIntegerGenerator<int64_t>>(
+ /*seed=*/12345, /*range_lower=*/0,
+ /*range_upper=*/static_cast<int64_t>(num_documents) * 10 - 1);
+}
+
void BM_IndexLatency(benchmark::State& state) {
// Initialize the filesystem
std::string test_dir = GetTestTempDir() + "/icing/benchmark";
@@ -240,33 +242,7 @@ void BM_IndexLatency(benchmark::State& state) {
}
BENCHMARK(BM_IndexLatency)
// Arguments: num_indexed_documents, num_sections
- ->ArgPair(1, 1)
- ->ArgPair(2, 1)
- ->ArgPair(8, 1)
- ->ArgPair(32, 1)
- ->ArgPair(128, 1)
- ->ArgPair(1 << 10, 1)
- ->ArgPair(1 << 13, 1)
- ->ArgPair(1 << 15, 1)
- ->ArgPair(1 << 17, 1)
- ->ArgPair(1, 5)
- ->ArgPair(2, 5)
- ->ArgPair(8, 5)
- ->ArgPair(32, 5)
- ->ArgPair(128, 5)
- ->ArgPair(1 << 10, 5)
- ->ArgPair(1 << 13, 5)
- ->ArgPair(1 << 15, 5)
- ->ArgPair(1 << 17, 5)
- ->ArgPair(1, 10)
- ->ArgPair(2, 10)
- ->ArgPair(8, 10)
- ->ArgPair(32, 10)
- ->ArgPair(128, 10)
- ->ArgPair(1 << 10, 10)
- ->ArgPair(1 << 13, 10)
- ->ArgPair(1 << 15, 10)
- ->ArgPair(1 << 17, 10);
+ ->ArgPair(1000000, 5);
void BM_QueryLatency(benchmark::State& state) {
// Initialize the filesystem
@@ -303,7 +279,7 @@ void BM_QueryLatency(benchmark::State& state) {
SearchSpecProto search_spec = CreateSearchSpec(
language.at(0), std::vector<std::string>(), TermMatchType::PREFIX);
- ResultSpecProto result_spec = CreateResultSpec(1000000, 1000000, 1000000);
+ ResultSpecProto result_spec = CreateResultSpec(1, 1000000, 1000000);
ScoringSpecProto scoring_spec =
CreateScoringSpec(ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
for (auto _ : state) {
@@ -313,10 +289,7 @@ void BM_QueryLatency(benchmark::State& state) {
}
BENCHMARK(BM_QueryLatency)
// Arguments: num_indexed_documents, num_sections
- ->ArgPair(32, 2)
- ->ArgPair(128, 2)
- ->ArgPair(1 << 10, 2)
- ->ArgPair(1 << 13, 2);
+ ->ArgPair(1000000, 2);
void BM_IndexThroughput(benchmark::State& state) {
// Initialize the filesystem
@@ -793,6 +766,507 @@ void BM_PutMaxAllowedDocuments(benchmark::State& state) {
}
BENCHMARK(BM_PutMaxAllowedDocuments);
+void BM_QueryWithSnippet(benchmark::State& state) {
+ // Initialize the filesystem
+ std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+ Filesystem filesystem;
+ DestructibleDirectory ddir(filesystem, test_dir);
+
+ // Create the schema.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Create the index.
+ IcingSearchEngineOptions options;
+ options.set_base_dir(test_dir);
+ options.set_index_merge_size(kIcingFullIndexSize);
+ std::unique_ptr<IcingSearchEngine> icing =
+ std::make_unique<IcingSearchEngine>(options);
+
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ std::string body = "message body";
+ for (int i = 0; i < 100; i++) {
+ body = body +
+ " invent invention inventory invest investigate investigation "
+ "investigator investment nvestor invisible invitation invite "
+ "involve involved involvement IraqiI rish island";
+ }
+ for (int i = 0; i < 50; i++) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("namespace", "uri" + std::to_string(i))
+ .SetSchema("Message")
+ .AddStringProperty("body", body)
+ .Build();
+ ASSERT_THAT(icing->Put(std::move(document)).status(), ProtoIsOk());
+ }
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("i");
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(10000);
+ result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(10000);
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(10000);
+
+ for (auto s : state) {
+ SearchResultProto results = icing->Search(
+ search_spec, ScoringSpecProto::default_instance(), result_spec);
+ }
+}
+BENCHMARK(BM_QueryWithSnippet);
+
+void BM_NumericIndexing(benchmark::State& state) {
+ int num_documents = state.range(0);
+ int num_integers_per_doc = state.range(1);
+
+ // Initialize the filesystem
+ std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+ Filesystem filesystem;
+
+ // Create the schema.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("integer")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+
+ std::unique_ptr<NumberGenerator<int64_t>> integer_generator =
+ CreateIntegerGenerator(num_documents);
+ std::vector<DocumentProto> documents;
+ documents.reserve(num_documents);
+ for (int i = 0; i < num_documents; ++i) {
+ std::vector<int64_t> integers;
+ integers.reserve(num_integers_per_doc);
+ for (int j = 0; j < num_integers_per_doc; ++j) {
+ integers.push_back(integer_generator->Generate());
+ }
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri" + std::to_string(i))
+ .SetSchema("Message")
+ .AddStringProperty("body", "body hello world")
+ .AddInt64Property("integer", integers.begin(), integers.end())
+ .Build();
+ documents.push_back(std::move(document));
+ }
+
+ for (auto s : state) {
+ state.PauseTiming();
+ // Create the index.
+ IcingSearchEngineOptions options;
+ options.set_base_dir(test_dir);
+ options.set_index_merge_size(kIcingFullIndexSize);
+ std::unique_ptr<IcingSearchEngine> icing =
+ std::make_unique<IcingSearchEngine>(options);
+
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+ state.ResumeTiming();
+
+ for (const DocumentProto& document : documents) {
+ ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
+ }
+
+ state.PauseTiming();
+ icing.reset();
+ ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(test_dir.c_str()));
+ state.ResumeTiming();
+ }
+}
+
+BENCHMARK(BM_NumericIndexing)
+ // Arguments: num_documents, num_integers_per_doc
+ ->ArgPair(1000000, 5);
+
+void BM_NumericExactQuery(benchmark::State& state) {
+ int num_documents = state.range(0);
+ int num_integers_per_doc = state.range(1);
+
+ // Initialize the filesystem
+ std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+ Filesystem filesystem;
+ DestructibleDirectory ddir(filesystem, test_dir);
+
+ // Create the schema.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("integer")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+
+ // Create the index.
+ IcingSearchEngineOptions options;
+ options.set_base_dir(test_dir);
+ options.set_index_merge_size(kIcingFullIndexSize);
+ std::unique_ptr<IcingSearchEngine> icing =
+ std::make_unique<IcingSearchEngine>(options);
+
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ std::unique_ptr<NumberGenerator<int64_t>> integer_generator =
+ CreateIntegerGenerator(num_documents);
+ std::unordered_set<int64_t> chosen_integer_set;
+ for (int i = 0; i < num_documents; ++i) {
+ std::vector<int64_t> integers;
+ integers.reserve(num_integers_per_doc);
+ for (int j = 0; j < num_integers_per_doc; ++j) {
+ int64_t chosen_int = integer_generator->Generate();
+ integers.push_back(chosen_int);
+ chosen_integer_set.insert(chosen_int);
+ }
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri" + std::to_string(i))
+ .SetSchema("Message")
+ .AddStringProperty("body", "body hello world")
+ .AddInt64Property("integer", integers.begin(), integers.end())
+ .Build();
+ ASSERT_THAT(icing->Put(std::move(document)).status(), ProtoIsOk());
+ }
+
+ SearchSpecProto search_spec;
+ search_spec.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(1);
+
+ std::vector<int64_t> chosen_integers(chosen_integer_set.begin(),
+ chosen_integer_set.end());
+ std::uniform_int_distribution<> distrib(0, chosen_integers.size() - 1);
+ std::default_random_engine e(/*seed=*/12345);
+ for (auto s : state) {
+ int64_t exact = chosen_integers[distrib(e)];
+ search_spec.set_query("integer == " + std::to_string(exact));
+
+ SearchResultProto results =
+ icing->Search(search_spec, scoring_spec, result_spec);
+ ASSERT_THAT(results.status(), ProtoIsOk());
+ ASSERT_GT(results.results_size(), 0);
+ if (results.next_page_token() != kInvalidNextPageToken) {
+ icing->InvalidateNextPageToken(results.next_page_token());
+ }
+ }
+}
+BENCHMARK(BM_NumericExactQuery)
+ // Arguments: num_documents, num_integers_per_doc
+ ->ArgPair(1000000, 5);
+
+void BM_NumericRangeQueryAll(benchmark::State& state) {
+ int num_documents = state.range(0);
+ int num_integers_per_doc = state.range(1);
+
+ // Initialize the filesystem
+ std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+ Filesystem filesystem;
+ DestructibleDirectory ddir(filesystem, test_dir);
+
+ // Create the schema.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("integer")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+
+ // Create the index.
+ IcingSearchEngineOptions options;
+ options.set_base_dir(test_dir);
+ options.set_index_merge_size(kIcingFullIndexSize);
+ std::unique_ptr<IcingSearchEngine> icing =
+ std::make_unique<IcingSearchEngine>(options);
+
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ std::unique_ptr<NumberGenerator<int64_t>> integer_generator =
+ CreateIntegerGenerator(num_documents);
+ for (int i = 0; i < num_documents; ++i) {
+ std::vector<int64_t> integers;
+ integers.reserve(num_integers_per_doc);
+ for (int j = 0; j < num_integers_per_doc; ++j) {
+ integers.push_back(integer_generator->Generate());
+ }
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri" + std::to_string(i))
+ .SetSchema("Message")
+ .AddStringProperty("body", "body hello world")
+ .AddInt64Property("integer", integers.begin(), integers.end())
+ .Build();
+ ASSERT_THAT(icing->Put(std::move(document)).status(), ProtoIsOk());
+ }
+
+ SearchSpecProto search_spec;
+ search_spec.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+ search_spec.set_query("integer >= " +
+ std::to_string(std::numeric_limits<int64_t>::min()));
+
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(1);
+
+ for (auto s : state) {
+ SearchResultProto results =
+ icing->Search(search_spec, scoring_spec, result_spec);
+ ASSERT_THAT(results.status(), ProtoIsOk());
+ ASSERT_GT(results.results_size(), 0);
+ if (results.next_page_token() != kInvalidNextPageToken) {
+ icing->InvalidateNextPageToken(results.next_page_token());
+ }
+ }
+}
+BENCHMARK(BM_NumericRangeQueryAll)
+ // Arguments: num_documents, num_integers_per_doc
+ ->ArgPair(1000000, 5);
+
+void BM_JoinQueryQualifiedId(benchmark::State& state) {
+ // Initialize the filesystem
+ std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+ Filesystem filesystem;
+ DestructibleDirectory ddir(filesystem, test_dir);
+
+ // Create the schema.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("firstName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("lastName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("personQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Create the index.
+ IcingSearchEngineOptions options;
+ options.set_base_dir(test_dir);
+ options.set_index_merge_size(kIcingFullIndexSize);
+ std::unique_ptr<IcingSearchEngine> icing =
+ std::make_unique<IcingSearchEngine>(options);
+
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ // Create Person documents (parent)
+ static constexpr int kNumPersonDocuments = 1000;
+ for (int i = 0; i < kNumPersonDocuments; ++i) {
+ std::string person_id = std::to_string(i);
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person" + person_id)
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first" + person_id)
+ .AddStringProperty("lastName", "last" + person_id)
+ .AddStringProperty("emailAddress",
+ "person" + person_id + "@gmail.com")
+ .Build();
+ ASSERT_THAT(icing->Put(std::move(person)).status(), ProtoIsOk());
+ }
+
+ // Create Email documents (child)
+ static constexpr int kNumEmailDocuments = 10000;
+ std::uniform_int_distribution<> distrib(0, kNumPersonDocuments - 1);
+ std::default_random_engine e(/*seed=*/12345);
+ for (int i = 0; i < kNumEmailDocuments; ++i) {
+ std::string email_id = std::to_string(i);
+ std::string person_id = std::to_string(distrib(e));
+ DocumentProto email =
+ DocumentBuilder()
+ .SetKey("namespace", "email" + email_id)
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject " + email_id)
+ .AddStringProperty("body", "message body")
+ .AddStringProperty("personQualifiedId",
+ "pkg$db/namespace#person" + person_id)
+ .Build();
+ ASSERT_THAT(icing->Put(std::move(email)).status(), ProtoIsOk());
+ }
+
+ // Parent SearchSpec
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("firstName:first");
+
+ // JoinSpec
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("personQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::MAX);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
+ nested_search_spec->set_query("subject:test");
+ *nested_spec->mutable_scoring_spec() = ScoringSpecProto::default_instance();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ static constexpr int kNumPerPage = 10;
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(kNumPerPage);
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ ScoringSpecProto score_spec = ScoringSpecProto::default_instance();
+
+ const auto child_count_reduce_func =
+ [](int child_count, const SearchResultProto::ResultProto& result) -> int {
+ return child_count + result.joined_results_size();
+ };
+ for (auto s : state) {
+ int total_parent_count = 0;
+ int total_child_count = 0;
+ SearchResultProto results =
+ icing->Search(search_spec, score_spec, result_spec);
+ total_parent_count += results.results_size();
+ total_child_count +=
+ std::reduce(results.results().begin(), results.results().end(), 0,
+ child_count_reduce_func);
+
+ // Get all pages.
+ while (results.next_page_token() != kInvalidNextPageToken) {
+ results = icing->GetNextPage(results.next_page_token());
+ total_parent_count += results.results_size();
+ total_child_count +=
+ std::reduce(results.results().begin(), results.results().end(), 0,
+ child_count_reduce_func);
+ }
+
+ ASSERT_THAT(total_parent_count, Eq(kNumPersonDocuments));
+ ASSERT_THAT(total_child_count, Eq(kNumEmailDocuments));
+ }
+}
+BENCHMARK(BM_JoinQueryQualifiedId);
+
+void BM_PersistToDisk(benchmark::State& state) {
+ // Initialize the filesystem
+ std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+ Filesystem filesystem;
+ DestructibleDirectory ddir(filesystem, test_dir);
+
+ // Create the schema.
+ std::default_random_engine random;
+ int num_types = kAvgNumNamespaces * kAvgNumTypes;
+ ExactStringPropertyGenerator property_generator;
+ SchemaGenerator<ExactStringPropertyGenerator> schema_generator(
+ /*num_properties=*/state.range(1), &property_generator);
+ SchemaProto schema = schema_generator.GenerateSchema(num_types);
+ EvenDistributionTypeSelector type_selector(schema);
+
+ // Generate documents.
+ int num_docs = state.range(0);
+ std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
+ const std::vector<DocumentProto> random_docs =
+ GenerateRandomDocuments(&type_selector, num_docs, language);
+
+ for (auto _ : state) {
+ state.PauseTiming();
+ // Create the index.
+ IcingSearchEngineOptions options;
+ options.set_base_dir(test_dir);
+ options.set_index_merge_size(kIcingFullIndexSize);
+ options.set_use_persistent_hash_map(true);
+ std::unique_ptr<IcingSearchEngine> icing =
+ std::make_unique<IcingSearchEngine>(options);
+
+ ASSERT_THAT(icing->Reset().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ for (const DocumentProto& doc : random_docs) {
+ ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk());
+ }
+
+ state.ResumeTiming();
+
+ ASSERT_THAT(icing->PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
+
+ state.PauseTiming();
+ icing.reset();
+ ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(test_dir.c_str()));
+ state.ResumeTiming();
+ }
+}
+BENCHMARK(BM_PersistToDisk)
+ // Arguments: num_indexed_documents, num_sections
+ ->ArgPair(1024, 5);
+
} // namespace
} // namespace lib
diff --git a/icing/icing-search-engine_delete_test.cc b/icing/icing-search-engine_delete_test.cc
new file mode 100644
index 0000000..c3b1ccd
--- /dev/null
+++ b/icing/icing-search-engine_delete_test.cc
@@ -0,0 +1,768 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/icing-search-engine.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/portable/endian.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/reset.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::Eq;
+using ::testing::Ge;
+using ::testing::Gt;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::Return;
+using ::testing::SizeIs;
+using ::testing::StrEq;
+using ::testing::UnorderedElementsAre;
+
+// For mocking purpose, we allow tests to provide a custom Filesystem.
+class TestIcingSearchEngine : public IcingSearchEngine {
+ public:
+ TestIcingSearchEngine(const IcingSearchEngineOptions& options,
+ std::unique_ptr<const Filesystem> filesystem,
+ std::unique_ptr<const IcingFilesystem> icing_filesystem,
+ std::unique_ptr<Clock> clock,
+ std::unique_ptr<JniCache> jni_cache)
+ : IcingSearchEngine(options, std::move(filesystem),
+ std::move(icing_filesystem), std::move(clock),
+ std::move(jni_cache)) {}
+};
+
+std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
+
+// This test is meant to cover all tests relating to IcingSearchEngine::Delete*.
+class IcingSearchEngineDeleteTest : public testing::Test {
+ protected:
+ void SetUp() override {
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ // If we've specified using the reverse-JNI method for segmentation (i.e.
+ // not ICU), then we won't have the ICU data file included to set up.
+ // Technically, we could choose to use reverse-JNI for segmentation AND
+ // include an ICU data file, but that seems unlikely and our current BUILD
+ // setup doesn't do this.
+ // File generated via icu_data_file rule in //icing/BUILD.
+ std::string icu_data_file_path =
+ GetTestFilePath("icing/icu.dat");
+ ICING_ASSERT_OK(
+ icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+ }
+ filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ const Filesystem* filesystem() const { return &filesystem_; }
+
+ private:
+ Filesystem filesystem_;
+};
+
+// Non-zero value so we don't override it to be the current time
+constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
+
+IcingSearchEngineOptions GetDefaultIcingOptions() {
+ IcingSearchEngineOptions icing_options;
+ icing_options.set_base_dir(GetTestBaseDir());
+ return icing_options;
+}
+
+SchemaProto CreateMessageSchema() {
+ return SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+}
+
+SchemaProto CreateEmailSchema() {
+ return SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+}
+
+ScoringSpecProto GetDefaultScoringSpec() {
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+ return scoring_spec;
+}
+
+TEST_F(IcingSearchEngineDeleteTest, DeleteBySchemaType) {
+ SchemaProto schema;
+ // Add an email type
+ auto type = schema.add_types();
+ type->set_schema_type("email");
+ auto property = type->add_properties();
+ property->set_property_name("subject");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ property->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::EXACT_ONLY);
+ property->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+ // Add an message type
+ type = schema.add_types();
+ type->set_schema_type("message");
+ property = type->add_properties();
+ property->set_property_name("body");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ property->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::EXACT_ONLY);
+ property->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("message")
+ .AddStringProperty("body", "message body1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("email")
+ .AddStringProperty("subject", "message body2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(7);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = document1;
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Delete the first type. The first doc should be irretrievable. The
+ // second should still be present.
+ DeleteBySchemaTypeResultProto result_proto =
+ icing.DeleteBySchemaType("message");
+ EXPECT_THAT(result_proto.status(), ProtoIsOk());
+ DeleteStatsProto exp_stats;
+ exp_stats.set_delete_type(DeleteStatsProto::DeleteType::SCHEMA_TYPE);
+ exp_stats.set_latency_ms(7);
+ exp_stats.set_num_documents_deleted(1);
+ EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace1, uri1) not found.");
+ expected_get_result_proto.clear_document();
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ expected_get_result_proto.mutable_status()->clear_message();
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Search for "message", only document2 should show up.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("message");
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineDeleteTest, DeleteSchemaTypeByQuery) {
+ SchemaProto schema = CreateMessageSchema();
+ // Add an email type
+ SchemaProto tmp = CreateEmailSchema();
+ *schema.add_types() = tmp.types(0);
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema(schema.types(0).schema_type())
+ .AddStringProperty("body", "message body1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema(schema.types(1).schema_type())
+ .AddStringProperty("subject", "subject subject2")
+ .AddStringProperty("body", "message body2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = document1;
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Delete the first type. The first doc should be irretrievable. The
+ // second should still be present.
+ SearchSpecProto search_spec;
+ search_spec.add_schema_type_filters(schema.types(0).schema_type());
+ EXPECT_THAT(icing.DeleteByQuery(search_spec).status(), ProtoIsOk());
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace1, uri1) not found.");
+ expected_get_result_proto.clear_document();
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ expected_get_result_proto.mutable_status()->clear_message();
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ search_spec = SearchSpecProto::default_instance();
+ search_spec.set_query("message");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineDeleteTest, DeleteByNamespace) {
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace3", "uri3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(7);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = document1;
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ *expected_get_result_proto.mutable_document() = document3;
+ EXPECT_THAT(
+ icing.Get("namespace3", "uri3", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Delete namespace1. Document1 and document2 should be irretrievable.
+ // Document3 should still be present.
+ DeleteByNamespaceResultProto result_proto =
+ icing.DeleteByNamespace("namespace1");
+ EXPECT_THAT(result_proto.status(), ProtoIsOk());
+ DeleteStatsProto exp_stats;
+ exp_stats.set_delete_type(DeleteStatsProto::DeleteType::NAMESPACE);
+ exp_stats.set_latency_ms(7);
+ exp_stats.set_num_documents_deleted(2);
+ EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace1, uri1) not found.");
+ expected_get_result_proto.clear_document();
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace1, uri2) not found.");
+ expected_get_result_proto.clear_document();
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ expected_get_result_proto.mutable_status()->clear_message();
+ *expected_get_result_proto.mutable_document() = document3;
+ EXPECT_THAT(
+ icing.Get("namespace3", "uri3", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Search for "message", only document3 should show up.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("message");
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineDeleteTest, DeleteNamespaceByQuery) {
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = document1;
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Delete the first namespace. The first doc should be irretrievable. The
+ // second should still be present.
+ SearchSpecProto search_spec;
+ search_spec.add_namespace_filters("namespace1");
+ EXPECT_THAT(icing.DeleteByQuery(search_spec).status(), ProtoIsOk());
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace1, uri1) not found.");
+ expected_get_result_proto.clear_document();
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ expected_get_result_proto.mutable_status()->clear_message();
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ search_spec = SearchSpecProto::default_instance();
+ search_spec.set_query("message");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineDeleteTest, DeleteByQuery) {
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(7);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = document1;
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Delete all docs containing 'body1'. The first doc should be irretrievable.
+ // The second should still be present.
+ SearchSpecProto search_spec;
+ search_spec.set_query("body1");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ DeleteByQueryResultProto result_proto = icing.DeleteByQuery(search_spec);
+ EXPECT_THAT(result_proto.status(), ProtoIsOk());
+ DeleteByQueryStatsProto exp_stats;
+ exp_stats.set_latency_ms(7);
+ exp_stats.set_num_documents_deleted(1);
+ exp_stats.set_query_length(search_spec.query().length());
+ exp_stats.set_num_terms(1);
+ exp_stats.set_num_namespaces_filtered(0);
+ exp_stats.set_num_schema_types_filtered(0);
+ exp_stats.set_parse_query_latency_ms(7);
+ exp_stats.set_document_removal_latency_ms(7);
+ EXPECT_THAT(result_proto.delete_by_query_stats(), EqualsProto(exp_stats));
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace1, uri1) not found.");
+ expected_get_result_proto.clear_document();
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ expected_get_result_proto.mutable_status()->clear_message();
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ search_spec = SearchSpecProto::default_instance();
+ search_spec.set_query("message");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineDeleteTest, DeleteByQueryReturnInfo) {
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body3")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(7);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = document1;
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ *expected_get_result_proto.mutable_document() = document3;
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri3", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Delete all docs to test the information is correctly grouped.
+ SearchSpecProto search_spec;
+ search_spec.set_query("message");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ DeleteByQueryResultProto result_proto =
+ icing.DeleteByQuery(search_spec, true);
+ EXPECT_THAT(result_proto.status(), ProtoIsOk());
+ DeleteByQueryStatsProto exp_stats;
+ exp_stats.set_latency_ms(7);
+ exp_stats.set_num_documents_deleted(3);
+ exp_stats.set_query_length(search_spec.query().length());
+ exp_stats.set_num_terms(1);
+ exp_stats.set_num_namespaces_filtered(0);
+ exp_stats.set_num_schema_types_filtered(0);
+ exp_stats.set_parse_query_latency_ms(7);
+ exp_stats.set_document_removal_latency_ms(7);
+ EXPECT_THAT(result_proto.delete_by_query_stats(), EqualsProto(exp_stats));
+
+ // Check that DeleteByQuery can return information for deleted documents.
+ DeleteByQueryResultProto::DocumentGroupInfo info1, info2;
+ info1.set_namespace_("namespace1");
+ info1.set_schema("Message");
+ info1.add_uris("uri1");
+ info2.set_namespace_("namespace2");
+ info2.set_schema("Message");
+ info2.add_uris("uri3");
+ info2.add_uris("uri2");
+ EXPECT_THAT(result_proto.deleted_documents(),
+ UnorderedElementsAre(EqualsProto(info1), EqualsProto(info2)));
+
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance())
+ .status()
+ .code(),
+ Eq(StatusProto::NOT_FOUND));
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance())
+ .status()
+ .code(),
+ Eq(StatusProto::NOT_FOUND));
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri3", GetResultSpecProto::default_instance())
+ .status()
+ .code(),
+ Eq(StatusProto::NOT_FOUND));
+}
+
+TEST_F(IcingSearchEngineDeleteTest, DeleteByQueryNotFound) {
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = document1;
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Delete all docs containing 'foo', which should be none of them. Both docs
+ // should still be present.
+ SearchSpecProto search_spec;
+ search_spec.set_query("foo");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ EXPECT_THAT(icing.DeleteByQuery(search_spec).status(),
+ ProtoStatusIs(StatusProto::NOT_FOUND));
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ expected_get_result_proto.mutable_status()->clear_message();
+ *expected_get_result_proto.mutable_document() = document1;
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ expected_get_result_proto.mutable_status()->clear_message();
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ search_spec = SearchSpecProto::default_instance();
+ search_spec.set_query("message");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+} // namespace
+} // namespace lib
+} // namespace icing
diff --git a/icing/icing-search-engine_initialization_test.cc b/icing/icing-search-engine_initialization_test.cc
new file mode 100644
index 0000000..b4853b4
--- /dev/null
+++ b/icing/icing-search-engine_initialization_test.cc
@@ -0,0 +1,5462 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/file/version-util.h"
+#include "icing/icing-search-engine.h"
+#include "icing/index/index-processor.h"
+#include "icing/index/index.h"
+#include "icing/index/integer-section-indexing-handler.h"
+#include "icing/index/numeric/integer-index.h"
+#include "icing/index/string-section-indexing-handler.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/join/doc-join-info.h"
+#include "icing/join/join-processor.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/join/qualified-id-join-indexing-handler.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/legacy/index/icing-mock-filesystem.h"
+#include "icing/portable/endian.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/reset.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/query/query-features.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-log-creator.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/transform/normalizer-factory.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/tokenized-document.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::_;
+using ::testing::AtLeast;
+using ::testing::DoDefault;
+using ::testing::EndsWith;
+using ::testing::Eq;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::Matcher;
+using ::testing::Ne;
+using ::testing::Return;
+using ::testing::SizeIs;
+
+constexpr std::string_view kIpsumText =
+ "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla convallis "
+ "scelerisque orci quis hendrerit. Sed augue turpis, sodales eu gravida "
+ "nec, scelerisque nec leo. Maecenas accumsan interdum commodo. Aliquam "
+ "mattis sapien est, sit amet interdum risus dapibus sed. Maecenas leo "
+ "erat, fringilla in nisl a, venenatis gravida metus. Phasellus venenatis, "
+ "orci in aliquet mattis, lectus sapien volutpat arcu, sed hendrerit ligula "
+ "arcu nec mauris. Integer dolor mi, rhoncus eget gravida et, pulvinar et "
+ "nunc. Aliquam ac sollicitudin nisi. Vivamus sit amet urna vestibulum, "
+ "tincidunt eros sed, efficitur nisl. Fusce non neque accumsan, sagittis "
+ "nisi eget, sagittis turpis. Ut pulvinar nibh eu purus feugiat faucibus. "
+ "Donec tellus nulla, tincidunt vel lacus id, bibendum fermentum turpis. "
+ "Nullam ultrices sed nibh vitae aliquet. Ut risus neque, consectetur "
+ "vehicula posuere vitae, convallis eu lorem. Donec semper augue eu nibh "
+ "placerat semper.";
+
+PortableFileBackedProtoLog<DocumentWrapper>::Header ReadDocumentLogHeader(
+ Filesystem filesystem, const std::string& file_path) {
+ PortableFileBackedProtoLog<DocumentWrapper>::Header header;
+ filesystem.PRead(file_path.c_str(), &header,
+ sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header),
+ /*offset=*/0);
+ return header;
+}
+
+void WriteDocumentLogHeader(
+ Filesystem filesystem, const std::string& file_path,
+ PortableFileBackedProtoLog<DocumentWrapper>::Header& header) {
+ filesystem.Write(file_path.c_str(), &header,
+ sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header));
+}
+
+// For mocking purpose, we allow tests to provide a custom Filesystem.
+class TestIcingSearchEngine : public IcingSearchEngine {
+ public:
+ TestIcingSearchEngine(const IcingSearchEngineOptions& options,
+ std::unique_ptr<const Filesystem> filesystem,
+ std::unique_ptr<const IcingFilesystem> icing_filesystem,
+ std::unique_ptr<Clock> clock,
+ std::unique_ptr<JniCache> jni_cache)
+ : IcingSearchEngine(options, std::move(filesystem),
+ std::move(icing_filesystem), std::move(clock),
+ std::move(jni_cache)) {}
+};
+
+std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
+
+// This test is meant to cover all tests relating to
+// IcingSearchEngine::Initialize.
+class IcingSearchEngineInitializationTest : public testing::Test {
+ protected:
+ void SetUp() override {
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ // If we've specified using the reverse-JNI method for segmentation (i.e.
+ // not ICU), then we won't have the ICU data file included to set up.
+ // Technically, we could choose to use reverse-JNI for segmentation AND
+ // include an ICU data file, but that seems unlikely and our current BUILD
+ // setup doesn't do this.
+ // File generated via icu_data_file rule in //icing/BUILD.
+ std::string icu_data_file_path =
+ GetTestFilePath("icing/icu.dat");
+ ICING_ASSERT_OK(
+ icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+ }
+ filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
+
+ language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ lang_segmenter_,
+ language_segmenter_factory::Create(std::move(segmenter_options)));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ normalizer_,
+ normalizer_factory::Create(
+ /*max_term_byte_size=*/std::numeric_limits<int32_t>::max()));
+ }
+
+ void TearDown() override {
+ normalizer_.reset();
+ lang_segmenter_.reset();
+ filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ const Filesystem* filesystem() const { return &filesystem_; }
+
+ const IcingFilesystem* icing_filesystem() const { return &icing_filesystem_; }
+
+ Filesystem filesystem_;
+ IcingFilesystem icing_filesystem_;
+ std::unique_ptr<LanguageSegmenter> lang_segmenter_;
+ std::unique_ptr<Normalizer> normalizer_;
+};
+
+// Non-zero value so we don't override it to be the current time
+constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
+
+std::string GetVersionFilename() { return GetTestBaseDir() + "/version"; }
+
+std::string GetDocumentDir() { return GetTestBaseDir() + "/document_dir"; }
+
+std::string GetIndexDir() { return GetTestBaseDir() + "/index_dir"; }
+
+std::string GetIntegerIndexDir() {
+ return GetTestBaseDir() + "/integer_index_dir";
+}
+
+std::string GetQualifiedIdJoinIndexDir() {
+ return GetTestBaseDir() + "/qualified_id_join_index_dir";
+}
+
+std::string GetSchemaDir() { return GetTestBaseDir() + "/schema_dir"; }
+
+std::string GetHeaderFilename() {
+ return GetTestBaseDir() + "/icing_search_engine_header";
+}
+
+IcingSearchEngineOptions GetDefaultIcingOptions() {
+ IcingSearchEngineOptions icing_options;
+ icing_options.set_base_dir(GetTestBaseDir());
+ return icing_options;
+}
+
+DocumentProto CreateMessageDocument(std::string name_space, std::string uri) {
+ return DocumentBuilder()
+ .SetKey(std::move(name_space), std::move(uri))
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+}
+
+DocumentProto CreateEmailDocument(const std::string& name_space,
+ const std::string& uri, int score,
+ const std::string& subject_content,
+ const std::string& body_content) {
+ return DocumentBuilder()
+ .SetKey(name_space, uri)
+ .SetSchema("Email")
+ .SetScore(score)
+ .AddStringProperty("subject", subject_content)
+ .AddStringProperty("body", body_content)
+ .Build();
+}
+
+SchemaTypeConfigProto CreateMessageSchemaTypeConfig() {
+ return SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .Build();
+}
+
+SchemaTypeConfigProto CreateEmailSchemaTypeConfig() {
+ return SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .Build();
+}
+
+SchemaProto CreateMessageSchema() {
+ return SchemaBuilder().AddType(CreateMessageSchemaTypeConfig()).Build();
+}
+
+SchemaProto CreateEmailSchema() {
+ return SchemaBuilder().AddType(CreateEmailSchemaTypeConfig()).Build();
+}
+
+ScoringSpecProto GetDefaultScoringSpec() {
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+ return scoring_spec;
+}
+
+// TODO(b/272145329): create SearchSpecBuilder, JoinSpecBuilder,
+// SearchResultProtoBuilder and ResultProtoBuilder for unit tests and build all
+// instances by them.
+
+TEST_F(IcingSearchEngineInitializationTest, UninitializedInstanceFailsSafely) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+
+ SchemaProto email_schema = CreateMessageSchema();
+ EXPECT_THAT(icing.SetSchema(email_schema).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.GetSchema().status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.GetSchemaType(email_schema.types(0).schema_type()).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+
+ DocumentProto doc = CreateMessageDocument("namespace", "uri");
+ EXPECT_THAT(icing.Put(doc).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing
+ .Get(doc.namespace_(), doc.uri(),
+ GetResultSpecProto::default_instance())
+ .status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.Delete(doc.namespace_(), doc.uri()).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.DeleteByNamespace(doc.namespace_()).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.DeleteBySchemaType(email_schema.types(0).schema_type())
+ .status()
+ .code(),
+ Eq(StatusProto::FAILED_PRECONDITION));
+
+ SearchSpecProto search_spec = SearchSpecProto::default_instance();
+ ScoringSpecProto scoring_spec = ScoringSpecProto::default_instance();
+ ResultSpecProto result_spec = ResultSpecProto::default_instance();
+ EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ constexpr int kSomePageToken = 12;
+ EXPECT_THAT(icing.GetNextPage(kSomePageToken).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ icing.InvalidateNextPageToken(kSomePageToken); // Verify this doesn't crash.
+
+ EXPECT_THAT(icing.PersistToDisk(PersistType::FULL).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.Optimize().status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+}
+
+TEST_F(IcingSearchEngineInitializationTest, SimpleInitialization) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document = CreateMessageDocument("namespace", "uri");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(DocumentProto(document)).status(), ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializingAgainSavesNonPersistedData) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document = CreateMessageDocument("namespace", "uri");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = document;
+
+ ASSERT_THAT(
+ icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(
+ icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ MaxIndexMergeSizeReturnsInvalidArgument) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(std::numeric_limits<int32_t>::max());
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ NegativeMergeSizeReturnsInvalidArgument) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(-1);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ ZeroMergeSizeReturnsInvalidArgument) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(0);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineInitializationTest, GoodIndexMergeSizeReturnsOk) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ // One is fine, if a bit weird. It just means that the lite index will be
+ // smaller and will request a merge any time content is added to it.
+ options.set_index_merge_size(1);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ NegativeMaxTokenLenReturnsInvalidArgument) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_max_token_length(-1);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ ZeroMaxTokenLenReturnsInvalidArgument) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_max_token_length(0);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ NegativeCompressionLevelReturnsInvalidArgument) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_compression_level(-1);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ GreaterThanMaxCompressionLevelReturnsInvalidArgument) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_compression_level(10);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineInitializationTest, GoodCompressionLevelReturnsOk) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_compression_level(0);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ ReinitializingWithDifferentCompressionLevelReturnsOk) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_compression_level(3);
+ {
+ IcingSearchEngine icing(options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document = CreateMessageDocument("namespace", "uri");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ ASSERT_THAT(icing.PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
+ }
+ options.set_compression_level(9);
+ {
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ }
+ options.set_compression_level(0);
+ {
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest, FailToCreateDocStore) {
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ // This fails DocumentStore::Create()
+ ON_CALL(*mock_filesystem, CreateDirectoryRecursively(_))
+ .WillByDefault(Return(false));
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(),
+ ProtoStatusIs(StatusProto::INTERNAL));
+ EXPECT_THAT(initialize_result_proto.status().message(),
+ HasSubstr("Could not create directory"));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitMarkerFilePreviousFailuresAtThreshold) {
+ Filesystem filesystem;
+ DocumentProto email1 =
+ CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
+ email1.set_creation_timestamp_ms(10000);
+ DocumentProto email2 =
+ CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
+ email2.set_creation_timestamp_ms(10000);
+
+ {
+ // Create an index with a few documents.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto init_result = icing.Initialize();
+ ASSERT_THAT(init_result.status(), ProtoIsOk());
+ ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+ Eq(0));
+ ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+ }
+
+ // Write an init marker file with 5 previously failed attempts.
+ std::string marker_filepath = GetTestBaseDir() + "/init_marker";
+
+ {
+ ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str()));
+ int network_init_attempts = GHostToNetworkL(5);
+ // Write the updated number of attempts before we get started.
+ ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0,
+ &network_init_attempts,
+ sizeof(network_init_attempts)));
+ ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get()));
+ }
+
+ {
+ // Create the index again and verify that initialization succeeds and no
+ // data is thrown out.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto init_result = icing.Initialize();
+ ASSERT_THAT(init_result.status(), ProtoIsOk());
+ ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+ Eq(5));
+ EXPECT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
+ .document(),
+ EqualsProto(email1));
+ EXPECT_THAT(
+ icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+ .document(),
+ EqualsProto(email2));
+ }
+
+ // The successful init should have thrown out the marker file.
+ ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitMarkerFilePreviousFailuresBeyondThreshold) {
+ Filesystem filesystem;
+ DocumentProto email1 =
+ CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
+ DocumentProto email2 =
+ CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
+
+ {
+ // Create an index with a few documents.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto init_result = icing.Initialize();
+ ASSERT_THAT(init_result.status(), ProtoIsOk());
+ ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+ Eq(0));
+ ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+ }
+
+ // Write an init marker file with 6 previously failed attempts.
+ std::string marker_filepath = GetTestBaseDir() + "/init_marker";
+
+ {
+ ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str()));
+ int network_init_attempts = GHostToNetworkL(6);
+ // Write the updated number of attempts before we get started.
+ ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0,
+ &network_init_attempts,
+ sizeof(network_init_attempts)));
+ ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get()));
+ }
+
+ {
+ // Create the index again and verify that initialization succeeds and all
+ // data is thrown out.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto init_result = icing.Initialize();
+ ASSERT_THAT(init_result.status(),
+ ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
+ ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+ Eq(6));
+ EXPECT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
+ .status(),
+ ProtoStatusIs(StatusProto::NOT_FOUND));
+ EXPECT_THAT(
+ icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+ .status(),
+ ProtoStatusIs(StatusProto::NOT_FOUND));
+ }
+
+ // The successful init should have thrown out the marker file.
+ ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ SuccessiveInitFailuresIncrementsInitMarker) {
+ Filesystem filesystem;
+ DocumentProto email1 =
+ CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
+ DocumentProto email2 =
+ CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
+
+ {
+ // 1. Create an index with a few documents.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto init_result = icing.Initialize();
+ ASSERT_THAT(init_result.status(), ProtoIsOk());
+ ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+ Eq(0));
+ ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+ }
+
+ {
+ // 2. Create an index that will encounter an IO failure when trying to
+ // create the document log.
+ IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ std::string document_log_filepath =
+ icing_options.base_dir() + "/document_dir/document_log_v1";
+ ON_CALL(*mock_filesystem,
+ GetFileSize(Matcher<const char*>(Eq(document_log_filepath))))
+ .WillByDefault(Return(Filesystem::kBadFileSize));
+
+ TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ // Fail to initialize six times in a row.
+ InitializeResultProto init_result = icing.Initialize();
+ ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
+ ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+ Eq(0));
+
+ init_result = icing.Initialize();
+ ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
+ ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+ Eq(1));
+
+ init_result = icing.Initialize();
+ ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
+ ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+ Eq(2));
+
+ init_result = icing.Initialize();
+ ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
+ ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+ Eq(3));
+
+ init_result = icing.Initialize();
+ ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
+ ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+ Eq(4));
+
+ init_result = icing.Initialize();
+ ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
+ ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+ Eq(5));
+ }
+
+ {
+ // 3. Create the index again and verify that initialization succeeds and all
+ // data is thrown out.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto init_result = icing.Initialize();
+ ASSERT_THAT(init_result.status(),
+ ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
+ ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+ Eq(6));
+
+ EXPECT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
+ .status(),
+ ProtoStatusIs(StatusProto::NOT_FOUND));
+ EXPECT_THAT(
+ icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+ .status(),
+ ProtoStatusIs(StatusProto::NOT_FOUND));
+ }
+
+ // The successful init should have thrown out the marker file.
+ std::string marker_filepath = GetTestBaseDir() + "/init_marker";
+ ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
+}
+
+TEST_F(IcingSearchEngineInitializationTest, RecoverFromMissingHeaderFile) {
+ SearchSpecProto search_spec;
+ search_spec.set_query("message");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ CreateMessageDocument("namespace", "uri");
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() =
+ CreateMessageDocument("namespace", "uri");
+
+ {
+ // Basic initialization/setup
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
+ EXPECT_THAT(
+ icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+ } // This should shut down IcingSearchEngine and persist anything it needs to
+
+ EXPECT_TRUE(filesystem()->DeleteFile(GetHeaderFilename().c_str()));
+
+ // We should be able to recover from this and access all our previous data
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Checks that DocumentLog is still ok
+ EXPECT_THAT(
+ icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Checks that the term index is still ok so we can search over it
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Checks that the integer index is still ok so we can search over it
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto search_result_google::protobuf =
+ icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Checks that Schema is still since it'll be needed to validate the document
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineInitializationTest, UnableToRecoverFromCorruptSchema) {
+ {
+ // Basic initialization/setup
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() =
+ CreateMessageDocument("namespace", "uri");
+
+ EXPECT_THAT(
+ icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+ } // This should shut down IcingSearchEngine and persist anything it needs to
+
+ const std::string schema_file =
+ absl_ports::StrCat(GetSchemaDir(), "/schema.pb");
+ const std::string corrupt_data = "1234";
+ EXPECT_TRUE(filesystem()->Write(schema_file.c_str(), corrupt_data.data(),
+ corrupt_data.size()));
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INTERNAL));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ UnableToRecoverFromCorruptDocumentLog) {
+ {
+ // Basic initialization/setup
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() =
+ CreateMessageDocument("namespace", "uri");
+
+ EXPECT_THAT(
+ icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+ } // This should shut down IcingSearchEngine and persist anything it needs to
+
+ const std::string document_log_file = absl_ports::StrCat(
+ GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
+ const std::string corrupt_data = "1234";
+ EXPECT_TRUE(filesystem()->Write(document_log_file.c_str(),
+ corrupt_data.data(), corrupt_data.size()));
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INTERNAL));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ RecoverFromInconsistentSchemaStore) {
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2_with_additional_property =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("additional", "content")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ {
+ // Initializes folder and schema
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder(CreateMessageSchemaTypeConfig())
+ // Add non-indexable property "additional"
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("additional")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document2_with_additional_property).status(),
+ ProtoIsOk());
+
+ // Won't get us anything because "additional" isn't marked as an indexed
+ // property in the schema
+ SearchSpecProto search_spec;
+ search_spec.set_query("additional:content");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+ } // This should shut down IcingSearchEngine and persist anything it needs to
+
+ {
+ // This schema will change the SchemaTypeIds from the previous schema_
+ // (since SchemaTypeIds are assigned based on order of the types, and this
+ // new schema changes the ordering of previous types)
+ SchemaProto new_schema;
+ auto type = new_schema.add_types();
+ type->set_schema_type("Email");
+
+ // Switching a non-indexable property to indexable changes the SectionIds
+ // (since SectionIds are assigned based on alphabetical order of indexed
+ // sections, marking "additional" as an indexed property will push the
+ // "body" and "indexableInteger" property to different SectionIds)
+ *new_schema.add_types() =
+ SchemaTypeConfigBuilder(CreateMessageSchemaTypeConfig())
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("additional")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+
+ // Write the marker file
+ std::string marker_filepath =
+ absl_ports::StrCat(options.base_dir(), "/set_schema_marker");
+ ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str()));
+ ASSERT_TRUE(sfd.is_valid());
+
+ // Write the new schema
+ FakeClock fake_clock;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ new_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+ } // Will persist new schema
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // We can insert a Email document since we kept the new schema
+ DocumentProto email_document =
+ DocumentBuilder()
+ .SetKey("namespace", "email_uri")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = email_document;
+
+ EXPECT_THAT(icing.Get("namespace", "email_uri",
+ GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Verify term search
+ SearchSpecProto search_spec1;
+
+ // The section restrict will ensure we are using the correct, updated
+ // SectionId in the Index
+ search_spec1.set_query("additional:content");
+
+ // Schema type filter will ensure we're using the correct, updated
+ // SchemaTypeId in the DocumentStore
+ search_spec1.add_schema_type_filters("Message");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto expected_search_result_proto1;
+ expected_search_result_proto1.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto1.mutable_results()->Add()->mutable_document() =
+ document2_with_additional_property;
+
+ SearchResultProto search_result_proto1 =
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto1));
+
+ // Verify numeric (integer) search
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec1.add_schema_type_filters("Message");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto expected_search_result_google::protobuf;
+ expected_search_result_google::protobuf.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_google::protobuf.mutable_results()->Add()->mutable_document() =
+ document2_with_additional_property;
+ *expected_search_result_google::protobuf.mutable_results()->Add()->mutable_document() =
+ document1;
+
+ SearchResultProto search_result_google::protobuf =
+ icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_google::protobuf));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ RecoverFromInconsistentDocumentStore) {
+ // Test the following scenario: document store is ahead of term, integer and
+ // qualified id join index. IcingSearchEngine should be able to recover all
+ // indices. Several additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should be unaffected.
+ // - Qualified id join index directory should be unaffected.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index shouldn't take effect.
+ // - "Clear()" shouldn't be called for integer index, i.e. no integer index
+ // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // discarded.
+ // - "Clear()" shouldn't be called for qualified id join index, i.e. no
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded.
+ // - Still, we need to replay and reindex documents.
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message1 =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message2 =
+ DocumentBuilder()
+ .SetKey("namespace", "message/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body two")
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ {
+ // Initializes folder and schema, index one document
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message1).status(), ProtoIsOk());
+ } // This should shut down IcingSearchEngine and persist anything it needs to
+
+ {
+ FakeClock fake_clock;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
+
+ // Puts message2 into DocumentStore but doesn't index it.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(
+ filesystem(), GetDocumentDir(), &fake_clock, schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
+ ICING_EXPECT_OK(document_store->Put(message2));
+ }
+
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded, and Clear()
+ // should never be called (i.e. storage sub directory
+ // "*/integer_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+ // Ensure qualified id join index directory should never be discarded, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ EXPECT_THAT(initialize_result.status(), ProtoIsOk());
+ // Index Restoration should be triggered here and document2 should be
+ // indexed.
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = message1;
+
+ // DocumentStore kept the additional document
+ EXPECT_THAT(icing.Get("namespace", "message/1",
+ GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ *expected_get_result_proto.mutable_document() = message2;
+ EXPECT_THAT(icing.Get("namespace", "message/2",
+ GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ message2;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ message1;
+
+ // We indexed the additional document in all indices.
+ // Verify term search
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("message");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto search_result_proto1 =
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify numeric (integer) search
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto search_result_google::protobuf =
+ icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify join search: join a query for `name:person` with a child query for
+ // `body:message` based on the child's `senderQualifiedId` field.
+ SearchSpecProto search_spec3;
+ search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec3.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:message");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+ result_spec3.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto expected_join_search_result_proto;
+ expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto =
+ expected_join_search_result_proto.mutable_results()->Add();
+ *result_proto->mutable_document() = person;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() = message2;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() = message1;
+
+ SearchResultProto search_result_proto3 = icing.Search(
+ search_spec3, ScoringSpecProto::default_instance(), result_spec3);
+ EXPECT_THAT(search_result_proto3, EqualsSearchResultIgnoreStatsAndScores(
+ expected_join_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIndex) {
+ // Test the following scenario: term index is corrupted (e.g. checksum doesn't
+ // match). IcingSearchEngine should be able to recover term index. Several
+ // additional behaviors are also tested:
+ // - Index directory handling:
+ // - Should discard the entire term index directory and start it from
+ // scratch.
+ // - Integer index directory should be unaffected.
+ // - Qualified id join index directory should be unaffected.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index shouldn't take effect since we start it
+ // from scratch.
+ // - "Clear()" shouldn't be called for integer index, i.e. no integer index
+ // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // discarded.
+ // - "Clear()" shouldn't be called for qualified id join index, i.e. no
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded.
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("body:message");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ message;
+
+ {
+ // Initializes folder and schema, index one document
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+ } // This should shut down IcingSearchEngine and persist anything it needs to
+
+ // Manually corrupt term index
+ {
+ const std::string index_hit_buffer_file = GetIndexDir() + "/idx/lite.hb";
+ ScopedFd fd(filesystem()->OpenForWrite(index_hit_buffer_file.c_str()));
+ ASSERT_TRUE(fd.is_valid());
+ ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
+ }
+
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should be discarded once.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(1);
+ // Ensure integer index directory should never be discarded, and Clear()
+ // should never be called (i.e. storage sub directory "*/integer_index_dir/*"
+ // should never be discarded).
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+ // Ensure qualified id join index directory should never be discarded, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ EXPECT_THAT(initialize_result.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+
+ // Check that our index is ok by searching over the restored index
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIntegerIndex) {
+ // Test the following scenario: integer index is corrupted (e.g. checksum
+ // doesn't match). IcingSearchEngine should be able to recover integer index.
+ // Several additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Should discard the entire integer index directory and start it from
+ // scratch.
+ // - Qualified id join index directory should be unaffected.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index shouldn't take effect.
+ // - "Clear()" shouldn't be called for integer index, i.e. no integer index
+ // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // discarded, since we start it from scratch.
+ // - "Clear()" shouldn't be called for qualified id join index, i.e. no
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded.
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("indexableInteger == 123");
+ search_spec.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ message;
+
+ {
+ // Initializes folder and schema, index one document
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+ } // This should shut down IcingSearchEngine and persist anything it needs to
+
+ // Manually corrupt integer index
+ {
+ const std::string integer_index_metadata_file =
+ GetIntegerIndexDir() + "/integer_index.m";
+ ScopedFd fd(
+ filesystem()->OpenForWrite(integer_index_metadata_file.c_str()));
+ ASSERT_TRUE(fd.is_valid());
+ ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
+ }
+
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should be discarded once, and Clear()
+ // should never be called (i.e. storage sub directory "*/integer_index_dir/*"
+ // should never be discarded) since we start it from scratch.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(1);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+ // Ensure qualified id join index directory should never be discarded, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ EXPECT_THAT(initialize_result.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+
+ // Check that our index is ok by searching over the restored index
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ RecoverFromIntegerIndexBucketSplitThresholdChange) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddInt64Property("indexableInteger", 123)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // 1. Create an index with a message document.
+ {
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ }
+
+ // 2. Create the index again with different
+ // integer_index_bucket_split_threshold. This should trigger index
+ // restoration.
+ {
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should be discarded once, and Clear()
+ // should never be called (i.e. storage sub directory
+ // "*/integer_index_dir/*" should never be discarded) since we start it from
+ // scratch.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(1);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+ // Ensure qualified id join index directory should never be discarded, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
+
+ static constexpr int32_t kNewIntegerIndexBucketSplitThreshold = 1000;
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ ASSERT_THAT(kNewIntegerIndexBucketSplitThreshold,
+ Ne(options.integer_index_bucket_split_threshold()));
+ options.set_integer_index_bucket_split_threshold(
+ kNewIntegerIndexBucketSplitThreshold);
+
+ TestIcingSearchEngine icing(options, std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+
+ // Verify integer index works normally
+ SearchSpecProto search_spec;
+ search_spec.set_query("indexableInteger == 123");
+ search_spec.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document().uri(), Eq("message/1"));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ RecoverFromCorruptQualifiedIdJoinIndex) {
+ // Test the following scenario: qualified id join index is corrupted (e.g.
+ // checksum doesn't match). IcingSearchEngine should be able to recover
+ // qualified id join index. Several additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should be unaffected.
+ // - Should discard the entire qualified id join index directory and start
+ // it from scratch.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index shouldn't take effect.
+ // - "Clear()" shouldn't be called for integer index, i.e. no integer index
+ // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // discarded.
+ // - "Clear()" shouldn't be called for qualified id join index, i.e. no
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded, since we start
+ // it from scratch.
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // Prepare join search spec to join a query for `name:person` with a child
+ // query for `body:message` based on the child's `senderQualifiedId` field.
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:message");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ ResultSpecProto result_spec = ResultSpecProto::default_instance();
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto =
+ expected_search_result_proto.mutable_results()->Add();
+ *result_proto->mutable_document() = person;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() = message;
+
+ {
+ // Initializes folder and schema, index one document
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+ } // This should shut down IcingSearchEngine and persist anything it needs to
+
+ // Manually corrupt qualified id join index
+ {
+ const std::string qualified_id_join_index_metadata_file =
+ GetQualifiedIdJoinIndexDir() + "/metadata";
+ ScopedFd fd(filesystem()->OpenForWrite(
+ qualified_id_join_index_metadata_file.c_str()));
+ ASSERT_TRUE(fd.is_valid());
+ ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
+ }
+
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded, and Clear()
+ // should never be called (i.e. storage sub directory "*/integer_index_dir/*"
+ // should never be discarded).
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+ // Ensure qualified id join index directory should be discarded once, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(1);
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ EXPECT_THAT(initialize_result.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+
+ // Check that our index is ok by searching over the restored index
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseTermIndex) {
+ // Test the following scenario: losing the entire term index. Since we need
+ // flash index magic to determine the version, in this test we will throw out
+ // the entire term index and re-initialize an empty one, to bypass
+ // undetermined version state change and correctly trigger "lose term index"
+ // scenario.
+ // IcingSearchEngine should be able to recover term index. Several additional
+ // behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should not be discarded (but instead just being
+ // rebuilt by replaying all docs).
+ // - Integer index directory should be unaffected.
+ // - Qualified id join index directory should be unaffected.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index shouldn't take effect since it is empty.
+ // - "Clear()" shouldn't be called for integer index, i.e. no integer index
+ // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // discarded.
+ // - "Clear()" shouldn't be called for qualified id join index, i.e. no
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded.
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // 1. Create an index with 3 message documents.
+ {
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/2").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/3").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ }
+
+ // 2. Delete and re-initialize an empty term index to trigger
+ // RestoreIndexIfNeeded.
+ {
+ std::string idx_subdir = GetIndexDir() + "/idx";
+ ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(Index::Options(GetIndexDir(),
+ /*index_merge_size=*/100,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/50),
+ filesystem(), icing_filesystem()));
+ ICING_ASSERT_OK(index->PersistToDisk());
+ }
+
+ // 3. Create the index again. This should trigger index restoration.
+ {
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded since we've already
+ // lost it.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded, and Clear()
+ // should never be called (i.e. storage sub directory
+ // "*/integer_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+ // Ensure qualified id join index directory should never be discarded, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
+
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+
+ // Verify term index works normally
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("body:consectetur");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto results1 =
+ icing.Search(search_spec1, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results1.status(), ProtoIsOk());
+ EXPECT_THAT(results1.next_page_token(), Eq(0));
+ // All documents should be retrievable.
+ ASSERT_THAT(results1.results(), SizeIs(3));
+ EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
+
+ // Verify integer index works normally
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results2 =
+ icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results2.results(), SizeIs(3));
+ EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
+
+ // Verify qualified id join index works normally: join a query for
+ // `name:person` with a child query for `body:consectetur` based on the
+ // child's `senderQualifiedId` field.
+ SearchSpecProto search_spec3;
+ search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec3.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:consectetur");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+ result_spec3.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto results3 = icing.Search(
+ search_spec3, ScoringSpecProto::default_instance(), result_spec3);
+ ASSERT_THAT(results3.results(), SizeIs(1));
+ EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
+ EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
+ EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
+ Eq("message/3"));
+ EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
+ Eq("message/2"));
+ EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
+ Eq("message/1"));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseIntegerIndex) {
+ // Test the following scenario: losing the entire integer index directory.
+ // IcingSearchEngine should be able to recover integer index. Several
+ // additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should not be discarded since we've already
+ // lost it. Start it from scratch.
+ // - Qualified id join index directory should be unaffected.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index shouldn't take effect.
+ // - "Clear()" shouldn't be called for integer index, i.e. no integer index
+ // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // discarded, since we start it from scratch.
+ // - "Clear()" shouldn't be called for qualified id join index, i.e. no
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded.
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // 1. Create an index with 3 message documents.
+ {
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/2").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/3").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ }
+
+ // 2. Delete the integer index file to trigger RestoreIndexIfNeeded.
+ std::string integer_index_dir = GetIntegerIndexDir();
+ filesystem()->DeleteDirectoryRecursively(integer_index_dir.c_str());
+
+ // 3. Create the index again. This should trigger index restoration.
+ {
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded since we've
+ // already lost it, and Clear() should never be called (i.e. storage sub
+ // directory "*/integer_index_dir/*" should never be discarded) since we
+ // start it from scratch.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+ // Ensure qualified id join index directory should never be discarded, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
+
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+
+ // Verify term index works normally
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("body:consectetur");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto results1 =
+ icing.Search(search_spec1, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results1.status(), ProtoIsOk());
+ EXPECT_THAT(results1.next_page_token(), Eq(0));
+ // All documents should be retrievable.
+ ASSERT_THAT(results1.results(), SizeIs(3));
+ EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
+
+ // Verify integer index works normally
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results2 =
+ icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results2.results(), SizeIs(3));
+ EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
+
+ // Verify qualified id join index works normally: join a query for
+ // `name:person` with a child query for `body:consectetur` based on the
+ // child's `senderQualifiedId` field.
+ SearchSpecProto search_spec3;
+ search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec3.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:consectetur");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+ result_spec3.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto results3 = icing.Search(
+ search_spec3, ScoringSpecProto::default_instance(), result_spec3);
+ ASSERT_THAT(results3.results(), SizeIs(1));
+ EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
+ EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
+ EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
+ Eq("message/3"));
+ EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
+ Eq("message/2"));
+ EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
+ Eq("message/1"));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ RestoreIndexLoseQualifiedIdJoinIndex) {
+ // Test the following scenario: losing the entire qualified id join index
+ // directory. IcingSearchEngine should be able to recover qualified id join
+ // index. Several additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should be unaffected.
+ // - Qualified id join index directory should not be discarded since we've
+ // already lost it. Start it from scratch.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index shouldn't take effect.
+ // - "Clear()" shouldn't be called for integer index, i.e. no integer index
+ // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // discarded.
+ // - "Clear()" shouldn't be called for qualified id join index, i.e. no
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded, since we start
+ // it from scratch.
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // 1. Create an index with 3 message documents.
+ {
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/2").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/3").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ }
+
+ // 2. Delete the qualified id join index file to trigger RestoreIndexIfNeeded.
+ std::string qualified_id_join_index_dir = GetQualifiedIdJoinIndexDir();
+ filesystem()->DeleteDirectoryRecursively(qualified_id_join_index_dir.c_str());
+
+ // 3. Create the index again. This should trigger index restoration.
+ {
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded since we've
+ // already lost it, and Clear() should never be called (i.e. storage sub
+ // directory "*/integer_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+ // Ensure qualified id join index directory should never be discarded, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded)
+ // since we start it from scratch.
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
+
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+
+ // Verify term index works normally
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("body:consectetur");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto results1 =
+ icing.Search(search_spec1, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results1.status(), ProtoIsOk());
+ EXPECT_THAT(results1.next_page_token(), Eq(0));
+ // All documents should be retrievable.
+ ASSERT_THAT(results1.results(), SizeIs(3));
+ EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
+
+ // Verify integer index works normally
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results2 =
+ icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results2.results(), SizeIs(3));
+ EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
+
+ // Verify qualified id join index works normally: join a query for
+ // `name:person` with a child query for `body:consectetur` based on the
+ // child's `senderQualifiedId` field.
+ SearchSpecProto search_spec3;
+ search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec3.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:consectetur");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+ result_spec3.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto results3 = icing.Search(
+ search_spec3, ScoringSpecProto::default_instance(), result_spec3);
+ ASSERT_THAT(results3.results(), SizeIs(1));
+ EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
+ EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
+ EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
+ Eq("message/3"));
+ EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
+ Eq("message/2"));
+ EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
+ Eq("message/1"));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ RestoreIndexTruncateLiteIndexWithoutReindexing) {
+ // Test the following scenario: term lite index is *completely* ahead of
+ // document store. IcingSearchEngine should be able to recover term index.
+ // Several additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should be unaffected.
+ // - Qualified id join index directory should be unaffected.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index should take effect and throw out the
+ // entire lite index. This should be sufficient to make term index
+ // consistent with document store, so reindexing should not take place.
+ // - "Clear()" shouldn't be called for integer index, i.e. no integer index
+ // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // discarded.
+ // - "Clear()" shouldn't be called for qualified id join index, i.e. no
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded.
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // 1. Create an index with a LiteIndex that will only allow a person and a
+ // message document before needing a merge.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(person.ByteSizeLong() +
+ message.ByteSizeLong());
+ TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ // Add two message documents. These should get merged into the main index.
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/2").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ }
+
+ // 2. Manually add some data into term lite index and increment
+ // last_added_document_id, but don't merge into the main index. This will
+ // cause mismatched last_added_document_id with term index.
+ // - Document store: [0, 1, 2]
+ // - Term index
+ // - Main index: [0, 1, 2]
+ // - Lite index: [3]
+ // - Integer index: [0, 1, 2]
+ // - Qualified id join index: [0, 1, 2]
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(
+ Index::Options(GetIndexDir(),
+ /*index_merge_size=*/message.ByteSizeLong(),
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/8),
+ filesystem(), icing_filesystem()));
+ DocumentId original_last_added_doc_id = index->last_added_document_id();
+ index->set_last_added_document_id(original_last_added_doc_id + 1);
+ Index::Editor editor =
+ index->Edit(original_last_added_doc_id + 1, /*section_id=*/0,
+ TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+ }
+
+ // 3. Create the index again.
+ {
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded. since we only call
+ // TruncateTo for term index.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded, and Clear()
+ // should never be called (i.e. storage sub directory
+ // "*/integer_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+ // Ensure qualified id join index directory should never be discarded, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
+
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(message.ByteSizeLong());
+ TestIcingSearchEngine icing(options, std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ // Since truncating lite index is sufficient to make term index consistent
+ // with document store, replaying documents or reindex shouldn't take place.
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+
+ // Verify term index works normally
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("body:consectetur");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto results1 =
+ icing.Search(search_spec1, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results1.status(), ProtoIsOk());
+ EXPECT_THAT(results1.next_page_token(), Eq(0));
+ // Only the documents that were in the main index should be retrievable.
+ ASSERT_THAT(results1.results(), SizeIs(2));
+ EXPECT_THAT(results1.results(0).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results1.results(1).document().uri(), Eq("message/1"));
+
+ // Verify integer index works normally
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results2 =
+ icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results2.results(), SizeIs(2));
+ EXPECT_THAT(results2.results(0).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results2.results(1).document().uri(), Eq("message/1"));
+
+ // Verify qualified id join index works normally: join a query for
+ // `name:person` with a child query for `body:consectetur` based on the
+ // child's `senderQualifiedId` field.
+ SearchSpecProto search_spec3;
+ search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec3.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:consectetur");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+ result_spec3.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto results3 = icing.Search(
+ search_spec3, ScoringSpecProto::default_instance(), result_spec3);
+ ASSERT_THAT(results3.results(), SizeIs(1));
+ EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
+ EXPECT_THAT(results3.results(0).joined_results(), SizeIs(2));
+ EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
+ Eq("message/2"));
+ EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
+ Eq("message/1"));
+ }
+
+ // 4. Since document 3 doesn't exist, testing query = "foo" is not enough to
+ // verify the correctness of term index restoration. Instead, we have to check
+ // hits for "foo" should not be found in term index.
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(
+ Index::Options(GetIndexDir(),
+ /*index_merge_size=*/message.ByteSizeLong(),
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/8),
+ filesystem(), icing_filesystem()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
+ index->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(doc_hit_info_iter->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ RestoreIndexTruncateLiteIndexWithReindexing) {
+ // Test the following scenario: term lite index is *partially* ahead of
+ // document store. IcingSearchEngine should be able to recover term index.
+ // Several additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should be unaffected.
+ // - Qualified id join index directory should be unaffected.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index should take effect and throw out the
+ // entire lite index. However, some valid data in term lite index were
+ // discarded together, so reindexing should still take place to recover
+ // them after truncating.
+ // - "Clear()" shouldn't be called for integer index, i.e. no integer index
+ // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // discarded.
+ // - "Clear()" shouldn't be called for qualified id join index, i.e. no
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded.
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // 1. Create an index with a LiteIndex that will only allow a person and a
+ // message document before needing a merge.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(message.ByteSizeLong());
+ TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ // Add two message documents. These should get merged into the main index.
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/2").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ // Add one document. This one should get remain in the lite index.
+ message = DocumentBuilder(message).SetUri("message/3").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ }
+
+ // 2. Manually add some data into term lite index and increment
+ // last_added_document_id, but don't merge into the main index. This will
+ // cause mismatched last_added_document_id with term index.
+ // - Document store: [0, 1, 2, 3]
+ // - Term index
+ // - Main index: [0, 1, 2]
+ // - Lite index: [3, 4]
+ // - Integer index: [0, 1, 2, 3]
+ // - Qualified id join index: [0, 1, 2, 3]
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(
+ Index::Options(GetIndexDir(),
+ /*index_merge_size=*/message.ByteSizeLong(),
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/8),
+ filesystem(), icing_filesystem()));
+ DocumentId original_last_added_doc_id = index->last_added_document_id();
+ index->set_last_added_document_id(original_last_added_doc_id + 1);
+ Index::Editor editor =
+ index->Edit(original_last_added_doc_id + 1, /*section_id=*/0,
+ TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+ }
+
+ // 3. Create the index again.
+ {
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded. since we only call
+ // TruncateTo for term index.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded, and Clear()
+ // should never be called (i.e. storage sub directory
+ // "*/integer_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+ // Ensure qualified id join index directory should never be discarded, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
+
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(message.ByteSizeLong());
+ TestIcingSearchEngine icing(options, std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ // Truncating lite index not only deletes data ahead document store, but
+ // also deletes valid data. Therefore, we still have to replay documents and
+ // reindex.
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+
+ // Verify term index works normally
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("body:consectetur");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto results1 =
+ icing.Search(search_spec1, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results1.status(), ProtoIsOk());
+ EXPECT_THAT(results1.next_page_token(), Eq(0));
+ // Only the documents that were in the main index should be retrievable.
+ ASSERT_THAT(results1.results(), SizeIs(3));
+ EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
+
+ // Verify integer index works normally
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results2 =
+ icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results2.results(), SizeIs(3));
+ EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
+
+ // Verify qualified id join index works normally: join a query for
+ // `name:person` with a child query for `body:consectetur` based on the
+ // child's `senderQualifiedId` field.
+ SearchSpecProto search_spec3;
+ search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec3.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:consectetur");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+ result_spec3.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto results3 = icing.Search(
+ search_spec3, ScoringSpecProto::default_instance(), result_spec3);
+ ASSERT_THAT(results3.results(), SizeIs(1));
+ EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
+ EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
+ EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
+ Eq("message/3"));
+ EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
+ Eq("message/2"));
+ EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
+ Eq("message/1"));
+ }
+
+ // 4. Since document 4 doesn't exist, testing query = "foo" is not enough to
+ // verify the correctness of term index restoration. Instead, we have to check
+ // hits for "foo" should not be found in term index.
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(
+ Index::Options(GetIndexDir(),
+ /*index_merge_size=*/message.ByteSizeLong(),
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/8),
+ filesystem(), icing_filesystem()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
+ index->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(doc_hit_info_iter->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ RestoreIndexTruncateMainIndexWithoutReindexing) {
+ // Test the following scenario: term main index is *completely* ahead of
+ // document store. IcingSearchEngine should be able to recover term index.
+ // Several additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should be unaffected.
+ // - Qualified id join index directory should be unaffected.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index should take effect and throw out the
+ // entire lite and main index. This should be sufficient to make term
+ // index consistent with document store (in this case, document store is
+ // empty as well), so reindexing should not take place.
+ // - "Clear()" should be called for integer index. It is a special case when
+ // document store has no document. Since there is no integer index storage
+ // sub directories (path_expr = "*/integer_index_dir/*"), nothing will be
+ // discarded.
+ // - "Clear()" should be called for qualified id join index. It is a special
+ // case when document store has no document.
+
+ // 1. Create an index with no document.
+ {
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ }
+
+ // 2. Manually add some data into term lite index and increment
+ // last_added_document_id. Merge some of them into the main index and keep
+ // others in the lite index. This will cause mismatched document id with
+ // document store.
+ // - Document store: []
+ // - Term index
+ // - Main index: [0]
+ // - Lite index: [1]
+ // - Integer index: []
+ // - Qualified id join index: []
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(
+ // index merge size is not important here because we will manually
+ // invoke merge below.
+ Index::Options(GetIndexDir(), /*index_merge_size=*/100,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/50),
+ filesystem(), icing_filesystem()));
+ // Add hits for document 0 and merge.
+ ASSERT_THAT(index->last_added_document_id(), kInvalidDocumentId);
+ index->set_last_added_document_id(0);
+ Index::Editor editor =
+ index->Edit(/*document_id=*/0, /*section_id=*/0,
+ TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+ ICING_ASSERT_OK(index->Merge());
+
+ // Add hits for document 1 and don't merge.
+ index->set_last_added_document_id(1);
+ editor = index->Edit(/*document_id=*/1, /*section_id=*/0,
+ TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+ }
+
+ // 3. Create the index again. This should throw out the lite and main index.
+ {
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded. since we only call
+ // TruncateTo for term index.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded. Even though
+ // Clear() was called, it shouldn't take effect since there is no storage
+ // sub directory ("*/integer_index_dir/*") and nothing will be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+ // Ensure qualified id join index directory should never be discarded.
+ // Clear() was called and should discard and reinitialize the underlying
+ // mapper.
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(AtLeast(1));
+
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ // Since truncating main index is sufficient to make term index consistent
+ // with document store, replaying documents or reindexing shouldn't take
+ // place.
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ }
+
+ // 4. Since document 0, 1 don't exist, testing queries = "foo", "bar" are not
+ // enough to verify the correctness of term index restoration. Instead, we
+ // have to check hits for "foo", "bar" should not be found in term index.
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(Index::Options(GetIndexDir(), /*index_merge_size=*/100,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/50),
+ filesystem(), icing_filesystem()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
+ index->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(doc_hit_info_iter->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ doc_hit_info_iter,
+ index->GetIterator("bar", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(doc_hit_info_iter->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ RestoreIndexTruncateMainIndexWithReindexing) {
+ // Test the following scenario: term main index is *partially* ahead of
+ // document store. IcingSearchEngine should be able to recover term index.
+ // Several additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should be unaffected.
+ // - Qualified id join index directory should be unaffected.
+ // - In RestoreIndexIfNecessary():
+ // - "TruncateTo()" for term index should take effect and throw out the
+ // entire lite and main index. However, some valid data in term main index
+ // were discarded together, so reindexing should still take place to
+ // recover them after truncating.
+ // - "Clear()" shouldn't be called for integer index, i.e. no integer index
+ // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // discarded.
+ // - "Clear()" shouldn't be called for qualified id join index, i.e. no
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded.
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // 1. Create an index with 3 message documents.
+ {
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/2").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/3").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ }
+
+ // 2. Manually add some data into term lite index and increment
+ // last_added_document_id. Merge some of them into the main index and keep
+ // others in the lite index. This will cause mismatched document id with
+ // document store.
+ // - Document store: [0, 1, 2, 3]
+ // - Term index
+ // - Main index: [0, 1, 2, 3, 4]
+ // - Lite index: [5]
+ // - Integer index: [0, 1, 2, 3]
+ // - Qualified id join index: [0, 1, 2, 3]
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(
+ Index::Options(GetIndexDir(),
+ /*index_merge_size=*/message.ByteSizeLong(),
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/8),
+ filesystem(), icing_filesystem()));
+ // Add hits for document 4 and merge.
+ DocumentId original_last_added_doc_id = index->last_added_document_id();
+ index->set_last_added_document_id(original_last_added_doc_id + 1);
+ Index::Editor editor =
+ index->Edit(original_last_added_doc_id + 1, /*section_id=*/0,
+ TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+ ICING_ASSERT_OK(index->Merge());
+
+ // Add hits for document 5 and don't merge.
+ index->set_last_added_document_id(original_last_added_doc_id + 2);
+ editor = index->Edit(original_last_added_doc_id + 2, /*section_id=*/0,
+ TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+ }
+
+ // 3. Create the index again. This should throw out the lite and main index
+ // and trigger index restoration.
+ {
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded. since we only call
+ // TruncateTo for term index.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded, and Clear()
+ // should never be called (i.e. storage sub directory
+ // "*/integer_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+ // Ensure qualified id join index directory should never be discarded, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
+
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ // Truncating main index not only deletes data ahead document store, but
+ // also deletes valid data. Therefore, we still have to replay documents and
+ // reindex.
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+
+ // Verify term index works normally
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("body:consectetur");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto results1 =
+ icing.Search(search_spec1, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results1.status(), ProtoIsOk());
+ EXPECT_THAT(results1.next_page_token(), Eq(0));
+ // Only the first document should be retrievable.
+ ASSERT_THAT(results1.results(), SizeIs(3));
+ EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
+
+ // Verify integer index works normally
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results2 =
+ icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results2.results(), SizeIs(3));
+ EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
+
+ // Verify qualified id join index works normally: join a query for
+ // `name:person` with a child query for `body:consectetur` based on the
+ // child's `senderQualifiedId` field.
+ SearchSpecProto search_spec3;
+ search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec3.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:consectetur");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+ result_spec3.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto results3 = icing.Search(
+ search_spec3, ScoringSpecProto::default_instance(), result_spec3);
+ ASSERT_THAT(results3.results(), SizeIs(1));
+ EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
+ EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
+ EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
+ Eq("message/3"));
+ EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
+ Eq("message/2"));
+ EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
+ Eq("message/1"));
+ }
+
+ // 4. Since document 4, 5 don't exist, testing queries = "foo", "bar" are not
+ // enough to verify the correctness of term index restoration. Instead, we
+ // have to check hits for "foo", "bar" should not be found in term index.
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(Index::Options(GetIndexDir(), /*index_merge_size=*/100,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/50),
+ filesystem(), icing_filesystem()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
+ index->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(doc_hit_info_iter->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ doc_hit_info_iter,
+ index->GetIterator("bar", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(doc_hit_info_iter->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ RestoreIndexTruncateIntegerIndexWithoutReindexing) {
+ // Test the following scenario: integer index is *completely* ahead of
+ // document store. IcingSearchEngine should be able to recover integer index.
+ // Several additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should be unaffected.
+ // - Qualified id join index directory should be unaffected.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index shouldn't take effect.
+ // - "Clear()" should be called for integer index and throw out all integer
+ // index storages, i.e. all storage sub directories (path_expr =
+ // "*/integer_index_dir/*") should be discarded. This should be sufficient
+ // to make integer index consistent with document store (in this case,
+ // document store is empty as well), so reindexing should not take place.
+ // - "Clear()" should be called for qualified id join index. It is a special
+ // case when document store has no document.
+
+ // 1. Create an index with no document.
+ {
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ }
+
+ // 2. Manually add some data into integer index and increment
+ // last_added_document_id. This will cause mismatched document id with
+ // document store.
+ // - Document store: []
+ // - Term index: []
+ // - Integer index: [0]
+ // - Qualified id join index: []
+ {
+ Filesystem filesystem;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem, GetIntegerIndexDir(),
+ /*num_data_threshold_for_bucket_split=*/65536,
+ /*pre_mapping_fbv=*/false));
+ // Add hits for document 0.
+ ASSERT_THAT(integer_index->last_added_document_id(), kInvalidDocumentId);
+ integer_index->set_last_added_document_id(0);
+ std::unique_ptr<NumericIndex<int64_t>::Editor> editor = integer_index->Edit(
+ /*property_path=*/"indexableInteger", /*document_id=*/0,
+ /*section_id=*/0);
+ ICING_ASSERT_OK(editor->BufferKey(123));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+ }
+
+ // 3. Create the index again. This should trigger index restoration.
+ {
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ // Clear() should be called to truncate integer index and thus storage sub
+ // directory (path_expr = "*/integer_index_dir/*") should be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(1);
+ // Ensure qualified id join index directory should never be discarded.
+ // Clear() was called and should discard and reinitialize the underlying
+ // mapper.
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(AtLeast(1));
+
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ // Since truncating integer index is sufficient to make it consistent with
+ // document store, replaying documents or reindexing shouldn't take place.
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+
+ // Verify that numeric query safely wiped out the pre-existing hit for
+ // 'indexableInteger' == 123. Add a new document without that value for
+ // 'indexableInteger' that will take docid=0. If the integer index was not
+ // rebuilt correctly, then it will still have the previously added hit for
+ // 'indexableInteger' == 123 for docid 0 and incorrectly return this new
+ // doc in a query.
+ DocumentProto another_message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 456)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ EXPECT_THAT(icing.Put(another_message).status(), ProtoIsOk());
+ // Verify integer index works normally
+ SearchSpecProto search_spec;
+ search_spec.set_query("indexableInteger == 123");
+ search_spec.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.results(), IsEmpty());
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ RestoreIndexTruncateIntegerIndexWithReindexing) {
+ // Test the following scenario: integer index is *partially* ahead of document
+ // store. IcingSearchEngine should be able to recover integer index. Several
+ // additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should be unaffected.
+ // - Qualified id join index directory should be unaffected.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index shouldn't take effect.
+ // - "Clear()" should be called for integer index and throw out all integer
+ // index storages, i.e. all storage sub directories (path_expr =
+ // "*/integer_index_dir/*") should be discarded. However, some valid data
+ // in integer index were discarded together, so reindexing should still
+ // take place to recover them after clearing.
+ // - "Clear()" shouldn't be called for qualified id join index, i.e. no
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded.
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // 1. Create an index with message 3 documents.
+ {
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/2").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/3").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ }
+
+ // 2. Manually add some data into integer index and increment
+ // last_added_document_id. This will cause mismatched document id with
+ // document store.
+ // - Document store: [0, 1, 2, 3]
+ // - Term index: [0, 1, 2, 3]
+ // - Integer index: [0, 1, 2, 3, 4]
+ // - Qualified id join index: [0, 1, 2, 3]
+ {
+ Filesystem filesystem;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem, GetIntegerIndexDir(),
+ /*num_data_threshold_for_bucket_split=*/65536,
+ /*pre_mapping_fbv=*/false));
+ // Add hits for document 4.
+ DocumentId original_last_added_doc_id =
+ integer_index->last_added_document_id();
+ integer_index->set_last_added_document_id(original_last_added_doc_id + 1);
+ std::unique_ptr<NumericIndex<int64_t>::Editor> editor = integer_index->Edit(
+ /*property_path=*/"indexableInteger",
+ /*document_id=*/original_last_added_doc_id + 1, /*section_id=*/0);
+ ICING_ASSERT_OK(editor->BufferKey(456));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+ }
+
+ // 3. Create the index again. This should trigger index restoration.
+ {
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ // Clear() should be called to truncate integer index and thus storage sub
+ // directory (path_expr = "*/integer_index_dir/*") should be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(1);
+ // Ensure qualified id join index directory should never be discarded, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
+
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+
+ // Verify term index works normally
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("body:consectetur");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto results1 =
+ icing.Search(search_spec1, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results1.status(), ProtoIsOk());
+ EXPECT_THAT(results1.next_page_token(), Eq(0));
+ // All documents should be retrievable.
+ ASSERT_THAT(results1.results(), SizeIs(3));
+ EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
+
+ // Verify integer index works normally
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results2 =
+ icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results2.results(), SizeIs(3));
+ EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
+
+ // Verify qualified id join index works normally: join a query for
+ // `name:person` with a child query for `body:consectetur` based on the
+ // child's `senderQualifiedId` field.
+ SearchSpecProto search_spec3;
+ search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec3.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:consectetur");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+ result_spec3.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto results3 = icing.Search(
+ search_spec3, ScoringSpecProto::default_instance(), result_spec3);
+ ASSERT_THAT(results3.results(), SizeIs(1));
+ EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
+ EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
+ EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
+ Eq("message/3"));
+ EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
+ Eq("message/2"));
+ EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
+ Eq("message/1"));
+
+ // Verify that numeric index safely wiped out the pre-existing hit for
+ // 'indexableInteger' == 456. Add a new document without that value for
+ // 'indexableInteger' that will take docid=0. If the integer index was not
+ // rebuilt correctly, then it will still have the previously added hit for
+ // 'indexableInteger' == 456 for docid 0 and incorrectly return this new
+ // doc in a query.
+ DocumentProto another_message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/4")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ EXPECT_THAT(icing.Put(another_message).status(), ProtoIsOk());
+ // Verify integer index works normally
+ SearchSpecProto search_spec;
+ search_spec.set_query("indexableInteger == 456");
+ search_spec.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.results(), IsEmpty());
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ RestoreIndexTruncateQualifiedIdJoinIndexWithoutReindexing) {
+ // Test the following scenario: qualified id join index is *completely* ahead
+ // of document store. IcingSearchEngine should be able to recover qualified id
+ // join index. Several additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should be unaffected.
+ // - Qualified id join index directory should be unaffected.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index shouldn't take effect.
+ // - "Clear()" should be called for integer index. It is a special case when
+ // document store has no document. Since there is no integer index storage
+ // sub directories (path_expr = "*/integer_index_dir/*"), nothing will be
+ // discarded.
+ // - "Clear()" should be called for qualified id join index and throw out
+ // all data, i.e. discarding the underlying mapper (path_expr =
+ // "*/qualified_id_join_index_dir/*") and reinitialize. This should be
+ // sufficient to make qualified id join index consistent with document
+ // store (in this case, document store is empty as well), so reindexing
+ // should not take place.
+
+ // 1. Create an index with no document.
+ {
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ }
+
+ // 2. Manually add some data into integer index and increment
+ // last_added_document_id. This will cause mismatched document id with
+ // document store.
+ // - Document store: []
+ // - Term index: []
+ // - Integer index: []
+ // - Qualified id join index: [0]
+ {
+ Filesystem filesystem;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index,
+ QualifiedIdJoinIndex::Create(filesystem, GetQualifiedIdJoinIndexDir(),
+ /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false));
+ // Add data for document 0.
+ ASSERT_THAT(qualified_id_join_index->last_added_document_id(),
+ kInvalidDocumentId);
+ qualified_id_join_index->set_last_added_document_id(0);
+ ICING_ASSERT_OK(qualified_id_join_index->Put(
+ DocJoinInfo(/*document_id=*/0, /*joinable_property_id=*/0),
+ /*ref_qualified_id_str=*/"namespace#person"));
+ }
+
+ // 3. Create the index again. This should trigger index restoration.
+ {
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded. Even though
+ // Clear() was called, it shouldn't take effect since there is no storage
+ // sub directory ("*/integer_index_dir/*") and nothing will be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+ // Ensure qualified id join index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ // Clear() should be called to truncate qualified id join index and thus
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded.
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(AtLeast(1));
+
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ // Since truncating qualified id join index is sufficient to make it
+ // consistent with document store, replaying documents or reindexing
+ // shouldn't take place.
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ }
+
+ // 4. Since document 0 doesn't exist, testing join query is not enough to
+ // verify the correctness of qualified id join index restoration. Instead, we
+ // have to check the previously added data should not be found in qualified id
+ // join index.
+ {
+ Filesystem filesystem;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index,
+ QualifiedIdJoinIndex::Create(filesystem, GetQualifiedIdJoinIndexDir(),
+ /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false));
+ EXPECT_THAT(qualified_id_join_index->Get(
+ DocJoinInfo(/*document_id=*/0, /*joinable_property_id=*/0)),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ RestoreIndexTruncateQualifiedIdJoinIndexWithReindexing) {
+ // Test the following scenario: qualified id join index is *partially* ahead
+ // of document store. IcingSearchEngine should be able to recover qualified id
+ // join index. Several additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should be unaffected.
+ // - Qualified id join index directory should be unaffected.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index shouldn't take effect.
+ // - "Clear()" shouldn't be called for integer index, i.e. no integer index
+ // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // discarded.
+ // - "Clear()" should be called for qualified id join index and throw out
+ // all data, i.e. discarding the underlying mapper (path_expr =
+ // "*/qualified_id_join_index_dir/*") and reinitialize. However, some
+ // valid data in qualified id join index were discarded together, so
+ // reindexing should still take place to recover them after clearing.
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // 1. Create an index with message 3 documents.
+ {
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/2").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/3").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ }
+
+ DocJoinInfo additional_data_key;
+ // 2. Manually add some data into qualified id join index and increment
+ // last_added_document_id. This will cause mismatched document id with
+ // document store.
+ // - Document store: [0, 1, 2, 3]
+ // - Term index: [0, 1, 2, 3]
+ // - Integer index: [0, 1, 2, 3]
+ // - Qualified id join index: [0, 1, 2, 3, 4]
+ {
+ Filesystem filesystem;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index,
+ QualifiedIdJoinIndex::Create(filesystem, GetQualifiedIdJoinIndexDir(),
+ /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false));
+ // Add data for document 4.
+ DocumentId original_last_added_doc_id =
+ qualified_id_join_index->last_added_document_id();
+ qualified_id_join_index->set_last_added_document_id(
+ original_last_added_doc_id + 1);
+ additional_data_key =
+ DocJoinInfo(/*document_id=*/original_last_added_doc_id + 1,
+ /*joinable_property_id=*/0);
+ ICING_ASSERT_OK(qualified_id_join_index->Put(
+ additional_data_key,
+ /*ref_qualified_id_str=*/"namespace#person"));
+ }
+
+ // 3. Create the index again. This should trigger index restoration.
+ {
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded, and Clear()
+ // should never be called (i.e. storage sub directory
+ // "*/integer_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+ // Ensure qualified id join index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ // Clear() should be called to truncate qualified id join index and thus
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded.
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(AtLeast(1));
+
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+
+ // Verify term index works normally
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("body:consectetur");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto results1 =
+ icing.Search(search_spec1, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results1.status(), ProtoIsOk());
+ EXPECT_THAT(results1.next_page_token(), Eq(0));
+ // All documents should be retrievable.
+ ASSERT_THAT(results1.results(), SizeIs(3));
+ EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
+
+ // Verify integer index works normally
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results2 =
+ icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results2.results(), SizeIs(3));
+ EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
+
+ // Verify qualified id join index works normally: join a query for
+ // `name:person` with a child query for `body:consectetur` based on the
+ // child's `senderQualifiedId` field.
+
+ // Add document 4 without "senderQualifiedId". If join index is not rebuilt
+ // correctly, then it will still have the previously added senderQualifiedId
+ // for document 4 and include document 4 incorrectly in the right side.
+ DocumentProto another_message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/4")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ EXPECT_THAT(icing.Put(another_message).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec3;
+ search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec3.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:consectetur");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+ result_spec3.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto results3 = icing.Search(
+ search_spec3, ScoringSpecProto::default_instance(), result_spec3);
+ ASSERT_THAT(results3.results(), SizeIs(1));
+ EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
+ EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
+ EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
+ Eq("message/3"));
+ EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
+ Eq("message/2"));
+ EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
+ Eq("message/1"));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ DocumentWithNoIndexedPropertyDoesntCauseRestoreIndex) {
+ // 1. Create an index with a single document in it that has no indexed
+ // content.
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Set a schema for a single type that has no indexed properties.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("unindexedField")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN,
+ TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("unindexedInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ // Add a document that contains no indexed properties.
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("unindexedField",
+ "Don't you dare search over this!")
+ .AddInt64Property("unindexedInteger", -123)
+ .Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ // 2. Create the index again. This should NOT trigger a recovery of any kind.
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto init_result = icing.Initialize();
+ EXPECT_THAT(init_result.status(), ProtoIsOk());
+ EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ init_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(init_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ DocumentWithNoValidIndexedContentDoesntCauseRestoreIndex) {
+ // 1. Create an index with a single document in it that has no valid indexed
+ // tokens in its content.
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ // Set a schema for a single type that has no term, integer, join indexed
+ // contents.
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ // Add a document that contains:
+ // - No valid indexed string content - just punctuation
+ // - No integer content - since it is an optional property
+ // - No qualified id content - since it is an optional property
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", "?...!")
+ .Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ // 2. Create the index again. This should NOT trigger a recovery of any kind.
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto init_result = icing.Initialize();
+ EXPECT_THAT(init_result.status(), ProtoIsOk());
+ EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ init_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(init_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldLogFunctionLatency) {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result_proto.initialize_stats().latency_ms(), Eq(10));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldLogNumberOfDocuments) {
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("icing", "fake_type/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 456)
+ .Build();
+
+ {
+ // Initialize and put a document.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
+ Eq(0));
+
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ }
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
+ Eq(1));
+
+ // Put another document.
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ }
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
+ Eq(2));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldNotLogRecoveryCauseForFirstTimeInitialize) {
+ // Even though the fake timer will return 10, all the latency numbers related
+ // to recovery / restoration should be 0 during the first-time initialization.
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldLogRecoveryCausePartialDataLoss) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .Build();
+
+ {
+ // Initialize and put a document.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ {
+ // Append a non-checksummed document. This will mess up the checksum of the
+ // proto log, forcing it to rewind and later return a DATA_LOSS error.
+ const std::string serialized_document = document.SerializeAsString();
+ const std::string document_log_file = absl_ports::StrCat(
+ GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
+
+ int64_t file_size = filesystem()->GetFileSize(document_log_file.c_str());
+ filesystem()->PWrite(document_log_file.c_str(), file_size,
+ serialized_document.data(),
+ serialized_document.size());
+ }
+
+ {
+ // Document store will rewind to previous checkpoint. The cause should be
+ // DATA_LOSS and the data status should be PARTIAL_LOSS.
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::DATA_LOSS));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::PARTIAL_LOSS));
+ // Since document store rewinds to previous checkpoint, last stored doc id
+ // will be consistent with last added document ids in term/integer indices,
+ // so there will be no index restoration.
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .index_restoration_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldLogRecoveryCauseCompleteDataLoss) {
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .Build();
+
+ const std::string document_log_file = absl_ports::StrCat(
+ GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
+ int64_t corruptible_offset;
+
+ {
+ // Initialize and put a document.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // There's some space at the beginning of the file (e.g. header, kmagic,
+ // etc) that is necessary to initialize the FileBackedProtoLog. We can't
+ // corrupt that region, so we need to figure out the offset at which
+ // documents will be written to - which is the file size after
+ // initialization.
+ corruptible_offset = filesystem()->GetFileSize(document_log_file.c_str());
+
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ }
+
+ {
+ // "Corrupt" the content written in the log. Make the corrupt document
+ // smaller than our original one so we don't accidentally write past our
+ // file.
+ DocumentProto document =
+ DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build();
+ std::string serialized_document = document.SerializeAsString();
+ ASSERT_TRUE(filesystem()->PWrite(
+ document_log_file.c_str(), corruptible_offset,
+ serialized_document.data(), serialized_document.size()));
+
+ PortableFileBackedProtoLog<DocumentWrapper>::Header header =
+ ReadDocumentLogHeader(*filesystem(), document_log_file);
+
+ // Set dirty bit to true to reflect that something changed in the log.
+ header.SetDirtyFlag(true);
+ header.SetHeaderChecksum(header.CalculateHeaderChecksum());
+
+ WriteDocumentLogHeader(*filesystem(), document_log_file, header);
+ }
+
+ {
+ // Document store will completely rewind. The cause should be DATA_LOSS and
+ // the data status should be COMPLETE_LOSS.
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::DATA_LOSS));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::COMPLETE_LOSS));
+ // The complete rewind of ground truth causes us to clear the index, but
+ // that's not considered a restoration.
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .index_restoration_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldLogRecoveryCauseIndexInconsistentWithGroundTruth) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .Build();
+ {
+ // Initialize and put a document.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ {
+ // Delete and re-initialize an empty index file to trigger
+ // RestoreIndexIfNeeded.
+ std::string idx_subdir = GetIndexDir() + "/idx";
+ ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(Index::Options(GetIndexDir(),
+ /*index_merge_size=*/100,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/50),
+ filesystem(), icing_filesystem()));
+ ICING_ASSERT_OK(index->PersistToDisk());
+ }
+
+ {
+ // Index is empty but ground truth is not. Index should be restored due to
+ // the inconsistency.
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .index_restoration_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+ }
+}
+
+TEST_F(
+ IcingSearchEngineInitializationTest,
+ InitializeShouldLogRecoveryCauseIntegerIndexInconsistentWithGroundTruth) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .Build();
+ {
+ // Initialize and put a document.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ {
+ // Delete the integer index file to trigger RestoreIndexIfNeeded.
+ std::string integer_index_dir = GetIntegerIndexDir();
+ filesystem()->DeleteDirectoryRecursively(integer_index_dir.c_str());
+ }
+
+ {
+ // Index is empty but ground truth is not. Index should be restored due to
+ // the inconsistency.
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .index_restoration_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+ }
+}
+
+TEST_F(
+ IcingSearchEngineInitializationTest,
+ InitializeShouldLogRecoveryCauseQualifiedIdJoinIndexInconsistentWithGroundTruth) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ {
+ // Initialize and put documents.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ }
+
+ {
+ // Delete the qualified id join index file to trigger RestoreIndexIfNeeded.
+ std::string qualified_id_join_index_dir = GetQualifiedIdJoinIndexDir();
+ filesystem()->DeleteDirectoryRecursively(
+ qualified_id_join_index_dir.c_str());
+ }
+
+ {
+ // Index is empty but ground truth is not. Index should be restored due to
+ // the inconsistency.
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .index_restoration_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldLogRecoveryCauseSchemaChangesOutOfSync) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .Build();
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ {
+ // Initialize and put one document.
+ IcingSearchEngine icing(options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ {
+ // Simulate a schema change where power is lost after the schema is written.
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder(CreateMessageSchemaTypeConfig())
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ // Write the marker file
+ std::string marker_filepath =
+ absl_ports::StrCat(options.base_dir(), "/set_schema_marker");
+ ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str()));
+ ASSERT_TRUE(sfd.is_valid());
+
+ // Write the new schema
+ FakeClock fake_clock;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ new_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+ }
+
+ {
+ // Both document store and index should be recovered from checksum mismatch.
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .index_restoration_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+ }
+
+ {
+ // No recovery should be needed.
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .index_restoration_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldLogRecoveryCauseIndexIOError) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .Build();
+ {
+ // Initialize and put one document.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ std::string lite_index_buffer_file_path =
+ absl_ports::StrCat(GetIndexDir(), "/idx/lite.hb");
+ auto mock_icing_filesystem = std::make_unique<IcingMockFilesystem>();
+ EXPECT_CALL(*mock_icing_filesystem, OpenForWrite(_))
+ .WillRepeatedly(DoDefault());
+ // This fails Index::Create() once.
+ EXPECT_CALL(*mock_icing_filesystem,
+ OpenForWrite(Eq(lite_index_buffer_file_path)))
+ .WillOnce(Return(-1))
+ .WillRepeatedly(DoDefault());
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::move(mock_icing_filesystem),
+ std::move(fake_clock), GetTestJniCache());
+
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldLogRecoveryCauseIntegerIndexIOError) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .Build();
+ {
+ // Initialize and put one document.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ std::string integer_index_metadata_file =
+ absl_ports::StrCat(GetIntegerIndexDir(), "/integer_index.m");
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, OpenForWrite(_)).WillRepeatedly(DoDefault());
+ // This fails IntegerIndex::Create() once.
+ EXPECT_CALL(*mock_filesystem, OpenForWrite(Eq(integer_index_metadata_file)))
+ .WillOnce(Return(-1))
+ .WillRepeatedly(DoDefault());
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldLogRecoveryCauseQualifiedIdJoinIndexIOError) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ {
+ // Initialize and put documents.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(message).status(), ProtoIsOk());
+ }
+
+ std::string qualified_id_join_index_metadata_file =
+ absl_ports::StrCat(GetQualifiedIdJoinIndexDir(), "/metadata");
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, PRead(A<const char*>(), _, _, _))
+ .WillRepeatedly(DoDefault());
+ // This fails QualifiedIdJoinIndex::Create() once.
+ EXPECT_CALL(
+ *mock_filesystem,
+ PRead(Matcher<const char*>(Eq(qualified_id_join_index_metadata_file)), _,
+ _, _))
+ .WillOnce(Return(false))
+ .WillRepeatedly(DoDefault());
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldLogRecoveryCauseDocStoreIOError) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .Build();
+ {
+ // Initialize and put one document.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ std::string document_store_header_file_path =
+ absl_ports::StrCat(GetDocumentDir(), "/document_store_header");
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, Read(A<const char*>(), _, _))
+ .WillRepeatedly(DoDefault());
+ // This fails DocumentStore::InitializeDerivedFiles() once.
+ EXPECT_CALL(
+ *mock_filesystem,
+ Read(Matcher<const char*>(Eq(document_store_header_file_path)), _, _))
+ .WillOnce(Return(false))
+ .WillRepeatedly(DoDefault());
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldLogRecoveryCauseSchemaStoreIOError) {
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ }
+
+ {
+ // Delete the schema store type mapper to trigger an I/O error.
+ std::string schema_store_header_file_path =
+ GetSchemaDir() + "/schema_type_mapper";
+ ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(
+ schema_store_header_file_path.c_str()));
+ }
+
+ {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .index_restoration_latency_ms(),
+ Eq(0));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldLogNumberOfSchemaTypes) {
+ {
+ // Initialize an empty storage.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ // There should be 0 schema types.
+ EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
+ Eq(0));
+
+ // Set a schema with one type config.
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ }
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ // There should be 1 schema type.
+ EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
+ Eq(1));
+
+ // Create and set a schema with two type configs: Email and Message.
+ SchemaProto schema = CreateEmailSchema();
+ *schema.add_types() = CreateMessageSchemaTypeConfig();
+
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ }
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
+ Eq(2));
+ }
+}
+
+class IcingSearchEngineInitializationVersionChangeTest
+ : public IcingSearchEngineInitializationTest,
+ public ::testing::WithParamInterface<version_util::VersionInfo> {};
+
+TEST_P(IcingSearchEngineInitializationVersionChangeTest,
+ RecoverFromVersionChange) {
+ // TODO(b/280697513): test backup schema migration
+ // Test the following scenario: version change. All derived data should be
+ // rebuilt. We test this by manually adding some invalid derived data and
+ // verifying they're removed due to rebuild.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person1 =
+ DocumentBuilder()
+ .SetKey("namespace", "person/1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto person2 =
+ DocumentBuilder()
+ .SetKey("namespace", "person/2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message")
+ .SetSchema("Message")
+ .AddStringProperty("body", "correct message")
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person/1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ {
+ // Initializes folder and schema, index person1 and person2
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person1).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person2).status(), ProtoIsOk());
+ } // This should shut down IcingSearchEngine and persist anything it needs to
+
+ {
+ // Manually:
+ // - Put message into DocumentStore
+ // - But add some incorrect data for message into 3 indices
+ // - Change version file
+ //
+ // These will make sure last_added_document_id is consistent with
+ // last_stored_document_id, so if Icing didn't handle version change
+ // correctly, then the index won't be rebuilt.
+ FakeClock fake_clock;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
+
+ // Put message into DocumentStore
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(
+ filesystem(), GetDocumentDir(), &fake_clock, schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, document_store->Put(message));
+
+ // Index doc_id with incorrect data
+ Index::Options options(GetIndexDir(), /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(options, filesystem(), icing_filesystem()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(*filesystem(), GetIntegerIndexDir(),
+ /*num_data_threshold_for_bucket_split=*/65536,
+ /*pre_mapping_fbv=*/false));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index,
+ QualifiedIdJoinIndex::Create(
+ *filesystem(), GetQualifiedIdJoinIndexDir(),
+ /*pre_mapping_fbv=*/false, /*use_persistent_hash_map=*/false));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<StringSectionIndexingHandler>
+ string_section_indexing_handler,
+ StringSectionIndexingHandler::Create(&fake_clock, normalizer_.get(),
+ index.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerSectionIndexingHandler>
+ integer_section_indexing_handler,
+ IntegerSectionIndexingHandler::Create(
+ &fake_clock, integer_index.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<QualifiedIdJoinIndexingHandler>
+ qualified_id_join_indexing_handler,
+ QualifiedIdJoinIndexingHandler::Create(
+ &fake_clock, qualified_id_join_index.get()));
+ std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
+ handlers.push_back(std::move(string_section_indexing_handler));
+ handlers.push_back(std::move(integer_section_indexing_handler));
+ handlers.push_back(std::move(qualified_id_join_indexing_handler));
+ IndexProcessor index_processor(std::move(handlers), &fake_clock);
+
+ DocumentProto incorrect_message =
+ DocumentBuilder()
+ .SetKey("namespace", "message")
+ .SetSchema("Message")
+ .AddStringProperty("body", "wrong message")
+ .AddInt64Property("indexableInteger", 456)
+ .AddStringProperty("senderQualifiedId", "namespace#person/2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store.get(), lang_segmenter_.get(),
+ std::move(incorrect_message)));
+ ICING_ASSERT_OK(index_processor.IndexDocument(tokenized_document, doc_id));
+
+ // Change existing data's version file
+ const version_util::VersionInfo& existing_version_info = GetParam();
+ ICING_ASSERT_OK(version_util::WriteVersion(
+ *filesystem(), GetVersionFilename(), existing_version_info));
+ }
+
+ // Mock filesystem to observe and check the behavior of all indices.
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ EXPECT_THAT(initialize_result.status(), ProtoIsOk());
+ // Index Restoration should be triggered here. Incorrect data should be
+ // deleted and correct data of message should be indexed.
+ EXPECT_THAT(
+ initialize_result.initialize_stats().document_store_recovery_cause(),
+ Eq(InitializeStatsProto::VERSION_CHANGED));
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::VERSION_CHANGED));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::VERSION_CHANGED));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::VERSION_CHANGED));
+
+ // Manually check version file
+ ICING_ASSERT_OK_AND_ASSIGN(
+ version_util::VersionInfo version_info_after_init,
+ version_util::ReadVersion(*filesystem(), GetVersionFilename(),
+ GetIndexDir()));
+ EXPECT_THAT(version_info_after_init.version, Eq(version_util::kVersion));
+ EXPECT_THAT(version_info_after_init.max_version,
+ Eq(std::max(version_util::kVersion, GetParam().max_version)));
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ message;
+
+ // Verify term search
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("body:correct");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto search_result_proto1 =
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify numeric (integer) search
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto search_result_google::protobuf =
+ icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify join search: join a query for `name:person` with a child query for
+ // `body:message` based on the child's `senderQualifiedId` field.
+ SearchSpecProto search_spec3;
+ search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec3.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:message");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+ result_spec3.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto expected_join_search_result_proto;
+ expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ // Person 1 with message
+ SearchResultProto::ResultProto* result_proto =
+ expected_join_search_result_proto.mutable_results()->Add();
+ *result_proto->mutable_document() = person1;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() = message;
+ // Person 2 without children
+ *expected_join_search_result_proto.mutable_results()
+ ->Add()
+ ->mutable_document() = person2;
+
+ SearchResultProto search_result_proto3 = icing.Search(
+ search_spec3, ScoringSpecProto::default_instance(), result_spec3);
+ EXPECT_THAT(search_result_proto3, EqualsSearchResultIgnoreStatsAndScores(
+ expected_join_search_result_proto));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ IcingSearchEngineInitializationVersionChangeTest,
+ IcingSearchEngineInitializationVersionChangeTest,
+ testing::Values(
+ // Manually change existing data set's version to kVersion + 1. When
+ // initializing, it will detect "rollback".
+ version_util::VersionInfo(
+ /*version_in=*/version_util::kVersion + 1,
+ /*max_version_in=*/version_util::kVersion + 1),
+
+ // Currently we don't have any "upgrade" that requires rebuild derived
+ // files, so skip this case until we have a case for it.
+
+ // Manually change existing data set's version to kVersion - 1 and
+ // max_version to kVersion. When initializing, it will detect "roll
+ // forward".
+ version_util::VersionInfo(
+ /*version_in=*/version_util::kVersion - 1,
+ /*max_version_in=*/version_util::kVersion),
+
+ // Manually change existing data set's version to 0 and max_version to
+ // 0. When initializing, it will detect "version 0 upgrade".
+ //
+ // Note: in reality, version 0 won't be written into version file, but
+ // it is ok here since it is hack to simulate version 0 situation.
+ version_util::VersionInfo(
+ /*version_in=*/0,
+ /*max_version_in=*/0),
+
+ // Manually change existing data set's version to 0 and max_version to
+ // kVersion. When initializing, it will detect "version 0 roll forward".
+ //
+ // Note: in reality, version 0 won't be written into version file, but
+ // it is ok here since it is hack to simulate version 0 situation.
+ version_util::VersionInfo(
+ /*version_in=*/0,
+ /*max_version_in=*/version_util::kVersion)));
+
+} // namespace
+} // namespace lib
+} // namespace icing
diff --git a/icing/icing-search-engine_optimize_test.cc b/icing/icing-search-engine_optimize_test.cc
new file mode 100644
index 0000000..3127171
--- /dev/null
+++ b/icing/icing-search-engine_optimize_test.cc
@@ -0,0 +1,1843 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <unistd.h>
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/icing-search-engine.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/join/join-processor.h"
+#include "icing/portable/endian.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/reset.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/query/query-features.h"
+#include "icing/schema-builder.h"
+#include "icing/store/document-log-creator.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::Eq;
+using ::testing::Ge;
+using ::testing::Gt;
+using ::testing::HasSubstr;
+using ::testing::Lt;
+using ::testing::Return;
+
+// For mocking purpose, we allow tests to provide a custom Filesystem.
+class TestIcingSearchEngine : public IcingSearchEngine {
+ public:
+ TestIcingSearchEngine(const IcingSearchEngineOptions& options,
+ std::unique_ptr<const Filesystem> filesystem,
+ std::unique_ptr<const IcingFilesystem> icing_filesystem,
+ std::unique_ptr<Clock> clock,
+ std::unique_ptr<JniCache> jni_cache)
+ : IcingSearchEngine(options, std::move(filesystem),
+ std::move(icing_filesystem), std::move(clock),
+ std::move(jni_cache)) {}
+};
+
+std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
+
+// This test is meant to cover all tests relating to
+// IcingSearchEngine::Optimize.
+class IcingSearchEngineOptimizeTest : public testing::Test {
+ protected:
+ void SetUp() override {
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ // If we've specified using the reverse-JNI method for segmentation (i.e.
+ // not ICU), then we won't have the ICU data file included to set up.
+ // Technically, we could choose to use reverse-JNI for segmentation AND
+ // include an ICU data file, but that seems unlikely and our current BUILD
+ // setup doesn't do this.
+ // File generated via icu_data_file rule in //icing/BUILD.
+ std::string icu_data_file_path =
+ GetTestFilePath("icing/icu.dat");
+ ICING_ASSERT_OK(
+ icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+ }
+ filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ const Filesystem* filesystem() const { return &filesystem_; }
+
+ private:
+ Filesystem filesystem_;
+};
+
+// Non-zero value so we don't override it to be the current time
+constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
+
+IcingSearchEngineOptions GetDefaultIcingOptions() {
+ IcingSearchEngineOptions icing_options;
+ icing_options.set_base_dir(GetTestBaseDir());
+ return icing_options;
+}
+
+ScoringSpecProto GetDefaultScoringSpec() {
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+ return scoring_spec;
+}
+
+// TODO(b/272145329): create SearchSpecBuilder, JoinSpecBuilder,
+// SearchResultProtoBuilder and ResultProtoBuilder for unit tests and build all
+// instances by them.
+
+TEST_F(IcingSearchEngineOptimizeTest,
+ AllPageTokensShouldBeInvalidatedAfterOptimization) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body two")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(1);
+
+ // Searches and gets the first page, 1 result
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken));
+ uint64_t next_page_token = search_result_proto.next_page_token();
+ // Since the token is a random number, we don't need to verify
+ expected_search_result_proto.set_next_page_token(next_page_token);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+ // Now document1 is still to be fetched.
+
+ OptimizeResultProto optimize_result_proto;
+ optimize_result_proto.mutable_status()->set_code(StatusProto::OK);
+ optimize_result_proto.mutable_status()->set_message("");
+ OptimizeResultProto actual_result = icing.Optimize();
+ actual_result.clear_optimize_stats();
+ ASSERT_THAT(actual_result, EqualsProto(optimize_result_proto));
+
+ // Tries to fetch the second page, no results since all tokens have been
+ // invalidated during Optimize()
+ expected_search_result_proto.clear_results();
+ expected_search_result_proto.clear_next_page_token();
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, OptimizationShouldRemoveDeletedDocs) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace, uri1) not found.");
+ {
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+ // Deletes document1
+ ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
+ const std::string document_log_path =
+ icing_options.base_dir() + "/document_dir/" +
+ DocumentLogCreator::GetDocumentLogFilename();
+ int64_t document_log_size_before =
+ filesystem()->GetFileSize(document_log_path.c_str());
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+ int64_t document_log_size_after =
+ filesystem()->GetFileSize(document_log_path.c_str());
+
+ // Validates that document can't be found right after Optimize()
+ EXPECT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+ // Validates that document is actually removed from document log
+ EXPECT_THAT(document_log_size_after, Lt(document_log_size_before));
+ } // Destroys IcingSearchEngine to make sure nothing is cached.
+
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest,
+ OptimizationShouldDeleteTemporaryDirectory) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ // Create a tmp dir that will be used in Optimize() to swap files,
+ // this validates that any tmp dirs will be deleted before using.
+ const std::string tmp_dir =
+ icing_options.base_dir() + "/document_dir_optimize_tmp";
+
+ const std::string tmp_file = tmp_dir + "/file";
+ ASSERT_TRUE(filesystem()->CreateDirectory(tmp_dir.c_str()));
+ ScopedFd fd(filesystem()->OpenForWrite(tmp_file.c_str()));
+ ASSERT_TRUE(fd.is_valid());
+ ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
+ fd.reset();
+
+ EXPECT_THAT(icing.Optimize().status(), ProtoIsOk());
+
+ EXPECT_FALSE(filesystem()->DirectoryExists(tmp_dir.c_str()));
+ EXPECT_FALSE(filesystem()->FileExists(tmp_file.c_str()));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, GetOptimizeInfoHasCorrectStats) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body two")
+ .SetCreationTimestampMs(100)
+ .SetTtlMs(500)
+ .Build();
+
+ {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetSystemTimeMilliseconds(1000);
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Just initialized, nothing is optimizable yet.
+ GetOptimizeInfoResultProto optimize_info = icing.GetOptimizeInfo();
+ EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+ EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
+ EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+ EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
+
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+ // Only have active documents, nothing is optimizable yet.
+ optimize_info = icing.GetOptimizeInfo();
+ EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+ EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
+ EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+ EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
+
+ // Deletes document1
+ ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
+
+ optimize_info = icing.GetOptimizeInfo();
+ EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+ EXPECT_THAT(optimize_info.optimizable_docs(), Eq(1));
+ EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Gt(0));
+ EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
+ int64_t first_estimated_optimizable_bytes =
+ optimize_info.estimated_optimizable_bytes();
+
+ // Add a second document, but it'll be expired since the time (1000) is
+ // greater than the document's creation timestamp (100) + the document's ttl
+ // (500)
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ optimize_info = icing.GetOptimizeInfo();
+ EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+ EXPECT_THAT(optimize_info.optimizable_docs(), Eq(2));
+ EXPECT_THAT(optimize_info.estimated_optimizable_bytes(),
+ Gt(first_estimated_optimizable_bytes));
+ EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
+
+ // Optimize
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+ }
+
+ {
+ // Recreate with new time
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetSystemTimeMilliseconds(5000);
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Nothing is optimizable now that everything has been optimized away.
+ GetOptimizeInfoResultProto optimize_info = icing.GetOptimizeInfo();
+ EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+ EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
+ EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+ EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(4000));
+ }
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, GetAndPutShouldWorkAfterOptimization) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body two")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body three")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document4 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri4")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body four")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document5 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri5")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body five")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Delete("namespace", "uri2").status(), ProtoIsOk());
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+
+ // Validates that Get() and Put() are good right after Optimize()
+ *expected_get_result_proto.mutable_document() = document1;
+ EXPECT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+ EXPECT_THAT(
+ icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+ .status()
+ .code(),
+ Eq(StatusProto::NOT_FOUND));
+ *expected_get_result_proto.mutable_document() = document3;
+ EXPECT_THAT(
+ icing.Get("namespace", "uri3", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+ EXPECT_THAT(icing.Put(document4).status(), ProtoIsOk());
+ } // Destroys IcingSearchEngine to make sure nothing is cached.
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ *expected_get_result_proto.mutable_document() = document1;
+ EXPECT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+ EXPECT_THAT(
+ icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+ .status()
+ .code(),
+ Eq(StatusProto::NOT_FOUND));
+ *expected_get_result_proto.mutable_document() = document3;
+ EXPECT_THAT(
+ icing.Get("namespace", "uri3", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+ *expected_get_result_proto.mutable_document() = document4;
+ EXPECT_THAT(
+ icing.Get("namespace", "uri4", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ EXPECT_THAT(icing.Put(document5).status(), ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineOptimizeTest,
+ GetAndPutShouldWorkAfterOptimizationWithEmptyDocuments) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto empty_document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto empty_document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto empty_document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(std::move(schema)).status(), ProtoIsOk());
+
+ ASSERT_THAT(icing.Put(empty_document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(empty_document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Delete("namespace", "uri2").status(), ProtoIsOk());
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+
+ // Validates that Get() and Put() are good right after Optimize()
+ *expected_get_result_proto.mutable_document() = empty_document1;
+ EXPECT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+ EXPECT_THAT(
+ icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+ .status()
+ .code(),
+ Eq(StatusProto::NOT_FOUND));
+ EXPECT_THAT(icing.Put(empty_document3).status(), ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, DeleteShouldWorkAfterOptimization) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body two")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+
+ // Validates that Delete() works right after Optimize()
+ EXPECT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(
+ StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace, uri1) not found.");
+ EXPECT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ expected_get_result_proto.mutable_status()->clear_message();
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+ } // Destroys IcingSearchEngine to make sure nothing is cached.
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.Delete("namespace", "uri2").status(), ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace, uri1) not found.");
+ EXPECT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace, uri2) not found.");
+ EXPECT_THAT(
+ icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, OptimizationFailureUninitializesIcing) {
+ // Setup filesystem to fail
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ bool just_swapped_files = false;
+ auto create_dir_lambda = [this, &just_swapped_files](const char* dir_name) {
+ if (just_swapped_files) {
+ // We should fail the first call immediately after swapping files.
+ just_swapped_files = false;
+ return false;
+ }
+ return filesystem()->CreateDirectoryRecursively(dir_name);
+ };
+ ON_CALL(*mock_filesystem, CreateDirectoryRecursively)
+ .WillByDefault(create_dir_lambda);
+
+ auto swap_lambda = [&just_swapped_files](const char* first_dir,
+ const char* second_dir) {
+ just_swapped_files = true;
+ return false;
+ };
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ ON_CALL(*mock_filesystem, SwapFiles(HasSubstr("document_dir_optimize_tmp"),
+ HasSubstr("document_dir")))
+ .WillByDefault(swap_lambda);
+ TestIcingSearchEngine icing(options, std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // The mocks should cause an unrecoverable error during Optimize - returning
+ // INTERNAL.
+ ASSERT_THAT(icing.Optimize().status(), ProtoStatusIs(StatusProto::INTERNAL));
+
+ // Ordinary operations should fail safely.
+ SchemaProto simple_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("type0").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("prop0")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ DocumentProto simple_doc = DocumentBuilder()
+ .SetKey("namespace0", "uri0")
+ .SetSchema("type0")
+ .AddStringProperty("prop0", "foo")
+ .Build();
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("foo");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ ResultSpecProto result_spec;
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+
+ EXPECT_THAT(icing.SetSchema(simple_schema).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.Put(simple_doc).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing
+ .Get(simple_doc.namespace_(), simple_doc.uri(),
+ GetResultSpecProto::default_instance())
+ .status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+
+ // Reset should get icing back to a safe (empty) and working state.
+ EXPECT_THAT(icing.Reset().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(simple_schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(simple_doc).status(), ProtoIsOk());
+ EXPECT_THAT(icing
+ .Get(simple_doc.namespace_(), simple_doc.uri(),
+ GetResultSpecProto::default_instance())
+ .status(),
+ ProtoIsOk());
+ EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(),
+ ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, SetSchemaShouldWorkAfterOptimization) {
+ // Creates 3 test schemas
+ SchemaProto schema1 =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ SchemaProto schema2 = SchemaProto(schema1);
+ *schema2.mutable_types(0)->add_properties() =
+ PropertyConfigBuilder()
+ .SetName("property2")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+
+ SchemaProto schema3 = SchemaProto(schema2);
+ *schema3.mutable_types(0)->add_properties() =
+ PropertyConfigBuilder()
+ .SetName("property3")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+
+ // Validates that SetSchema() works right after Optimize()
+ EXPECT_THAT(icing.SetSchema(schema2).status(), ProtoIsOk());
+ } // Destroys IcingSearchEngine to make sure nothing is cached.
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema3).status(), ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, SearchShouldWorkAfterOptimization) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ SearchSpecProto search_spec1;
+ search_spec1.set_term_match_type(TermMatchType::PREFIX);
+ search_spec1.set_query("m");
+
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document;
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+
+ // Validates that Search() works right after Optimize()
+ // Term search
+ SearchResultProto search_result_proto1 =
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Numeric (integer) search
+ SearchResultProto search_result_google::protobuf =
+ icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+ } // Destroys IcingSearchEngine to make sure nothing is cached.
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Verify term search
+ SearchResultProto search_result_proto1 =
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify numeric (integer) search
+ SearchResultProto search_result_google::protobuf =
+ icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest,
+ JoinShouldWorkAfterOptimizationDeleteParent) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person1 =
+ DocumentBuilder()
+ .SetKey("namespace", "person1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person one")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto person2 =
+ DocumentBuilder()
+ .SetKey("namespace", "person2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person two")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ DocumentProto message1 =
+ DocumentBuilder()
+ .SetKey("namespace", "message1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .AddStringProperty("senderQualifiedId", "namespace#person1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message2 =
+ DocumentBuilder()
+ .SetKey("namespace", "message2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body two")
+ .AddStringProperty("senderQualifiedId", "namespace#person1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message3 =
+ DocumentBuilder()
+ .SetKey("namespace", "message3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body three")
+ .AddStringProperty("senderQualifiedId", "namespace#person2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // Prepare join search spec to join a query for `name:person` with a child
+ // query for `body:message` based on the child's `senderQualifiedId` field.
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:message");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ ResultSpecProto result_spec = ResultSpecProto::default_instance();
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ // Person1 is going to be deleted below. Only person2 which is joined with
+ // message3 should match the query.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto =
+ expected_search_result_proto.mutable_results()->Add();
+ *result_proto->mutable_document() = person2;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() = message3;
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(message1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(message2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(message3).status(), ProtoIsOk());
+ // Delete parent document: person1
+ ASSERT_THAT(icing.Delete("namespace", "person1").status(), ProtoIsOk());
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+
+ // Validates that join search query works right after Optimize()
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+ } // Destroys IcingSearchEngine to make sure nothing is cached.
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest,
+ JoinShouldWorkAfterOptimizationDeleteChild) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person1 =
+ DocumentBuilder()
+ .SetKey("namespace", "person1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person one")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto person2 =
+ DocumentBuilder()
+ .SetKey("namespace", "person2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person two")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ DocumentProto message1 =
+ DocumentBuilder()
+ .SetKey("namespace", "message1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .AddStringProperty("senderQualifiedId", "namespace#person1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message2 =
+ DocumentBuilder()
+ .SetKey("namespace", "message2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body two")
+ .AddStringProperty("senderQualifiedId", "namespace#person1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message3 =
+ DocumentBuilder()
+ .SetKey("namespace", "message3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body three")
+ .AddStringProperty("senderQualifiedId", "namespace#person2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // Prepare join search spec to join a query for `name:person` with a child
+ // query for `body:message` based on the child's `senderQualifiedId` field.
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:message");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ ResultSpecProto result_spec = ResultSpecProto::default_instance();
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ // Message1 and message3 are going to be deleted below. Both person1 and
+ // person2 should be included even though person2 has no child (since we're
+ // doing left join).
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto1 =
+ expected_search_result_proto.mutable_results()->Add();
+ *result_proto1->mutable_document() = person1;
+ *result_proto1->mutable_joined_results()->Add()->mutable_document() =
+ message2;
+ SearchResultProto::ResultProto* result_google::protobuf =
+ expected_search_result_proto.mutable_results()->Add();
+ *result_google::protobuf->mutable_document() = person2;
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(message1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(message2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(message3).status(), ProtoIsOk());
+ // Delete child documents: message1 and message3
+ ASSERT_THAT(icing.Delete("namespace", "message1").status(), ProtoIsOk());
+ ASSERT_THAT(icing.Delete("namespace", "message3").status(), ProtoIsOk());
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+
+ // Validates that join search query works right after Optimize()
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+ } // Destroys IcingSearchEngine to make sure nothing is cached.
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest,
+ IcingShouldWorkFineIfOptimizationIsAborted) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ DocumentProto message1 =
+ DocumentBuilder()
+ .SetKey("namespace", "message1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ {
+ // Initializes a normal icing to create files needed
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(message1).status(), ProtoIsOk());
+ }
+
+ // Creates a mock filesystem in which DeleteDirectoryRecursively() always
+ // fails. This will fail IcingSearchEngine::OptimizeDocumentStore() and makes
+ // it return ABORTED_ERROR.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ ON_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("_optimize_tmp")))
+ .WillByDefault(Return(false));
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.Optimize().status(), ProtoStatusIs(StatusProto::ABORTED));
+
+ // Now optimization is aborted, we verify that document-related functions
+ // still work as expected.
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = message1;
+ EXPECT_THAT(icing.Get("namespace", "message1",
+ GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ DocumentProto message2 =
+ DocumentBuilder()
+ .SetKey("namespace", "message2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body two")
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ EXPECT_THAT(icing.Put(message2).status(), ProtoIsOk());
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ message2;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ message1;
+
+ // Verify term search
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("body:m");
+ search_spec1.set_term_match_type(TermMatchType::PREFIX);
+
+ SearchResultProto search_result_proto1 =
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify numeric (integer) search
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto search_result_google::protobuf =
+ icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify join search: join a query for `name:person` with a child query for
+ // `body:message` based on the child's `senderQualifiedId` field.
+ SearchSpecProto search_spec3;
+ search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec3.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:message");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+ result_spec3.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto expected_join_search_result_proto;
+ expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto =
+ expected_join_search_result_proto.mutable_results()->Add();
+ *result_proto->mutable_document() = person;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() = message2;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() = message1;
+
+ SearchResultProto search_result_proto3 =
+ icing.Search(search_spec3, GetDefaultScoringSpec(), result_spec3);
+ EXPECT_THAT(search_result_proto3, EqualsSearchResultIgnoreStatsAndScores(
+ expected_join_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest,
+ OptimizationShouldRecoverIfFileDirectoriesAreMissing) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // Creates a mock filesystem in which SwapFiles() always fails and deletes the
+ // directories. This will fail IcingSearchEngine::OptimizeDocumentStore().
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ ON_CALL(*mock_filesystem, SwapFiles(HasSubstr("document_dir_optimize_tmp"),
+ HasSubstr("document_dir")))
+ .WillByDefault([this](const char* one, const char* two) {
+ filesystem()->DeleteDirectoryRecursively(one);
+ filesystem()->DeleteDirectoryRecursively(two);
+ return false;
+ });
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ // Optimize() fails due to filesystem error
+ OptimizeResultProto result = icing.Optimize();
+ EXPECT_THAT(result.status(), ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
+ // Should rebuild the index for data loss.
+ EXPECT_THAT(result.optimize_stats().index_restoration_mode(),
+ Eq(OptimizeStatsProto::FULL_INDEX_REBUILD));
+
+ // Document is not found because original file directory is missing
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace, uri) not found.");
+ EXPECT_THAT(
+ icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ DocumentProto new_document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "new body")
+ .AddInt64Property("indexableInteger", 456)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ EXPECT_THAT(icing.Put(new_document).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("m");
+ search_spec1.set_term_match_type(TermMatchType::PREFIX);
+
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+ // Searching old content returns nothing because original file directory is
+ // missing
+ // Term search
+ SearchResultProto search_result_proto1 =
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Numeric (integer) search
+ SearchResultProto search_result_google::protobuf =
+ icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Searching new content returns the new document
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ new_document;
+ // Term search
+ search_spec1.set_query("n");
+ search_result_proto1 = icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Numeric (integer) search
+ search_spec2.set_query("indexableInteger == 456");
+ search_result_google::protobuf = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest,
+ OptimizationShouldRecoverIfDataFilesAreMissing) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // Creates a mock filesystem in which SwapFiles() always fails and empties the
+ // directories. This will fail IcingSearchEngine::OptimizeDocumentStore().
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ ON_CALL(*mock_filesystem, SwapFiles(HasSubstr("document_dir_optimize_tmp"),
+ HasSubstr("document_dir")))
+ .WillByDefault([this](const char* one, const char* two) {
+ filesystem()->DeleteDirectoryRecursively(one);
+ filesystem()->CreateDirectoryRecursively(one);
+ filesystem()->DeleteDirectoryRecursively(two);
+ filesystem()->CreateDirectoryRecursively(two);
+ return false;
+ });
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ // Optimize() fails due to filesystem error
+ OptimizeResultProto result = icing.Optimize();
+ EXPECT_THAT(result.status(), ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
+ // Should rebuild the index for data loss.
+ EXPECT_THAT(result.optimize_stats().index_restoration_mode(),
+ Eq(OptimizeStatsProto::FULL_INDEX_REBUILD));
+
+ // Document is not found because original files are missing
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace, uri) not found.");
+ EXPECT_THAT(
+ icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ DocumentProto new_document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "new body")
+ .AddInt64Property("indexableInteger", 456)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ EXPECT_THAT(icing.Put(new_document).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("m");
+ search_spec1.set_term_match_type(TermMatchType::PREFIX);
+
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+ // Searching old content returns nothing because original files are missing
+ // Term search
+ SearchResultProto search_result_proto1 =
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Numeric (integer) search
+ SearchResultProto search_result_google::protobuf =
+ icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Searching new content returns the new document
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ new_document;
+ // Term search
+ search_spec1.set_query("n");
+ search_result_proto1 = icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Numeric (integer) search
+ search_spec2.set_query("indexableInteger == 456");
+ search_result_google::protobuf = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, OptimizeThresholdTest) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .AddInt64Property("indexableInteger", 1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body two")
+ .AddInt64Property("indexableInteger", 2)
+ .SetCreationTimestampMs(9000)
+ .SetTtlMs(500)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body three")
+ .AddInt64Property("indexableInteger", 3)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(5);
+ fake_clock->SetSystemTimeMilliseconds(10000);
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ // Set the threshold to 0.9 to test that the threshold works.
+ options.set_optimize_rebuild_index_threshold(0.9);
+ auto icing = std::make_unique<TestIcingSearchEngine>(
+ options, std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::move(fake_clock),
+ GetTestJniCache());
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ // Add three documents.
+ ASSERT_THAT(icing->Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing->Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing->Put(document3).status(), ProtoIsOk());
+
+ // Delete the first document.
+ ASSERT_THAT(icing->Delete(document1.namespace_(), document1.uri()).status(),
+ ProtoIsOk());
+ ASSERT_THAT(icing->PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
+
+ OptimizeStatsProto expected;
+ expected.set_latency_ms(5);
+ expected.set_document_store_optimize_latency_ms(5);
+ expected.set_index_restoration_latency_ms(5);
+ expected.set_num_original_documents(3);
+ expected.set_num_deleted_documents(1);
+ expected.set_num_expired_documents(1);
+ expected.set_index_restoration_mode(OptimizeStatsProto::INDEX_TRANSLATION);
+
+ // Run Optimize
+ OptimizeResultProto result = icing->Optimize();
+ // Depending on how many blocks the documents end up spread across, it's
+ // possible that Optimize can remove documents without shrinking storage. The
+ // first Optimize call will also write the OptimizeStatusProto for the first
+ // time which will take up 1 block. So make sure that before_size is no less
+ // than after_size - 1 block.
+ uint32_t page_size = getpagesize();
+ EXPECT_THAT(result.optimize_stats().storage_size_before(),
+ Ge(result.optimize_stats().storage_size_after() - page_size));
+ result.mutable_optimize_stats()->clear_storage_size_before();
+ result.mutable_optimize_stats()->clear_storage_size_after();
+ EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
+
+ fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(5);
+ fake_clock->SetSystemTimeMilliseconds(20000);
+ icing = std::make_unique<TestIcingSearchEngine>(
+ options, std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::move(fake_clock),
+ GetTestJniCache());
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+
+ expected = OptimizeStatsProto();
+ expected.set_latency_ms(5);
+ expected.set_document_store_optimize_latency_ms(5);
+ expected.set_index_restoration_latency_ms(5);
+ expected.set_num_original_documents(1);
+ expected.set_num_deleted_documents(0);
+ expected.set_num_expired_documents(0);
+ expected.set_time_since_last_optimize_ms(10000);
+ expected.set_index_restoration_mode(OptimizeStatsProto::INDEX_TRANSLATION);
+
+ // Run Optimize
+ result = icing->Optimize();
+ EXPECT_THAT(result.optimize_stats().storage_size_before(),
+ Eq(result.optimize_stats().storage_size_after()));
+ result.mutable_optimize_stats()->clear_storage_size_before();
+ result.mutable_optimize_stats()->clear_storage_size_after();
+ EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
+
+ // Delete the last document.
+ ASSERT_THAT(icing->Delete(document3.namespace_(), document3.uri()).status(),
+ ProtoIsOk());
+
+ expected = OptimizeStatsProto();
+ expected.set_latency_ms(5);
+ expected.set_document_store_optimize_latency_ms(5);
+ expected.set_index_restoration_latency_ms(5);
+ expected.set_num_original_documents(1);
+ expected.set_num_deleted_documents(1);
+ expected.set_num_expired_documents(0);
+ expected.set_time_since_last_optimize_ms(0);
+ // Should rebuild the index since all documents are removed.
+ expected.set_index_restoration_mode(OptimizeStatsProto::FULL_INDEX_REBUILD);
+
+ // Run Optimize
+ result = icing->Optimize();
+ EXPECT_THAT(result.optimize_stats().storage_size_before(),
+ Ge(result.optimize_stats().storage_size_after()));
+ result.mutable_optimize_stats()->clear_storage_size_before();
+ result.mutable_optimize_stats()->clear_storage_size_after();
+ EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, OptimizeStatsProtoTest) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .AddInt64Property("indexableInteger", 1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body two")
+ .AddInt64Property("indexableInteger", 2)
+ .SetCreationTimestampMs(9000)
+ .SetTtlMs(500)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body three")
+ .AddInt64Property("indexableInteger", 3)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(5);
+ fake_clock->SetSystemTimeMilliseconds(10000);
+ // Use the default Icing options, so that a change to the default value will
+ // require updating this test.
+ auto icing = std::make_unique<TestIcingSearchEngine>(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::move(fake_clock),
+ GetTestJniCache());
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ // Add three documents.
+ ASSERT_THAT(icing->Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing->Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing->Put(document3).status(), ProtoIsOk());
+
+ // Delete the first document.
+ ASSERT_THAT(icing->Delete(document1.namespace_(), document1.uri()).status(),
+ ProtoIsOk());
+ ASSERT_THAT(icing->PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
+
+ OptimizeStatsProto expected;
+ expected.set_latency_ms(5);
+ expected.set_document_store_optimize_latency_ms(5);
+ expected.set_index_restoration_latency_ms(5);
+ expected.set_num_original_documents(3);
+ expected.set_num_deleted_documents(1);
+ expected.set_num_expired_documents(1);
+ expected.set_index_restoration_mode(OptimizeStatsProto::FULL_INDEX_REBUILD);
+
+ // Run Optimize
+ OptimizeResultProto result = icing->Optimize();
+ // Depending on how many blocks the documents end up spread across, it's
+ // possible that Optimize can remove documents without shrinking storage. The
+ // first Optimize call will also write the OptimizeStatusProto for the first
+ // time which will take up 1 block. So make sure that before_size is no less
+ // than after_size - 1 block.
+ uint32_t page_size = getpagesize();
+ EXPECT_THAT(result.optimize_stats().storage_size_before(),
+ Ge(result.optimize_stats().storage_size_after() - page_size));
+ result.mutable_optimize_stats()->clear_storage_size_before();
+ result.mutable_optimize_stats()->clear_storage_size_after();
+ EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
+
+ fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(5);
+ fake_clock->SetSystemTimeMilliseconds(20000);
+ // Use the default Icing options, so that a change to the default value will
+ // require updating this test.
+ icing = std::make_unique<TestIcingSearchEngine>(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::move(fake_clock),
+ GetTestJniCache());
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+
+ expected = OptimizeStatsProto();
+ expected.set_latency_ms(5);
+ expected.set_document_store_optimize_latency_ms(5);
+ expected.set_index_restoration_latency_ms(5);
+ expected.set_num_original_documents(1);
+ expected.set_num_deleted_documents(0);
+ expected.set_num_expired_documents(0);
+ expected.set_time_since_last_optimize_ms(10000);
+ expected.set_index_restoration_mode(OptimizeStatsProto::FULL_INDEX_REBUILD);
+
+ // Run Optimize
+ result = icing->Optimize();
+ EXPECT_THAT(result.optimize_stats().storage_size_before(),
+ Eq(result.optimize_stats().storage_size_after()));
+ result.mutable_optimize_stats()->clear_storage_size_before();
+ result.mutable_optimize_stats()->clear_storage_size_after();
+ EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
+
+ // Delete the last document.
+ ASSERT_THAT(icing->Delete(document3.namespace_(), document3.uri()).status(),
+ ProtoIsOk());
+
+ expected = OptimizeStatsProto();
+ expected.set_latency_ms(5);
+ expected.set_document_store_optimize_latency_ms(5);
+ expected.set_index_restoration_latency_ms(5);
+ expected.set_num_original_documents(1);
+ expected.set_num_deleted_documents(1);
+ expected.set_num_expired_documents(0);
+ expected.set_time_since_last_optimize_ms(0);
+ expected.set_index_restoration_mode(OptimizeStatsProto::FULL_INDEX_REBUILD);
+
+ // Run Optimize
+ result = icing->Optimize();
+ EXPECT_THAT(result.optimize_stats().storage_size_before(),
+ Ge(result.optimize_stats().storage_size_after()));
+ result.mutable_optimize_stats()->clear_storage_size_before();
+ result.mutable_optimize_stats()->clear_storage_size_after();
+ EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest,
+ OptimizationRewritesDocsWithNewCompressionLevel) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body two")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+ icing_options.set_compression_level(3);
+ int64_t document_log_size_compression_3;
+ int64_t document_log_size_after_opti_no_compression;
+ int64_t document_log_size_after_opti_compression_3;
+ const std::string document_log_path =
+ icing_options.base_dir() + "/document_dir/" +
+ DocumentLogCreator::GetDocumentLogFilename();
+ {
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
+ document_log_size_compression_3 =
+ filesystem()->GetFileSize(document_log_path.c_str());
+ } // Destroys IcingSearchEngine to make sure nothing is cached.
+
+ // Turn off compression
+ icing_options.set_compression_level(0);
+
+ {
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ // Document log size is the same even after reopening with a different
+ // compression level
+ ASSERT_EQ(document_log_size_compression_3,
+ filesystem()->GetFileSize(document_log_path.c_str()));
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+ document_log_size_after_opti_no_compression =
+ filesystem()->GetFileSize(document_log_path.c_str());
+ // Document log size is larger after optimizing since optimizing rewrites
+ // with the new compression level which is 0 or none
+ ASSERT_GT(document_log_size_after_opti_no_compression,
+ document_log_size_compression_3);
+ }
+
+ // Restore the original compression level
+ icing_options.set_compression_level(3);
+
+ {
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ // Document log size is the same even after reopening with a different
+ // compression level
+ ASSERT_EQ(document_log_size_after_opti_no_compression,
+ filesystem()->GetFileSize(document_log_path.c_str()));
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+ document_log_size_after_opti_compression_3 =
+ filesystem()->GetFileSize(document_log_path.c_str());
+ // Document log size should be the same as it was originally
+ ASSERT_EQ(document_log_size_after_opti_compression_3,
+ document_log_size_compression_3);
+ }
+}
+
+} // namespace
+} // namespace lib
+} // namespace icing
diff --git a/icing/icing-search-engine_put_test.cc b/icing/icing-search-engine_put_test.cc
new file mode 100644
index 0000000..ed72f17
--- /dev/null
+++ b/icing/icing-search-engine_put_test.cc
@@ -0,0 +1,481 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/icing-search-engine.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/legacy/index/icing-mock-filesystem.h"
+#include "icing/portable/endian.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/reset.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/random-string.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::Ge;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::Le;
+using ::testing::SizeIs;
+
+constexpr std::string_view kIpsumText =
+ "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla convallis "
+ "scelerisque orci quis hendrerit. Sed augue turpis, sodales eu gravida "
+ "nec, scelerisque nec leo. Maecenas accumsan interdum commodo. Aliquam "
+ "mattis sapien est, sit amet interdum risus dapibus sed. Maecenas leo "
+ "erat, fringilla in nisl a, venenatis gravida metus. Phasellus venenatis, "
+ "orci in aliquet mattis, lectus sapien volutpat arcu, sed hendrerit ligula "
+ "arcu nec mauris. Integer dolor mi, rhoncus eget gravida et, pulvinar et "
+ "nunc. Aliquam ac sollicitudin nisi. Vivamus sit amet urna vestibulum, "
+ "tincidunt eros sed, efficitur nisl. Fusce non neque accumsan, sagittis "
+ "nisi eget, sagittis turpis. Ut pulvinar nibh eu purus feugiat faucibus. "
+ "Donec tellus nulla, tincidunt vel lacus id, bibendum fermentum turpis. "
+ "Nullam ultrices sed nibh vitae aliquet. Ut risus neque, consectetur "
+ "vehicula posuere vitae, convallis eu lorem. Donec semper augue eu nibh "
+ "placerat semper.";
+
+// For mocking purpose, we allow tests to provide a custom Filesystem.
+class TestIcingSearchEngine : public IcingSearchEngine {
+ public:
+ TestIcingSearchEngine(const IcingSearchEngineOptions& options,
+ std::unique_ptr<const Filesystem> filesystem,
+ std::unique_ptr<const IcingFilesystem> icing_filesystem,
+ std::unique_ptr<Clock> clock,
+ std::unique_ptr<JniCache> jni_cache)
+ : IcingSearchEngine(options, std::move(filesystem),
+ std::move(icing_filesystem), std::move(clock),
+ std::move(jni_cache)) {}
+};
+
+std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
+
+// This test is meant to cover all tests relating to IcingSearchEngine::Put.
+class IcingSearchEnginePutTest : public testing::Test {
+ protected:
+ void SetUp() override {
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ // If we've specified using the reverse-JNI method for segmentation (i.e.
+ // not ICU), then we won't have the ICU data file included to set up.
+ // Technically, we could choose to use reverse-JNI for segmentation AND
+ // include an ICU data file, but that seems unlikely and our current BUILD
+ // setup doesn't do this.
+ // File generated via icu_data_file rule in //icing/BUILD.
+ std::string icu_data_file_path =
+ GetTestFilePath("icing/icu.dat");
+ ICING_ASSERT_OK(
+ icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+ }
+ filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ const Filesystem* filesystem() const { return &filesystem_; }
+
+ private:
+ Filesystem filesystem_;
+};
+
+constexpr int kMaxSupportedDocumentSize = (1u << 24) - 1;
+
+// Non-zero value so we don't override it to be the current time
+constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
+
+std::string GetIndexDir() { return GetTestBaseDir() + "/index_dir"; }
+
+IcingSearchEngineOptions GetDefaultIcingOptions() {
+ IcingSearchEngineOptions icing_options;
+ icing_options.set_base_dir(GetTestBaseDir());
+ return icing_options;
+}
+
+DocumentProto CreateMessageDocument(std::string name_space, std::string uri) {
+ return DocumentBuilder()
+ .SetKey(std::move(name_space), std::move(uri))
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+}
+
+SchemaProto CreateMessageSchema() {
+ return SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+}
+
+ScoringSpecProto GetDefaultScoringSpec() {
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+ return scoring_spec;
+}
+
+TEST_F(IcingSearchEnginePutTest, MaxTokenLenReturnsOkAndTruncatesTokens) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ // A length of 1 is allowed - even though it would be strange to want
+ // this.
+ options.set_max_token_length(1);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document = CreateMessageDocument("namespace", "uri");
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ // "message" should have been truncated to "m"
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ // The indexed tokens were truncated to length of 1, so "m" will match
+ search_spec.set_query("m");
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document;
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // The query token is also truncated to length of 1, so "me"->"m" matches "m"
+ search_spec.set_query("me");
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // The query token is still truncated to length of 1, so "massage"->"m"
+ // matches "m"
+ search_spec.set_query("massage");
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEnginePutTest,
+ MaxIntMaxTokenLenReturnsOkTooLargeTokenReturnsResourceExhausted) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ // Set token length to max. This is allowed (it just means never to
+ // truncate tokens). However, this does mean that tokens that exceed the
+ // size of the lexicon will cause indexing to fail.
+ options.set_max_token_length(std::numeric_limits<int32_t>::max());
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Add a document that just barely fits under the max document limit.
+ // This will still fail to index because we won't actually have enough
+ // room in the lexicon to fit this content.
+ std::string enormous_string(kMaxSupportedDocumentSize - 256, 'p');
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Message")
+ .AddStringProperty("body", std::move(enormous_string))
+ .Build();
+ EXPECT_THAT(icing.Put(document).status(),
+ ProtoStatusIs(StatusProto::OUT_OF_SPACE));
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("p");
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEnginePutTest, PutWithoutSchemaFailedPrecondition) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ DocumentProto document = CreateMessageDocument("namespace", "uri");
+ PutResultProto put_result_proto = icing.Put(document);
+ EXPECT_THAT(put_result_proto.status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(put_result_proto.status().message(), HasSubstr("Schema not set"));
+}
+
+TEST_F(IcingSearchEnginePutTest, IndexingDocMergeFailureResets) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .Build();
+ // 1. Create an index with a LiteIndex that will only allow one document
+ // before needing a merge.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(document.ByteSizeLong());
+ IcingSearchEngine icing(options, GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Add two documents. These should get merged into the main index.
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = DocumentBuilder(document).SetUri("fake_type/1").Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ // Add one document. This one should get remain in the lite index.
+ document = DocumentBuilder(document).SetUri("fake_type/2").Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ // 2. Delete the index file to trigger RestoreIndexIfNeeded.
+ std::string idx_subdir = GetIndexDir() + "/idx";
+ filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str());
+
+ // 3. Setup a mock filesystem to fail to grow the main index once.
+ bool has_failed_already = false;
+ auto open_write_lambda = [this, &has_failed_already](const char* filename) {
+ std::string main_lexicon_suffix = "/main-lexicon.prop.2";
+ std::string filename_string(filename);
+ if (!has_failed_already &&
+ filename_string.length() >= main_lexicon_suffix.length() &&
+ filename_string.substr(
+ filename_string.length() - main_lexicon_suffix.length(),
+ main_lexicon_suffix.length()) == main_lexicon_suffix) {
+ has_failed_already = true;
+ return -1;
+ }
+ return this->filesystem()->OpenForWrite(filename);
+ };
+ auto mock_icing_filesystem = std::make_unique<IcingMockFilesystem>();
+ ON_CALL(*mock_icing_filesystem, OpenForWrite)
+ .WillByDefault(open_write_lambda);
+
+ // 4. Create the index again. This should trigger index restoration.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(document.ByteSizeLong());
+ TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
+ std::move(mock_icing_filesystem),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("consectetur");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.next_page_token(), Eq(0));
+ // Only the last document that was added should still be retrievable.
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/2"));
+ }
+}
+
+TEST_F(IcingSearchEnginePutTest, PutDocumentShouldLogFunctionLatency) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .Build();
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ PutResultProto put_result_proto = icing.Put(document);
+ EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(put_result_proto.put_document_stats().latency_ms(), Eq(10));
+}
+
+TEST_F(IcingSearchEnginePutTest, PutDocumentShouldLogDocumentStoreStats) {
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .AddStringProperty("body", "message body")
+ .Build();
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ PutResultProto put_result_proto = icing.Put(document);
+ EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(put_result_proto.put_document_stats().document_store_latency_ms(),
+ Eq(10));
+ size_t document_size = put_result_proto.put_document_stats().document_size();
+ EXPECT_THAT(document_size, Ge(document.ByteSizeLong()));
+ EXPECT_THAT(document_size, Le(document.ByteSizeLong() +
+ sizeof(DocumentProto::InternalFields)));
+}
+
+TEST_F(IcingSearchEnginePutTest, PutDocumentShouldLogIndexingStats) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .Build();
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ PutResultProto put_result_proto = icing.Put(document);
+ EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(put_result_proto.put_document_stats().index_latency_ms(), Eq(10));
+ // No merge should happen.
+ EXPECT_THAT(put_result_proto.put_document_stats().index_merge_latency_ms(),
+ Eq(0));
+ // The input document has 2 tokens.
+ EXPECT_THAT(put_result_proto.put_document_stats()
+ .tokenization_stats()
+ .num_tokens_indexed(),
+ Eq(2));
+}
+
+TEST_F(IcingSearchEnginePutTest, PutDocumentShouldLogIndexMergeLatency) {
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("icing", "fake_type/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .Build();
+
+ // Create an icing instance with index_merge_size = document1's size.
+ IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+ icing_options.set_index_merge_size(document1.ByteSizeLong());
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(icing_options, std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+ // Putting document2 should trigger an index merge.
+ PutResultProto put_result_proto = icing.Put(document2);
+ EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(put_result_proto.put_document_stats().index_merge_latency_ms(),
+ Eq(10));
+}
+
+TEST_F(IcingSearchEnginePutTest, PutDocumentIndexFailureDeletion) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Testing has shown that adding ~600,000 terms generated this way will
+ // fill up the hit buffer.
+ std::vector<std::string> terms = GenerateUniqueTerms(600000);
+ std::string content = absl_ports::StrJoin(terms, " ");
+ DocumentProto document = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "foo " + content)
+ .Build();
+ // We failed to add the document to the index fully. This means that we should
+ // reject the document from Icing entirely.
+ ASSERT_THAT(icing.Put(document).status(),
+ ProtoStatusIs(StatusProto::OUT_OF_SPACE));
+
+ // Make sure that the document isn't searchable.
+ SearchSpecProto search_spec;
+ search_spec.set_query("foo");
+ search_spec.set_term_match_type(TERM_MATCH_PREFIX);
+
+ SearchResultProto search_results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(search_results.status(), ProtoIsOk());
+ ASSERT_THAT(search_results.results(), IsEmpty());
+
+ // Make sure that the document isn't retrievable.
+ GetResultProto get_result =
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance());
+ ASSERT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
+}
+
+} // namespace
+} // namespace lib
+} // namespace icing
diff --git a/icing/icing-search-engine_schema_test.cc b/icing/icing-search-engine_schema_test.cc
new file mode 100644
index 0000000..0e88c5a
--- /dev/null
+++ b/icing/icing-search-engine_schema_test.cc
@@ -0,0 +1,3136 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/icing-search-engine.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/join/join-processor.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/reset.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/query/query-features.h"
+#include "icing/schema-builder.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::Eq;
+using ::testing::HasSubstr;
+using ::testing::Return;
+
+// For mocking purpose, we allow tests to provide a custom Filesystem.
+class TestIcingSearchEngine : public IcingSearchEngine {
+ public:
+ TestIcingSearchEngine(const IcingSearchEngineOptions& options,
+ std::unique_ptr<const Filesystem> filesystem,
+ std::unique_ptr<const IcingFilesystem> icing_filesystem,
+ std::unique_ptr<Clock> clock,
+ std::unique_ptr<JniCache> jni_cache)
+ : IcingSearchEngine(options, std::move(filesystem),
+ std::move(icing_filesystem), std::move(clock),
+ std::move(jni_cache)) {}
+};
+
+std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
+
+// This test is meant to cover all tests relating to
+// IcingSearchEngine::GetSchema and IcingSearchEngine::SetSchema.
+class IcingSearchEngineSchemaTest : public testing::Test {
+ protected:
+ void SetUp() override {
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ // If we've specified using the reverse-JNI method for segmentation (i.e.
+ // not ICU), then we won't have the ICU data file included to set up.
+ // Technically, we could choose to use reverse-JNI for segmentation AND
+ // include an ICU data file, but that seems unlikely and our current BUILD
+ // setup doesn't do this.
+ // File generated via icu_data_file rule in //icing/BUILD.
+ std::string icu_data_file_path =
+ GetTestFilePath("icing/icu.dat");
+ ICING_ASSERT_OK(
+ icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+ }
+ filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ const Filesystem* filesystem() const { return &filesystem_; }
+
+ private:
+ Filesystem filesystem_;
+};
+
+// Non-zero value so we don't override it to be the current time
+constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
+
+std::string GetSchemaDir() { return GetTestBaseDir() + "/schema_dir"; }
+
+IcingSearchEngineOptions GetDefaultIcingOptions() {
+ IcingSearchEngineOptions icing_options;
+ icing_options.set_base_dir(GetTestBaseDir());
+ return icing_options;
+}
+
+DocumentProto CreateMessageDocument(std::string name_space, std::string uri) {
+ return DocumentBuilder()
+ .SetKey(std::move(name_space), std::move(uri))
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+}
+
+SchemaTypeConfigProto CreateMessageSchemaTypeConfig() {
+ return SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .Build();
+}
+
+SchemaProto CreateMessageSchema() {
+ return SchemaBuilder().AddType(CreateMessageSchemaTypeConfig()).Build();
+}
+
+ScoringSpecProto GetDefaultScoringSpec() {
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+ return scoring_spec;
+}
+
+// TODO(b/272145329): create SearchSpecBuilder, JoinSpecBuilder,
+// SearchResultProtoBuilder and ResultProtoBuilder for unit tests and build all
+// instances by them.
+
+TEST_F(IcingSearchEngineSchemaTest,
+ CircularReferenceCreateSectionManagerReturnsInvalidArgument) {
+ // Create a type config with a circular reference.
+ SchemaProto schema;
+ auto* type = schema.add_types();
+ type->set_schema_type("Message");
+
+ auto* body = type->add_properties();
+ body->set_property_name("recipient");
+ body->set_schema_type("Person");
+ body->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+ body->mutable_document_indexing_config()->set_index_nested_properties(true);
+
+ type = schema.add_types();
+ type->set_schema_type("Person");
+
+ body = type->add_properties();
+ body->set_property_name("recipient");
+ body->set_schema_type("Message");
+ body->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+ body->mutable_document_indexing_config()->set_index_nested_properties(true);
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, FailToReadSchema) {
+ IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+ {
+ // Successfully initialize and set a schema
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ }
+
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+
+ // This fails FileBackedProto::Read() when we try to check the schema we
+ // had previously set
+ ON_CALL(*mock_filesystem,
+ OpenForRead(Eq(icing_options.base_dir() + "/schema_dir/schema.pb")))
+ .WillByDefault(Return(-1));
+
+ TestIcingSearchEngine test_icing(icing_options, std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ InitializeResultProto initialize_result_proto = test_icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(),
+ ProtoStatusIs(StatusProto::INTERNAL));
+ EXPECT_THAT(initialize_result_proto.status().message(),
+ HasSubstr("Unable to open file for read"));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, FailToWriteSchema) {
+ IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ // This fails FileBackedProto::Write()
+ ON_CALL(*mock_filesystem, OpenForWrite(HasSubstr("schema.pb")))
+ .WillByDefault(Return(-1));
+
+ TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SetSchemaResultProto set_schema_result_proto =
+ icing.SetSchema(CreateMessageSchema());
+ EXPECT_THAT(set_schema_result_proto.status(),
+ ProtoStatusIs(StatusProto::INTERNAL));
+ EXPECT_THAT(set_schema_result_proto.status().message(),
+ HasSubstr("Unable to open file for write"));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchemaIncompatibleFails) {
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 1. Create a schema with an Email type with properties { "title", "body"}
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ property = type->add_properties();
+ property->set_property_name("body");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ // 2. Add an email document
+ DocumentProto doc = DocumentBuilder()
+ .SetKey("emails", "email#1")
+ .SetSchema("Email")
+ .AddStringProperty("title", "Hello world.")
+ .AddStringProperty("body", "Goodnight Moon.")
+ .Build();
+ EXPECT_THAT(icing.Put(std::move(doc)).status(), ProtoIsOk());
+ }
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 3. Set a schema that deletes email. This should fail.
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_schema_type("Message");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("body");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ EXPECT_THAT(
+ icing.SetSchema(schema, /*ignore_errors_and_delete_documents=*/false)
+ .status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+
+ // 4. Try to delete by email type. This should succeed because email wasn't
+ // deleted in step 3.
+ EXPECT_THAT(icing.DeleteBySchemaType("Email").status(), ProtoIsOk());
+ }
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ SetSchemaIncompatibleForceOverrideSucceeds) {
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 1. Create a schema with an Email type with properties { "title", "body"}
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ property = type->add_properties();
+ property->set_property_name("body");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ // 2. Add an email document
+ DocumentProto doc = DocumentBuilder()
+ .SetKey("emails", "email#1")
+ .SetSchema("Email")
+ .AddStringProperty("title", "Hello world.")
+ .AddStringProperty("body", "Goodnight Moon.")
+ .Build();
+ EXPECT_THAT(icing.Put(std::move(doc)).status(), ProtoIsOk());
+ }
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 3. Set a schema that deletes email with force override. This should
+ // succeed and delete the email type.
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_schema_type("Message");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("body");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ EXPECT_THAT(icing.SetSchema(schema, true).status(), ProtoIsOk());
+
+ // 4. Try to delete by email type. This should fail because email was
+ // already deleted.
+ EXPECT_THAT(icing.DeleteBySchemaType("Email").status(),
+ ProtoStatusIs(StatusProto::NOT_FOUND));
+ }
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchemaUnsetVersionIsZero) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 1. Create a schema with an Email type with version 1
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(0));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchemaCompatibleVersionUpdateSucceeds) {
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 1. Create a schema with an Email type with version 1
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_version(1);
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ SetSchemaResultProto set_schema_result = icing.SetSchema(schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
+ }
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 2. Create schema that adds a new optional property and updates version.
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_version(2);
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ property = type->add_properties();
+ property->set_property_name("body");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ // 3. SetSchema should succeed and the version number should be updated.
+ SetSchemaResultProto set_schema_result = icing.SetSchema(schema, true);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_fully_compatible_changed_schema_types()
+ ->Add("Email");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2));
+ }
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchemaIncompatibleVersionUpdateFails) {
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 1. Create a schema with an Email type with version 1
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_version(1);
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
+ }
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 2. Create schema that makes an incompatible change (OPTIONAL -> REQUIRED)
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_version(2);
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+ // 3. SetSchema should fail and the version number should NOT be updated.
+ EXPECT_THAT(icing.SetSchema(schema).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+
+ EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
+ }
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ SetSchemaIncompatibleVersionUpdateForceOverrideSucceeds) {
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 1. Create a schema with an Email type with version 1
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_version(1);
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
+ }
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 2. Create schema that makes an incompatible change (OPTIONAL -> REQUIRED)
+ // with force override to true.
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_version(2);
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+ // 3. SetSchema should succeed and the version number should be updated.
+ EXPECT_THAT(icing.SetSchema(schema, true).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2));
+ }
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchemaNoChangeVersionUpdateSucceeds) {
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 1. Create a schema with an Email type with version 1
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_version(1);
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
+ }
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 2. Create schema that only changes the version.
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_version(2);
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ // 3. SetSchema should succeed and the version number should be updated.
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2));
+ }
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ SetSchemaDuplicateTypesReturnsAlreadyExists) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create a schema with types { "Email", "Message" and "Email" }
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ type = schema.add_types();
+ type->set_schema_type("Message");
+ property = type->add_properties();
+ property->set_property_name("body");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ *schema.add_types() = schema.types(0);
+
+ EXPECT_THAT(icing.SetSchema(schema).status(),
+ ProtoStatusIs(StatusProto::ALREADY_EXISTS));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ SetSchemaDuplicatePropertiesReturnsAlreadyExists) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create a schema with an Email type with properties { "title", "body" and
+ // "title" }
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ property = type->add_properties();
+ property->set_property_name("body");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ EXPECT_THAT(icing.SetSchema(schema).status(),
+ ProtoStatusIs(StatusProto::ALREADY_EXISTS));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchema) {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(1000);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ auto message_document = CreateMessageDocument("namespace", "uri");
+
+ auto schema_with_message = CreateMessageSchema();
+
+ SchemaProto schema_with_email;
+ SchemaTypeConfigProto* type = schema_with_email.add_types();
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ SchemaProto schema_with_email_and_message = schema_with_email;
+ *schema_with_email_and_message.add_types() = CreateMessageSchemaTypeConfig();
+
+ // Create an arbitrary invalid schema
+ SchemaProto invalid_schema;
+ SchemaTypeConfigProto* empty_type = invalid_schema.add_types();
+ empty_type->set_schema_type("");
+
+ // Make sure we can't set invalid schemas
+ SetSchemaResultProto set_schema_result = icing.SetSchema(invalid_schema);
+ EXPECT_THAT(set_schema_result.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+ EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
+
+ // Can add an document of a set schema
+ set_schema_result = icing.SetSchema(schema_with_message);
+ EXPECT_THAT(set_schema_result.status(), ProtoStatusIs(StatusProto::OK));
+ EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
+ EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
+
+ // Schema with Email doesn't have Message, so would result incompatible
+ // data
+ set_schema_result = icing.SetSchema(schema_with_email);
+ EXPECT_THAT(set_schema_result.status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
+
+ // Can expand the set of schema types and add an document of a new
+ // schema type
+ set_schema_result = icing.SetSchema(schema_with_email_and_message);
+ EXPECT_THAT(set_schema_result.status(), ProtoStatusIs(StatusProto::OK));
+ EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
+
+ EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
+ // Can't add an document whose schema isn't set
+ auto photo_document = DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Photo")
+ .AddStringProperty("creator", "icing")
+ .Build();
+ PutResultProto put_result_proto = icing.Put(photo_document);
+ EXPECT_THAT(put_result_proto.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
+ EXPECT_THAT(put_result_proto.status().message(),
+ HasSubstr("'Photo' not found"));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ SetSchemaNewIndexedStringPropertyTriggersIndexRestorationAndReturnsOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create a schema with 2 properties:
+ // - 'a': string type, unindexed. No section id assigned.
+ // - 'b': int64 type, indexed. Section id = 0.
+ SchemaProto schema_one =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Schema")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN,
+ TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ SetSchemaResultProto set_schema_result = icing.SetSchema(schema_one);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_new_schema_types()->Add("Schema");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Schema")
+ .AddStringProperty("a", "message body")
+ .AddInt64Property("b", 123)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ // Only 'b' will be indexed.
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document;
+
+ SearchResultProto empty_result;
+ empty_result.mutable_status()->set_code(StatusProto::OK);
+
+ // Verify term search: won't get anything.
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("a:message");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+ // Verify numeric (integer) search: will get document.
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("b == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Change the schema to:
+ // - 'a': string type, indexed. Section id = 0.
+ // - 'b': int64 type, indexed. Section id = 1.
+ SchemaProto schema_two = schema_one;
+ schema_two.mutable_types(0)
+ ->mutable_properties(0)
+ ->mutable_string_indexing_config()
+ ->set_term_match_type(TERM_MATCH_PREFIX);
+ schema_two.mutable_types(0)
+ ->mutable_properties(0)
+ ->mutable_string_indexing_config()
+ ->set_tokenizer_type(TOKENIZER_PLAIN);
+ // Index restoration should be triggered here because new schema requires more
+ // properties to be indexed. Also new section ids will be reassigned and index
+ // restoration should use new section ids to rebuild.
+ set_schema_result = icing.SetSchema(schema_two);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result = SetSchemaResultProto();
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+ ->Add("Schema");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Verify term search: will get document now.
+ actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify numeric (integer) search: will still get document.
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ SetSchemaNewIndexedIntegerPropertyTriggersIndexRestorationAndReturnsOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create a schema with 2 properties:
+ // - 'a': int64 type, unindexed. No section id assigned.
+ // - 'b': string type, indexed. Section id = 0.
+ SchemaProto schema_one =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Schema")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+
+ .Build();
+
+ SetSchemaResultProto set_schema_result = icing.SetSchema(schema_one);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_new_schema_types()->Add("Schema");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Schema")
+ .AddInt64Property("a", 123)
+ .AddStringProperty("b", "message body")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ // Only 'b' will be indexed.
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document;
+
+ SearchResultProto empty_result;
+ empty_result.mutable_status()->set_code(StatusProto::OK);
+
+ // Verify term search: will get document.
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("b:message");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify numeric (integer) search: won't get anything.
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("a == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+ // Change the schema to:
+ // - 'a': int64 type, indexed. Section id = 0.
+ // - 'b': string type, indexed. Section id = 1.
+ SchemaProto schema_two = schema_one;
+ schema_two.mutable_types(0)
+ ->mutable_properties(0)
+ ->mutable_integer_indexing_config()
+ ->set_numeric_match_type(NUMERIC_MATCH_RANGE);
+ // Index restoration should be triggered here because new schema requires more
+ // properties to be indexed. Also new section ids will be reassigned and index
+ // restoration should use new section ids to rebuild.
+ set_schema_result = icing.SetSchema(schema_two);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result = SetSchemaResultProto();
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+ ->Add("Schema");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Verify term search: will still get document.
+ actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify numeric (integer) search: will get document now.
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(
+ IcingSearchEngineSchemaTest,
+ SetSchemaNewIndexedDocumentPropertyTriggersIndexRestorationAndReturnsOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create a schema with a nested document type:
+ //
+ // Section id assignment for 'Person':
+ // - "age": integer type, indexed. Section id = 0
+ // - "name": string type, indexed. Section id = 1.
+ // - "worksFor.name": string type, (nested) indexed. Section id = 2.
+ //
+ // Joinable property id assignment for 'Person':
+ // - "worksFor.listRef": string type, Qualified Id type joinable. Joinable
+ // property id = 0.
+ SchemaProto schema_one =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("List").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("title")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("age")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("worksFor")
+ .SetDataTypeDocument(
+ "Organization",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Organization")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("listRef")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema_one).status(), ProtoIsOk());
+
+ DocumentProto list_document = DocumentBuilder()
+ .SetKey("namespace", "list/1")
+ .SetSchema("List")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("title", "title")
+ .Build();
+ DocumentProto person_document =
+ DocumentBuilder()
+ .SetKey("namespace", "person/2")
+ .SetSchema("Person")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("name", "John")
+ .AddInt64Property("age", 20)
+ .AddDocumentProperty(
+ "worksFor", DocumentBuilder()
+ .SetKey("namespace", "org/1")
+ .SetSchema("Organization")
+ .AddStringProperty("name", "Google")
+ .AddStringProperty("listRef", "namespace#list/1")
+ .Build())
+ .Build();
+ EXPECT_THAT(icing.Put(list_document).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person_document).status(), ProtoIsOk());
+
+ ResultSpecProto result_spec = ResultSpecProto::default_instance();
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ person_document;
+
+ SearchResultProto empty_result;
+ empty_result.mutable_status()->set_code(StatusProto::OK);
+
+ // Verify term search
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("worksFor.name:Google");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec1, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify numeric (integer) search
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("age == 20");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ actual_results =
+ icing.Search(search_spec2, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify join search: join a query for `title:title` (which will get
+ // list_document) with a child query for `name:John` (which will get
+ // person_document) based on the child's `worksFor.listRef` field.
+ SearchSpecProto search_spec_with_join;
+ search_spec_with_join.set_query("title:title");
+ search_spec_with_join.set_term_match_type(TermMatchType::EXACT_ONLY);
+ JoinSpecProto* join_spec = search_spec_with_join.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("worksFor.listRef");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("name:John");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = result_spec;
+
+ SearchResultProto expected_join_search_result_proto;
+ expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto =
+ expected_join_search_result_proto.mutable_results()->Add();
+ *result_proto->mutable_document() = list_document;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() =
+ person_document;
+
+ actual_results =
+ icing.Search(search_spec_with_join, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_join_search_result_proto));
+
+ // Change the schema to add another nested document property to 'Person'
+ //
+ // New section id assignment for 'Person':
+ // - "age": integer type, indexed. Section id = 0
+ // - "almaMater.name", string type, indexed. Section id = 1
+ // - "name": string type, indexed. Section id = 2
+ // - "worksFor.name": string type, (nested) indexed. Section id = 3
+ //
+ // New joinable property id assignment for 'Person':
+ // - "almaMater.listRef": string type, Qualified Id type joinable. Joinable
+ // property id = 0.
+ // - "worksFor.listRef": string type, Qualified Id type joinable. Joinable
+ // property id = 1.
+ SchemaProto schema_two =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("List").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("title")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("age")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("worksFor")
+ .SetDataTypeDocument(
+ "Organization",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("almaMater")
+ .SetDataTypeDocument(
+ "Organization",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Organization")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("listRef")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ // This schema change is compatible since the added 'almaMater' property has
+ // CARDINALITY_OPTIONAL.
+ //
+ // Index restoration should be triggered here because new schema requires more
+ // properties to be indexed. Also new section ids will be reassigned and index
+ // restoration should use new section ids to rebuild.
+ SetSchemaResultProto set_schema_result = icing.SetSchema(schema_two);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result = SetSchemaResultProto();
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+ ->Add("Person");
+ expected_set_schema_result.mutable_join_incompatible_changed_schema_types()
+ ->Add("Person");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Verify term search:
+ // Searching for "worksFor.name:Google" should still match document
+ actual_results =
+ icing.Search(search_spec1, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // In new_schema the 'name' property is now indexed at section id 2. If
+ // searching for "name:Google" matched the document, this means that index
+ // rebuild was not triggered and Icing is still searching the old index, where
+ // 'worksFor.name' was indexed at section id 2.
+ search_spec1.set_query("name:Google");
+ actual_results =
+ icing.Search(search_spec1, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+ // Verify numeric (integer) search: should still match document
+ actual_results =
+ icing.Search(search_spec2, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify join search: should still able to join by `worksFor.listRef`
+ actual_results =
+ icing.Search(search_spec_with_join, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_join_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ SetSchemaChangeNestedPropertiesTriggersIndexRestorationAndReturnsOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SchemaTypeConfigProto person_proto =
+ SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("age")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ // Create a schema with nested properties:
+ // - "sender.age": int64 type, (nested) indexed. Section id = 0.
+ // - "sender.name": string type, (nested) indexed. Section id = 1.
+ // - "subject": string type, indexed. Section id = 2.
+ // - "timestamp": int64 type, indexed. Section id = 3.
+ SchemaProto nested_schema =
+ SchemaBuilder()
+ .AddType(person_proto)
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_new_schema_types()->Add("Person");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("subject",
+ "Did you get the memo about TPS reports?")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Bill Lundbergh")
+ .AddInt64Property("age", 20)
+ .Build())
+ .AddInt64Property("timestamp", 1234)
+ .Build();
+
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document;
+
+ SearchResultProto empty_result;
+ empty_result.mutable_status()->set_code(StatusProto::OK);
+
+ // Verify term search
+ // document should match a query for 'Bill' in 'sender.name', but not in
+ // 'subject'
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("sender.name:Bill");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ search_spec1.set_query("subject:Bill");
+ actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+ // Verify numeric (integer) search
+ // document should match a query for 20 in 'sender.age', but not in
+ // 'timestamp'
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("sender.age == 20");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ search_spec2.set_query("timestamp == 20");
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+ // Now update the schema with index_nested_properties=false. This should
+ // reassign section ids, lead to an index rebuild and ensure that nothing
+ // match a query for "Bill" or 20.
+ // - "sender.age": int64 type, (nested) unindexed. No section id assigned.
+ // - "sender.name": string type, (nested) unindexed. No section id assigned.
+ // - "subject": string type, indexed. Section id = 0.
+ // - "timestamp": int64 type, indexed. Section id = 1.
+ SchemaProto no_nested_schema =
+ SchemaBuilder()
+ .AddType(person_proto)
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ set_schema_result = icing.SetSchema(no_nested_schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result = SetSchemaResultProto();
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+ ->Add("Email");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Verify term search
+ // document shouldn't match a query for 'Bill' in either 'sender.name' or
+ // 'subject'
+ search_spec1.set_query("sender.name:Bill");
+ actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+ search_spec1.set_query("subject:Bill");
+ actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+ // Verify numeric (integer) search
+ // document shouldn't match a query for 20 in either 'sender.age' or
+ // 'timestamp'
+ search_spec2.set_query("sender.age == 20");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+ search_spec2.set_query("timestamp == 20");
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+}
+
+TEST_F(
+ IcingSearchEngineSchemaTest,
+ SetSchemaChangeNestedPropertiesListTriggersIndexRestorationAndReturnsOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SchemaTypeConfigProto person_proto =
+ SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("lastName")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("address")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("age")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("birthday")
+ .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ // Create a schema with nested properties:
+ // - "sender.address": string type, (nested) non-indexable. Section id = 0.
+ // - "sender.age": int64 type, (nested) indexed. Section id = 1.
+ // - "sender.birthday": int64 type, (nested) non-indexable. Section id = 2.
+ // - "sender.lastName": int64 type, (nested) indexed. Section id = 3.
+ // - "sender.name": string type, (nested) indexed. Section id = 4.
+ // - "subject": string type, indexed. Section id = 5.
+ // - "timestamp": int64 type, indexed. Section id = 6.
+ // - "sender.foo": unknown type, (nested) non-indexable. Section id = 7.
+ //
+ // "sender.address" and "sender.birthday" are assigned a section id because
+ // they are listed in the indexable_nested_properties_list for 'Email.sender'.
+ // They are assigned a sectionId but are not indexed since their indexing
+ // configs are non-indexable.
+ //
+ // "sender.foo" is also assigned a section id, but is also not undefined by
+ // the schema definition. Trying to index a document with this nested property
+ // should fail.
+ SchemaProto nested_schema =
+ SchemaBuilder()
+ .AddType(person_proto)
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person", /*indexable_nested_properties_list=*/
+ {"age", "lastName", "address", "name", "birthday",
+ "foo"})
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_new_schema_types()->Add("Person");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("subject",
+ "Did you get the memo about TPS reports?")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Bill")
+ .AddStringProperty("lastName", "Lundbergh")
+ .AddStringProperty("address", "1600 Amphitheatre Pkwy")
+ .AddInt64Property("age", 20)
+ .AddInt64Property("birthday", 20)
+ .Build())
+ .AddInt64Property("timestamp", 1234)
+ .Build();
+
+ // Indexing this doc should fail, since the 'sender.foo' property is not found
+ DocumentProto invalid_document =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("subject",
+ "Did you get the memo about TPS reports?")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Bill")
+ .AddStringProperty("lastName", "Lundbergh")
+ .AddStringProperty("address", "1600 Amphitheatre Pkwy")
+ .AddInt64Property("age", 20)
+ .AddInt64Property("birthday", 20)
+ .AddBytesProperty("foo", "bar bytes")
+ .Build())
+ .AddInt64Property("timestamp", 1234)
+ .Build();
+
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(invalid_document).status(),
+ ProtoStatusIs(StatusProto::NOT_FOUND));
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document;
+
+ SearchResultProto empty_result;
+ empty_result.mutable_status()->set_code(StatusProto::OK);
+
+ // Verify term search
+ // document should match a query for 'Bill' in 'sender.name', but not in
+ // 'sender.lastName'
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("sender.name:Bill");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ search_spec1.set_query("sender.lastName:Bill");
+ actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+ // document should match a query for 'Lundber' in 'sender.lastName', but not
+ // in 'sender.name'.
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("sender.lastName:Lundber");
+ search_spec2.set_term_match_type(TermMatchType::PREFIX);
+
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ search_spec2.set_query("sender.name:Lundber");
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+ // document should not match a query for 'Amphitheatre' because the
+ // 'sender.address' field is not indexed.
+ search_spec2.set_query("Amphitheatre");
+ search_spec2.set_term_match_type(TermMatchType::PREFIX);
+
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+ // Verify numeric (integer) search
+ // document should match a query for 20 in 'sender.age', but not in
+ // 'timestamp' or 'sender.birthday'
+ SearchSpecProto search_spec3;
+ search_spec3.set_query("sender.age == 20");
+ search_spec3.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec3.add_enabled_features(std::string(kNumericSearchFeature));
+
+ actual_results = icing.Search(search_spec3, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ search_spec3.set_query("timestamp == 20");
+ actual_results = icing.Search(search_spec3, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+ search_spec3.set_query("birthday == 20");
+ actual_results = icing.Search(search_spec3, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+ // Now update the schema and don't index "sender.name", "sender.birthday" and
+ // "sender.foo".
+ // This should reassign section ids, lead to an index rebuild and ensure that
+ // nothing match a query for "Bill".
+ //
+ // Section id assignment:
+ // - "sender.address": string type, (nested) non-indexable. Section id = 0.
+ // - "sender.age": int64 type, (nested) indexed. Section id = 1.
+ // - "sender.birthday": int64 type, (nested) unindexed. No section id.
+ // - "sender.lastName": int64 type, (nested) indexed. Section id = 2.
+ // - "sender.name": string type, (nested) unindexed. No section id.
+ // - "subject": string type, indexed. Section id = 3.
+ // - "timestamp": int64 type, indexed. Section id = 4.
+ // - "sender.foo": unknown type, invalid. No section id.
+ SchemaProto nested_schema_with_less_props =
+ SchemaBuilder()
+ .AddType(person_proto)
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person", /*indexable_nested_properties=*/
+ {"age", "lastName", "address"})
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ set_schema_result = icing.SetSchema(nested_schema_with_less_props);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result = SetSchemaResultProto();
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+ ->Add("Email");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Verify term search
+ // document shouldn't match a query for 'Bill' in either 'sender.name' or
+ // 'subject'
+ search_spec1.set_query("sender.name:Bill");
+ actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+ search_spec1.set_query("subject:Bill");
+ actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ SetSchemaNewJoinablePropertyTriggersIndexRestorationAndReturnsOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create "Message" schema with 3 properties:
+ // - "subject": string type, non-joinable. No joinable property id assigned.
+ // It is indexed and used for searching only.
+ // - "receiverQualifiedId": string type, non-joinable. No joinable property id
+ // assigned.
+ // - "senderQualifiedId": string type, Qualified Id type joinable. Joinable
+ // property id = 0.
+ SchemaProto schema_one =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("receiverQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_NONE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ SetSchemaResultProto set_schema_result = icing.SetSchema(schema_one);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_new_schema_types()->Add("Message");
+ expected_set_schema_result.mutable_new_schema_types()->Add("Person");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ DocumentProto person1 =
+ DocumentBuilder()
+ .SetKey("namespace", "person1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person one")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto person2 =
+ DocumentBuilder()
+ .SetKey("namespace", "person2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person two")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message1")
+ .SetSchema("Message")
+ .AddStringProperty("subject", "message")
+ .AddStringProperty("receiverQualifiedId", "namespace#person1")
+ .AddStringProperty("senderQualifiedId", "namespace#person2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ EXPECT_THAT(icing.Put(person1).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person2).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+
+ ResultSpecProto result_spec = ResultSpecProto::default_instance();
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ // Verify join search: join a query for `name:person` with a child query for
+ // `subject:message` based on the child's `receiverQualifiedId` field.
+ // Since "receiverQualifiedId" is not JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ // joining on that property should only return the "left-side" (`name:person`)
+ // of the join.
+ SearchSpecProto search_spec_join_by_receiver;
+ search_spec_join_by_receiver.set_query("name:person");
+ search_spec_join_by_receiver.set_term_match_type(TermMatchType::EXACT_ONLY);
+ JoinSpecProto* join_spec = search_spec_join_by_receiver.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("receiverQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("subject:message");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ SearchResultProto expected_empty_child_search_result_proto;
+ expected_empty_child_search_result_proto.mutable_status()->set_code(
+ StatusProto::OK);
+ *expected_empty_child_search_result_proto.mutable_results()
+ ->Add()
+ ->mutable_document() = person2;
+ *expected_empty_child_search_result_proto.mutable_results()
+ ->Add()
+ ->mutable_document() = person1;
+ SearchResultProto actual_results = icing.Search(
+ search_spec_join_by_receiver, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_empty_child_search_result_proto));
+
+ // Verify join search: join a query for `name:person` with a child query for
+ // `subject:message` based on the child's `senderQualifiedId` field.
+ // Since "senderQualifiedId" is JOINABLE_VALUE_TYPE_QUALIFIED_ID, joining on
+ // that property should return both "left-side" (`name:person`) and
+ // "right-side" (`subject:message`) of the join.
+ SearchSpecProto search_spec_join_by_sender = search_spec_join_by_receiver;
+ join_spec = search_spec_join_by_sender.mutable_join_spec();
+ join_spec->set_child_property_expression("senderQualifiedId");
+
+ SearchResultProto expected_join_by_sender_search_result_proto;
+ expected_join_by_sender_search_result_proto.mutable_status()->set_code(
+ StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto =
+ expected_join_by_sender_search_result_proto.mutable_results()->Add();
+ *result_proto->mutable_document() = person2;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() = message;
+ *expected_join_by_sender_search_result_proto.mutable_results()
+ ->Add()
+ ->mutable_document() = person1;
+ actual_results = icing.Search(search_spec_join_by_sender,
+ GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_join_by_sender_search_result_proto));
+
+ // Change "Message" schema to:
+ // - "subject": string type, non-joinable. No joinable property id assigned.
+ // - "receiverQualifiedId": string type, Qualified Id joinable. Joinable
+ // property id = 0.
+ // - "senderQualifiedId": string type, Qualified Id joinable. Joinable
+ // property id = 1.
+ SchemaProto schema_two = schema_one;
+ schema_two.mutable_types(1)
+ ->mutable_properties(1)
+ ->mutable_joinable_config()
+ ->set_value_type(JOINABLE_VALUE_TYPE_QUALIFIED_ID);
+ // Index restoration should be triggered here because new schema requires more
+ // joinable properties. Also new joinable property ids will be reassigned and
+ // index restoration should use new joinable property ids to rebuild.
+ set_schema_result = icing.SetSchema(schema_two);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result = SetSchemaResultProto();
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_join_incompatible_changed_schema_types()
+ ->Add("Message");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Verify join search: join a query for `name:person` with a child query for
+ // `subject:message` based on the child's `receiverQualifiedId` field.
+ // Since we've changed "receiverQualifiedId" to be
+ // JOINABLE_VALUE_TYPE_QUALIFIED_ID, joining on that property should return
+ // should return both "left-side" (`name:person`) and "right-side"
+ // (`subject:message`) of the join now.
+ SearchResultProto expected_join_by_receiver_search_result_proto;
+ expected_join_by_receiver_search_result_proto.mutable_status()->set_code(
+ StatusProto::OK);
+ result_proto =
+ expected_join_by_receiver_search_result_proto.mutable_results()->Add();
+ *result_proto->mutable_document() = person1;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() = message;
+ *expected_join_by_receiver_search_result_proto.mutable_results()
+ ->Add()
+ ->mutable_document() = person2;
+ actual_results = icing.Search(search_spec_join_by_receiver,
+ GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(
+ expected_join_by_receiver_search_result_proto));
+
+ // Verify join search: join a query for `name:person` with a child query for
+ // `subject:message` based on the child's `senderQualifiedId` field. We should
+ // get the same set of result since `senderQualifiedId` is unchanged.
+ actual_results = icing.Search(search_spec_join_by_sender,
+ GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_join_by_sender_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ SetSchemaWithValidCycle_circularSchemaDefinitionNotAllowedFails) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_allow_circular_schema_definitions(false);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create schema with circular type definitions: A <-> B
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true)))
+ .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/false)))
+ .Build();
+
+ EXPECT_THAT(
+ icing.SetSchema(schema, /*ignore_errors_and_delete_documents=*/false)
+ .status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ SetSchemaWithValidCycle_allowCircularSchemaDefinitionsOK) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_allow_circular_schema_definitions(true);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create schema with valid circular type definitions: A <-> B, B->A sets
+ // index_nested_properties=false
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true)))
+ .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/false)))
+ .Build();
+
+ EXPECT_THAT(
+ icing.SetSchema(schema, /*ignore_errors_and_delete_documents=*/false)
+ .status(),
+ ProtoStatusIs(StatusProto::OK));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ SetSchemaWithInvalidCycle_allowCircularSchemaDefinitionsFails) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_allow_circular_schema_definitions(true);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create schema with invalid circular type definitions: A <-> B, all edges
+ // set index_nested_properties=true
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true)))
+ .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/true)))
+ .Build();
+
+ EXPECT_THAT(
+ icing.SetSchema(schema, /*ignore_errors_and_delete_documents=*/false)
+ .status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(
+ IcingSearchEngineSchemaTest,
+ ForceSetSchemaIndexedPropertyDeletionTriggersIndexRestorationAndReturnsOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create a schema with 4 properties:
+ // - "body": string type, indexed. Section id = 0.
+ // - "subject": string type, indexed. Section id = 1.
+ // - "timestamp1": int64 type, indexed. Section id = 2.
+ // - "timestamp2": int64 type, indexed. Section id = 3.
+ SchemaProto email_with_body_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp1")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp2")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SetSchemaResultProto set_schema_result =
+ icing.SetSchema(email_with_body_schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Create a document with only subject and timestamp2 property.
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("subject",
+ "Did you get the memo about TPS reports?")
+ .AddInt64Property("timestamp2", 1234)
+ .Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document;
+
+ // Verify term search
+ // We should be able to retrieve the document by searching for 'tps' in
+ // 'subject'.
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("subject:tps");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify numeric (integer) search
+ // We should be able to retrieve the document by searching for 1234 in
+ // 'timestamp2'.
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("timestamp2 == 1234");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Now update the schema to remove the 'body' and 'timestamp1' field. This is
+ // backwards incompatible, but document should be preserved because it doesn't
+ // contain a 'body' or 'timestamp1' field.
+ // - "subject": string type, indexed. Section id = 0.
+ // - "timestamp2": int64 type, indexed. Section id = 1.
+ //
+ // If the index is not correctly rebuilt, then the hits of 'subject' and
+ // 'timestamp2' in the index will still have old section ids of 1, 3 and
+ // therefore they won't be found.
+ SchemaProto email_no_body_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp2")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ set_schema_result = icing.SetSchema(
+ email_no_body_schema, /*ignore_errors_and_delete_documents=*/true);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result = SetSchemaResultProto();
+ expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+ ->Add("Email");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Verify term search
+ // We should be able to retrieve the document by searching for 'tps' in
+ // 'subject'.
+ search_spec1.set_query("subject:tps");
+ actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify numeric (integer) search
+ // We should be able to retrieve the document by searching for 1234 in
+ // 'timestamp'.
+ search_spec2.set_query("timestamp2 == 1234");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ ForceSetSchemaJoinablePropertyDeletionTriggersIndexRestoration) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create "Email" schema with 2 joinable properties:
+ // - "receiverQualifiedId": qualified id joinable. Joinable property id = 0.
+ // - "senderQualifiedId": qualified id joinable. Joinable property id = 1.
+ SchemaProto email_with_receiver_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("receiverQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SetSchemaResultProto set_schema_result =
+ icing.SetSchema(email_with_receiver_schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_new_schema_types()->Add("Person");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ DocumentProto person = DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("name", "person")
+ .Build();
+ // Create an email document with only "senderQualifiedId" joinable property.
+ DocumentProto email =
+ DocumentBuilder()
+ .SetKey("namespace", "email")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("subject",
+ "Did you get the memo about TPS reports?")
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .Build();
+
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(email).status(), ProtoIsOk());
+
+ // Verify join search: join a query for `name:person` with a child query for
+ // `subject:tps` based on the child's `senderQualifiedId` field. We should be
+ // able to join person and email documents by this property.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto =
+ expected_search_result_proto.mutable_results()->Add();
+ *result_proto->mutable_document() = person;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() = email;
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("name:person");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("subject:tps");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ ResultSpecProto result_spec = ResultSpecProto::default_instance();
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Now update the schema to remove "receiverQualifiedId" fields. This is
+ // backwards incompatible, but document should be preserved because it doesn't
+ // contain "receiverQualifiedId" field. Also since it is join incompatible, we
+ // have to rebuild join index.
+ // - "senderQualifiedId": qualified id joinable. Joinable property id = 0.
+ //
+ // If the index is not correctly rebuilt, then the joinable data of
+ // "senderQualifiedId" in the join index will still have old joinable property
+ // id of 1 and therefore won't take effect for join search query.
+ SchemaProto email_without_receiver_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Although we've just deleted an existing property "receiverQualifiedId" from
+ // schema "Email", some email documents will still be preserved because they
+ // don't have "receiverQualifiedId" property.
+ set_schema_result =
+ icing.SetSchema(email_without_receiver_schema,
+ /*ignore_errors_and_delete_documents=*/true);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result = SetSchemaResultProto();
+ expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_join_incompatible_changed_schema_types()
+ ->Add("Email");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Verify join search: join a query for `name:person` with a child query for
+ // `subject:tps` based on the child's `senderQualifiedId` field. We should
+ // still be able to join person and email documents by this property.
+ actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(
+ IcingSearchEngineSchemaTest,
+ ForceSetSchemaIndexedPropertyDeletionAndAdditionTriggersIndexRestorationAndReturnsOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create a schema with 3 properties:
+ // - "body": string type, indexed. Section id = 0.
+ // - "subject": string type, indexed. Section id = 1.
+ // - "timestamp": int64 type, indexed. Section id = 2.
+ SchemaProto email_with_body_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SetSchemaResultProto set_schema_result =
+ icing.SetSchema(email_with_body_schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Create a document with only subject and timestamp property.
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("subject",
+ "Did you get the memo about TPS reports?")
+ .AddInt64Property("timestamp", 1234)
+ .Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document;
+
+ // Verify term search
+ // We should be able to retrieve the document by searching for 'tps' in
+ // 'subject'.
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("subject:tps");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify numeric (integer) search
+ // We should be able to retrieve the document by searching for 1234 in
+ // 'timestamp'.
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("timestamp == 1234");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Now update the schema to remove the 'body' field. This is backwards
+ // incompatible, but document should be preserved because it doesn't contain a
+ // 'body' field.
+ // - "subject": string type, indexed. Section id = 0.
+ // - "timestamp": int64 type, indexed. Section id = 1.
+ // - "to": string type, indexed. Section id = 2.
+ //
+ // If the index is not correctly rebuilt, then the hits of 'subject' and
+ // 'timestamp' in the index will still have old section ids of 1, 2 and
+ // therefore they won't be found.
+ SchemaProto email_no_body_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("to")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ set_schema_result = icing.SetSchema(
+ email_no_body_schema, /*ignore_errors_and_delete_documents=*/true);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result = SetSchemaResultProto();
+ expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+ ->Add("Email");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Verify term search
+ // We should be able to retrieve the document by searching for 'tps' in
+ // 'subject'.
+ search_spec1.set_query("subject:tps");
+ actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify numeric (integer) search
+ // We should be able to retrieve the document by searching for 1234 in
+ // 'timestamp'.
+ search_spec2.set_query("timestamp == 1234");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(
+ IcingSearchEngineSchemaTest,
+ ForceSetSchemaJoinablePropertyDeletionAndAdditionTriggersIndexRestorationAndReturnsOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create "Email" schema with 2 joinable properties:
+ // - "receiverQualifiedId": qualified id joinable. Joinable property id = 0.
+ // - "senderQualifiedId": qualified id joinable. Joinable property id = 1.
+ SchemaProto email_with_body_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("receiverQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SetSchemaResultProto set_schema_result =
+ icing.SetSchema(email_with_body_schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_new_schema_types()->Add("Person");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ DocumentProto person = DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("name", "person")
+ .Build();
+ // Create an email document with only subject and timestamp property.
+ DocumentProto email =
+ DocumentBuilder()
+ .SetKey("namespace", "email")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("subject",
+ "Did you get the memo about TPS reports?")
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .Build();
+
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(email).status(), ProtoIsOk());
+
+ // Verify join search: join a query for `name:person` with a child query for
+ // `subject:tps` based on the child's `senderQualifiedId` field. We should be
+ // able to join person and email documents by this property.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto =
+ expected_search_result_proto.mutable_results()->Add();
+ *result_proto->mutable_document() = person;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() = email;
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("name:person");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("subject:tps");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ ResultSpecProto result_spec = ResultSpecProto::default_instance();
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Now update the schema to remove the "receiverQualified" field and add
+ // "zQualifiedId". This is backwards incompatible, but document should
+ // be preserved because it doesn't contain a "receiverQualified" field and
+ // "zQualifiedId" is optional.
+ // - "senderQualifiedId": qualified id joinable. Joinable property id = 0.
+ // - "zQualifiedId": qualified id joinable. Joinable property id = 1.
+ //
+ // If the index is not correctly rebuilt, then the joinable data of
+ // "senderQualifiedId" in the join index will still have old joinable property
+ // id of 1 and therefore won't take effect for join search query.
+ SchemaProto email_no_body_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("zQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ set_schema_result = icing.SetSchema(
+ email_no_body_schema, /*ignore_errors_and_delete_documents=*/true);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result = SetSchemaResultProto();
+ expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_join_incompatible_changed_schema_types()
+ ->Add("Email");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Verify join search: join a query for `name:person` with a child query for
+ // `subject:tps` based on the child's `senderQualifiedId` field. We should
+ // still be able to join person and email documents by this property.
+ actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ ForceSetSchemaIncompatibleNestedDocsAreDeleted) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SchemaTypeConfigProto email_schema_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument("Person",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto nested_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("company")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(email_schema_type)
+ .Build();
+
+ SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_new_schema_types()->Add("Person");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Create two documents - a person document and an email document - both docs
+ // should be deleted when we remove the 'company' field from the person type.
+ DocumentProto person_document =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Person")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("name", "Bill Lundbergh")
+ .AddStringProperty("company", "Initech Corp.")
+ .Build();
+ EXPECT_THAT(icing.Put(person_document).status(), ProtoIsOk());
+
+ DocumentProto email_document =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("subject",
+ "Did you get the memo about TPS reports?")
+ .AddDocumentProperty("sender", person_document)
+ .Build();
+ EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk());
+
+ // We should be able to retrieve both documents.
+ GetResultProto get_result =
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance());
+ EXPECT_THAT(get_result.status(), ProtoIsOk());
+ EXPECT_THAT(get_result.document(), EqualsProto(person_document));
+
+ get_result =
+ icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance());
+ EXPECT_THAT(get_result.status(), ProtoIsOk());
+ EXPECT_THAT(get_result.document(), EqualsProto(email_document));
+
+ // Now update the schema to remove the 'company' field. This is backwards
+ // incompatible, *both* documents should be deleted because both fail
+ // validation (they each contain a 'Person' that has a non-existent property).
+ nested_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(email_schema_type)
+ .Build();
+
+ set_schema_result = icing.SetSchema(
+ nested_schema, /*ignore_errors_and_delete_documents=*/true);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result = SetSchemaResultProto();
+ expected_set_schema_result.mutable_incompatible_schema_types()->Add("Person");
+ expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+ ->Add("Email");
+ expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+ ->Add("Person");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Both documents should be deleted now.
+ get_result =
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance());
+ EXPECT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
+
+ get_result =
+ icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance());
+ EXPECT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchemaRevalidatesDocumentsAndReturnsOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SchemaProto schema_with_optional_subject;
+ auto type = schema_with_optional_subject.add_types();
+ type->set_schema_type("email");
+
+ // Add a OPTIONAL property
+ auto property = type->add_properties();
+ property->set_property_name("subject");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ EXPECT_THAT(icing.SetSchema(schema_with_optional_subject).status(),
+ ProtoIsOk());
+
+ DocumentProto email_document_without_subject =
+ DocumentBuilder()
+ .SetKey("namespace", "without_subject")
+ .SetSchema("email")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto email_document_with_subject =
+ DocumentBuilder()
+ .SetKey("namespace", "with_subject")
+ .SetSchema("email")
+ .AddStringProperty("subject", "foo")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ EXPECT_THAT(icing.Put(email_document_without_subject).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(email_document_with_subject).status(), ProtoIsOk());
+
+ SchemaProto schema_with_required_subject;
+ type = schema_with_required_subject.add_types();
+ type->set_schema_type("email");
+
+ // Add a REQUIRED property
+ property = type->add_properties();
+ property->set_property_name("subject");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+ // Can't set the schema since it's incompatible
+ SetSchemaResultProto set_schema_result =
+ icing.SetSchema(schema_with_required_subject);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result_proto;
+ expected_set_schema_result_proto.mutable_status()->set_code(
+ StatusProto::FAILED_PRECONDITION);
+ expected_set_schema_result_proto.mutable_status()->set_message(
+ "Schema is incompatible.");
+ expected_set_schema_result_proto.add_incompatible_schema_types("email");
+
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result_proto));
+
+ // Force set it
+ set_schema_result =
+ icing.SetSchema(schema_with_required_subject,
+ /*ignore_errors_and_delete_documents=*/true);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result_proto.mutable_status()->clear_message();
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result_proto));
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = email_document_with_subject;
+
+ EXPECT_THAT(icing.Get("namespace", "with_subject",
+ GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // The document without a subject got deleted because it failed validation
+ // against the new schema
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace, without_subject) not found.");
+ expected_get_result_proto.clear_document();
+
+ EXPECT_THAT(icing.Get("namespace", "without_subject",
+ GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchemaDeletesDocumentsAndReturnsOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SchemaProto schema;
+ auto type = schema.add_types();
+ type->set_schema_type("email");
+ type = schema.add_types();
+ type->set_schema_type("message");
+
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto email_document =
+ DocumentBuilder()
+ .SetKey("namespace", "email_uri")
+ .SetSchema("email")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message_document =
+ DocumentBuilder()
+ .SetKey("namespace", "message_uri")
+ .SetSchema("message")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
+
+ // Clear the schema and only add the "email" type, essentially deleting the
+ // "message" type
+ SchemaProto new_schema;
+ type = new_schema.add_types();
+ type->set_schema_type("email");
+
+ // Can't set the schema since it's incompatible
+ SetSchemaResultProto set_schema_result = icing.SetSchema(new_schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_result;
+ expected_result.mutable_status()->set_code(StatusProto::FAILED_PRECONDITION);
+ expected_result.mutable_status()->set_message("Schema is incompatible.");
+ expected_result.add_deleted_schema_types("message");
+
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_result));
+
+ // Force set it
+ set_schema_result =
+ icing.SetSchema(new_schema,
+ /*ignore_errors_and_delete_documents=*/true);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_result.mutable_status()->set_code(StatusProto::OK);
+ expected_result.mutable_status()->clear_message();
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_result));
+
+ // "email" document is still there
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = email_document;
+
+ EXPECT_THAT(icing.Get("namespace", "email_uri",
+ GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // "message" document got deleted
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace, message_uri) not found.");
+ expected_get_result_proto.clear_document();
+
+ EXPECT_THAT(icing.Get("namespace", "message_uri",
+ GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, GetSchemaNotFound) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.GetSchema().status(),
+ ProtoStatusIs(StatusProto::NOT_FOUND));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, GetSchemaOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ GetSchemaResultProto expected_get_schema_result_proto;
+ expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema();
+ EXPECT_THAT(icing.GetSchema(), EqualsProto(expected_get_schema_result_proto));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, GetSchemaTypeFailedPrecondition) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ GetSchemaTypeResultProto get_schema_type_result_proto =
+ icing.GetSchemaType("nonexistent_schema");
+ EXPECT_THAT(get_schema_type_result_proto.status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(get_schema_type_result_proto.status().message(),
+ HasSubstr("Schema not set"));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, GetSchemaTypeOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ GetSchemaTypeResultProto expected_get_schema_type_result_proto;
+ expected_get_schema_type_result_proto.mutable_status()->set_code(
+ StatusProto::OK);
+ *expected_get_schema_type_result_proto.mutable_schema_type_config() =
+ CreateMessageSchema().types(0);
+ EXPECT_THAT(icing.GetSchemaType(CreateMessageSchema().types(0).schema_type()),
+ EqualsProto(expected_get_schema_type_result_proto));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ SetSchemaCanNotDetectPreviousSchemaWasLostWithoutDocuments) {
+ SchemaProto schema;
+ auto type = schema.add_types();
+ type->set_schema_type("Message");
+
+ auto body = type->add_properties();
+ body->set_property_name("body");
+ body->set_data_type(PropertyConfigProto::DataType::STRING);
+ body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ // Make an incompatible schema, a previously OPTIONAL field is REQUIRED
+ SchemaProto incompatible_schema = schema;
+ incompatible_schema.mutable_types(0)->mutable_properties(0)->set_cardinality(
+ PropertyConfigProto::Cardinality::REQUIRED);
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ } // This should shut down IcingSearchEngine and persist anything it needs to
+
+ ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(GetSchemaDir().c_str()));
+
+ // Since we don't have any documents yet, we can't detect this edge-case. But
+ // it should be fine since there aren't any documents to be invalidated.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(incompatible_schema).status(), ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchemaCanDetectPreviousSchemaWasLost) {
+ SchemaTypeConfigProto message_schema_type_config =
+ CreateMessageSchemaTypeConfig();
+ message_schema_type_config.mutable_properties(0)->set_cardinality(
+ CARDINALITY_OPTIONAL);
+
+ SchemaProto schema;
+ *schema.add_types() = message_schema_type_config;
+
+ // Make an incompatible schema, a previously OPTIONAL field is REQUIRED
+ SchemaProto incompatible_schema = schema;
+ incompatible_schema.mutable_types(0)->mutable_properties(0)->set_cardinality(
+ PropertyConfigProto::Cardinality::REQUIRED);
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("message");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document = CreateMessageDocument("namespace", "uri");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ // Can retrieve by namespace/uri
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = document;
+
+ ASSERT_THAT(
+ icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Can search for it
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ CreateMessageDocument("namespace", "uri");
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+ } // This should shut down IcingSearchEngine and persist anything it needs to
+
+ ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(GetSchemaDir().c_str()));
+
+ // Setting the new, different schema will remove incompatible documents
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(incompatible_schema).status(), ProtoIsOk());
+
+ // Can't retrieve by namespace/uri
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace, uri) not found.");
+
+ EXPECT_THAT(
+ icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Can't search for it
+ SearchResultProto empty_result;
+ empty_result.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, IcingShouldWorkFor64Sections) {
+ // Create a schema with 64 sections
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ // Person has 4 sections.
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("firstName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("lastName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("phoneNumber")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ // Email has 16 sections.
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("date")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("time")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("receiver")
+ .SetDataTypeDocument(
+ "Person", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("cc")
+ .SetDataTypeDocument(
+ "Person", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .AddType(SchemaTypeConfigBuilder()
+ // EmailCollection has 64 sections.
+ .SetType("EmailCollection")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("email1")
+ .SetDataTypeDocument(
+ "Email", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("email2")
+ .SetDataTypeDocument(
+ "Email", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("email3")
+ .SetDataTypeDocument(
+ "Email", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("email4")
+ .SetDataTypeDocument(
+ "Email", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ DocumentProto person1 =
+ DocumentBuilder()
+ .SetKey("namespace", "person1")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first1")
+ .AddStringProperty("lastName", "last1")
+ .AddStringProperty("emailAddress", "email1@gmail.com")
+ .AddStringProperty("phoneNumber", "000-000-001")
+ .Build();
+ DocumentProto person2 =
+ DocumentBuilder()
+ .SetKey("namespace", "person2")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first2")
+ .AddStringProperty("lastName", "last2")
+ .AddStringProperty("emailAddress", "email2@gmail.com")
+ .AddStringProperty("phoneNumber", "000-000-002")
+ .Build();
+ DocumentProto person3 =
+ DocumentBuilder()
+ .SetKey("namespace", "person3")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first3")
+ .AddStringProperty("lastName", "last3")
+ .AddStringProperty("emailAddress", "email3@gmail.com")
+ .AddStringProperty("phoneNumber", "000-000-003")
+ .Build();
+ DocumentProto email1 = DocumentBuilder()
+ .SetKey("namespace", "email1")
+ .SetSchema("Email")
+ .AddStringProperty("body", "test body")
+ .AddStringProperty("subject", "test subject")
+ .AddStringProperty("date", "2022-08-01")
+ .AddStringProperty("time", "1:00 PM")
+ .AddDocumentProperty("sender", person1)
+ .AddDocumentProperty("receiver", person2)
+ .AddDocumentProperty("cc", person3)
+ .Build();
+ DocumentProto email2 = DocumentBuilder()
+ .SetKey("namespace", "email2")
+ .SetSchema("Email")
+ .AddStringProperty("body", "test body")
+ .AddStringProperty("subject", "test subject")
+ .AddStringProperty("date", "2022-08-02")
+ .AddStringProperty("time", "2:00 PM")
+ .AddDocumentProperty("sender", person2)
+ .AddDocumentProperty("receiver", person1)
+ .AddDocumentProperty("cc", person3)
+ .Build();
+ DocumentProto email3 = DocumentBuilder()
+ .SetKey("namespace", "email3")
+ .SetSchema("Email")
+ .AddStringProperty("body", "test body")
+ .AddStringProperty("subject", "test subject")
+ .AddStringProperty("date", "2022-08-03")
+ .AddStringProperty("time", "3:00 PM")
+ .AddDocumentProperty("sender", person3)
+ .AddDocumentProperty("receiver", person1)
+ .AddDocumentProperty("cc", person2)
+ .Build();
+ DocumentProto email4 = DocumentBuilder()
+ .SetKey("namespace", "email4")
+ .SetSchema("Email")
+ .AddStringProperty("body", "test body")
+ .AddStringProperty("subject", "test subject")
+ .AddStringProperty("date", "2022-08-04")
+ .AddStringProperty("time", "4:00 PM")
+ .AddDocumentProperty("sender", person3)
+ .AddDocumentProperty("receiver", person2)
+ .AddDocumentProperty("cc", person1)
+ .Build();
+ DocumentProto email_collection =
+ DocumentBuilder()
+ .SetKey("namespace", "email_collection")
+ .SetSchema("EmailCollection")
+ .AddDocumentProperty("email1", email1)
+ .AddDocumentProperty("email2", email2)
+ .AddDocumentProperty("email3", email3)
+ .AddDocumentProperty("email4", email4)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email_collection).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ const std::vector<std::string> query_terms = {
+ "first1", "last2", "email3@gmail.com", "000-000-001",
+ "body", "subject", "2022-08-02", "3\\:00"};
+ SearchResultProto expected_document;
+ expected_document.mutable_status()->set_code(StatusProto::OK);
+ *expected_document.mutable_results()->Add()->mutable_document() =
+ email_collection;
+ for (const std::string& query_term : query_terms) {
+ search_spec.set_query(query_term);
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(expected_document));
+ }
+
+ search_spec.set_query("foo");
+ SearchResultProto expected_no_documents;
+ expected_no_documents.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(expected_no_documents));
+}
+
+} // namespace
+} // namespace lib
+} // namespace icing
diff --git a/icing/icing-search-engine_search_test.cc b/icing/icing-search-engine_search_test.cc
new file mode 100644
index 0000000..451c9ce
--- /dev/null
+++ b/icing/icing-search-engine_search_test.cc
@@ -0,0 +1,6199 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/icing-search-engine.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/join/join-processor.h"
+#include "icing/portable/endian.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/reset.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/query/query-features.h"
+#include "icing/schema-builder.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/snippet-helpers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::Gt;
+using ::testing::IsEmpty;
+using ::testing::Ne;
+using ::testing::SizeIs;
+
+// For mocking purpose, we allow tests to provide a custom Filesystem.
+class TestIcingSearchEngine : public IcingSearchEngine {
+ public:
+ TestIcingSearchEngine(const IcingSearchEngineOptions& options,
+ std::unique_ptr<const Filesystem> filesystem,
+ std::unique_ptr<const IcingFilesystem> icing_filesystem,
+ std::unique_ptr<Clock> clock,
+ std::unique_ptr<JniCache> jni_cache)
+ : IcingSearchEngine(options, std::move(filesystem),
+ std::move(icing_filesystem), std::move(clock),
+ std::move(jni_cache)) {}
+};
+
+std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
+
+// This test is meant to cover all tests relating to IcingSearchEngine::Search
+// and IcingSearchEngine::GetNextPage.
+class IcingSearchEngineSearchTest
+ : public ::testing::TestWithParam<SearchSpecProto::SearchType::Code> {
+ protected:
+ void SetUp() override {
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ // If we've specified using the reverse-JNI method for segmentation (i.e.
+ // not ICU), then we won't have the ICU data file included to set up.
+ // Technically, we could choose to use reverse-JNI for segmentation AND
+ // include an ICU data file, but that seems unlikely and our current BUILD
+ // setup doesn't do this.
+ // File generated via icu_data_file rule in //icing/BUILD.
+ std::string icu_data_file_path =
+ GetTestFilePath("icing/icu.dat");
+ ICING_ASSERT_OK(
+ icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+ }
+ filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ const Filesystem* filesystem() const { return &filesystem_; }
+
+ private:
+ Filesystem filesystem_;
+};
+
+// Non-zero value so we don't override it to be the current time
+constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
+
+IcingSearchEngineOptions GetDefaultIcingOptions() {
+ IcingSearchEngineOptions icing_options;
+ icing_options.set_base_dir(GetTestBaseDir());
+ return icing_options;
+}
+
+DocumentProto CreateMessageDocument(std::string name_space, std::string uri) {
+ return DocumentBuilder()
+ .SetKey(std::move(name_space), std::move(uri))
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+}
+
+DocumentProto CreateEmailDocument(const std::string& name_space,
+ const std::string& uri, int score,
+ const std::string& subject_content,
+ const std::string& body_content) {
+ return DocumentBuilder()
+ .SetKey(name_space, uri)
+ .SetSchema("Email")
+ .SetScore(score)
+ .AddStringProperty("subject", subject_content)
+ .AddStringProperty("body", body_content)
+ .Build();
+}
+
+SchemaProto CreateMessageSchema() {
+ return SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+}
+
+SchemaProto CreateEmailSchema() {
+ return SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+}
+
+SchemaProto CreatePersonAndEmailSchema() {
+ return SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+}
+
+ScoringSpecProto GetDefaultScoringSpec() {
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+ return scoring_spec;
+}
+
+UsageReport CreateUsageReport(std::string name_space, std::string uri,
+ int64_t timestamp_ms,
+ UsageReport::UsageType usage_type) {
+ UsageReport usage_report;
+ usage_report.set_document_namespace(name_space);
+ usage_report.set_document_uri(uri);
+ usage_report.set_usage_timestamp_ms(timestamp_ms);
+ usage_report.set_usage_type(usage_type);
+ return usage_report;
+}
+
+std::vector<std::string> GetUrisFromSearchResults(
+ SearchResultProto& search_result_proto) {
+ std::vector<std::string> result_uris;
+ result_uris.reserve(search_result_proto.results_size());
+ for (int i = 0; i < search_result_proto.results_size(); i++) {
+ result_uris.push_back(
+ search_result_proto.mutable_results(i)->document().uri());
+ }
+ return result_uris;
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchReturnsValidResults) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(1);
+
+ SearchResultProto results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(2));
+
+ const DocumentProto& document = results.results(0).document();
+ EXPECT_THAT(document, EqualsProto(document_two));
+
+ const SnippetProto& snippet = results.results(0).snippet();
+ EXPECT_THAT(snippet.entries(), SizeIs(1));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("message body"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("message"));
+
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
+ EXPECT_THAT(results.results(1).snippet().entries(), IsEmpty());
+
+ search_spec.set_query("foo");
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchReturnsScoresDocumentScore) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
+ document_one.set_score(93);
+ document_one.set_creation_timestamp_ms(10000);
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
+ document_two.set_score(15);
+ document_two.set_creation_timestamp_ms(12000);
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+
+ // Rank by DOCUMENT_SCORE and ensure that the score field is populated with
+ // document score.
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ SearchResultProto results = icing.Search(search_spec, scoring_spec,
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(2));
+
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_one));
+ EXPECT_THAT(results.results(0).score(), 93);
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document_two));
+ EXPECT_THAT(results.results(1).score(), 15);
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchReturnsScoresCreationTimestamp) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
+ document_one.set_score(93);
+ document_one.set_creation_timestamp_ms(10000);
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
+ document_two.set_score(15);
+ document_two.set_creation_timestamp_ms(12000);
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+
+ // Rank by CREATION_TS and ensure that the score field is populated with
+ // creation ts.
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+
+ SearchResultProto results = icing.Search(search_spec, scoring_spec,
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(2));
+
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+ EXPECT_THAT(results.results(0).score(), 12000);
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
+ EXPECT_THAT(results.results(1).score(), 10000);
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchReturnsOneResult) {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(1000);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(1);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document_two;
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+
+ EXPECT_THAT(search_result_proto.query_stats().latency_ms(), Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().parse_query_latency_ms(),
+ Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().scoring_latency_ms(), Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().ranking_latency_ms(), Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().document_retrieval_latency_ms(),
+ Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().lock_acquisition_latency_ms(),
+ Eq(1000));
+
+ // The token is a random number so we don't verify it.
+ expected_search_result_proto.set_next_page_token(
+ search_result_proto.next_page_token());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchReturnsOneResult_readOnlyFalse) {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(1000);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+ search_spec.set_use_read_only_search(false);
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(1);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document_two;
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+
+ EXPECT_THAT(search_result_proto.query_stats().latency_ms(), Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().parse_query_latency_ms(),
+ Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().scoring_latency_ms(), Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().ranking_latency_ms(), Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().document_retrieval_latency_ms(),
+ Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().lock_acquisition_latency_ms(),
+ Eq(1000));
+
+ // The token is a random number so we don't verify it.
+ expected_search_result_proto.set_next_page_token(
+ search_result_proto.next_page_token());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchZeroResultLimitReturnsEmptyResults) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(0);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchZeroResultLimitReturnsEmptyResults_readOnlyFalse) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("");
+ search_spec.set_search_type(GetParam());
+ search_spec.set_use_read_only_search(false);
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(0);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithNumToScore) {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(1000);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
+ document_one.set_score(10);
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
+ document_two.set_score(5);
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(10);
+ result_spec.set_num_to_score(10);
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+
+ SearchResultProto expected_search_result_proto1;
+ expected_search_result_proto1.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto1.mutable_results()->Add()->mutable_document() =
+ document_one;
+ *expected_search_result_proto1.mutable_results()->Add()->mutable_document() =
+ document_two;
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto1));
+
+ result_spec.set_num_to_score(1);
+ // By setting num_to_score = 1, only document_two will be scored, ranked, and
+ // returned.
+ // - num_to_score cutoff is only affected by the reading order from posting
+ // list. IOW, since we read posting lists in doc id descending order,
+ // ScoringProcessor scores documents with higher doc ids first and cuts off
+ // if exceeding num_to_score.
+ // - Therefore, even though document_one has higher score, ScoringProcessor
+ // still skips document_one, because posting list reads document_two first
+ // and ScoringProcessor stops after document_two given that total # of
+ // scored document has already reached num_to_score.
+ SearchResultProto expected_search_result_google::protobuf;
+ expected_search_result_google::protobuf.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_google::protobuf.mutable_results()->Add()->mutable_document() =
+ document_two;
+
+ search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_google::protobuf));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchNegativeResultLimitReturnsInvalidArgument) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(-5);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(
+ StatusProto::INVALID_ARGUMENT);
+ expected_search_result_proto.mutable_status()->set_message(
+ "ResultSpecProto.num_per_page cannot be negative.");
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchNegativeResultLimitReturnsInvalidArgument_readOnlyFalse) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("");
+ search_spec.set_search_type(GetParam());
+ search_spec.set_use_read_only_search(false);
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(-5);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(
+ StatusProto::INVALID_ARGUMENT);
+ expected_search_result_proto.mutable_status()->set_message(
+ "ResultSpecProto.num_per_page cannot be negative.");
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchNonPositivePageTotalBytesLimitReturnsInvalidArgument) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_total_bytes_per_page_threshold(-1);
+
+ SearchResultProto actual_results1 =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results1.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+
+ result_spec.set_num_total_bytes_per_page_threshold(0);
+ SearchResultProto actual_results2 =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results2.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchNegativeMaxJoinedChildrenPerParentReturnsInvalidArgument) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.set_max_joined_children_per_parent_to_return(-1);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(
+ StatusProto::INVALID_ARGUMENT);
+ expected_search_result_proto.mutable_status()->set_message(
+ "ResultSpecProto.max_joined_children_per_parent_to_return cannot be "
+ "negative.");
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchNonPositiveNumToScoreReturnsInvalidArgument) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_to_score(-1);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(
+ StatusProto::INVALID_ARGUMENT);
+ expected_search_result_proto.mutable_status()->set_message(
+ "ResultSpecProto.num_to_score cannot be non-positive.");
+
+ SearchResultProto actual_results1 =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results1, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ result_spec.set_num_to_score(0);
+ SearchResultProto actual_results2 =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results2, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithPersistenceReturnsValidResults) {
+ IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+ {
+ // Set the schema up beforehand.
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ // Schema will be persisted to disk when icing goes out of scope.
+ }
+
+ {
+ // Ensure that icing initializes the schema and section_manager
+ // properly from the pre-existing file.
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
+ // The index and document store will be persisted to disk when icing goes
+ // out of scope.
+ }
+
+ {
+ // Ensure that the index is brought back up without problems and we
+ // can query for the content that we expect.
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ CreateMessageDocument("namespace", "uri");
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ search_spec.set_query("foo");
+
+ SearchResultProto empty_result;
+ empty_result.mutable_status()->set_code(StatusProto::OK);
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+ }
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchShouldReturnEmpty) {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(1000);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+
+ // Empty result, no next-page token
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+
+ EXPECT_THAT(search_result_proto.query_stats().latency_ms(), Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().parse_query_latency_ms(),
+ Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().scoring_latency_ms(), Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().ranking_latency_ms(), Eq(0));
+ EXPECT_THAT(search_result_proto.query_stats().document_retrieval_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(search_result_proto.query_stats().lock_acquisition_latency_ms(),
+ Eq(1000));
+
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchShouldReturnMultiplePages) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates and inserts 5 documents
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+ DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+ DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
+ DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(2);
+
+ // Searches and gets the first page, 2 results
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document5;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document4;
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken));
+ uint64_t next_page_token = search_result_proto.next_page_token();
+ // Since the token is a random number, we don't need to verify
+ expected_search_result_proto.set_next_page_token(next_page_token);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Second page, 2 results
+ expected_search_result_proto.clear_results();
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Third page, 1 result
+ expected_search_result_proto.clear_results();
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+ // Because there are no more results, we should not return the next page
+ // token.
+ expected_search_result_proto.clear_next_page_token();
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // No more results
+ expected_search_result_proto.clear_results();
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchWithNoScoringShouldReturnMultiplePages) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates and inserts 5 documents
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+ DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+ DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
+ DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE);
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(2);
+
+ // Searches and gets the first page, 2 results
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document5;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document4;
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken));
+ uint64_t next_page_token = search_result_proto.next_page_token();
+ // Since the token is a random number, we don't need to verify
+ expected_search_result_proto.set_next_page_token(next_page_token);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Second page, 2 results
+ expected_search_result_proto.clear_results();
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Third page, 1 result
+ expected_search_result_proto.clear_results();
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+ // Because there are no more results, we should not return the next page
+ // token.
+ expected_search_result_proto.clear_next_page_token();
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // No more results
+ expected_search_result_proto.clear_results();
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchWithUnknownEnabledFeatureShouldReturnError) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+ search_spec.add_enabled_features("BAD_FEATURE");
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_P(IcingSearchEngineSearchTest, ShouldReturnMultiplePagesWithSnippets) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates and inserts 5 documents
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+ DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+ DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
+ DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(2);
+ result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(3);
+
+ // Searches and gets the first page, 2 results with 2 snippets
+ SearchResultProto search_result =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ ASSERT_THAT(search_result.status(), ProtoIsOk());
+ ASSERT_THAT(search_result.results(), SizeIs(2));
+ ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
+
+ const DocumentProto& document_result_1 = search_result.results(0).document();
+ EXPECT_THAT(document_result_1, EqualsProto(document5));
+ const SnippetProto& snippet_result_1 = search_result.results(0).snippet();
+ EXPECT_THAT(snippet_result_1.entries(), SizeIs(1));
+ EXPECT_THAT(snippet_result_1.entries(0).property_name(), Eq("body"));
+ std::string_view content = GetString(
+ &document_result_1, snippet_result_1.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet_result_1.entries(0)),
+ ElementsAre("message body"));
+ EXPECT_THAT(GetMatches(content, snippet_result_1.entries(0)),
+ ElementsAre("message"));
+
+ const DocumentProto& document_result_2 = search_result.results(1).document();
+ EXPECT_THAT(document_result_2, EqualsProto(document4));
+ const SnippetProto& snippet_result_2 = search_result.results(1).snippet();
+ EXPECT_THAT(snippet_result_2.entries(0).property_name(), Eq("body"));
+ content = GetString(&document_result_2,
+ snippet_result_2.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet_result_2.entries(0)),
+ ElementsAre("message body"));
+ EXPECT_THAT(GetMatches(content, snippet_result_2.entries(0)),
+ ElementsAre("message"));
+
+ // Second page, 2 result with 1 snippet
+ search_result = icing.GetNextPage(search_result.next_page_token());
+ ASSERT_THAT(search_result.status(), ProtoIsOk());
+ ASSERT_THAT(search_result.results(), SizeIs(2));
+ ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
+
+ const DocumentProto& document_result_3 = search_result.results(0).document();
+ EXPECT_THAT(document_result_3, EqualsProto(document3));
+ const SnippetProto& snippet_result_3 = search_result.results(0).snippet();
+ EXPECT_THAT(snippet_result_3.entries(0).property_name(), Eq("body"));
+ content = GetString(&document_result_3,
+ snippet_result_3.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet_result_3.entries(0)),
+ ElementsAre("message body"));
+ EXPECT_THAT(GetMatches(content, snippet_result_3.entries(0)),
+ ElementsAre("message"));
+
+ EXPECT_THAT(search_result.results(1).document(), EqualsProto(document2));
+ EXPECT_THAT(search_result.results(1).snippet().entries(), IsEmpty());
+
+ // Third page, 1 result with 0 snippets
+ search_result = icing.GetNextPage(search_result.next_page_token());
+ ASSERT_THAT(search_result.status(), ProtoIsOk());
+ ASSERT_THAT(search_result.results(), SizeIs(1));
+ ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
+
+ EXPECT_THAT(search_result.results(0).document(), EqualsProto(document1));
+ EXPECT_THAT(search_result.results(0).snippet().entries(), IsEmpty());
+}
+
+TEST_P(IcingSearchEngineSearchTest, ShouldInvalidateNextPageToken) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(1);
+
+ // Searches and gets the first page, 1 result
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken));
+ uint64_t next_page_token = search_result_proto.next_page_token();
+ // Since the token is a random number, we don't need to verify
+ expected_search_result_proto.set_next_page_token(next_page_token);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+ // Now document1 is still to be fetched.
+
+ // Invalidates token
+ icing.InvalidateNextPageToken(next_page_token);
+
+ // Tries to fetch the second page, no result since it's invalidated
+ expected_search_result_proto.clear_results();
+ expected_search_result_proto.clear_next_page_token();
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchIncludesDocumentsBeforeTtl) {
+ SchemaProto schema;
+ auto type = schema.add_types();
+ type->set_schema_type("Message");
+
+ auto body = type->add_properties();
+ body->set_property_name("body");
+ body->set_data_type(PropertyConfigProto::DataType::STRING);
+ body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+ body->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ body->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+
+ DocumentProto document = DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .SetCreationTimestampMs(100)
+ .SetTtlMs(500)
+ .Build();
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("message");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_search_type(GetParam());
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document;
+
+ // Time just has to be less than the document's creation timestamp (100) + the
+ // document's ttl (500)
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetSystemTimeMilliseconds(400);
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ // Check that the document is returned as part of search results
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchDoesntIncludeDocumentsPastTtl) {
+ SchemaProto schema;
+ auto type = schema.add_types();
+ type->set_schema_type("Message");
+
+ auto body = type->add_properties();
+ body->set_property_name("body");
+ body->set_data_type(PropertyConfigProto::DataType::STRING);
+ body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+ body->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ body->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+
+ DocumentProto document = DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .SetCreationTimestampMs(100)
+ .SetTtlMs(500)
+ .Build();
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("message");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_search_type(GetParam());
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+ // Time just has to be greater than the document's creation timestamp (100) +
+ // the document's ttl (500)
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetSystemTimeMilliseconds(700);
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ // Check that the document is not returned as part of search results
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchWorksAfterSchemaTypesCompatiblyModified) {
+ SchemaProto schema;
+ auto type_config = schema.add_types();
+ type_config->set_schema_type("message");
+
+ auto property = type_config->add_properties();
+ property->set_property_name("body");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ DocumentProto message_document =
+ DocumentBuilder()
+ .SetKey("namespace", "message_uri")
+ .SetSchema("message")
+ .AddStringProperty("body", "foo")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(message_document).status(), ProtoIsOk());
+
+ // Make sure we can search for message document
+ SearchSpecProto search_spec;
+ search_spec.set_query("foo");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_search_type(GetParam());
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+ // The message isn't indexed, so we get nothing
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // With just the schema type filter, we can search for the message
+ search_spec.Clear();
+ search_spec.add_schema_type_filters("message");
+
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ message_document;
+
+ search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Since SchemaTypeIds are assigned based on order in the SchemaProto, this
+ // will force a change in the DocumentStore's cached SchemaTypeIds
+ schema.clear_types();
+ type_config = schema.add_types();
+ type_config->set_schema_type("email");
+
+ // Adding a new indexed property will require reindexing
+ type_config = schema.add_types();
+ type_config->set_schema_type("message");
+
+ property = type_config->add_properties();
+ property->set_property_name("body");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ property->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ property->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ search_spec.Clear();
+ search_spec.set_query("foo");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.add_schema_type_filters("message");
+
+ // We can still search for the message document
+ search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchResultShouldBeRankedByDocumentScore) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 documents and ensures the relationship in terms of document
+ // score is: document1 < document2 < document3
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetScore(3)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // Intentionally inserts the documents in the order that is different than
+ // their score order
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+ // "m" will match all 3 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ // Result should be in descending score order
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWorksForNestedSubtypeDocument) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Artist")
+ .AddParentType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("Company").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("employee")
+ .SetDataTypeDocument("Person",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ // Create a company with a person and an artist.
+ DocumentProto document_company =
+ DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Company")
+ .AddDocumentProperty("employee",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "name_person")
+ .Build(),
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Artist")
+ .AddStringProperty("name", "name_artist")
+ .AddStringProperty("emailAddress", "email")
+ .Build())
+ .Build();
+ ASSERT_THAT(icing.Put(document_company).status(), ProtoIsOk());
+
+ SearchResultProto company_search_result_proto;
+ company_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *company_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document_company;
+
+ SearchResultProto empty_search_result_proto;
+ empty_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_search_type(GetParam());
+
+ // "name_person" should match the company.
+ search_spec.set_query("name_person");
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ company_search_result_proto));
+
+ // "name_artist" should match the company.
+ search_spec.set_query("name_artist");
+ search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ company_search_result_proto));
+
+ // "email" should not match the company even though the artist has a matched
+ // property. This is because the "employee" property is defined as Person
+ // type, and indexing on document properties should be based on defined types,
+ // instead of subtypes.
+ search_spec.set_query("email");
+ search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ empty_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchShouldAllowNoScoring) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 documents and ensures the relationship of them is:
+ // document1 < document2 < document3
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(1571111111111)
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(1572222222222)
+ .Build();
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetScore(3)
+ .SetCreationTimestampMs(1573333333333)
+ .Build();
+
+ // Intentionally inserts the documents in the order that is different than
+ // their score order
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // "m" will match all 3 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+
+ // Results should not be ranked by score but returned in reverse insertion
+ // order.
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultShouldBeRankedByCreationTimestamp) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 documents and ensures the relationship in terms of creation
+ // timestamp score is: document1 < document2 < document3
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetCreationTimestampMs(1571111111111)
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetCreationTimestampMs(1572222222222)
+ .Build();
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetCreationTimestampMs(1573333333333)
+ .Build();
+
+ // Intentionally inserts the documents in the order that is different than
+ // their score order
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // "m" will match all 3 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ // Result should be in descending timestamp order
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchResultShouldBeRankedByUsageCount) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 test documents
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // Intentionally inserts the documents in a different order to eliminate the
+ // possibility that the following results are sorted in the default reverse
+ // insertion order.
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // Report usage for doc3 twice and doc2 once. The order will be doc3 > doc2 >
+ // doc1 when ranked by USAGE_TYPE1_COUNT.
+ UsageReport usage_report_doc3 = CreateUsageReport(
+ /*name_space=*/"namespace", /*uri=*/"uri/3", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE1);
+ UsageReport usage_report_doc2 = CreateUsageReport(
+ /*name_space=*/"namespace", /*uri=*/"uri/2", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE1);
+ ASSERT_THAT(icing.ReportUsage(usage_report_doc3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.ReportUsage(usage_report_doc3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.ReportUsage(usage_report_doc2).status(), ProtoIsOk());
+
+ // "m" will match all 3 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ // Result should be in descending USAGE_TYPE1_COUNT order
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultShouldHaveDefaultOrderWithoutUsageCounts) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 test documents
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ // "m" will match all 3 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ // None of the documents have usage reports. Result should be in the default
+ // reverse insertion order.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultShouldBeRankedByUsageTimestamp) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 test documents
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // Intentionally inserts the documents in a different order to eliminate the
+ // possibility that the following results are sorted in the default reverse
+ // insertion order.
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // Report usage for doc2 and doc3. The order will be doc3 > doc2 > doc1 when
+ // ranked by USAGE_TYPE1_LAST_USED_TIMESTAMP.
+ UsageReport usage_report_doc2 = CreateUsageReport(
+ /*name_space=*/"namespace", /*uri=*/"uri/2", /*timestamp_ms=*/1000,
+ UsageReport::USAGE_TYPE1);
+ UsageReport usage_report_doc3 = CreateUsageReport(
+ /*name_space=*/"namespace", /*uri=*/"uri/3", /*timestamp_ms=*/5000,
+ UsageReport::USAGE_TYPE1);
+ ASSERT_THAT(icing.ReportUsage(usage_report_doc2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.ReportUsage(usage_report_doc3).status(), ProtoIsOk());
+
+ // "m" will match all 3 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ // Result should be in descending USAGE_TYPE1_LAST_USED_TIMESTAMP order
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, Bm25fRelevanceScoringOneNamespace) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+ // Create and index documents in namespace "namespace1".
+ DocumentProto document = CreateEmailDocument(
+ "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
+ "fresh fish. inexpensive. good sushi.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
+ "indian food. buffet. spicy food. kadai chicken.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
+ "panda express",
+ "chinese food. cheap. inexpensive. kung pao.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
+ "speederia pizza",
+ "thin-crust pizza. good and fast.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
+ "whole foods",
+ "salads. pizza. organic food. expensive.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
+ "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri6", /*score=*/4, "costco",
+ "bulk. cheap whole beans. frozen fish. food samples.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
+ "starbucks coffee",
+ "habit. birthday rewards. good coffee");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("coffee OR food");
+ search_spec.set_search_type(GetParam());
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+
+ // Result should be in descending score order
+ EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+ // Both doc5 and doc7 have "coffee" in name and text sections.
+ // However, doc5 has more matches in the text section.
+ // Documents with "food" are ranked lower as the term "food" is commonly
+ // present in this corpus, and thus, has a lower IDF.
+ EXPECT_THAT(GetUrisFromSearchResults(search_result_proto),
+ ElementsAre("namespace1/uri5", // 'coffee' 3 times
+ "namespace1/uri7", // 'coffee' 2 times
+ "namespace1/uri1", // 'food' 2 times
+ "namespace1/uri4", // 'food' 2 times
+ "namespace1/uri2", // 'food' 1 time
+ "namespace1/uri6")); // 'food' 1 time
+}
+
+TEST_P(IcingSearchEngineSearchTest, Bm25fRelevanceScoringOneNamespaceAdvanced) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+ // Create and index documents in namespace "namespace1".
+ DocumentProto document = CreateEmailDocument(
+ "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
+ "fresh fish. inexpensive. good sushi.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
+ "indian food. buffet. spicy food. kadai chicken.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
+ "panda express",
+ "chinese food. cheap. inexpensive. kung pao.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
+ "speederia pizza",
+ "thin-crust pizza. good and fast.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
+ "whole foods",
+ "salads. pizza. organic food. expensive.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
+ "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri6", /*score=*/4, "costco",
+ "bulk. cheap whole beans. frozen fish. food samples.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
+ "starbucks coffee",
+ "habit. birthday rewards. good coffee");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("coffee OR food");
+ search_spec.set_search_type(GetParam());
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_advanced_scoring_expression("this.relevanceScore() * 2 + 1");
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::ADVANCED_SCORING_EXPRESSION);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+
+ // Result should be in descending score order
+ EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+ // Both doc5 and doc7 have "coffee" in name and text sections.
+ // However, doc5 has more matches in the text section.
+ // Documents with "food" are ranked lower as the term "food" is commonly
+ // present in this corpus, and thus, has a lower IDF.
+ EXPECT_THAT(GetUrisFromSearchResults(search_result_proto),
+ ElementsAre("namespace1/uri5", // 'coffee' 3 times
+ "namespace1/uri7", // 'coffee' 2 times
+ "namespace1/uri1", // 'food' 2 times
+ "namespace1/uri4", // 'food' 2 times
+ "namespace1/uri2", // 'food' 1 time
+ "namespace1/uri6")); // 'food' 1 time
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ Bm25fRelevanceScoringOneNamespaceNotOperator) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+ // Create and index documents in namespace "namespace1".
+ DocumentProto document = CreateEmailDocument(
+ "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
+ "fresh fish. inexpensive. good sushi.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
+ "indian food. buffet. spicy food. kadai chicken.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
+ "panda express",
+ "chinese food. cheap. inexpensive. kung pao.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri3", /*score=*/23, "speederia pizza",
+ "thin-crust pizza. good and fast. nice coffee");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
+ "whole foods",
+ "salads. pizza. organic food. expensive.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
+ "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri6", /*score=*/4, "costco",
+ "bulk. cheap whole beans. frozen fish. food samples.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
+ "starbucks coffee",
+ "habit. birthday rewards. good coffee");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("coffee -starbucks");
+ search_spec.set_search_type(GetParam());
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+
+ // Result should be in descending score order
+ EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(
+ GetUrisFromSearchResults(search_result_proto),
+ ElementsAre("namespace1/uri5", // 'coffee' 3 times, 'starbucks' 0 times
+ "namespace1/uri3")); // 'coffee' 1 times, 'starbucks' 0 times
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ Bm25fRelevanceScoringOneNamespaceSectionRestrict) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+ // Create and index documents in namespace "namespace1".
+ DocumentProto document = CreateEmailDocument(
+ "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
+ "fresh fish. inexpensive. good sushi.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
+ "indian food. buffet. spicy food. kadai chicken.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
+ "panda express",
+ "chinese food. cheap. inexpensive. kung pao.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
+ "speederia pizza",
+ "thin-crust pizza. good and fast.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
+ "whole foods",
+ "salads. pizza. organic food. expensive.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document =
+ CreateEmailDocument("namespace1", "namespace1/uri5", /*score=*/18,
+ "peets coffee, best coffee",
+ "espresso. decaf. whole beans. excellent coffee.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri6", /*score=*/4, "costco",
+ "bulk. cheap whole beans. frozen fish. food samples.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri7", /*score=*/4, "starbucks",
+ "habit. birthday rewards. good coffee. brewed coffee");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("subject:coffee OR body:food");
+ search_spec.set_search_type(GetParam());
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+
+ // Result should be in descending score order
+ EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+ // The term frequencies of "coffee" and "food" are calculated respectively
+ // from the subject section and the body section.
+ // Documents with "food" are ranked lower as the term "food" is commonly
+ // present in this corpus, and thus, has a lower IDF.
+ EXPECT_THAT(
+ GetUrisFromSearchResults(search_result_proto),
+ ElementsAre("namespace1/uri5", // 'coffee' 2 times in section subject
+ "namespace1/uri1", // 'food' 2 times in section body
+ "namespace1/uri4", // 'food' 2 times in section body
+ "namespace1/uri2", // 'food' 1 time in section body
+ "namespace1/uri6")); // 'food' 1 time in section body
+}
+
+TEST_P(IcingSearchEngineSearchTest, Bm25fRelevanceScoringTwoNamespaces) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+ // Create and index documents in namespace "namespace1".
+ DocumentProto document = CreateEmailDocument(
+ "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
+ "fresh fish. inexpensive. good sushi.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
+ "indian food. buffet. spicy food. kadai chicken.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
+ "panda express",
+ "chinese food. cheap. inexpensive. kung pao.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
+ "speederia pizza",
+ "thin-crust pizza. good and fast.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
+ "whole foods",
+ "salads. pizza. organic food. expensive.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
+ "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri6", /*score=*/4, "costco",
+ "bulk. cheap whole beans. frozen fish. food samples.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
+ "starbucks coffee",
+ "habit. birthday rewards. good coffee");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ // Create and index documents in namespace "namespace2".
+ document = CreateEmailDocument("namespace2", "namespace2/uri0", /*score=*/10,
+ "sushi belmont",
+ "fresh fish. inexpensive. good sushi.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace2", "namespace2/uri1", /*score=*/13, "peacock koriander",
+ "indian food. buffet. spicy food. kadai chicken.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace2", "namespace2/uri2", /*score=*/4,
+ "panda express",
+ "chinese food. cheap. inexpensive. kung pao.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace2", "namespace2/uri3", /*score=*/23,
+ "speederia pizza",
+ "thin-crust pizza. good and fast.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace2", "namespace2/uri4", /*score=*/8,
+ "whole foods",
+ "salads. pizza. organic food. expensive.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace2", "namespace2/uri5", /*score=*/18, "peets coffee",
+ "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace2", "namespace2/uri6", /*score=*/4, "costco",
+ "bulk. cheap whole beans. frozen fish. food samples.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace2", "namespace2/uri7", /*score=*/4,
+ "starbucks coffee", "good coffee");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("coffee OR food");
+ search_spec.set_search_type(GetParam());
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+ ResultSpecProto result_spec_proto;
+ result_spec_proto.set_num_per_page(16);
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec_proto);
+
+ // Result should be in descending score order
+ EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+ // The two corpora have the same documents except for document 7, which in
+ // "namespace2" is much shorter than the average dcoument length, so it is
+ // boosted.
+ EXPECT_THAT(GetUrisFromSearchResults(search_result_proto),
+ ElementsAre("namespace2/uri7", // 'coffee' 2 times, short doc
+ "namespace1/uri5", // 'coffee' 3 times
+ "namespace2/uri5", // 'coffee' 3 times
+ "namespace1/uri7", // 'coffee' 2 times
+ "namespace1/uri1", // 'food' 2 times
+ "namespace2/uri1", // 'food' 2 times
+ "namespace1/uri4", // 'food' 2 times
+ "namespace2/uri4", // 'food' 2 times
+ "namespace1/uri2", // 'food' 1 time
+ "namespace2/uri2", // 'food' 1 time
+ "namespace1/uri6", // 'food' 1 time
+ "namespace2/uri6")); // 'food' 1 time
+}
+
+TEST_P(IcingSearchEngineSearchTest, Bm25fRelevanceScoringWithNamespaceFilter) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+ // Create and index documents in namespace "namespace1".
+ DocumentProto document = CreateEmailDocument(
+ "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
+ "fresh fish. inexpensive. good sushi.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
+ "indian food. buffet. spicy food. kadai chicken.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
+ "panda express",
+ "chinese food. cheap. inexpensive. kung pao.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
+ "speederia pizza",
+ "thin-crust pizza. good and fast.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
+ "whole foods",
+ "salads. pizza. organic food. expensive.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
+ "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri6", /*score=*/4, "costco",
+ "bulk. cheap whole beans. frozen fish. food samples.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
+ "starbucks coffee",
+ "habit. birthday rewards. good coffee");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ // Create and index documents in namespace "namespace2".
+ document = CreateEmailDocument("namespace2", "namespace2/uri0", /*score=*/10,
+ "sushi belmont",
+ "fresh fish. inexpensive. good sushi.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace2", "namespace2/uri1", /*score=*/13, "peacock koriander",
+ "indian food. buffet. spicy food. kadai chicken.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace2", "namespace2/uri2", /*score=*/4,
+ "panda express",
+ "chinese food. cheap. inexpensive. kung pao.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace2", "namespace2/uri3", /*score=*/23,
+ "speederia pizza",
+ "thin-crust pizza. good and fast.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace2", "namespace2/uri4", /*score=*/8,
+ "whole foods",
+ "salads. pizza. organic food. expensive.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace2", "namespace2/uri5", /*score=*/18, "peets coffee",
+ "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace2", "namespace2/uri6", /*score=*/4, "costco",
+ "bulk. cheap whole beans. frozen fish. food samples.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace2", "namespace2/uri7", /*score=*/4,
+ "starbucks coffee", "good coffee");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("coffee OR food");
+ search_spec.set_search_type(GetParam());
+ // Now query only corpus 2
+ search_spec.add_namespace_filters("namespace2");
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ search_result_proto = icing.Search(search_spec, scoring_spec,
+ ResultSpecProto::default_instance());
+
+ // Result from namespace "namespace2" should be in descending score order
+ EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+ // Both doc5 and doc7 have "coffee" in name and text sections.
+ // Even though doc5 has more matches in the text section, doc7's length is
+ // much shorter than the average corpus's length, so it's being boosted.
+ // Documents with "food" are ranked lower as the term "food" is commonly
+ // present in this corpus, and thus, has a lower IDF.
+ EXPECT_THAT(GetUrisFromSearchResults(search_result_proto),
+ ElementsAre("namespace2/uri7", // 'coffee' 2 times, short doc
+ "namespace2/uri5", // 'coffee' 3 times
+ "namespace2/uri1", // 'food' 2 times
+ "namespace2/uri4", // 'food' 2 times
+ "namespace2/uri2", // 'food' 1 time
+ "namespace2/uri6")); // 'food' 1 time
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultShouldHaveDefaultOrderWithoutUsageTimestamp) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 test documents
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ // "m" will match all 3 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ // None of the documents have usage reports. Result should be in the default
+ // reverse insertion order.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchResultShouldBeRankedAscendingly) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 documents and ensures the relationship in terms of document
+ // score is: document1 < document2 < document3
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetScore(3)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // Intentionally inserts the documents in the order that is different than
+ // their score order
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+ // "m" will match all 3 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ // Result should be in ascending score order
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+ scoring_spec.set_order_by(ScoringSpecProto::Order::ASC);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultGroupingDuplicateNamespaceShouldReturnError) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 2 documents and ensures the relationship in terms of document
+ // score is: document1 < document2
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // "m" will match all 2 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ // Specify "namespace1" twice. This should result in an error.
+ ResultSpecProto result_spec;
+ result_spec.set_result_group_type(ResultSpecProto::NAMESPACE);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ ResultSpecProto::ResultGrouping::Entry* entry =
+ result_grouping->add_entry_groupings();
+ result_grouping->set_max_results(1);
+ entry->set_namespace_("namespace1");
+ entry = result_grouping->add_entry_groupings();
+ entry->set_namespace_("namespace2");
+ entry = result_grouping->add_entry_groupings();
+ entry->set_namespace_("namespace1");
+ result_grouping = result_spec.add_result_groupings();
+ entry = result_grouping->add_entry_groupings();
+ result_grouping->set_max_results(1);
+ entry->set_namespace_("namespace1");
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(search_result_proto.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultGroupingDuplicateSchemaShouldReturnError) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 2 documents and ensures the relationship in terms of document
+ // score is: document1 < document2
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // "m" will match all 2 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ // Specify "Message" twice. This should result in an error.
+ ResultSpecProto result_spec;
+ result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ ResultSpecProto::ResultGrouping::Entry* entry =
+ result_grouping->add_entry_groupings();
+ result_grouping->set_max_results(1);
+ entry->set_schema("Message");
+ entry = result_grouping->add_entry_groupings();
+ entry->set_schema("nonexistentMessage");
+ result_grouping = result_spec.add_result_groupings();
+ result_grouping->set_max_results(1);
+ entry = result_grouping->add_entry_groupings();
+ entry->set_schema("Message");
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(search_result_proto.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultGroupingDuplicateNamespaceAndSchemaSchemaShouldReturnError) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 2 documents and ensures the relationship in terms of document
+ // score is: document1 < document2
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // "m" will match all 2 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ // Specify "namespace1xMessage" twice. This should result in an error.
+ ResultSpecProto result_spec;
+ result_spec.set_result_group_type(ResultSpecProto::NAMESPACE_AND_SCHEMA_TYPE);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ ResultSpecProto::ResultGrouping::Entry* entry =
+ result_grouping->add_entry_groupings();
+ result_grouping->set_max_results(1);
+ entry->set_namespace_("namespace1");
+ entry->set_schema("Message");
+ entry = result_grouping->add_entry_groupings();
+ entry->set_namespace_("namespace2");
+ entry->set_schema("Message");
+ entry = result_grouping->add_entry_groupings();
+ entry->set_namespace_("namespace1");
+ entry->set_schema("Message");
+ result_grouping = result_spec.add_result_groupings();
+ result_grouping->set_max_results(1);
+ entry = result_grouping->add_entry_groupings();
+ entry->set_namespace_("namespace1");
+ entry->set_schema("Message");
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(search_result_proto.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultGroupingNonPositiveMaxResultsShouldReturnError) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 2 documents and ensures the relationship in terms of document
+ // score is: document1 < document2
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // "m" will match all 2 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ // Specify zero results. This should result in an error.
+ ResultSpecProto result_spec;
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ ResultSpecProto::ResultGrouping::Entry* entry =
+ result_grouping->add_entry_groupings();
+ result_grouping->set_max_results(0);
+ entry->set_namespace_("namespace1");
+ entry->set_schema("Message");
+ result_grouping->add_entry_groupings();
+ entry->set_namespace_("namespace2");
+ entry->set_schema("Message");
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(search_result_proto.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+
+ // Specify negative results. This should result in an error.
+ result_spec.mutable_result_groupings(0)->set_max_results(-1);
+ EXPECT_THAT(search_result_proto.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultGroupingMultiNamespaceGrouping) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 documents and ensures the relationship in terms of document
+ // score is: document1 < document2 < document3 < document4 < document5 <
+ // document6
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetScore(3)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document4 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri/4")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(4)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document5 =
+ DocumentBuilder()
+ .SetKey("namespace3", "uri/5")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetScore(5)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document6 =
+ DocumentBuilder()
+ .SetKey("namespace3", "uri/6")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(6)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document6).status(), ProtoIsOk());
+
+ // "m" will match all 6 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ ResultSpecProto result_spec;
+ result_spec.set_result_group_type(ResultSpecProto::NAMESPACE);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ ResultSpecProto::ResultGrouping::Entry* entry =
+ result_grouping->add_entry_groupings();
+ result_grouping->set_max_results(1);
+ entry->set_namespace_("namespace1");
+ result_grouping = result_spec.add_result_groupings();
+ result_grouping->set_max_results(2);
+ entry = result_grouping->add_entry_groupings();
+ entry->set_namespace_("namespace2");
+ entry = result_grouping->add_entry_groupings();
+ entry->set_namespace_("namespace3");
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec);
+
+ // The last result (document1) in namespace "namespace1" should not be
+ // included. "namespace2" and "namespace3" are grouped together. So only the
+ // two highest scored documents between the two (both of which are in
+ // "namespace3") should be returned.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document6;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document5;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchResultGroupingMultiSchemaGrouping) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetScore(1)
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo")
+ .AddDocumentProperty("sender", DocumentBuilder()
+ .SetKey("namespace", "uri1-sender")
+ .SetSchema("Person")
+ .AddStringProperty("name", "foo")
+ .Build())
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace1", "uri2")
+ .SetSchema("Message")
+ .SetScore(2)
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("body", "fo")
+ .Build();
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace2", "uri3")
+ .SetSchema("Message")
+ .SetScore(3)
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("body", "fo")
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ // "f" will match all 3 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("f");
+ search_spec.set_search_type(GetParam());
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ ResultSpecProto result_spec;
+ result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ ResultSpecProto::ResultGrouping::Entry* entry =
+ result_grouping->add_entry_groupings();
+ result_grouping->set_max_results(1);
+ entry->set_schema("Message");
+ result_grouping = result_spec.add_result_groupings();
+ result_grouping->set_max_results(1);
+ entry = result_grouping->add_entry_groupings();
+ entry->set_namespace_("Email");
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec);
+
+ // Each of the highest scored documents of schema type "Message" (document3)
+ // and "Email" (document1) should be returned.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultGroupingMultiNamespaceAndSchemaGrouping) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 documents and ensures the relationship in terms of document
+ // score is: document1 < document2 < document3 < document4 < document5 <
+ // document6
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetScore(3)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document4 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri/4")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(4)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document5 =
+ DocumentBuilder()
+ .SetKey("namespace3", "uri/5")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetScore(5)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document6 =
+ DocumentBuilder()
+ .SetKey("namespace3", "uri/6")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(6)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document6).status(), ProtoIsOk());
+
+ // "m" will match all 6 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ ResultSpecProto result_spec;
+ result_spec.set_result_group_type(ResultSpecProto::NAMESPACE_AND_SCHEMA_TYPE);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ ResultSpecProto::ResultGrouping::Entry* entry =
+ result_grouping->add_entry_groupings();
+ result_grouping->set_max_results(1);
+ entry->set_namespace_("namespace1");
+ entry->set_schema("Message");
+ result_grouping = result_spec.add_result_groupings();
+ result_grouping->set_max_results(1);
+ entry = result_grouping->add_entry_groupings();
+ entry->set_namespace_("namespace2");
+ entry->set_schema("Message");
+ result_grouping = result_spec.add_result_groupings();
+ result_grouping->set_max_results(1);
+ entry = result_grouping->add_entry_groupings();
+ entry->set_namespace_("namespace3");
+ entry->set_schema("Message");
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec);
+
+ // The three highest scored documents that fit the criteria of
+ // "namespace1xMessage" (document2), "namespace2xMessage" (document4),
+ // and "namespace3xMessage" (document6) should be returned.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document6;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document4;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultGroupingNonexistentNamespaceShouldBeIgnored) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 2 documents and ensures the relationship in terms of document
+ // score is: document1 < document2
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // "m" will match all 2 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ ResultSpecProto result_spec;
+ result_spec.set_result_group_type(ResultSpecProto::NAMESPACE);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ ResultSpecProto::ResultGrouping::Entry* entry =
+ result_grouping->add_entry_groupings();
+ result_grouping->set_max_results(1);
+ entry->set_namespace_("namespace1");
+ entry = result_grouping->add_entry_groupings();
+ entry->set_namespace_("nonexistentNamespace");
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec);
+
+ // Only the top ranked document in "namespace" (document2), should be
+ // returned. The presence of "nonexistentNamespace" in the same result
+ // grouping should have no effect.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultGroupingNonexistentSchemaShouldBeIgnored) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 2 documents and ensures the relationship in terms of document
+ // score is: document1 < document2
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // "m" will match all 2 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ ResultSpecProto result_spec;
+ result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ ResultSpecProto::ResultGrouping::Entry* entry =
+ result_grouping->add_entry_groupings();
+ result_grouping->set_max_results(1);
+ entry->set_schema("Message");
+ entry = result_grouping->add_entry_groupings();
+ entry->set_schema("nonexistentMessage");
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec);
+
+ // Only the top ranked document in "Message" (document2), should be
+ // returned. The presence of "nonexistentMessage" in the same result
+ // grouping should have no effect.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultGroupingNonexistentNamespaceAndSchemaShouldBeIgnored) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 2 documents and ensures the relationship in terms of document
+ // score is: document1 < document2
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetScore(3)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ DocumentProto document4 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri/4")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message4")
+ .SetScore(4)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+
+ // "m" will match all 2 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ ResultSpecProto result_spec;
+ result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ ResultSpecProto::ResultGrouping::Entry* entry =
+ result_grouping->add_entry_groupings();
+ result_grouping->set_max_results(1);
+ entry->set_namespace_("namespace2");
+ entry->set_schema("Message");
+ entry = result_grouping->add_entry_groupings();
+ entry->set_schema("namespace1");
+ entry->set_schema("nonexistentMessage");
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec);
+
+ // Only the top ranked document in "namespace2xMessage" (document4), should be
+ // returned. The presence of "namespace1xnonexistentMessage" in the same
+ // result grouping should have no effect. If either the namespace or the
+ // schema type is nonexistent, the entire entry will be ignored.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document4;
+
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SnippetNormalization) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "MDI zurich Team Meeting")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "mdi Zürich Team Meeting")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("mdi Zürich");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(2);
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(2);
+
+ SearchResultProto results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ ASSERT_THAT(results.results(), SizeIs(2));
+ const DocumentProto& result_document_1 = results.results(0).document();
+ const SnippetProto& result_snippet_1 = results.results(0).snippet();
+ EXPECT_THAT(result_document_1, EqualsProto(document_two));
+ EXPECT_THAT(result_snippet_1.entries(), SizeIs(1));
+ EXPECT_THAT(result_snippet_1.entries(0).property_name(), Eq("body"));
+ std::string_view content = GetString(
+ &result_document_1, result_snippet_1.entries(0).property_name());
+ EXPECT_THAT(
+ GetWindows(content, result_snippet_1.entries(0)),
+ ElementsAre("mdi Zürich Team Meeting", "mdi Zürich Team Meeting"));
+ EXPECT_THAT(GetMatches(content, result_snippet_1.entries(0)),
+ ElementsAre("mdi", "Zürich"));
+
+ const DocumentProto& result_document_2 = results.results(1).document();
+ const SnippetProto& result_snippet_2 = results.results(1).snippet();
+ EXPECT_THAT(result_document_2, EqualsProto(document_one));
+ EXPECT_THAT(result_snippet_2.entries(), SizeIs(1));
+ EXPECT_THAT(result_snippet_2.entries(0).property_name(), Eq("body"));
+ content = GetString(&result_document_2,
+ result_snippet_2.entries(0).property_name());
+ EXPECT_THAT(
+ GetWindows(content, result_snippet_2.entries(0)),
+ ElementsAre("MDI zurich Team Meeting", "MDI zurich Team Meeting"));
+ EXPECT_THAT(GetMatches(content, result_snippet_2.entries(0)),
+ ElementsAre("MDI", "zurich"));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SnippetNormalizationPrefix) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "MDI zurich Team Meeting")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "mdi Zürich Team Meeting")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("md Zür");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(2);
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(2);
+
+ SearchResultProto results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ ASSERT_THAT(results.results(), SizeIs(2));
+ const DocumentProto& result_document_1 = results.results(0).document();
+ const SnippetProto& result_snippet_1 = results.results(0).snippet();
+ EXPECT_THAT(result_document_1, EqualsProto(document_two));
+ EXPECT_THAT(result_snippet_1.entries(), SizeIs(1));
+ EXPECT_THAT(result_snippet_1.entries(0).property_name(), Eq("body"));
+ std::string_view content = GetString(
+ &result_document_1, result_snippet_1.entries(0).property_name());
+ EXPECT_THAT(
+ GetWindows(content, result_snippet_1.entries(0)),
+ ElementsAre("mdi Zürich Team Meeting", "mdi Zürich Team Meeting"));
+ EXPECT_THAT(GetMatches(content, result_snippet_1.entries(0)),
+ ElementsAre("mdi", "Zürich"));
+
+ const DocumentProto& result_document_2 = results.results(1).document();
+ const SnippetProto& result_snippet_2 = results.results(1).snippet();
+ EXPECT_THAT(result_document_2, EqualsProto(document_one));
+ EXPECT_THAT(result_snippet_2.entries(), SizeIs(1));
+ EXPECT_THAT(result_snippet_2.entries(0).property_name(), Eq("body"));
+ content = GetString(&result_document_2,
+ result_snippet_2.entries(0).property_name());
+ EXPECT_THAT(
+ GetWindows(content, result_snippet_2.entries(0)),
+ ElementsAre("MDI zurich Team Meeting", "MDI zurich Team Meeting"));
+ EXPECT_THAT(GetMatches(content, result_snippet_2.entries(0)),
+ ElementsAre("MDI", "zurich"));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SnippetSectionRestrict) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "MDI zurich Team Meeting")
+ .AddStringProperty("body", "MDI zurich Team Meeting")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "MDI zurich trip")
+ .AddStringProperty("body", "Let's travel to zurich")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("body:Zür");
+ search_spec->set_search_type(GetParam());
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+ result_spec->set_num_per_page(1);
+ result_spec->mutable_snippet_spec()->set_max_window_utf32_length(64);
+ result_spec->mutable_snippet_spec()->set_num_matches_per_property(10);
+ result_spec->mutable_snippet_spec()->set_num_to_snippet(10);
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ ASSERT_THAT(results.results(), SizeIs(1));
+
+ const DocumentProto& result_document_two = results.results(0).document();
+ const SnippetProto& result_snippet_two = results.results(0).snippet();
+ EXPECT_THAT(result_document_two, EqualsProto(document_two));
+ EXPECT_THAT(result_snippet_two.entries(), SizeIs(1));
+ EXPECT_THAT(result_snippet_two.entries(0).property_name(), Eq("body"));
+ std::string_view content = GetString(
+ &result_document_two, result_snippet_two.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet_two.entries(0)),
+ ElementsAre("Let's travel to zurich"));
+ EXPECT_THAT(GetMatches(content, result_snippet_two.entries(0)),
+ ElementsAre("zurich"));
+
+ search_spec.reset();
+ scoring_spec.reset();
+ result_spec.reset();
+
+ results = icing.GetNextPage(results.next_page_token());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ ASSERT_THAT(results.results(), SizeIs(1));
+
+ const DocumentProto& result_document_one = results.results(0).document();
+ const SnippetProto& result_snippet_one = results.results(0).snippet();
+ EXPECT_THAT(result_document_one, EqualsProto(document_one));
+ EXPECT_THAT(result_snippet_one.entries(), SizeIs(1));
+ EXPECT_THAT(result_snippet_one.entries(0).property_name(), Eq("body"));
+ content = GetString(&result_document_one,
+ result_snippet_one.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet_one.entries(0)),
+ ElementsAre("MDI zurich Team Meeting"));
+ EXPECT_THAT(GetMatches(content, result_snippet_one.entries(0)),
+ ElementsAre("zurich"));
+}
+
+TEST_P(IcingSearchEngineSearchTest, Hyphens) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_schema_type("MyType");
+ PropertyConfigProto* prop = type->add_properties();
+ prop->set_property_name("foo");
+ prop->set_data_type(PropertyConfigProto::DataType::STRING);
+ prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+ prop->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::EXACT_ONLY);
+ prop->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("MyType")
+ .AddStringProperty("foo", "foo bar-baz bat")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("MyType")
+ .AddStringProperty("foo", "bar for baz bat-man")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("foo:bar-baz");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ SearchResultProto results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ ASSERT_THAT(results.results(), SizeIs(2));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithProjectionEmptyFieldPath) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query that will match those documents and use an empty field
+ // mask to request NO properties.
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("hello");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ // Retrieve only one result at a time to make sure that projection works when
+ // retrieving all pages.
+ result_spec.set_num_per_page(1);
+ TypePropertyMask* email_field_mask = result_spec.add_type_property_masks();
+ email_field_mask->set_schema_type("Email");
+ email_field_mask->add_paths("");
+
+ SearchResultProto results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+
+ // 3. Verify that the returned results contain no properties.
+ DocumentProto projected_document_two = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .Build();
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(projected_document_two));
+
+ results = icing.GetNextPage(results.next_page_token());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+ DocumentProto projected_document_one = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .Build();
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(projected_document_one));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithProjectionMultipleFieldPaths) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query that will match those documents and request only
+ // 'sender.name' and 'subject' properties.
+ // Create all of search_spec, result_spec and scoring_spec as objects with
+ // scope that will end before the call to GetNextPage to ensure that the
+ // implementation isn't relying on references to any of them.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("hello");
+ search_spec->set_search_type(GetParam());
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+ // Retrieve only one result at a time to make sure that projection works when
+ // retrieving all pages.
+ result_spec->set_num_per_page(1);
+ TypePropertyMask* email_field_mask = result_spec->add_type_property_masks();
+ email_field_mask->set_schema_type("Email");
+ email_field_mask->add_paths("sender.name");
+ email_field_mask->add_paths("subject");
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+
+ // 3. Verify that the first returned result only contains the 'sender.name'
+ // property.
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .Build();
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(projected_document_two));
+
+ // 4. Now, delete all of the specs used in the search. GetNextPage should have
+ // no problem because it shouldn't be keeping any references to them.
+ search_spec.reset();
+ result_spec.reset();
+ scoring_spec.reset();
+
+ // 5. Verify that the second returned result only contains the 'sender.name'
+ // property.
+ results = icing.GetNextPage(results.next_page_token());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .Build();
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(projected_document_one));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithPropertyFilters) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "hellogirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query with property filters of sender.name and subject for the
+ // Email schema type.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("hello");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* email_property_filters =
+ search_spec->add_type_property_filters();
+ email_property_filters->set_schema_type("Email");
+ email_property_filters->add_paths("sender.name");
+ email_property_filters->add_paths("subject");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+
+ // 3. Verify that only the first document is returned. Although 'hello' is
+ // present in document_two, it shouldn't be in the result since 'hello' is not
+ // in the specified property filter.
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(document_one));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithPropertyFiltersOnMultipleSchema) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ // Add Person and Organization schema with a property 'name' in both.
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Organization")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("address")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(),
+ ProtoIsOk());
+
+ // 1. Add person document
+ DocumentProto person_document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "hellogirl@aol.com")
+ .Build();
+ ASSERT_THAT(icing.Put(person_document).status(), ProtoIsOk());
+
+ // 1. Add organization document
+ DocumentProto organization_document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Organization")
+ .AddStringProperty("name", "Meg Corp")
+ .AddStringProperty("address", "Universal street")
+ .Build();
+ ASSERT_THAT(icing.Put(organization_document).status(), ProtoIsOk());
+
+ // 2. Issue a query with property filters. Person schema has name in it's
+ // property filter but Organization schema doesn't.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("Meg");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* person_property_filters =
+ search_spec->add_type_property_filters();
+ person_property_filters->set_schema_type("Person");
+ person_property_filters->add_paths("name");
+ TypePropertyMask* organization_property_filters =
+ search_spec->add_type_property_filters();
+ organization_property_filters->set_schema_type("Organization");
+ organization_property_filters->add_paths("address");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+
+ // 3. Verify that only the person document is returned. Although 'Meg' is
+ // present in organization document, it shouldn't be in the result since
+ // the name field is not specified in the Organization property filter.
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(person_document));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithWildcardPropertyFilters) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "hellogirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query with property filters of sender.name and subject for the
+ // wildcard(*) schema type.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("hello");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* wildcard_property_filters =
+ search_spec->add_type_property_filters();
+ wildcard_property_filters->set_schema_type("*");
+ wildcard_property_filters->add_paths("sender.name");
+ wildcard_property_filters->add_paths("subject");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+
+ // 3. Verify that only the first document is returned since the second
+ // document doesn't contain the word 'hello' in either of fields specified in
+ // the property filter. This confirms that the property filters for the
+ // wildcard entry have been applied to the Email schema as well.
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(document_one));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithMixedPropertyFilters) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "hellogirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query with property filters of sender.name and subject for the
+ // wildcard(*) schema type plus property filters of sender.name and body for
+ // the Email schema type.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("hello");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* wildcard_property_filters =
+ search_spec->add_type_property_filters();
+ wildcard_property_filters->set_schema_type("*");
+ wildcard_property_filters->add_paths("sender.name");
+ wildcard_property_filters->add_paths("subject");
+ TypePropertyMask* email_property_filters =
+ search_spec->add_type_property_filters();
+ email_property_filters->set_schema_type("Email");
+ email_property_filters->add_paths("sender.name");
+ email_property_filters->add_paths("body");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+
+ // 3. Verify that only the second document is returned since the first
+ // document doesn't contain the word 'hello' in either of fields sender.name
+ // or body. This confirms that the property filters specified for Email schema
+ // have been applied and the ones specified for wildcard entry have been
+ // ignored.
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(document_two));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithNonApplicablePropertyFilters) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "hellogirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query with property filters of sender.name and subject for an
+ // unknown schema type.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("hello");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* email_property_filters =
+ search_spec->add_type_property_filters();
+ email_property_filters->set_schema_type("unknown");
+ email_property_filters->add_paths("sender.name");
+ email_property_filters->add_paths("subject");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(2));
+
+ // 3. Verify that both the documents are returned since each of them have the
+ // word 'hello' in at least 1 property. The second document being returned
+ // confirms that the body field was searched and the specified property
+ // filters were not applied to the Email schema type.
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(document_two));
+ EXPECT_THAT(results.results(1).document(),
+ EqualsProto(document_one));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithEmptyPropertyFilter) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Message")
+ .AddStringProperty("body", "Hello World!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ // 2. Issue a query with empty property filter for Message schema.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("hello");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* message_property_filters =
+ search_spec->add_type_property_filters();
+ message_property_filters->set_schema_type("Message");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+
+ // 3. Verify that no documents are returned. Although 'hello' is present in
+ // the indexed document, it shouldn't be returned since the Message property
+ // filter doesn't allow any properties to be searched.
+ ASSERT_THAT(results.results(), IsEmpty());
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchWithPropertyFilterHavingInvalidProperty) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Message")
+ .AddStringProperty("body", "Hello World!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ // 2. Issue a query with property filter having invalid/unknown property for
+ // Message schema.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("hello");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* message_property_filters =
+ search_spec->add_type_property_filters();
+ message_property_filters->set_schema_type("Message");
+ message_property_filters->add_paths("unknown");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+
+ // 3. Verify that no documents are returned. Although 'hello' is present in
+ // the indexed document, it shouldn't be returned since the Message property
+ // filter doesn't allow any valid properties to be searched. Any
+ // invalid/unknown properties specified in the property filters will be
+ // ignored while searching.
+ ASSERT_THAT(results.results(), IsEmpty());
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithPropertyFiltersWithNesting) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "hellogirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query with property filter of sender.emailAddress for the Email
+ // schema type.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("hello");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* email_property_filters =
+ search_spec->add_type_property_filters();
+ email_property_filters->set_schema_type("Email");
+ email_property_filters->add_paths("sender.emailAddress");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+
+ // 3. Verify that only the first document is returned since the second
+ // document doesn't contain the word 'hello' in sender.emailAddress. The first
+ // document being returned confirms that the nested property
+ // sender.emailAddress was actually searched.
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(document_one));
+}
+
+TEST_P(IcingSearchEngineSearchTest, QueryStatsProtoTest) {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(5);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates and inserts 5 documents
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+ DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+ DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
+ DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.add_namespace_filters("namespace");
+ search_spec.add_schema_type_filters(document1.schema());
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(2);
+ result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(3);
+
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+
+ // Searches and gets the first page, 2 results with 2 snippets
+ SearchResultProto search_result =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ ASSERT_THAT(search_result.status(), ProtoIsOk());
+ ASSERT_THAT(search_result.results(), SizeIs(2));
+ ASSERT_THAT(search_result.next_page_token(), Ne(kInvalidNextPageToken));
+
+ // Check the stats
+ QueryStatsProto exp_stats;
+ exp_stats.set_query_length(7);
+ exp_stats.set_num_terms(1);
+ exp_stats.set_num_namespaces_filtered(1);
+ exp_stats.set_num_schema_types_filtered(1);
+ exp_stats.set_ranking_strategy(
+ ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+ exp_stats.set_is_first_page(true);
+ exp_stats.set_requested_page_size(2);
+ exp_stats.set_num_results_returned_current_page(2);
+ exp_stats.set_num_documents_scored(5);
+ exp_stats.set_num_results_with_snippets(2);
+ exp_stats.set_latency_ms(5);
+ exp_stats.set_parse_query_latency_ms(5);
+ exp_stats.set_scoring_latency_ms(5);
+ exp_stats.set_ranking_latency_ms(5);
+ exp_stats.set_document_retrieval_latency_ms(5);
+ exp_stats.set_lock_acquisition_latency_ms(5);
+ exp_stats.set_num_joined_results_returned_current_page(0);
+ EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+
+ // Second page, 2 result with 1 snippet
+ search_result = icing.GetNextPage(search_result.next_page_token());
+ ASSERT_THAT(search_result.status(), ProtoIsOk());
+ ASSERT_THAT(search_result.results(), SizeIs(2));
+ ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
+
+ exp_stats = QueryStatsProto();
+ exp_stats.set_is_first_page(false);
+ exp_stats.set_requested_page_size(2);
+ exp_stats.set_num_results_returned_current_page(2);
+ exp_stats.set_num_results_with_snippets(1);
+ exp_stats.set_latency_ms(5);
+ exp_stats.set_document_retrieval_latency_ms(5);
+ exp_stats.set_lock_acquisition_latency_ms(5);
+ exp_stats.set_num_joined_results_returned_current_page(0);
+ EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+
+ // Third page, 1 result with 0 snippets
+ search_result = icing.GetNextPage(search_result.next_page_token());
+ ASSERT_THAT(search_result.status(), ProtoIsOk());
+ ASSERT_THAT(search_result.results(), SizeIs(1));
+ ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
+
+ exp_stats = QueryStatsProto();
+ exp_stats.set_is_first_page(false);
+ exp_stats.set_requested_page_size(2);
+ exp_stats.set_num_results_returned_current_page(1);
+ exp_stats.set_num_results_with_snippets(0);
+ exp_stats.set_latency_ms(5);
+ exp_stats.set_document_retrieval_latency_ms(5);
+ exp_stats.set_lock_acquisition_latency_ms(5);
+ exp_stats.set_num_joined_results_returned_current_page(0);
+ EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+}
+
+TEST_P(IcingSearchEngineSearchTest, JoinQueryStatsProtoTest) {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(5);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("firstName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("lastName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("personQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ DocumentProto person1 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person1")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first1")
+ .AddStringProperty("lastName", "last1")
+ .AddStringProperty("emailAddress", "email1@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(1)
+ .Build();
+ DocumentProto person2 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person2")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first2")
+ .AddStringProperty("lastName", "last2")
+ .AddStringProperty("emailAddress", "email2@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(2)
+ .Build();
+ DocumentProto person3 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person3")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first3")
+ .AddStringProperty("lastName", "last3")
+ .AddStringProperty("emailAddress", "email3@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(3)
+ .Build();
+
+ DocumentProto email1 =
+ DocumentBuilder()
+ .SetKey("namespace", "email1")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 1")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(3)
+ .Build();
+ DocumentProto email2 =
+ DocumentBuilder()
+ .SetKey("namespace", "email2")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 2")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(2)
+ .Build();
+ DocumentProto email3 =
+ DocumentBuilder()
+ .SetKey("namespace", "email3")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 3")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(1)
+ .Build();
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email3).status(), ProtoIsOk());
+
+ // Parent SearchSpec
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("firstName:first");
+ search_spec.set_search_type(GetParam());
+
+ // JoinSpec
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("personQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
+ nested_search_spec->set_query("subject:test");
+ nested_search_spec->set_search_type(GetParam());
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ // Parent ScoringSpec
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::JOIN_AGGREGATE_SCORE);
+ scoring_spec.set_order_by(ScoringSpecProto::Order::DESC);
+
+ // Parent ResultSpec
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(1);
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ // Since we:
+ // - Use MAX for aggregation scoring strategy.
+ // - (Default) use DOCUMENT_SCORE to score child documents.
+ // - (Default) use DESC as the ranking order.
+ //
+ // person1 + email1 should have the highest aggregated score (3) and be
+ // returned first. person2 + email2 (aggregated score = 2) should be the
+ // second, and person3 + email3 (aggregated score = 1) should be the last.
+ SearchResultProto expected_result1;
+ expected_result1.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto1 =
+ expected_result1.mutable_results()->Add();
+ *result_proto1->mutable_document() = person1;
+ *result_proto1->mutable_joined_results()->Add()->mutable_document() = email1;
+ *result_proto1->mutable_joined_results()->Add()->mutable_document() = email2;
+
+ SearchResultProto expected_result2;
+ expected_result2.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_google::protobuf =
+ expected_result2.mutable_results()->Add();
+ *result_google::protobuf->mutable_document() = person2;
+ *result_google::protobuf->mutable_joined_results()->Add()->mutable_document() = email3;
+
+ SearchResultProto expected_result3;
+ expected_result3.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto3 =
+ expected_result3.mutable_results()->Add();
+ *result_proto3->mutable_document() = person3;
+
+ SearchResultProto search_result =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ uint64_t next_page_token = search_result.next_page_token();
+ EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+ expected_result1.set_next_page_token(next_page_token);
+ ASSERT_THAT(search_result,
+ EqualsSearchResultIgnoreStatsAndScores(expected_result1));
+
+ // Check the stats
+ QueryStatsProto exp_stats;
+ exp_stats.set_query_length(15);
+ exp_stats.set_num_terms(1);
+ exp_stats.set_num_namespaces_filtered(0);
+ exp_stats.set_num_schema_types_filtered(0);
+ exp_stats.set_ranking_strategy(
+ ScoringSpecProto::RankingStrategy::JOIN_AGGREGATE_SCORE);
+ exp_stats.set_is_first_page(true);
+ exp_stats.set_requested_page_size(1);
+ exp_stats.set_num_results_returned_current_page(1);
+ exp_stats.set_num_documents_scored(3);
+ exp_stats.set_num_results_with_snippets(0);
+ exp_stats.set_latency_ms(5);
+ exp_stats.set_parse_query_latency_ms(5);
+ exp_stats.set_scoring_latency_ms(5);
+ exp_stats.set_ranking_latency_ms(5);
+ exp_stats.set_document_retrieval_latency_ms(5);
+ exp_stats.set_lock_acquisition_latency_ms(5);
+ exp_stats.set_num_joined_results_returned_current_page(2);
+ exp_stats.set_join_latency_ms(5);
+ EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+
+ // Second page, 1 child doc.
+ search_result = icing.GetNextPage(next_page_token);
+ next_page_token = search_result.next_page_token();
+ EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+ expected_result2.set_next_page_token(next_page_token);
+ EXPECT_THAT(search_result,
+ EqualsSearchResultIgnoreStatsAndScores(expected_result2));
+
+ exp_stats = QueryStatsProto();
+ exp_stats.set_is_first_page(false);
+ exp_stats.set_requested_page_size(1);
+ exp_stats.set_num_results_returned_current_page(1);
+ exp_stats.set_num_results_with_snippets(0);
+ exp_stats.set_latency_ms(5);
+ exp_stats.set_document_retrieval_latency_ms(5);
+ exp_stats.set_lock_acquisition_latency_ms(5);
+ exp_stats.set_num_joined_results_returned_current_page(1);
+ EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+
+ // Third page, 0 child docs.
+ search_result = icing.GetNextPage(next_page_token);
+ next_page_token = search_result.next_page_token();
+ ASSERT_THAT(search_result.status(), ProtoIsOk());
+ ASSERT_THAT(search_result.results(), SizeIs(1));
+ ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
+
+ exp_stats = QueryStatsProto();
+ exp_stats.set_is_first_page(false);
+ exp_stats.set_requested_page_size(1);
+ exp_stats.set_num_results_returned_current_page(1);
+ exp_stats.set_num_joined_results_returned_current_page(0);
+ exp_stats.set_latency_ms(5);
+ exp_stats.set_document_retrieval_latency_ms(5);
+ exp_stats.set_lock_acquisition_latency_ms(5);
+ exp_stats.set_num_results_with_snippets(0);
+ ASSERT_THAT(search_result,
+ EqualsSearchResultIgnoreStatsAndScores(expected_result3));
+ EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+
+ ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
+
+ search_result = icing.GetNextPage(search_result.next_page_token());
+ ASSERT_THAT(search_result.status(), ProtoIsOk());
+ ASSERT_THAT(search_result.results(), IsEmpty());
+ ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
+
+ exp_stats = QueryStatsProto();
+ exp_stats.set_is_first_page(false);
+ exp_stats.set_lock_acquisition_latency_ms(5);
+ EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SnippetErrorTest) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Generic").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetScore(10)
+ .SetSchema("Generic")
+ .AddStringProperty("subject", "I like cats", "I like dogs",
+ "I like birds", "I like fish")
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetScore(20)
+ .SetSchema("Generic")
+ .AddStringProperty("subject", "I like red", "I like green",
+ "I like blue", "I like yellow")
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri3")
+ .SetScore(5)
+ .SetSchema("Generic")
+ .AddStringProperty("subject", "I like cupcakes", "I like donuts",
+ "I like eclairs", "I like froyo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.add_schema_type_filters("Generic");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("like");
+ search_spec.set_search_type(GetParam());
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+ ResultSpecProto result_spec;
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(2);
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(3);
+ result_spec.mutable_snippet_spec()->set_max_window_utf32_length(4);
+ SearchResultProto search_results =
+ icing.Search(search_spec, scoring_spec, result_spec);
+
+ ASSERT_THAT(search_results.results(), SizeIs(3));
+ const SearchResultProto::ResultProto* result = &search_results.results(0);
+ EXPECT_THAT(result->document().uri(), Eq("uri2"));
+ ASSERT_THAT(result->snippet().entries(), SizeIs(3));
+ const SnippetProto::EntryProto* entry = &result->snippet().entries(0);
+ EXPECT_THAT(entry->property_name(), "subject[0]");
+ std::string_view content = GetString(&result->document(), "subject[0]");
+ EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+ entry = &result->snippet().entries(1);
+ EXPECT_THAT(entry->property_name(), "subject[1]");
+ content = GetString(&result->document(), "subject[1]");
+ EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+ entry = &result->snippet().entries(2);
+ EXPECT_THAT(entry->property_name(), "subject[2]");
+ content = GetString(&result->document(), "subject[2]");
+ EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+ result = &search_results.results(1);
+ EXPECT_THAT(result->document().uri(), Eq("uri1"));
+ ASSERT_THAT(result->snippet().entries(), SizeIs(3));
+ entry = &result->snippet().entries(0);
+ EXPECT_THAT(entry->property_name(), "subject[0]");
+ content = GetString(&result->document(), "subject[0]");
+ EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+ entry = &result->snippet().entries(1);
+ ASSERT_THAT(entry->property_name(), "subject[1]");
+ content = GetString(&result->document(), "subject[1]");
+ EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+ entry = &result->snippet().entries(2);
+ ASSERT_THAT(entry->property_name(), "subject[2]");
+ content = GetString(&result->document(), "subject[2]");
+ EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+ result = &search_results.results(2);
+ ASSERT_THAT(result->document().uri(), Eq("uri3"));
+ ASSERT_THAT(result->snippet().entries(), IsEmpty());
+}
+
+TEST_P(IcingSearchEngineSearchTest, CJKSnippetTest) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // String: "我每天走路去上班。"
+ // ^ ^ ^ ^^
+ // UTF8 idx: 0 3 9 15 18
+ // UTF16 idx: 0 1 3 5 6
+ // Breaks into segments: "我", "每天", "走路", "去", "上班"
+ constexpr std::string_view kChinese = "我每天走路去上班。";
+ DocumentProto document = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kChinese)
+ .Build();
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ // Search and request snippet matching but no windowing.
+ SearchSpecProto search_spec;
+ search_spec.set_query("走");
+ search_spec.set_term_match_type(TERM_MATCH_PREFIX);
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(
+ std::numeric_limits<int>::max());
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(
+ std::numeric_limits<int>::max());
+
+ // Search and make sure that we got a single successful result
+ SearchResultProto search_results = icing.Search(
+ search_spec, ScoringSpecProto::default_instance(), result_spec);
+ ASSERT_THAT(search_results.status(), ProtoIsOk());
+ ASSERT_THAT(search_results.results(), SizeIs(1));
+ const SearchResultProto::ResultProto* result = &search_results.results(0);
+ EXPECT_THAT(result->document().uri(), Eq("uri1"));
+
+ // Ensure that one and only one property was matched and it was "body"
+ ASSERT_THAT(result->snippet().entries(), SizeIs(1));
+ const SnippetProto::EntryProto* entry = &result->snippet().entries(0);
+ EXPECT_THAT(entry->property_name(), Eq("body"));
+
+ // Get the content for "subject" and see what the match is.
+ std::string_view content = GetString(&result->document(), "body");
+ ASSERT_THAT(content, Eq(kChinese));
+
+ // Ensure that there is one and only one match within "subject"
+ ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+ const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+
+ EXPECT_THAT(match_proto.exact_match_byte_position(), Eq(9));
+ EXPECT_THAT(match_proto.exact_match_byte_length(), Eq(6));
+ std::string_view match =
+ content.substr(match_proto.exact_match_byte_position(),
+ match_proto.exact_match_byte_length());
+ ASSERT_THAT(match, Eq("走路"));
+
+ // Ensure that the utf-16 values are also as expected
+ EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(3));
+ EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2));
+}
+
+TEST_P(IcingSearchEngineSearchTest, InvalidToEmptyQueryTest) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // String: "Luca Brasi sleeps with the 🐟🐟🐟."
+ // ^ ^ ^ ^ ^ ^ ^ ^ ^
+ // UTF8 idx: 0 5 11 18 23 27 3135 39
+ // UTF16 idx: 0 5 11 18 23 27 2931 33
+ // Breaks into segments: "Luca", "Brasi", "sleeps", "with", "the", "🐟", "🐟"
+ // and "🐟".
+ constexpr std::string_view kSicilianMessage =
+ "Luca Brasi sleeps with the 🐟🐟🐟.";
+ DocumentProto document = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kSicilianMessage)
+ .Build();
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "Some other content.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // Search and request snippet matching but no windowing.
+ SearchSpecProto search_spec;
+ search_spec.set_query("?");
+ search_spec.set_term_match_type(TERM_MATCH_PREFIX);
+ search_spec.set_search_type(GetParam());
+ ScoringSpecProto scoring_spec;
+ ResultSpecProto result_spec;
+
+ // Search and make sure that we got a single successful result
+ SearchResultProto search_results =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(search_results.status(), ProtoIsOk());
+ if (GetParam() ==
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+ // This is the actual correct behavior.
+ EXPECT_THAT(search_results.results(), IsEmpty());
+ } else {
+ EXPECT_THAT(search_results.results(), SizeIs(2));
+ }
+
+ search_spec.set_query("。");
+ search_results = icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(search_results.status(), ProtoIsOk());
+ if (GetParam() ==
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+ // This is the actual correct behavior.
+ EXPECT_THAT(search_results.results(), IsEmpty());
+ } else {
+ EXPECT_THAT(search_results.results(), SizeIs(2));
+ }
+
+ search_spec.set_query("-");
+ search_results = icing.Search(search_spec, scoring_spec, result_spec);
+ if (GetParam() ==
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+ // This is the actual correct behavior.
+ EXPECT_THAT(search_results.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+ } else {
+ EXPECT_THAT(search_results.status(), ProtoIsOk());
+ EXPECT_THAT(search_results.results(), SizeIs(2));
+ }
+
+ search_spec.set_query(":");
+ search_results = icing.Search(search_spec, scoring_spec, result_spec);
+ if (GetParam() ==
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+ // This is the actual correct behavior.
+ EXPECT_THAT(search_results.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+ } else {
+ EXPECT_THAT(search_results.status(), ProtoIsOk());
+ EXPECT_THAT(search_results.results(), SizeIs(2));
+ }
+
+ search_spec.set_query("OR");
+ search_results = icing.Search(search_spec, scoring_spec, result_spec);
+ if (GetParam() ==
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+ EXPECT_THAT(search_results.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+ } else {
+ EXPECT_THAT(search_results.status(), ProtoIsOk());
+ EXPECT_THAT(search_results.results(), SizeIs(2));
+ }
+
+ search_spec.set_query(" ");
+ search_results = icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(search_results.status(), ProtoIsOk());
+ EXPECT_THAT(search_results.results(), SizeIs(2));
+}
+
+TEST_P(IcingSearchEngineSearchTest, EmojiSnippetTest) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // String: "Luca Brasi sleeps with the 🐟🐟🐟."
+ // ^ ^ ^ ^ ^ ^ ^ ^ ^
+ // UTF8 idx: 0 5 11 18 23 27 3135 39
+ // UTF16 idx: 0 5 11 18 23 27 2931 33
+ // Breaks into segments: "Luca", "Brasi", "sleeps", "with", "the", "🐟", "🐟"
+ // and "🐟".
+ constexpr std::string_view kSicilianMessage =
+ "Luca Brasi sleeps with the 🐟🐟🐟.";
+ DocumentProto document = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kSicilianMessage)
+ .Build();
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "Some other content.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // Search and request snippet matching but no windowing.
+ SearchSpecProto search_spec;
+ search_spec.set_query("🐟");
+ search_spec.set_term_match_type(TERM_MATCH_PREFIX);
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(1);
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
+
+ // Search and make sure that we got a single successful result
+ SearchResultProto search_results = icing.Search(
+ search_spec, ScoringSpecProto::default_instance(), result_spec);
+ ASSERT_THAT(search_results.status(), ProtoIsOk());
+ ASSERT_THAT(search_results.results(), SizeIs(1));
+ const SearchResultProto::ResultProto* result = &search_results.results(0);
+ EXPECT_THAT(result->document().uri(), Eq("uri1"));
+
+ // Ensure that one and only one property was matched and it was "body"
+ ASSERT_THAT(result->snippet().entries(), SizeIs(1));
+ const SnippetProto::EntryProto* entry = &result->snippet().entries(0);
+ EXPECT_THAT(entry->property_name(), Eq("body"));
+
+ // Get the content for "subject" and see what the match is.
+ std::string_view content = GetString(&result->document(), "body");
+ ASSERT_THAT(content, Eq(kSicilianMessage));
+
+ // Ensure that there is one and only one match within "subject"
+ ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+ const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+
+ EXPECT_THAT(match_proto.exact_match_byte_position(), Eq(27));
+ EXPECT_THAT(match_proto.exact_match_byte_length(), Eq(4));
+ std::string_view match =
+ content.substr(match_proto.exact_match_byte_position(),
+ match_proto.exact_match_byte_length());
+ ASSERT_THAT(match, Eq("🐟"));
+
+ // Ensure that the utf-16 values are also as expected
+ EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(27));
+ EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2));
+}
+
+TEST_P(IcingSearchEngineSearchTest, JoinByQualifiedId) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("firstName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("lastName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("personQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ DocumentProto person1 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person1")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first1")
+ .AddStringProperty("lastName", "last1")
+ .AddStringProperty("emailAddress", "email1@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(1)
+ .Build();
+ DocumentProto person2 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person2")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first2")
+ .AddStringProperty("lastName", "last2")
+ .AddStringProperty("emailAddress", "email2@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(2)
+ .Build();
+ DocumentProto person3 =
+ DocumentBuilder()
+ .SetKey(R"(pkg$db/name#space\\)", "person3")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first3")
+ .AddStringProperty("lastName", "last3")
+ .AddStringProperty("emailAddress", "email3@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(3)
+ .Build();
+
+ DocumentProto email1 =
+ DocumentBuilder()
+ .SetKey("namespace", "email1")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 1")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(3)
+ .Build();
+ DocumentProto email2 =
+ DocumentBuilder()
+ .SetKey("namespace", "email2")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 2")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(2)
+ .Build();
+ DocumentProto email3 =
+ DocumentBuilder()
+ .SetKey("namespace", "email3")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 3")
+ .AddStringProperty("personQualifiedId",
+ R"(pkg$db/name\#space\\\\#person3)") // escaped
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(1)
+ .Build();
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email3).status(), ProtoIsOk());
+
+ // Parent SearchSpec
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("firstName:first");
+ search_spec.set_search_type(GetParam());
+
+ // JoinSpec
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("personQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::MAX);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
+ nested_search_spec->set_query("subject:test");
+ nested_search_spec->set_search_type(GetParam());
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ // Parent ScoringSpec
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+
+ // Parent ResultSpec
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(1);
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ // Since we:
+ // - Use MAX for aggregation scoring strategy.
+ // - (Default) use DOCUMENT_SCORE to score child documents.
+ // - (Default) use DESC as the ranking order.
+ //
+ // person1 + email1 should have the highest aggregated score (3) and be
+ // returned first. person2 + email2 (aggregated score = 2) should be the
+ // second, and person3 + email3 (aggregated score = 1) should be the last.
+ SearchResultProto expected_result1;
+ expected_result1.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto1 =
+ expected_result1.mutable_results()->Add();
+ *result_proto1->mutable_document() = person1;
+ *result_proto1->mutable_joined_results()->Add()->mutable_document() = email1;
+
+ SearchResultProto expected_result2;
+ expected_result2.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_google::protobuf =
+ expected_result2.mutable_results()->Add();
+ *result_google::protobuf->mutable_document() = person2;
+ *result_google::protobuf->mutable_joined_results()->Add()->mutable_document() = email2;
+
+ SearchResultProto expected_result3;
+ expected_result3.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto3 =
+ expected_result3.mutable_results()->Add();
+ *result_proto3->mutable_document() = person3;
+ *result_proto3->mutable_joined_results()->Add()->mutable_document() = email3;
+
+ SearchResultProto result1 =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ uint64_t next_page_token = result1.next_page_token();
+ EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+ expected_result1.set_next_page_token(next_page_token);
+ EXPECT_THAT(result1,
+ EqualsSearchResultIgnoreStatsAndScores(expected_result1));
+
+ SearchResultProto result2 = icing.GetNextPage(next_page_token);
+ next_page_token = result2.next_page_token();
+ EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+ expected_result2.set_next_page_token(next_page_token);
+ EXPECT_THAT(result2,
+ EqualsSearchResultIgnoreStatsAndScores(expected_result2));
+
+ SearchResultProto result3 = icing.GetNextPage(next_page_token);
+ next_page_token = result3.next_page_token();
+ EXPECT_THAT(next_page_token, Eq(kInvalidNextPageToken));
+ EXPECT_THAT(result3,
+ EqualsSearchResultIgnoreStatsAndScores(expected_result3));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ JoinShouldLimitNumChildDocumentsByMaxJoinedChildPerParent) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("firstName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("lastName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("personQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ DocumentProto person1 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person1")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first1")
+ .AddStringProperty("lastName", "last1")
+ .AddStringProperty("emailAddress", "email1@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(1)
+ .Build();
+ DocumentProto person2 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person2")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first2")
+ .AddStringProperty("lastName", "last2")
+ .AddStringProperty("emailAddress", "email2@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(2)
+ .Build();
+
+ DocumentProto email1 =
+ DocumentBuilder()
+ .SetKey("namespace", "email1")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 1")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(100)
+ .Build();
+ DocumentProto email2 =
+ DocumentBuilder()
+ .SetKey("namespace", "email2")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 2")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(99)
+ .Build();
+ DocumentProto email3 =
+ DocumentBuilder()
+ .SetKey("namespace", "email3")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 3")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(98)
+ .Build();
+ DocumentProto email4 =
+ DocumentBuilder()
+ .SetKey("namespace", "email4")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 4")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(97)
+ .Build();
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email4).status(), ProtoIsOk());
+
+ // Parent SearchSpec
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("firstName:first");
+ search_spec.set_search_type(GetParam());
+
+ // JoinSpec
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("personQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
+ nested_search_spec->set_query("subject:test");
+ nested_search_spec->set_search_type(GetParam());
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ // Parent ScoringSpec
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+
+ // Parent ResultSpec with max_joined_children_per_parent_to_return = 2
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(1);
+ result_spec.set_max_joined_children_per_parent_to_return(2);
+
+ // - Use COUNT for aggregation scoring strategy.
+ // - max_joined_children_per_parent_to_return = 2.
+ // - (Default) use DESC as the ranking order.
+ //
+ // person2 should have the highest aggregated score (3) since email2, email3,
+ // email4 are joined to it and the COUNT aggregated score is 3. However, only
+ // email2 and email3 should be attached to person2 due to
+ // max_joined_children_per_parent_to_return limitation in result_spec.
+ // person1 should be the second (aggregated score = 1).
+ SearchResultProto::ResultProto expected_result_proto1;
+ *expected_result_proto1.mutable_document() = person2;
+ expected_result_proto1.set_score(3);
+ SearchResultProto::ResultProto* child_result_proto1 =
+ expected_result_proto1.mutable_joined_results()->Add();
+ *child_result_proto1->mutable_document() = email2;
+ child_result_proto1->set_score(99);
+ SearchResultProto::ResultProto* child_result_google::protobuf =
+ expected_result_proto1.mutable_joined_results()->Add();
+ *child_result_google::protobuf->mutable_document() = email3;
+ child_result_google::protobuf->set_score(98);
+
+ SearchResultProto::ResultProto expected_result_google::protobuf;
+ *expected_result_google::protobuf.mutable_document() = person1;
+ expected_result_google::protobuf.set_score(1);
+ SearchResultProto::ResultProto* child_result_proto3 =
+ expected_result_google::protobuf.mutable_joined_results()->Add();
+ *child_result_proto3->mutable_document() = email1;
+ child_result_proto3->set_score(100);
+
+ SearchResultProto result1 =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ uint64_t next_page_token = result1.next_page_token();
+ EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+ EXPECT_THAT(result1.results(),
+ ElementsAre(EqualsProto(expected_result_proto1)));
+
+ SearchResultProto result2 = icing.GetNextPage(next_page_token);
+ next_page_token = result2.next_page_token();
+ EXPECT_THAT(next_page_token, Eq(kInvalidNextPageToken));
+ EXPECT_THAT(result2.results(),
+ ElementsAre(EqualsProto(expected_result_google::protobuf)));
+}
+
+TEST_P(IcingSearchEngineSearchTest, JoinWithZeroMaxJoinedChildPerParent) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("firstName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("lastName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("personQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ DocumentProto person1 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person1")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first1")
+ .AddStringProperty("lastName", "last1")
+ .AddStringProperty("emailAddress", "email1@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(1)
+ .Build();
+ DocumentProto person2 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person2")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first2")
+ .AddStringProperty("lastName", "last2")
+ .AddStringProperty("emailAddress", "email2@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(2)
+ .Build();
+
+ DocumentProto email1 =
+ DocumentBuilder()
+ .SetKey("namespace", "email1")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 1")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(100)
+ .Build();
+ DocumentProto email2 =
+ DocumentBuilder()
+ .SetKey("namespace", "email2")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 2")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(99)
+ .Build();
+ DocumentProto email3 =
+ DocumentBuilder()
+ .SetKey("namespace", "email3")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 3")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(98)
+ .Build();
+ DocumentProto email4 =
+ DocumentBuilder()
+ .SetKey("namespace", "email4")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 4")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(97)
+ .Build();
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email4).status(), ProtoIsOk());
+
+ // Parent SearchSpec
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("firstName:first");
+ search_spec.set_search_type(GetParam());
+
+ // JoinSpec
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("personQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
+ nested_search_spec->set_query("subject:test");
+ nested_search_spec->set_search_type(GetParam());
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ // Parent ScoringSpec
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+
+ // Parent ResultSpec with max_joined_children_per_parent_to_return = 0
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(1);
+ result_spec.set_max_joined_children_per_parent_to_return(0);
+
+ // - Use COUNT for aggregation scoring strategy.
+ // - max_joined_children_per_parent_to_return = 0.
+ // - (Default) use DESC as the ranking order.
+ //
+ // person2 should have the highest aggregated score (3) since email2, email3,
+ // email4 are joined to it and the COUNT aggregated score is 3. However, no
+ // child documents should be attached to person2 due to
+ // max_joined_children_per_parent_to_return limitation in result_spec.
+ // person1 should be the second (aggregated score = 1) with no attached child
+ // documents.
+ SearchResultProto::ResultProto expected_result_proto1;
+ *expected_result_proto1.mutable_document() = person2;
+ expected_result_proto1.set_score(3);
+
+ SearchResultProto::ResultProto expected_result_google::protobuf;
+ *expected_result_google::protobuf.mutable_document() = person1;
+ expected_result_google::protobuf.set_score(1);
+
+ SearchResultProto result1 =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ uint64_t next_page_token = result1.next_page_token();
+ EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+ EXPECT_THAT(result1.results(),
+ ElementsAre(EqualsProto(expected_result_proto1)));
+
+ SearchResultProto result2 = icing.GetNextPage(next_page_token);
+ next_page_token = result2.next_page_token();
+ EXPECT_THAT(next_page_token, Eq(kInvalidNextPageToken));
+ EXPECT_THAT(result2.results(),
+ ElementsAre(EqualsProto(expected_result_google::protobuf)));
+}
+
+TEST_P(IcingSearchEngineSearchTest, JoinSnippet) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("firstName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("lastName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("personQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first")
+ .AddStringProperty("lastName", "last")
+ .AddStringProperty("emailAddress", "email@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(1)
+ .Build();
+
+ DocumentProto email =
+ DocumentBuilder()
+ .SetKey("namespace", "email")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(3)
+ .Build();
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email).status(), ProtoIsOk());
+
+ // Parent SearchSpec
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("firstName:first");
+ search_spec.set_search_type(GetParam());
+
+ // JoinSpec
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("personQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::MAX);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
+ nested_search_spec->set_query("subject:test");
+ nested_search_spec->set_search_type(GetParam());
+ // Child ResultSpec (with snippet)
+ ResultSpecProto* nested_result_spec = nested_spec->mutable_result_spec();
+ nested_result_spec->mutable_snippet_spec()->set_max_window_utf32_length(64);
+ nested_result_spec->mutable_snippet_spec()->set_num_matches_per_property(1);
+ nested_result_spec->mutable_snippet_spec()->set_num_to_snippet(1);
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+
+ // Parent ScoringSpec
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+
+ // Parent ResultSpec (without snippet)
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(1);
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto result =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(result.status(), ProtoIsOk());
+ EXPECT_THAT(result.next_page_token(), Eq(kInvalidNextPageToken));
+
+ ASSERT_THAT(result.results(), SizeIs(1));
+ // Check parent doc (person).
+ const DocumentProto& result_parent_document = result.results(0).document();
+ EXPECT_THAT(result_parent_document, EqualsProto(person));
+ EXPECT_THAT(result.results(0).snippet().entries(), IsEmpty());
+
+ // Check child doc (email).
+ ASSERT_THAT(result.results(0).joined_results(), SizeIs(1));
+ const DocumentProto& result_child_document =
+ result.results(0).joined_results(0).document();
+ const SnippetProto& result_child_snippet =
+ result.results(0).joined_results(0).snippet();
+ EXPECT_THAT(result_child_document, EqualsProto(email));
+ ASSERT_THAT(result_child_snippet.entries(), SizeIs(1));
+ EXPECT_THAT(result_child_snippet.entries(0).property_name(), Eq("subject"));
+ std::string_view content = GetString(
+ &result_child_document, result_child_snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, result_child_snippet.entries(0)),
+ ElementsAre("test subject"));
+ EXPECT_THAT(GetMatches(content, result_child_snippet.entries(0)),
+ ElementsAre("test"));
+}
+
+TEST_P(IcingSearchEngineSearchTest, JoinProjection) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("firstName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("lastName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("personQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first")
+ .AddStringProperty("lastName", "last")
+ .AddStringProperty("emailAddress", "email@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(1)
+ .Build();
+
+ DocumentProto email =
+ DocumentBuilder()
+ .SetKey("namespace", "email")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(3)
+ .Build();
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email).status(), ProtoIsOk());
+
+ // Parent SearchSpec
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("firstName:first");
+ search_spec.set_search_type(GetParam());
+
+ // JoinSpec
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("personQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::MAX);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
+ nested_search_spec->set_query("subject:test");
+ nested_search_spec->set_search_type(GetParam());
+ // Child ResultSpec (with projection)
+ ResultSpecProto* nested_result_spec = nested_spec->mutable_result_spec();
+ TypePropertyMask* type_property_mask =
+ nested_result_spec->add_type_property_masks();
+ type_property_mask->set_schema_type("Email");
+ type_property_mask->add_paths("subject");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+
+ // Parent ScoringSpec
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+
+ // Parent ResultSpec (with projection)
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(1);
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+ type_property_mask = result_spec.add_type_property_masks();
+ type_property_mask->set_schema_type("Person");
+ type_property_mask->add_paths("emailAddress");
+
+ SearchResultProto result =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(result.status(), ProtoIsOk());
+ EXPECT_THAT(result.next_page_token(), Eq(kInvalidNextPageToken));
+
+ ASSERT_THAT(result.results(), SizeIs(1));
+ // Check parent doc (person): should contain only the "emailAddress" property.
+ DocumentProto projected_person_document =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("emailAddress", "email@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(1)
+ .Build();
+ EXPECT_THAT(result.results().at(0).document(),
+ EqualsProto(projected_person_document));
+
+ // Check child doc (email): should contain only the "subject" property.
+ ASSERT_THAT(result.results(0).joined_results(), SizeIs(1));
+ DocumentProto projected_email_document =
+ DocumentBuilder()
+ .SetKey("namespace", "email")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(3)
+ .Build();
+ EXPECT_THAT(result.results(0).joined_results(0).document(),
+ EqualsProto(projected_email_document));
+}
+
+TEST_F(IcingSearchEngineSearchTest, JoinWithAdvancedScoring) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("firstName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("lastName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("personQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ const int32_t person1_doc_score = 10;
+ const int32_t person2_doc_score = 25;
+ const int32_t person3_doc_score = 123;
+ const int32_t email1_doc_score = 10;
+ const int32_t email2_doc_score = 15;
+ const int32_t email3_doc_score = 40;
+
+ // person1 has children email1 and email2.
+ DocumentProto person1 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person1")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first1")
+ .AddStringProperty("lastName", "last1")
+ .AddStringProperty("emailAddress", "email1@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(person1_doc_score)
+ .Build();
+ // person2 has a single child email3
+ DocumentProto person2 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person2")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first2")
+ .AddStringProperty("lastName", "last2")
+ .AddStringProperty("emailAddress", "email2@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(person2_doc_score)
+ .Build();
+ // person3 has no child.
+ DocumentProto person3 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person3")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first3")
+ .AddStringProperty("lastName", "last3")
+ .AddStringProperty("emailAddress", "email3@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(person3_doc_score)
+ .Build();
+
+ DocumentProto email1 =
+ DocumentBuilder()
+ .SetKey("namespace", "email1")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 1")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(email1_doc_score)
+ .Build();
+ DocumentProto email2 =
+ DocumentBuilder()
+ .SetKey("namespace", "email2")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 2")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(email2_doc_score)
+ .Build();
+ DocumentProto email3 =
+ DocumentBuilder()
+ .SetKey("namespace", "email3")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 3")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(email3_doc_score)
+ .Build();
+
+ // Set children scoring expression and their expected value.
+ ScoringSpecProto child_scoring_spec = GetDefaultScoringSpec();
+ child_scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::ADVANCED_SCORING_EXPRESSION);
+ child_scoring_spec.set_advanced_scoring_expression(
+ "this.documentScore() * 2 + 1");
+ const int32_t exp_email1_score = email1_doc_score * 2 + 1;
+ const int32_t exp_email2_score = email2_doc_score * 2 + 1;
+ const int32_t exp_email3_score = email3_doc_score * 2 + 1;
+
+ // Set parent scoring expression and their expected value.
+ ScoringSpecProto parent_scoring_spec = GetDefaultScoringSpec();
+ parent_scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::ADVANCED_SCORING_EXPRESSION);
+ parent_scoring_spec.set_advanced_scoring_expression(
+ "this.documentScore() * sum(this.childrenRankingSignals())");
+ const int32_t exp_person1_score =
+ person1_doc_score * (exp_email1_score + exp_email2_score);
+ const int32_t exp_person2_score = person2_doc_score * exp_email3_score;
+ const int32_t exp_person3_score = person3_doc_score * 0;
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email3).status(), ProtoIsOk());
+
+ // Parent SearchSpec
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("firstName:first");
+
+ // JoinSpec
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("personQualifiedId");
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
+ nested_search_spec->set_query("subject:test");
+ *nested_spec->mutable_scoring_spec() = child_scoring_spec;
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ // Parent ResultSpec
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(1);
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto results =
+ icing.Search(search_spec, parent_scoring_spec, result_spec);
+ uint64_t next_page_token = results.next_page_token();
+ EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document().uri(), Eq("person2"));
+ // exp_person2_score = 2025
+ EXPECT_THAT(results.results(0).score(), Eq(exp_person2_score));
+
+ results = icing.GetNextPage(next_page_token);
+ next_page_token = results.next_page_token();
+ EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document().uri(), Eq("person1"));
+ // exp_person1_score = 520
+ EXPECT_THAT(results.results(0).score(), Eq(exp_person1_score));
+
+ results = icing.GetNextPage(next_page_token);
+ next_page_token = results.next_page_token();
+ EXPECT_THAT(next_page_token, Eq(kInvalidNextPageToken));
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document().uri(), Eq("person3"));
+ // exp_person3_score = 0
+ EXPECT_THAT(results.results(0).score(), Eq(exp_person3_score));
+}
+
+TEST_F(IcingSearchEngineSearchTest, NumericFilterAdvancedQuerySucceeds) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create the schema and document store
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("transaction")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("price")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("cost")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document_one = DocumentBuilder()
+ .SetKey("namespace", "1")
+ .SetSchema("transaction")
+ .SetCreationTimestampMs(1)
+ .AddInt64Property("price", 10)
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two = DocumentBuilder()
+ .SetKey("namespace", "2")
+ .SetSchema("transaction")
+ .SetCreationTimestampMs(1)
+ .AddInt64Property("price", 25)
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ DocumentProto document_three = DocumentBuilder()
+ .SetKey("namespace", "3")
+ .SetSchema("transaction")
+ .SetCreationTimestampMs(1)
+ .AddInt64Property("cost", 2)
+ .Build();
+ ASSERT_THAT(icing.Put(document_three).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("price < 20");
+ search_spec.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_one));
+
+ search_spec.set_query("price == 25");
+ results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+
+ search_spec.set_query("cost > 2");
+ results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.results(), IsEmpty());
+
+ search_spec.set_query("cost >= 2");
+ results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_three));
+
+ search_spec.set_query("price <= 25");
+ results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results.results(), SizeIs(2));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
+}
+
+TEST_F(IcingSearchEngineSearchTest,
+ NumericFilterAdvancedQueryWithPersistenceSucceeds) {
+ IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+ {
+ // Create the schema and document store
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("transaction")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("price")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("cost")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ // Schema will be persisted to disk when icing goes out of scope.
+ }
+
+ DocumentProto document_one = DocumentBuilder()
+ .SetKey("namespace", "1")
+ .SetSchema("transaction")
+ .SetCreationTimestampMs(1)
+ .AddInt64Property("price", 10)
+ .Build();
+ DocumentProto document_two = DocumentBuilder()
+ .SetKey("namespace", "2")
+ .SetSchema("transaction")
+ .SetCreationTimestampMs(1)
+ .AddInt64Property("price", 25)
+ .AddInt64Property("cost", 2)
+ .Build();
+ {
+ // Ensure that icing initializes the schema and section_manager
+ // properly from the pre-existing file.
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+ // The index and document store will be persisted to disk when icing goes
+ // out of scope.
+ }
+
+ {
+ // Ensure that the index is brought back up without problems and we
+ // can query for the content that we expect.
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("price < 20");
+ search_spec.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_one));
+
+ search_spec.set_query("price == 25");
+ results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+
+ search_spec.set_query("cost > 2");
+ results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.results(), IsEmpty());
+
+ search_spec.set_query("cost >= 2");
+ results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+
+ search_spec.set_query("price <= 25");
+ results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results.results(), SizeIs(2));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
+ }
+}
+
+TEST_F(IcingSearchEngineSearchTest, NumericFilterOldQueryFails) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create the schema and document store
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("transaction")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("price")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("cost")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document_one = DocumentBuilder()
+ .SetKey("namespace", "1")
+ .SetSchema("transaction")
+ .SetCreationTimestampMs(1)
+ .AddInt64Property("price", 10)
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two = DocumentBuilder()
+ .SetKey("namespace", "2")
+ .SetSchema("transaction")
+ .SetCreationTimestampMs(1)
+ .AddInt64Property("price", 25)
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ DocumentProto document_three = DocumentBuilder()
+ .SetKey("namespace", "3")
+ .SetSchema("transaction")
+ .SetCreationTimestampMs(1)
+ .AddInt64Property("cost", 2)
+ .Build();
+ ASSERT_THAT(icing.Put(document_three).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("price < 20");
+ search_spec.set_search_type(SearchSpecProto::SearchType::ICING_RAW_QUERY);
+ search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_P(IcingSearchEngineSearchTest, BarisNormalizationTest) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document = DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Person")
+ .SetCreationTimestampMs(1)
+ .AddStringProperty("name", "Barış")
+ .Build();
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ DocumentProto document_two = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .SetCreationTimestampMs(1)
+ .AddStringProperty("name", "ıbar")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TERM_MATCH_PREFIX);
+ search_spec.set_search_type(GetParam());
+
+ ScoringSpecProto scoring_spec;
+ ResultSpecProto result_spec;
+
+ SearchResultProto exp_results;
+ exp_results.mutable_status()->set_code(StatusProto::OK);
+ *exp_results.add_results()->mutable_document() = document;
+
+ search_spec.set_query("barış");
+ SearchResultProto results =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(results, EqualsSearchResultIgnoreStatsAndScores(exp_results));
+
+ search_spec.set_query("barıs");
+ results = icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(results, EqualsSearchResultIgnoreStatsAndScores(exp_results));
+
+ search_spec.set_query("baris");
+ results = icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(results, EqualsSearchResultIgnoreStatsAndScores(exp_results));
+
+ SearchResultProto exp_results2;
+ exp_results2.mutable_status()->set_code(StatusProto::OK);
+ *exp_results2.add_results()->mutable_document() = document_two;
+ search_spec.set_query("ı");
+ results = icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(results, EqualsSearchResultIgnoreStatsAndScores(exp_results2));
+}
+
+TEST_P(IcingSearchEngineSearchTest, LatinSnippetTest) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ constexpr std::string_view kLatin = "test ḞÖÖḸĬŞĤ test";
+ DocumentProto document = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kLatin)
+ .Build();
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("foo");
+ search_spec.set_term_match_type(TERM_MATCH_PREFIX);
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(
+ std::numeric_limits<int>::max());
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(
+ std::numeric_limits<int>::max());
+
+ // Search and make sure that we got a single successful result
+ SearchResultProto search_results = icing.Search(
+ search_spec, ScoringSpecProto::default_instance(), result_spec);
+ ASSERT_THAT(search_results.status(), ProtoIsOk());
+ ASSERT_THAT(search_results.results(), SizeIs(1));
+ const SearchResultProto::ResultProto* result = &search_results.results(0);
+ EXPECT_THAT(result->document().uri(), Eq("uri1"));
+
+ // Ensure that one and only one property was matched and it was "body"
+ ASSERT_THAT(result->snippet().entries(), SizeIs(1));
+ const SnippetProto::EntryProto* entry = &result->snippet().entries(0);
+ EXPECT_THAT(entry->property_name(), Eq("body"));
+
+ // Ensure that there is one and only one match within "body"
+ ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+
+ // Check that the match is "ḞÖÖḸĬŞĤ".
+ const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+ std::string_view match =
+ kLatin.substr(match_proto.exact_match_byte_position(),
+ match_proto.submatch_byte_length());
+ ASSERT_THAT(match, Eq("ḞÖÖ"));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ DocumentStoreNamespaceIdFingerprintCompatible) {
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+ DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+
+ // Initialize with some documents with document_store_namespace_id_fingerprint
+ // being false.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_document_store_namespace_id_fingerprint(false);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates and inserts 3 documents
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ }
+
+ // Reinitializate with document_store_namespace_id_fingerprint being true,
+ // and test that we are still able to read/query docs.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_document_store_namespace_id_fingerprint(true);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ ASSERT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
+ .status(),
+ ProtoIsOk());
+ ASSERT_THAT(
+ icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+ .status(),
+ ProtoIsOk());
+ ASSERT_THAT(
+ icing.Get("namespace", "uri3", GetResultSpecProto::default_instance())
+ .status(),
+ ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+ SearchResultProto results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results.results(), SizeIs(3));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document3));
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document2));
+ EXPECT_THAT(results.results(2).document(), EqualsProto(document1));
+ }
+
+ // Reinitializate with document_store_namespace_id_fingerprint being false,
+ // and test that we are still able to read/query docs.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_document_store_namespace_id_fingerprint(false);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ ASSERT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
+ .status(),
+ ProtoIsOk());
+ ASSERT_THAT(
+ icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+ .status(),
+ ProtoIsOk());
+ ASSERT_THAT(
+ icing.Get("namespace", "uri3", GetResultSpecProto::default_instance())
+ .status(),
+ ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+ SearchResultProto results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results.results(), SizeIs(3));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document3));
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document2));
+ EXPECT_THAT(results.results(2).document(), EqualsProto(document1));
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ IcingSearchEngineSearchTest, IcingSearchEngineSearchTest,
+ testing::Values(
+ SearchSpecProto::SearchType::ICING_RAW_QUERY,
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY));
+
+} // namespace
+} // namespace lib
+} // namespace icing
diff --git a/icing/icing-search-engine_suggest_test.cc b/icing/icing-search-engine_suggest_test.cc
new file mode 100644
index 0000000..b3aeafc
--- /dev/null
+++ b/icing/icing-search-engine_suggest_test.cc
@@ -0,0 +1,1601 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/icing-search-engine.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/portable/endian.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/reset.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::UnorderedElementsAre;
+
+// For mocking purpose, we allow tests to provide a custom Filesystem.
+class TestIcingSearchEngine : public IcingSearchEngine {
+ public:
+ TestIcingSearchEngine(const IcingSearchEngineOptions& options,
+ std::unique_ptr<const Filesystem> filesystem,
+ std::unique_ptr<const IcingFilesystem> icing_filesystem,
+ std::unique_ptr<Clock> clock,
+ std::unique_ptr<JniCache> jni_cache)
+ : IcingSearchEngine(options, std::move(filesystem),
+ std::move(icing_filesystem), std::move(clock),
+ std::move(jni_cache)) {}
+};
+
+std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
+
+// This test is meant to cover all tests relating to IcingSearchEngine::Search
+// and IcingSearchEngine::SearchSuggestions.
+class IcingSearchEngineSuggestTest : public testing::Test {
+ protected:
+ void SetUp() override {
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ // If we've specified using the reverse-JNI method for segmentation (i.e.
+ // not ICU), then we won't have the ICU data file included to set up.
+ // Technically, we could choose to use reverse-JNI for segmentation AND
+ // include an ICU data file, but that seems unlikely and our current BUILD
+ // setup doesn't do this.
+ // File generated via icu_data_file rule in //icing/BUILD.
+ std::string icu_data_file_path =
+ GetTestFilePath("icing/icu.dat");
+ ICING_ASSERT_OK(
+ icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+ }
+ filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ const Filesystem* filesystem() const { return &filesystem_; }
+
+ private:
+ Filesystem filesystem_;
+};
+
+IcingSearchEngineOptions GetDefaultIcingOptions() {
+ IcingSearchEngineOptions icing_options;
+ icing_options.set_base_dir(GetTestBaseDir());
+ return icing_options;
+}
+
+SchemaProto CreatePersonAndEmailSchema() {
+ return SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+}
+
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // Creates and inserts 6 documents, and index 6 termSix, 5 termFive, 4
+ // termFour, 3 termThree, 2 termTwo and one termOne.
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty(
+ "subject", "termOne termTwo termThree termFour termFive termSix")
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject",
+ "termTwo termThree termFour termFive termSix")
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri3")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "termThree termFour termFive termSix")
+ .Build();
+ DocumentProto document4 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri4")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "termFour termFive termSix")
+ .Build();
+ DocumentProto document5 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri5")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "termFive termSix")
+ .Build();
+ DocumentProto document6 = DocumentBuilder()
+ .SetKey("namespace", "uri6")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "termSix")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document6).status(), ProtoIsOk());
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("t");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ // Query all suggestions, and they will be ranked.
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions().at(0).query(), "termsix");
+ ASSERT_THAT(response.suggestions().at(1).query(), "termfive");
+ ASSERT_THAT(response.suggestions().at(2).query(), "termfour");
+ ASSERT_THAT(response.suggestions().at(3).query(), "termthree");
+ ASSERT_THAT(response.suggestions().at(4).query(), "termtwo");
+ ASSERT_THAT(response.suggestions().at(5).query(), "termone");
+
+ // Query first three suggestions, and they will be ranked.
+ suggestion_spec.set_num_to_return(3);
+ response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions().at(0).query(), "termsix");
+ ASSERT_THAT(response.suggestions().at(1).query(), "termfive");
+ ASSERT_THAT(response.suggestions().at(2).query(), "termfour");
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_ShouldReturnInOneNamespace) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo fool")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionFoo;
+ suggestionFoo.set_query("foo");
+ SuggestionResponse::Suggestion suggestionFool;
+ suggestionFool.set_query("fool");
+
+ // namespace1 has 2 results.
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.add_namespace_filters("namespace1");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFoo),
+ EqualsProto(suggestionFool)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_ShouldReturnInMultipleNamespace) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fo")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo")
+ .Build();
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace3", "uri3")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionFoo;
+ suggestionFoo.set_query("foo");
+ SuggestionResponse::Suggestion suggestionFool;
+ suggestionFool.set_query("fool");
+
+ // namespace2 and namespace3 has 2 results.
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.add_namespace_filters("namespace2");
+ suggestion_spec.add_namespace_filters("namespace3");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFoo),
+ EqualsProto(suggestionFool)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_NamespaceNotFound) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fo")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // Search for non-exist namespace3
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.add_namespace_filters("namespace3");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ EXPECT_THAT(response.status().code(), Eq(StatusProto::OK));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_OtherNamespaceDontContributeToHitCount) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // Index 4 documents,
+ // namespace1 has 2 hit2 for term one
+ // namespace2 has 2 hit2 for term two and 1 hit for term one.
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "termone")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace1", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "termone")
+ .Build();
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "termone termtwo")
+ .Build();
+ DocumentProto document4 = DocumentBuilder()
+ .SetKey("namespace2", "uri3")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "termtwo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionTermOne;
+ suggestionTermOne.set_query("termone");
+ SuggestionResponse::Suggestion suggestionTermTwo;
+ suggestionTermTwo.set_query("termtwo");
+
+ // only search suggestion for namespace2. The correctly order should be
+ // {"termtwo", "termone"}. If we're not filtering out namespace1 when
+ // calculating our score, then it will be {"termone", "termtwo"}.
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("t");
+ suggestion_spec.add_namespace_filters("namespace2");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ ElementsAre(EqualsProto(suggestionTermTwo),
+ EqualsProto(suggestionTermOne)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_DeletionTest) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionFool;
+ suggestionFool.set_query("fool");
+
+ // namespace1 has this suggestion
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.add_namespace_filters("namespace1");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFool)));
+
+ // namespace2 has this suggestion
+ suggestion_spec.clear_namespace_filters();
+ suggestion_spec.add_namespace_filters("namespace2");
+ response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFool)));
+
+ // delete document from namespace 1
+ EXPECT_THAT(icing.Delete("namespace1", "uri1").status(), ProtoIsOk());
+
+ // Now namespace1 will return empty
+ suggestion_spec.clear_namespace_filters();
+ suggestion_spec.add_namespace_filters("namespace1");
+ response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(), IsEmpty());
+
+ // namespace2 still has this suggestion, so we can prove the reason of
+ // namespace 1 cannot find it is we filter it out, not it doesn't exist.
+ suggestion_spec.add_namespace_filters("namespace2");
+ response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFool)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_ShouldReturnInOneDocument) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace1", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionFool;
+ suggestionFool.set_query("fool");
+ SuggestionResponse::Suggestion suggestionFoo;
+ suggestionFoo.set_query("foo");
+
+ // Only search in namespace1,uri1
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+ NamespaceDocumentUriGroup* namespace1_uri1 =
+ suggestion_spec.add_document_uri_filters();
+ namespace1_uri1->set_namespace_("namespace1");
+ namespace1_uri1->add_document_uris("uri1");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFool)));
+
+ // Only search in namespace1,uri2
+ suggestion_spec.clear_document_uri_filters();
+ NamespaceDocumentUriGroup* namespace1_uri2 =
+ suggestion_spec.add_document_uri_filters();
+ namespace1_uri2->set_namespace_("namespace1");
+ namespace1_uri2->add_document_uris("uri2");
+
+ response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFoo)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_ShouldReturnInMultipleDocument) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace1", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo")
+ .Build();
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace1", "uri3")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionFool;
+ suggestionFool.set_query("fool");
+ SuggestionResponse::Suggestion suggestionFoo;
+ suggestionFoo.set_query("foo");
+
+ // Only search document in namespace1,uri1 and namespace2,uri2
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+ NamespaceDocumentUriGroup* namespace1_uri1_uri2 =
+ suggestion_spec.add_document_uri_filters();
+ namespace1_uri1_uri2->set_namespace_("namespace1");
+ namespace1_uri1_uri2->add_document_uris("uri1");
+ namespace1_uri1_uri2->add_document_uris("uri2");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFool),
+ EqualsProto(suggestionFoo)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_ShouldReturnInDesiredDocumentAndNamespace) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo")
+ .Build();
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace3", "uri3")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionFool;
+ suggestionFool.set_query("fool");
+ SuggestionResponse::Suggestion suggestionFoo;
+ suggestionFoo.set_query("foo");
+
+ // Only search document in namespace1,uri1 and all documents under namespace2
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+ suggestion_spec.add_namespace_filters("namespace1");
+ suggestion_spec.add_namespace_filters("namespace2");
+ NamespaceDocumentUriGroup* namespace1_uri1 =
+ suggestion_spec.add_document_uri_filters();
+ namespace1_uri1->set_namespace_("namespace1");
+ namespace1_uri1->add_document_uris("uri1");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFool),
+ EqualsProto(suggestionFoo)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_DocumentIdDoesntExist) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // Search for a non-exist document id : namespace3,uri3
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+ suggestion_spec.add_namespace_filters("namespace3");
+ NamespaceDocumentUriGroup* namespace3_uri3 =
+ suggestion_spec.add_document_uri_filters();
+ namespace3_uri3->set_namespace_("namespace3");
+ namespace3_uri3->add_document_uris("uri3");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(), IsEmpty());
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_DocumentIdFilterDoesntMatchNamespaceFilter) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // Search for the document namespace1,uri1 with namespace filter in
+ // namespace2.
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+ NamespaceDocumentUriGroup* namespace1_uri1 =
+ suggestion_spec.add_document_uri_filters();
+ namespace1_uri1->set_namespace_("namespace1");
+ namespace1_uri1->add_document_uris("uri1");
+ suggestion_spec.add_namespace_filters("namespace2");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ EXPECT_THAT(response.status().code(), Eq(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_EmptyDocumentIdInNamespace) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+ // Give empty document uris in namespace 1
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+ NamespaceDocumentUriGroup* namespace1_uri1 =
+ suggestion_spec.add_document_uri_filters();
+ namespace1_uri1->set_namespace_("namespace1");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ EXPECT_THAT(response.status().code(), Eq(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_ShouldReturnInDesiredSchemaType) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .AddDocumentProperty("sender", DocumentBuilder()
+ .SetKey("namespace", "uri1-sender")
+ .SetSchema("Person")
+ .AddStringProperty("name", "foo")
+ .Build())
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace1", "uri2")
+ .SetSchema("Message")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("body", "fo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionFool;
+ suggestionFool.set_query("fool");
+ SuggestionResponse::Suggestion suggestionFoo;
+ suggestionFoo.set_query("foo");
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+ suggestion_spec.add_schema_type_filters("Email");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFoo),
+ EqualsProto(suggestionFool)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_SchemaTypeNotFound) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Message")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("body", "fo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+ suggestion_spec.add_schema_type_filters("Email");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(), IsEmpty());
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_ShouldReturnInDesiredProperty) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1-sender")
+ .SetSchema("Person")
+ .AddStringProperty("name", "foo")
+ .AddStringProperty("emailAddress", "fo")
+ .Build())
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionFool;
+ suggestionFool.set_query("fool");
+ SuggestionResponse::Suggestion suggestionFoo;
+ suggestionFoo.set_query("foo");
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ // Only search in subject.
+ TypePropertyMask* mask = suggestion_spec.add_type_property_filters();
+ mask->set_schema_type("Email");
+ mask->add_paths("subject");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFool)));
+
+ // Search in subject and sender.name
+ suggestion_spec.clear_type_property_filters();
+ mask = suggestion_spec.add_type_property_filters();
+ mask->set_schema_type("Email");
+ mask->add_paths("subject");
+ mask->add_paths("sender.name");
+
+ response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFoo),
+ EqualsProto(suggestionFool)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_NestedPropertyReturnNothing) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .AddDocumentProperty("sender", DocumentBuilder()
+ .SetKey("namespace", "uri1-sender")
+ .SetSchema("Person")
+ .AddStringProperty("name", "foo")
+ .Build())
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ // Only search in Person.name.
+ suggestion_spec.add_schema_type_filters("Person");
+ TypePropertyMask* mask = suggestion_spec.add_type_property_filters();
+ mask->set_schema_type("Person");
+ mask->add_paths("name");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(), IsEmpty());
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_PropertyFilterAndSchemaFilter) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .AddDocumentProperty("sender", DocumentBuilder()
+ .SetKey("namespace", "uri1-sender")
+ .SetSchema("Person")
+ .AddStringProperty("name", "foo")
+ .Build())
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace1", "uri2")
+ .SetSchema("Message")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("body", "fo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionFoo;
+ suggestionFoo.set_query("foo");
+ SuggestionResponse::Suggestion suggestionFo;
+ suggestionFo.set_query("fo");
+
+ // Search in sender.name of Email and everything in Message.
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+ suggestion_spec.add_schema_type_filters("Email");
+ suggestion_spec.add_schema_type_filters("Message");
+ TypePropertyMask* mask1 = suggestion_spec.add_type_property_filters();
+ mask1->set_schema_type("Email");
+ mask1->add_paths("sender.name");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFoo),
+ EqualsProto(suggestionFo)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_PropertyFilterNotMatchSchemaFilter) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Message")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("body", "fo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+ // Search in sender.name of Email but schema type is Message.
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+ suggestion_spec.add_schema_type_filters("Message");
+ TypePropertyMask* mask1 = suggestion_spec.add_type_property_filters();
+ mask1->set_schema_type("Email");
+ mask1->add_paths("sender.name");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ EXPECT_THAT(response.status().code(), Eq(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_OrderByTermFrequency) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Message")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty(
+ "body", "termthree termthree termthree termtwo termtwo termone")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+ // Search in sender.name of Email but schema type is Message.
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("t");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::EXACT_ONLY);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::TERM_FREQUENCY);
+
+ SuggestionResponse::Suggestion suggestionTermOne;
+ suggestionTermOne.set_query("termone");
+ SuggestionResponse::Suggestion suggestionTermTwo;
+ suggestionTermTwo.set_query("termtwo");
+ SuggestionResponse::Suggestion suggestionTermThree;
+ suggestionTermThree.set_query("termthree");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ ElementsAre(EqualsProto(suggestionTermThree),
+ EqualsProto(suggestionTermTwo),
+ EqualsProto(suggestionTermOne)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_ExpiredTest) {
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(100)
+ .SetTtlMs(500)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(100)
+ .SetTtlMs(1000)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetSystemTimeMilliseconds(400);
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionFool;
+ suggestionFool.set_query("fool");
+
+ // namespace1 has this suggestion
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.add_namespace_filters("namespace1");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFool)));
+
+ // namespace2 has this suggestion
+ suggestion_spec.clear_namespace_filters();
+ suggestion_spec.add_namespace_filters("namespace2");
+ response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFool)));
+ }
+ // We reinitialize here so we can feed in a fake clock this time
+ {
+ // Time needs to be past document1 creation time (100) + ttl (500) for it
+ // to count as "expired". document2 is not expired since its ttl is 1000.
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetSystemTimeMilliseconds(800);
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.add_namespace_filters("namespace1");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ // Now namespace1 will return empty
+ suggestion_spec.clear_namespace_filters();
+ suggestion_spec.add_namespace_filters("namespace1");
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(), IsEmpty());
+
+ // namespace2 still has this suggestion
+ SuggestionResponse::Suggestion suggestionFool;
+ suggestionFool.set_query("fool");
+
+ suggestion_spec.add_namespace_filters("namespace2");
+ response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFool)));
+ }
+}
+
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_emptyPrefix) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ ASSERT_THAT(icing.SearchSuggestions(suggestion_spec).status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_NonPositiveNumToReturn) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("prefix");
+ suggestion_spec.set_num_to_return(0);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ ASSERT_THAT(icing.SearchSuggestions(suggestion_spec).status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_MultipleTerms_And) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "bar fo")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionBarFo;
+ suggestionBarFo.set_query("bar fo");
+
+ // Search "bar AND f" only document 1 should match the search.
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("bar f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionBarFo)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_MultipleTerms_Or) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "bar fo")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "cat foo")
+ .Build();
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace", "uri3")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionBarCatFo;
+ suggestionBarCatFo.set_query("bar OR cat fo");
+ SuggestionResponse::Suggestion suggestionBarCatFoo;
+ suggestionBarCatFoo.set_query("bar OR cat foo");
+
+ // Search for "(bar OR cat) AND f" both document1 "bar fo" and document2 "cat
+ // foo" could match.
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("bar OR cat f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionBarCatFo),
+ EqualsProto(suggestionBarCatFoo)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_PropertyRestriction) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1-sender")
+ .SetSchema("Person")
+ .AddStringProperty("name", "foo")
+ .AddStringProperty("emailAddress", "fo")
+ .Build())
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+ // Add property restriction, only search for subject.
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("subject:f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ SuggestionResponse::Suggestion suggestionSubjectFool;
+ suggestionSubjectFool.set_query("subject:fool");
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionSubjectFool)));
+
+ // Add property restriction, only search for nested sender.name
+ suggestion_spec.set_prefix("sender.name:f");
+ SuggestionResponse::Suggestion suggestionSenderNameFoo;
+ suggestionSenderNameFoo.set_query("sender.name:foo");
+
+ response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionSenderNameFoo)));
+
+ // Add property restriction, only search for nonExist section
+ suggestion_spec.set_prefix("none:f");
+
+ response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(), IsEmpty());
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_AndOperatorPlusPropertyRestriction) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "bar fo") // "bar fo"
+ .AddStringProperty("body", "fool")
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "bar cat foo") // "bar cat fool"
+ .AddStringProperty("body", "fool")
+ .Build();
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace1", "uri3")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool") // "fool"
+ .AddStringProperty("body", "fool")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ // Search for "bar AND subject:f"
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("bar subject:f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ SuggestionResponse::Suggestion suggestionBarSubjectFo;
+ suggestionBarSubjectFo.set_query("bar subject:fo");
+ SuggestionResponse::Suggestion suggestionBarSubjectFoo;
+ suggestionBarSubjectFoo.set_query("bar subject:foo");
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionBarSubjectFo),
+ EqualsProto(suggestionBarSubjectFoo)));
+
+ // Search for "bar AND cat AND subject:f"
+ suggestion_spec.set_prefix("bar cat subject:f");
+ SuggestionResponse::Suggestion suggestionBarCatSubjectFoo;
+ suggestionBarCatSubjectFoo.set_query("bar cat subject:foo");
+
+ response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionBarCatSubjectFoo)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_InvalidPrefixTest) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "bar fo") // "bar fo"
+ .AddStringProperty("body", "fool")
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "bar cat foo") // "bar cat fool"
+ .AddStringProperty("body", "fool")
+ .Build();
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace1", "uri3")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool") // "fool"
+ .AddStringProperty("body", "fool")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ // Search for "f OR"
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f OR");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ if (SearchSpecProto::default_instance().search_type() ==
+ SearchSpecProto::SearchType::ICING_RAW_QUERY) {
+ EXPECT_THAT(response.status(), ProtoIsOk());
+ EXPECT_THAT(response.suggestions(), IsEmpty());
+ } else {
+ EXPECT_THAT(response.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+ EXPECT_THAT(response.suggestions(), IsEmpty());
+ }
+
+ // TODO(b/208654892): Update handling for hyphens to only consider it a hyphen
+ // within a TEXT token (rather than a MINUS token) when surrounded on both
+ // sides by TEXT rather than just preceded by TEXT.
+ // Search for "f-"
+ suggestion_spec.set_prefix("f-");
+ response = icing.SearchSuggestions(suggestion_spec);
+ EXPECT_THAT(response.status(), ProtoIsOk());
+ EXPECT_THAT(response.suggestions(), IsEmpty());
+
+ // Search for "f:"
+ suggestion_spec.set_prefix("f:");
+ response = icing.SearchSuggestions(suggestion_spec);
+ if (SearchSpecProto::default_instance().search_type() ==
+ SearchSpecProto::SearchType::ICING_RAW_QUERY) {
+ EXPECT_THAT(response.status(), ProtoIsOk());
+ EXPECT_THAT(response.suggestions(), IsEmpty());
+ } else {
+ EXPECT_THAT(response.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+ EXPECT_THAT(response.suggestions(), IsEmpty());
+ }
+
+ // Search for "OR OR - :"
+ suggestion_spec.set_prefix("OR OR - :");
+ response = icing.SearchSuggestions(suggestion_spec);
+ if (SearchSpecProto::default_instance().search_type() ==
+ SearchSpecProto::SearchType::ICING_RAW_QUERY) {
+ EXPECT_THAT(response.status(), ProtoIsOk());
+ EXPECT_THAT(response.suggestions(), IsEmpty());
+ } else {
+ EXPECT_THAT(response.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+ EXPECT_THAT(response.suggestions(), IsEmpty());
+ }
+}
+
+} // namespace
+} // namespace lib
+} // namespace icing
diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc
index dff38fb..ddb83a8 100644
--- a/icing/icing-search-engine_test.cc
+++ b/icing/icing-search-engine_test.cc
@@ -27,8 +27,6 @@
#include "icing/file/filesystem.h"
#include "icing/file/mock-filesystem.h"
#include "icing/jni/jni-cache.h"
-#include "icing/join/join-processor.h"
-#include "icing/legacy/index/icing-mock-filesystem.h"
#include "icing/portable/endian.h"
#include "icing/portable/equals-proto.h"
#include "icing/portable/platform.h"
@@ -47,19 +45,13 @@
#include "icing/proto/storage.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/proto/usage.pb.h"
-#include "icing/query/query-features.h"
#include "icing/schema-builder.h"
-#include "icing/schema/schema-store.h"
-#include "icing/schema/section.h"
-#include "icing/store/document-log-creator.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/icu-data-file-helper.h"
#include "icing/testing/jni-test-helpers.h"
-#include "icing/testing/random-string.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
-#include "icing/util/snippet-helpers.h"
namespace icing {
namespace lib {
@@ -67,54 +59,16 @@ namespace lib {
namespace {
using ::icing::lib::portable_equals_proto::EqualsProto;
-using ::testing::_;
-using ::testing::ElementsAre;
using ::testing::Eq;
using ::testing::Ge;
using ::testing::Gt;
using ::testing::HasSubstr;
using ::testing::IsEmpty;
-using ::testing::Le;
-using ::testing::Lt;
-using ::testing::Matcher;
-using ::testing::Ne;
using ::testing::Return;
using ::testing::SizeIs;
using ::testing::StrEq;
using ::testing::UnorderedElementsAre;
-constexpr std::string_view kIpsumText =
- "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla convallis "
- "scelerisque orci quis hendrerit. Sed augue turpis, sodales eu gravida "
- "nec, scelerisque nec leo. Maecenas accumsan interdum commodo. Aliquam "
- "mattis sapien est, sit amet interdum risus dapibus sed. Maecenas leo "
- "erat, fringilla in nisl a, venenatis gravida metus. Phasellus venenatis, "
- "orci in aliquet mattis, lectus sapien volutpat arcu, sed hendrerit ligula "
- "arcu nec mauris. Integer dolor mi, rhoncus eget gravida et, pulvinar et "
- "nunc. Aliquam ac sollicitudin nisi. Vivamus sit amet urna vestibulum, "
- "tincidunt eros sed, efficitur nisl. Fusce non neque accumsan, sagittis "
- "nisi eget, sagittis turpis. Ut pulvinar nibh eu purus feugiat faucibus. "
- "Donec tellus nulla, tincidunt vel lacus id, bibendum fermentum turpis. "
- "Nullam ultrices sed nibh vitae aliquet. Ut risus neque, consectetur "
- "vehicula posuere vitae, convallis eu lorem. Donec semper augue eu nibh "
- "placerat semper.";
-
-PortableFileBackedProtoLog<DocumentWrapper>::Header ReadDocumentLogHeader(
- Filesystem filesystem, const std::string& file_path) {
- PortableFileBackedProtoLog<DocumentWrapper>::Header header;
- filesystem.PRead(file_path.c_str(), &header,
- sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header),
- /*offset=*/0);
- return header;
-}
-
-void WriteDocumentLogHeader(
- Filesystem filesystem, const std::string& file_path,
- PortableFileBackedProtoLog<DocumentWrapper>::Header& header) {
- filesystem.Write(file_path.c_str(), &header,
- sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header));
-}
-
// For mocking purpose, we allow tests to provide a custom Filesystem.
class TestIcingSearchEngine : public IcingSearchEngine {
public:
@@ -130,6 +84,8 @@ class TestIcingSearchEngine : public IcingSearchEngine {
std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
+// This test is meant to cover all tests relating to IcingSearchEngine apis not
+// specifically covered by the other IcingSearchEngine*Test.
class IcingSearchEngineTest : public testing::Test {
protected:
void SetUp() override {
@@ -158,21 +114,9 @@ class IcingSearchEngineTest : public testing::Test {
Filesystem filesystem_;
};
-constexpr int kMaxSupportedDocumentSize = (1u << 24) - 1;
-
// Non-zero value so we don't override it to be the current time
constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
-std::string GetDocumentDir() { return GetTestBaseDir() + "/document_dir"; }
-
-std::string GetIndexDir() { return GetTestBaseDir() + "/index_dir"; }
-
-std::string GetSchemaDir() { return GetTestBaseDir() + "/schema_dir"; }
-
-std::string GetHeaderFilename() {
- return GetTestBaseDir() + "/icing_search_engine_header";
-}
-
IcingSearchEngineOptions GetDefaultIcingOptions() {
IcingSearchEngineOptions icing_options;
icing_options.set_base_dir(GetTestBaseDir());
@@ -188,19 +132,6 @@ DocumentProto CreateMessageDocument(std::string name_space, std::string uri) {
.Build();
}
-DocumentProto CreateEmailDocument(const std::string& name_space,
- const std::string& uri, int score,
- const std::string& subject_content,
- const std::string& body_content) {
- return DocumentBuilder()
- .SetKey(name_space, uri)
- .SetSchema("Email")
- .SetScore(score)
- .AddStringProperty("subject", subject_content)
- .AddStringProperty("body", body_content)
- .Build();
-}
-
SchemaProto CreateMessageSchema() {
return SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
@@ -211,23 +142,6 @@ SchemaProto CreateMessageSchema() {
.Build();
}
-SchemaProto CreateEmailSchema() {
- return SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED))
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED)))
- .Build();
-}
-
SchemaProto CreatePersonAndEmailSchema() {
return SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -270,7 +184,7 @@ ScoringSpecProto GetDefaultScoringSpec() {
}
UsageReport CreateUsageReport(std::string name_space, std::string uri,
- int64 timestamp_ms,
+ int64_t timestamp_ms,
UsageReport::UsageType usage_type) {
UsageReport usage_report;
usage_report.set_document_namespace(name_space);
@@ -280,1639 +194,6 @@ UsageReport CreateUsageReport(std::string name_space, std::string uri,
return usage_report;
}
-std::vector<std::string> GetUrisFromSearchResults(
- SearchResultProto& search_result_proto) {
- std::vector<std::string> result_uris;
- result_uris.reserve(search_result_proto.results_size());
- for (int i = 0; i < search_result_proto.results_size(); i++) {
- result_uris.push_back(
- search_result_proto.mutable_results(i)->document().uri());
- }
- return result_uris;
-}
-
-TEST_F(IcingSearchEngineTest, SimpleInitialization) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- DocumentProto document = CreateMessageDocument("namespace", "uri");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(DocumentProto(document)).status(), ProtoIsOk());
-}
-
-TEST_F(IcingSearchEngineTest, InitializingAgainSavesNonPersistedData) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- DocumentProto document = CreateMessageDocument("namespace", "uri");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = document;
-
- ASSERT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, MaxIndexMergeSizeReturnsInvalidArgument) {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(std::numeric_limits<int32_t>::max());
- IcingSearchEngine icing(options, GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, NegativeMergeSizeReturnsInvalidArgument) {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(-1);
- IcingSearchEngine icing(options, GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, ZeroMergeSizeReturnsInvalidArgument) {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(0);
- IcingSearchEngine icing(options, GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, GoodIndexMergeSizeReturnsOk) {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- // One is fine, if a bit weird. It just means that the lite index will be
- // smaller and will request a merge any time content is added to it.
- options.set_index_merge_size(1);
- IcingSearchEngine icing(options, GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-}
-
-TEST_F(IcingSearchEngineTest, NegativeMaxTokenLenReturnsInvalidArgument) {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_max_token_length(-1);
- IcingSearchEngine icing(options, GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, ZeroMaxTokenLenReturnsInvalidArgument) {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_max_token_length(0);
- IcingSearchEngine icing(options, GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, MaxTokenLenReturnsOkAndTruncatesTokens) {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- // A length of 1 is allowed - even though it would be strange to want
- // this.
- options.set_max_token_length(1);
- IcingSearchEngine icing(options, GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- DocumentProto document = CreateMessageDocument("namespace", "uri");
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- // "message" should have been truncated to "m"
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- // The indexed tokens were truncated to length of 1, so "m" will match
- search_spec.set_query("m");
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document;
-
- SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- // The query token is also truncated to length of 1, so "me"->"m" matches "m"
- search_spec.set_query("me");
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- // The query token is still truncated to length of 1, so "massage"->"m"
- // matches "m"
- search_spec.set_query("massage");
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- MaxIntMaxTokenLenReturnsOkTooLargeTokenReturnsResourceExhausted) {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- // Set token length to max. This is allowed (it just means never to
- // truncate tokens). However, this does mean that tokens that exceed the
- // size of the lexicon will cause indexing to fail.
- options.set_max_token_length(std::numeric_limits<int32_t>::max());
- IcingSearchEngine icing(options, GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Add a document that just barely fits under the max document limit.
- // This will still fail to index because we won't actually have enough
- // room in the lexicon to fit this content.
- std::string enormous_string(kMaxSupportedDocumentSize - 256, 'p');
- DocumentProto document =
- DocumentBuilder()
- .SetKey("namespace", "uri")
- .SetSchema("Message")
- .AddStringProperty("body", std::move(enormous_string))
- .Build();
- EXPECT_THAT(icing.Put(document).status(),
- ProtoStatusIs(StatusProto::OUT_OF_SPACE));
-
- SearchSpecProto search_spec;
- search_spec.set_query("p");
- search_spec.set_term_match_type(TermMatchType::PREFIX);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, FailToCreateDocStore) {
- auto mock_filesystem = std::make_unique<MockFilesystem>();
- // This fails DocumentStore::Create()
- ON_CALL(*mock_filesystem, CreateDirectoryRecursively(_))
- .WillByDefault(Return(false));
-
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::move(mock_filesystem),
- std::make_unique<IcingFilesystem>(),
- std::make_unique<FakeClock>(), GetTestJniCache());
-
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(),
- ProtoStatusIs(StatusProto::INTERNAL));
- EXPECT_THAT(initialize_result_proto.status().message(),
- HasSubstr("Could not create directory"));
-}
-
-TEST_F(IcingSearchEngineTest, InitMarkerFilePreviousFailuresAtThreshold) {
- Filesystem filesystem;
- DocumentProto email1 =
- CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
- email1.set_creation_timestamp_ms(10000);
- DocumentProto email2 =
- CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
- email2.set_creation_timestamp_ms(10000);
-
- {
- // Create an index with a few documents.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto init_result = icing.Initialize();
- ASSERT_THAT(init_result.status(), ProtoIsOk());
- ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
- Eq(0));
- ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
- }
-
- // Write an init marker file with 5 previously failed attempts.
- std::string marker_filepath = GetTestBaseDir() + "/init_marker";
-
- {
- ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str()));
- int network_init_attempts = GHostToNetworkL(5);
- // Write the updated number of attempts before we get started.
- ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0,
- &network_init_attempts,
- sizeof(network_init_attempts)));
- ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get()));
- }
-
- {
- // Create the index again and verify that initialization succeeds and no
- // data is thrown out.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto init_result = icing.Initialize();
- ASSERT_THAT(init_result.status(), ProtoIsOk());
- ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
- Eq(5));
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
- .document(),
- EqualsProto(email1));
- EXPECT_THAT(
- icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
- .document(),
- EqualsProto(email2));
- }
-
- // The successful init should have thrown out the marker file.
- ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
-}
-
-TEST_F(IcingSearchEngineTest, InitMarkerFilePreviousFailuresBeyondThreshold) {
- Filesystem filesystem;
- DocumentProto email1 =
- CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
- DocumentProto email2 =
- CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
-
- {
- // Create an index with a few documents.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto init_result = icing.Initialize();
- ASSERT_THAT(init_result.status(), ProtoIsOk());
- ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
- Eq(0));
- ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
- }
-
- // Write an init marker file with 6 previously failed attempts.
- std::string marker_filepath = GetTestBaseDir() + "/init_marker";
-
- {
- ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str()));
- int network_init_attempts = GHostToNetworkL(6);
- // Write the updated number of attempts before we get started.
- ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0,
- &network_init_attempts,
- sizeof(network_init_attempts)));
- ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get()));
- }
-
- {
- // Create the index again and verify that initialization succeeds and all
- // data is thrown out.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto init_result = icing.Initialize();
- ASSERT_THAT(init_result.status(),
- ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
- ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
- Eq(6));
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
- .status(),
- ProtoStatusIs(StatusProto::NOT_FOUND));
- EXPECT_THAT(
- icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
- .status(),
- ProtoStatusIs(StatusProto::NOT_FOUND));
- }
-
- // The successful init should have thrown out the marker file.
- ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
-}
-
-TEST_F(IcingSearchEngineTest, SuccessiveInitFailuresIncrementsInitMarker) {
- Filesystem filesystem;
- DocumentProto email1 =
- CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
- DocumentProto email2 =
- CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
-
- {
- // 1. Create an index with a few documents.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto init_result = icing.Initialize();
- ASSERT_THAT(init_result.status(), ProtoIsOk());
- ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
- Eq(0));
- ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
- }
-
- {
- // 2. Create an index that will encounter an IO failure when trying to
- // create the document log.
- IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
-
- auto mock_filesystem = std::make_unique<MockFilesystem>();
- std::string document_log_filepath =
- icing_options.base_dir() + "/document_dir/document_log_v1";
- auto get_filesize_lambda = [this,
- &document_log_filepath](const char* filename) {
- if (strncmp(document_log_filepath.c_str(), filename,
- document_log_filepath.length()) == 0) {
- return Filesystem::kBadFileSize;
- }
- return this->filesystem()->GetFileSize(filename);
- };
- ON_CALL(*mock_filesystem, GetFileSize(A<const char*>()))
- .WillByDefault(get_filesize_lambda);
-
- TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
- std::make_unique<IcingFilesystem>(),
- std::make_unique<FakeClock>(),
- GetTestJniCache());
-
- // Fail to initialize six times in a row.
- InitializeResultProto init_result = icing.Initialize();
- ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
- ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
- Eq(0));
-
- init_result = icing.Initialize();
- ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
- ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
- Eq(1));
-
- init_result = icing.Initialize();
- ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
- ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
- Eq(2));
-
- init_result = icing.Initialize();
- ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
- ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
- Eq(3));
-
- init_result = icing.Initialize();
- ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
- ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
- Eq(4));
-
- init_result = icing.Initialize();
- ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
- ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
- Eq(5));
- }
-
- {
- // 3. Create the index again and verify that initialization succeeds and all
- // data is thrown out.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto init_result = icing.Initialize();
- ASSERT_THAT(init_result.status(),
- ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
- ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
- Eq(6));
-
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
- .status(),
- ProtoStatusIs(StatusProto::NOT_FOUND));
- EXPECT_THAT(
- icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
- .status(),
- ProtoStatusIs(StatusProto::NOT_FOUND));
- }
-
- // The successful init should have thrown out the marker file.
- std::string marker_filepath = GetTestBaseDir() + "/init_marker";
- ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
-}
-
-TEST_F(IcingSearchEngineTest,
- CircularReferenceCreateSectionManagerReturnsInvalidArgument) {
- // Create a type config with a circular reference.
- SchemaProto schema;
- auto* type = schema.add_types();
- type->set_schema_type("Message");
-
- auto* body = type->add_properties();
- body->set_property_name("recipient");
- body->set_schema_type("Person");
- body->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- body->mutable_document_indexing_config()->set_index_nested_properties(true);
-
- type = schema.add_types();
- type->set_schema_type("Person");
-
- body = type->add_properties();
- body->set_property_name("recipient");
- body->set_schema_type("Message");
- body->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- body->mutable_document_indexing_config()->set_index_nested_properties(true);
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(schema).status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, PutWithoutSchemaFailedPrecondition) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- DocumentProto document = CreateMessageDocument("namespace", "uri");
- PutResultProto put_result_proto = icing.Put(document);
- EXPECT_THAT(put_result_proto.status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(put_result_proto.status().message(), HasSubstr("Schema not set"));
-}
-
-TEST_F(IcingSearchEngineTest, FailToReadSchema) {
- IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
-
- {
- // Successfully initialize and set a schema
- IcingSearchEngine icing(icing_options, GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- }
-
- auto mock_filesystem = std::make_unique<MockFilesystem>();
-
- // This fails FileBackedProto::Read() when we try to check the schema we
- // had previously set
- ON_CALL(*mock_filesystem,
- OpenForRead(Eq(icing_options.base_dir() + "/schema_dir/schema.pb")))
- .WillByDefault(Return(-1));
-
- TestIcingSearchEngine test_icing(icing_options, std::move(mock_filesystem),
- std::make_unique<IcingFilesystem>(),
- std::make_unique<FakeClock>(),
- GetTestJniCache());
-
- InitializeResultProto initialize_result_proto = test_icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(),
- ProtoStatusIs(StatusProto::INTERNAL));
- EXPECT_THAT(initialize_result_proto.status().message(),
- HasSubstr("Unable to open file for read"));
-}
-
-TEST_F(IcingSearchEngineTest, FailToWriteSchema) {
- IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
-
- auto mock_filesystem = std::make_unique<MockFilesystem>();
- // This fails FileBackedProto::Write()
- ON_CALL(*mock_filesystem, OpenForWrite(HasSubstr("schema.pb")))
- .WillByDefault(Return(-1));
-
- TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
- std::make_unique<IcingFilesystem>(),
- std::make_unique<FakeClock>(), GetTestJniCache());
-
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SetSchemaResultProto set_schema_result_proto =
- icing.SetSchema(CreateMessageSchema());
- EXPECT_THAT(set_schema_result_proto.status(),
- ProtoStatusIs(StatusProto::INTERNAL));
- EXPECT_THAT(set_schema_result_proto.status().message(),
- HasSubstr("Unable to open file for write"));
-}
-
-TEST_F(IcingSearchEngineTest, SetSchemaIncompatibleFails) {
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 1. Create a schema with an Email type with properties { "title", "body"}
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property = type->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- // 2. Add an email document
- DocumentProto doc = DocumentBuilder()
- .SetKey("emails", "email#1")
- .SetSchema("Email")
- .AddStringProperty("title", "Hello world.")
- .AddStringProperty("body", "Goodnight Moon.")
- .Build();
- EXPECT_THAT(icing.Put(std::move(doc)).status(), ProtoIsOk());
- }
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 3. Set a schema that deletes email. This should fail.
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_schema_type("Message");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- EXPECT_THAT(
- icing.SetSchema(schema, /*ignore_errors_and_delete_documents=*/false)
- .status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
-
- // 4. Try to delete by email type. This should succeed because email wasn't
- // deleted in step 3.
- EXPECT_THAT(icing.DeleteBySchemaType("Email").status(), ProtoIsOk());
- }
-}
-
-TEST_F(IcingSearchEngineTest, SetSchemaIncompatibleForceOverrideSucceeds) {
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 1. Create a schema with an Email type with properties { "title", "body"}
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property = type->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- // 2. Add an email document
- DocumentProto doc = DocumentBuilder()
- .SetKey("emails", "email#1")
- .SetSchema("Email")
- .AddStringProperty("title", "Hello world.")
- .AddStringProperty("body", "Goodnight Moon.")
- .Build();
- EXPECT_THAT(icing.Put(std::move(doc)).status(), ProtoIsOk());
- }
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 3. Set a schema that deletes email with force override. This should
- // succeed and delete the email type.
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_schema_type("Message");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- EXPECT_THAT(icing.SetSchema(schema, true).status(), ProtoIsOk());
-
- // 4. Try to delete by email type. This should fail because email was
- // already deleted.
- EXPECT_THAT(icing.DeleteBySchemaType("Email").status(),
- ProtoStatusIs(StatusProto::NOT_FOUND));
- }
-}
-
-TEST_F(IcingSearchEngineTest, SetSchemaUnsetVersionIsZero) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 1. Create a schema with an Email type with version 1
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(0));
-}
-
-TEST_F(IcingSearchEngineTest, SetSchemaCompatibleVersionUpdateSucceeds) {
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 1. Create a schema with an Email type with version 1
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_version(1);
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- SetSchemaResultProto set_schema_result = icing.SetSchema(schema);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- SetSchemaResultProto expected_set_schema_result;
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- expected_set_schema_result.mutable_new_schema_types()->Add("Email");
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
- }
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 2. Create schema that adds a new optional property and updates version.
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_version(2);
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property = type->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- // 3. SetSchema should succeed and the version number should be updated.
- SetSchemaResultProto set_schema_result = icing.SetSchema(schema, true);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- SetSchemaResultProto expected_set_schema_result;
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- expected_set_schema_result.mutable_fully_compatible_changed_schema_types()
- ->Add("Email");
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2));
- }
-}
-
-TEST_F(IcingSearchEngineTest, SetSchemaIncompatibleVersionUpdateFails) {
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 1. Create a schema with an Email type with version 1
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_version(1);
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
- }
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 2. Create schema that makes an incompatible change (OPTIONAL -> REQUIRED)
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_version(2);
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
- // 3. SetSchema should fail and the version number should NOT be updated.
- EXPECT_THAT(icing.SetSchema(schema).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
-
- EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
- }
-}
-
-TEST_F(IcingSearchEngineTest,
- SetSchemaIncompatibleVersionUpdateForceOverrideSucceeds) {
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 1. Create a schema with an Email type with version 1
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_version(1);
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
- }
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 2. Create schema that makes an incompatible change (OPTIONAL -> REQUIRED)
- // with force override to true.
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_version(2);
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
- // 3. SetSchema should succeed and the version number should be updated.
- EXPECT_THAT(icing.SetSchema(schema, true).status(), ProtoIsOk());
-
- EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2));
- }
-}
-
-TEST_F(IcingSearchEngineTest, SetSchemaNoChangeVersionUpdateSucceeds) {
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 1. Create a schema with an Email type with version 1
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_version(1);
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
- }
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 2. Create schema that only changes the version.
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_version(2);
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- // 3. SetSchema should succeed and the version number should be updated.
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2));
- }
-}
-
-TEST_F(IcingSearchEngineTest, SetSchemaDuplicateTypesReturnsAlreadyExists) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Create a schema with types { "Email", "Message" and "Email" }
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- type = schema.add_types();
- type->set_schema_type("Message");
- property = type->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- *schema.add_types() = schema.types(0);
-
- EXPECT_THAT(icing.SetSchema(schema).status(),
- ProtoStatusIs(StatusProto::ALREADY_EXISTS));
-}
-
-TEST_F(IcingSearchEngineTest,
- SetSchemaDuplicatePropertiesReturnsAlreadyExists) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Create a schema with an Email type with properties { "title", "body" and
- // "title" }
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property = type->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- EXPECT_THAT(icing.SetSchema(schema).status(),
- ProtoStatusIs(StatusProto::ALREADY_EXISTS));
-}
-
-TEST_F(IcingSearchEngineTest, SetSchema) {
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(1000);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- auto message_document = CreateMessageDocument("namespace", "uri");
-
- auto schema_with_message = CreateMessageSchema();
-
- SchemaProto schema_with_email;
- SchemaTypeConfigProto* type = schema_with_email.add_types();
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- SchemaProto schema_with_email_and_message = schema_with_email;
- type = schema_with_email_and_message.add_types();
- type->set_schema_type("Message");
- property = type->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- // Create an arbitrary invalid schema
- SchemaProto invalid_schema;
- SchemaTypeConfigProto* empty_type = invalid_schema.add_types();
- empty_type->set_schema_type("");
-
- // Make sure we can't set invalid schemas
- SetSchemaResultProto set_schema_result = icing.SetSchema(invalid_schema);
- EXPECT_THAT(set_schema_result.status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
- EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
-
- // Can add an document of a set schema
- set_schema_result = icing.SetSchema(schema_with_message);
- EXPECT_THAT(set_schema_result.status(), ProtoStatusIs(StatusProto::OK));
- EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
- EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
-
- // Schema with Email doesn't have Message, so would result incompatible
- // data
- set_schema_result = icing.SetSchema(schema_with_email);
- EXPECT_THAT(set_schema_result.status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
-
- // Can expand the set of schema types and add an document of a new
- // schema type
- set_schema_result = icing.SetSchema(schema_with_email_and_message);
- EXPECT_THAT(set_schema_result.status(), ProtoStatusIs(StatusProto::OK));
- EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
-
- EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
- // Can't add an document whose schema isn't set
- auto photo_document = DocumentBuilder()
- .SetKey("namespace", "uri")
- .SetSchema("Photo")
- .AddStringProperty("creator", "icing")
- .Build();
- PutResultProto put_result_proto = icing.Put(photo_document);
- EXPECT_THAT(put_result_proto.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
- EXPECT_THAT(put_result_proto.status().message(),
- HasSubstr("'Photo' not found"));
-}
-
-TEST_F(IcingSearchEngineTest,
- SetSchemaNewIndexedPropertyTriggersIndexRestorationAndReturnsOk) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SchemaProto schema_with_no_indexed_property = CreateMessageSchema();
- schema_with_no_indexed_property.mutable_types(0)
- ->mutable_properties(0)
- ->clear_string_indexing_config();
-
- SetSchemaResultProto set_schema_result =
- icing.SetSchema(schema_with_no_indexed_property);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- SetSchemaResultProto expected_set_schema_result;
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- expected_set_schema_result.mutable_new_schema_types()->Add("Message");
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- // Nothing will be index and Search() won't return anything.
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto empty_result;
- empty_result.mutable_status()->set_code(StatusProto::OK);
-
- SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results,
- EqualsSearchResultIgnoreStatsAndScores(empty_result));
-
- SchemaProto schema_with_indexed_property = CreateMessageSchema();
- // Index restoration should be triggered here because new schema requires more
- // properties to be indexed.
- set_schema_result = icing.SetSchema(schema_with_indexed_property);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- expected_set_schema_result = SetSchemaResultProto();
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
- ->Add("Message");
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- CreateMessageDocument("namespace", "uri");
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- SetSchemaChangeNestedPropertiesTriggersIndexRestorationAndReturnsOk) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SchemaTypeConfigProto person_proto =
- SchemaTypeConfigBuilder()
- .SetType("Person")
- .AddProperty(
- PropertyConfigBuilder()
- .SetName("name")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .Build();
- SchemaProto nested_schema =
- SchemaBuilder()
- .AddType(person_proto)
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("sender")
- .SetDataTypeDocument(
- "Person",
- /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
-
- SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- SetSchemaResultProto expected_set_schema_result;
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- expected_set_schema_result.mutable_new_schema_types()->Add("Email");
- expected_set_schema_result.mutable_new_schema_types()->Add("Person");
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- DocumentProto document =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(1000)
- .AddStringProperty("subject",
- "Did you get the memo about TPS reports?")
- .AddDocumentProperty("sender",
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Person")
- .AddStringProperty("name", "Bill Lundbergh")
- .Build())
- .Build();
-
- // "sender.name" should get assigned property id 0 and subject should get
- // property id 1.
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- // document should match a query for 'Bill' in 'sender.name', but not in
- // 'subject'
- SearchSpecProto search_spec;
- search_spec.set_query("sender.name:Bill");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto result;
- result.mutable_status()->set_code(StatusProto::OK);
- *result.mutable_results()->Add()->mutable_document() = document;
-
- SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
-
- SearchResultProto empty_result;
- empty_result.mutable_status()->set_code(StatusProto::OK);
- search_spec.set_query("subject:Bill");
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results,
- EqualsSearchResultIgnoreStatsAndScores(empty_result));
-
- // Now update the schema with index_nested_properties=false. This should
- // reassign property ids, lead to an index rebuild and ensure that nothing
- // match a query for "Bill".
- SchemaProto no_nested_schema =
- SchemaBuilder()
- .AddType(person_proto)
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("sender")
- .SetDataTypeDocument(
- "Person",
- /*index_nested_properties=*/false)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
-
- set_schema_result = icing.SetSchema(no_nested_schema);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- expected_set_schema_result = SetSchemaResultProto();
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
- ->Add("Email");
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- // document shouldn't match a query for 'Bill' in either 'sender.name' or
- // 'subject'
- search_spec.set_query("sender.name:Bill");
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results,
- EqualsSearchResultIgnoreStatsAndScores(empty_result));
-
- search_spec.set_query("subject:Bill");
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results,
- EqualsSearchResultIgnoreStatsAndScores(empty_result));
-}
-
-TEST_F(IcingSearchEngineTest,
- ForceSetSchemaPropertyDeletionTriggersIndexRestorationAndReturnsOk) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 'body' should have a property id of 0 and 'subject' should have a property
- // id of 1.
- SchemaProto email_with_body_schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
-
- SetSchemaResultProto set_schema_result =
- icing.SetSchema(email_with_body_schema);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- SetSchemaResultProto expected_set_schema_result;
- expected_set_schema_result.mutable_new_schema_types()->Add("Email");
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- // Create a document with only a subject property.
- DocumentProto document =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(1000)
- .AddStringProperty("subject",
- "Did you get the memo about TPS reports?")
- .Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- // We should be able to retrieve the document by searching for 'tps' in
- // 'subject'.
- SearchSpecProto search_spec;
- search_spec.set_query("subject:tps");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto result;
- result.mutable_status()->set_code(StatusProto::OK);
- *result.mutable_results()->Add()->mutable_document() = document;
-
- SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
-
- // Now update the schema to remove the 'body' field. This is backwards
- // incompatible, but document should be preserved because it doesn't contain a
- // 'body' field. If the index is correctly rebuilt, then 'subject' will now
- // have a property id of 0. If not, then the hits in the index will still have
- // have a property id of 1 and therefore it won't be found.
- SchemaProto email_no_body_schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("Email").AddProperty(
- PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
-
- set_schema_result = icing.SetSchema(
- email_no_body_schema, /*ignore_errors_and_delete_documents=*/true);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- expected_set_schema_result = SetSchemaResultProto();
- expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
- expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
- ->Add("Email");
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- // We should be able to retrieve the document by searching for 'tps' in
- // 'subject'.
- search_spec.set_query("subject:tps");
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
-}
-
-TEST_F(
- IcingSearchEngineTest,
- ForceSetSchemaPropertyDeletionAndAdditionTriggersIndexRestorationAndReturnsOk) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 'body' should have a property id of 0 and 'subject' should have a property
- // id of 1.
- SchemaProto email_with_body_schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
-
- SetSchemaResultProto set_schema_result =
- icing.SetSchema(email_with_body_schema);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- SetSchemaResultProto expected_set_schema_result;
- expected_set_schema_result.mutable_new_schema_types()->Add("Email");
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- // Create a document with only a subject property.
- DocumentProto document =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(1000)
- .AddStringProperty("subject",
- "Did you get the memo about TPS reports?")
- .Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- // We should be able to retrieve the document by searching for 'tps' in
- // 'subject'.
- SearchSpecProto search_spec;
- search_spec.set_query("subject:tps");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto result;
- result.mutable_status()->set_code(StatusProto::OK);
- *result.mutable_results()->Add()->mutable_document() = document;
-
- SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
-
- // Now update the schema to remove the 'body' field. This is backwards
- // incompatible, but document should be preserved because it doesn't contain a
- // 'body' field. If the index is correctly rebuilt, then 'subject' and 'to'
- // will now have property ids of 0 and 1 respectively. If not, then the hits
- // in the index will still have have a property id of 1 and therefore it won't
- // be found.
- SchemaProto email_no_body_schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("to")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
-
- set_schema_result = icing.SetSchema(
- email_no_body_schema, /*ignore_errors_and_delete_documents=*/true);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- expected_set_schema_result = SetSchemaResultProto();
- expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
- expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
- ->Add("Email");
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- // We should be able to retrieve the document by searching for 'tps' in
- // 'subject'.
- search_spec.set_query("subject:tps");
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
-}
-
-TEST_F(IcingSearchEngineTest, ForceSetSchemaIncompatibleNestedDocsAreDeleted) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SchemaTypeConfigProto email_schema_type =
- SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(
- PropertyConfigBuilder()
- .SetName("sender")
- .SetDataTypeDocument("Person",
- /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(
- PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .Build();
- SchemaProto nested_schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Person")
- .AddProperty(PropertyConfigBuilder()
- .SetName("name")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("company")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .AddType(email_schema_type)
- .Build();
-
- SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- SetSchemaResultProto expected_set_schema_result;
- expected_set_schema_result.mutable_new_schema_types()->Add("Email");
- expected_set_schema_result.mutable_new_schema_types()->Add("Person");
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- // Create two documents - a person document and an email document - both docs
- // should be deleted when we remove the 'company' field from the person type.
- DocumentProto person_document =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Person")
- .SetCreationTimestampMs(1000)
- .AddStringProperty("name", "Bill Lundbergh")
- .AddStringProperty("company", "Initech Corp.")
- .Build();
- EXPECT_THAT(icing.Put(person_document).status(), ProtoIsOk());
-
- DocumentProto email_document =
- DocumentBuilder()
- .SetKey("namespace1", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(1000)
- .AddStringProperty("subject",
- "Did you get the memo about TPS reports?")
- .AddDocumentProperty("sender", person_document)
- .Build();
- EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk());
-
- // We should be able to retrieve both documents.
- GetResultProto get_result =
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance());
- EXPECT_THAT(get_result.status(), ProtoIsOk());
- EXPECT_THAT(get_result.document(), EqualsProto(person_document));
-
- get_result =
- icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance());
- EXPECT_THAT(get_result.status(), ProtoIsOk());
- EXPECT_THAT(get_result.document(), EqualsProto(email_document));
-
- // Now update the schema to remove the 'company' field. This is backwards
- // incompatible, *both* documents should be deleted because both fail
- // validation (they each contain a 'Person' that has a non-existent property).
- nested_schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
- PropertyConfigBuilder()
- .SetName("name")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .AddType(email_schema_type)
- .Build();
-
- set_schema_result = icing.SetSchema(
- nested_schema, /*ignore_errors_and_delete_documents=*/true);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- expected_set_schema_result = SetSchemaResultProto();
- expected_set_schema_result.mutable_incompatible_schema_types()->Add("Person");
- expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
- expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
- ->Add("Email");
- expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
- ->Add("Person");
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- // Both documents should be deleted now.
- get_result =
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance());
- EXPECT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
-
- get_result =
- icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance());
- EXPECT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
-}
-
-TEST_F(IcingSearchEngineTest, SetSchemaRevalidatesDocumentsAndReturnsOk) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SchemaProto schema_with_optional_subject;
- auto type = schema_with_optional_subject.add_types();
- type->set_schema_type("email");
-
- // Add a OPTIONAL property
- auto property = type->add_properties();
- property->set_property_name("subject");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- EXPECT_THAT(icing.SetSchema(schema_with_optional_subject).status(),
- ProtoIsOk());
-
- DocumentProto email_document_without_subject =
- DocumentBuilder()
- .SetKey("namespace", "without_subject")
- .SetSchema("email")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto email_document_with_subject =
- DocumentBuilder()
- .SetKey("namespace", "with_subject")
- .SetSchema("email")
- .AddStringProperty("subject", "foo")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- EXPECT_THAT(icing.Put(email_document_without_subject).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(email_document_with_subject).status(), ProtoIsOk());
-
- SchemaProto schema_with_required_subject;
- type = schema_with_required_subject.add_types();
- type->set_schema_type("email");
-
- // Add a REQUIRED property
- property = type->add_properties();
- property->set_property_name("subject");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
- // Can't set the schema since it's incompatible
- SetSchemaResultProto set_schema_result =
- icing.SetSchema(schema_with_required_subject);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- SetSchemaResultProto expected_set_schema_result_proto;
- expected_set_schema_result_proto.mutable_status()->set_code(
- StatusProto::FAILED_PRECONDITION);
- expected_set_schema_result_proto.mutable_status()->set_message(
- "Schema is incompatible.");
- expected_set_schema_result_proto.add_incompatible_schema_types("email");
-
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result_proto));
-
- // Force set it
- set_schema_result =
- icing.SetSchema(schema_with_required_subject,
- /*ignore_errors_and_delete_documents=*/true);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- expected_set_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
- expected_set_schema_result_proto.mutable_status()->clear_message();
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result_proto));
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = email_document_with_subject;
-
- EXPECT_THAT(icing.Get("namespace", "with_subject",
- GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // The document without a subject got deleted because it failed validation
- // against the new schema
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace, without_subject) not found.");
- expected_get_result_proto.clear_document();
-
- EXPECT_THAT(icing.Get("namespace", "without_subject",
- GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SetSchemaDeletesDocumentsAndReturnsOk) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SchemaProto schema;
- auto type = schema.add_types();
- type->set_schema_type("email");
- type = schema.add_types();
- type->set_schema_type("message");
-
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- DocumentProto email_document =
- DocumentBuilder()
- .SetKey("namespace", "email_uri")
- .SetSchema("email")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto message_document =
- DocumentBuilder()
- .SetKey("namespace", "message_uri")
- .SetSchema("message")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
-
- // Clear the schema and only add the "email" type, essentially deleting the
- // "message" type
- SchemaProto new_schema;
- type = new_schema.add_types();
- type->set_schema_type("email");
-
- // Can't set the schema since it's incompatible
- SetSchemaResultProto set_schema_result = icing.SetSchema(new_schema);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- SetSchemaResultProto expected_result;
- expected_result.mutable_status()->set_code(StatusProto::FAILED_PRECONDITION);
- expected_result.mutable_status()->set_message("Schema is incompatible.");
- expected_result.add_deleted_schema_types("message");
-
- EXPECT_THAT(set_schema_result, EqualsProto(expected_result));
-
- // Force set it
- set_schema_result =
- icing.SetSchema(new_schema,
- /*ignore_errors_and_delete_documents=*/true);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- expected_result.mutable_status()->set_code(StatusProto::OK);
- expected_result.mutable_status()->clear_message();
- EXPECT_THAT(set_schema_result, EqualsProto(expected_result));
-
- // "email" document is still there
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = email_document;
-
- EXPECT_THAT(icing.Get("namespace", "email_uri",
- GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // "message" document got deleted
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace, message_uri) not found.");
- expected_get_result_proto.clear_document();
-
- EXPECT_THAT(icing.Get("namespace", "message_uri",
- GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, GetSchemaNotFound) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- EXPECT_THAT(icing.GetSchema().status(),
- ProtoStatusIs(StatusProto::NOT_FOUND));
-}
-
-TEST_F(IcingSearchEngineTest, GetSchemaOk) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- GetSchemaResultProto expected_get_schema_result_proto;
- expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema();
- EXPECT_THAT(icing.GetSchema(), EqualsProto(expected_get_schema_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, GetSchemaTypeFailedPrecondition) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- GetSchemaTypeResultProto get_schema_type_result_proto =
- icing.GetSchemaType("nonexistent_schema");
- EXPECT_THAT(get_schema_type_result_proto.status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(get_schema_type_result_proto.status().message(),
- HasSubstr("Schema not set"));
-}
-
-TEST_F(IcingSearchEngineTest, GetSchemaTypeOk) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- GetSchemaTypeResultProto expected_get_schema_type_result_proto;
- expected_get_schema_type_result_proto.mutable_status()->set_code(
- StatusProto::OK);
- *expected_get_schema_type_result_proto.mutable_schema_type_config() =
- CreateMessageSchema().types(0);
- EXPECT_THAT(icing.GetSchemaType(CreateMessageSchema().types(0).schema_type()),
- EqualsProto(expected_get_schema_type_result_proto));
-}
-
TEST_F(IcingSearchEngineTest, GetDocument) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -2147,3423 +428,406 @@ TEST_F(IcingSearchEngineTest,
EqualsProto(expected_get_result_proto));
}
-TEST_F(IcingSearchEngineTest, SearchReturnsValidResults) {
+TEST_F(IcingSearchEngineTest, GetDocumentProjectionPolymorphism) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
- ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
-
- DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("message");
-
- ResultSpecProto result_spec;
- result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
- result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
- result_spec.mutable_snippet_spec()->set_num_to_snippet(1);
-
- SearchResultProto results =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(results.status(), ProtoIsOk());
- EXPECT_THAT(results.results(), SizeIs(2));
-
- const DocumentProto& document = results.results(0).document();
- EXPECT_THAT(document, EqualsProto(document_two));
-
- const SnippetProto& snippet = results.results(0).snippet();
- EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
- std::string_view content =
- GetString(&document, snippet.entries(0).property_name());
- EXPECT_THAT(GetWindows(content, snippet.entries(0)),
- ElementsAre("message body"));
- EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("message"));
-
- EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
- EXPECT_THAT(results.results(1).snippet().entries(), IsEmpty());
-
- search_spec.set_query("foo");
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchReturnsScoresDocumentScore) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
- document_one.set_score(93);
- document_one.set_creation_timestamp_ms(10000);
- ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
-
- DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
- document_two.set_score(15);
- document_two.set_creation_timestamp_ms(12000);
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("message");
-
- // Rank by DOCUMENT_SCORE and ensure that the score field is populated with
- // document score.
- ScoringSpecProto scoring_spec;
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-
- SearchResultProto results = icing.Search(search_spec, scoring_spec,
- ResultSpecProto::default_instance());
- EXPECT_THAT(results.status(), ProtoIsOk());
- EXPECT_THAT(results.results(), SizeIs(2));
-
- EXPECT_THAT(results.results(0).document(), EqualsProto(document_one));
- EXPECT_THAT(results.results(0).score(), 93);
- EXPECT_THAT(results.results(1).document(), EqualsProto(document_two));
- EXPECT_THAT(results.results(1).score(), 15);
-}
-
-TEST_F(IcingSearchEngineTest, SearchReturnsScoresCreationTimestamp) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
- document_one.set_score(93);
- document_one.set_creation_timestamp_ms(10000);
- ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
-
- DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
- document_two.set_score(15);
- document_two.set_creation_timestamp_ms(12000);
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("message");
-
- // Rank by CREATION_TS and ensure that the score field is populated with
- // creation ts.
- ScoringSpecProto scoring_spec;
- scoring_spec.set_rank_by(
- ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
-
- SearchResultProto results = icing.Search(search_spec, scoring_spec,
- ResultSpecProto::default_instance());
- EXPECT_THAT(results.status(), ProtoIsOk());
- EXPECT_THAT(results.results(), SizeIs(2));
-
- EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
- EXPECT_THAT(results.results(0).score(), 12000);
- EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
- EXPECT_THAT(results.results(1).score(), 10000);
-}
-
-TEST_F(IcingSearchEngineTest, SearchReturnsOneResult) {
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(1000);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
- ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
-
- DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("message");
-
- ResultSpecProto result_spec;
- result_spec.set_num_per_page(1);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document_two;
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
-
- EXPECT_THAT(search_result_proto.query_stats().latency_ms(), Eq(1000));
- EXPECT_THAT(search_result_proto.query_stats().parse_query_latency_ms(),
- Eq(1000));
- EXPECT_THAT(search_result_proto.query_stats().scoring_latency_ms(), Eq(1000));
- EXPECT_THAT(search_result_proto.query_stats().ranking_latency_ms(), Eq(1000));
- EXPECT_THAT(search_result_proto.query_stats().document_retrieval_latency_ms(),
- Eq(1000));
- EXPECT_THAT(search_result_proto.query_stats().lock_acquisition_latency_ms(),
- Eq(1000));
-
- // The token is a random number so we don't verify it.
- expected_search_result_proto.set_next_page_token(
- search_result_proto.next_page_token());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchZeroResultLimitReturnsEmptyResults) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("");
-
- ResultSpecProto result_spec;
- result_spec.set_num_per_page(0);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchNegativeResultLimitReturnsInvalidArgument) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("");
-
- ResultSpecProto result_spec;
- result_spec.set_num_per_page(-5);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(
- StatusProto::INVALID_ARGUMENT);
- expected_search_result_proto.mutable_status()->set_message(
- "ResultSpecProto.num_per_page cannot be negative.");
- SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchNonPositivePageTotalBytesLimitReturnsInvalidArgument) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("");
-
- ResultSpecProto result_spec;
- result_spec.set_num_total_bytes_per_page_threshold(-1);
-
- SearchResultProto actual_results1 =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(actual_results1.status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-
- result_spec.set_num_total_bytes_per_page_threshold(0);
- SearchResultProto actual_results2 =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(actual_results2.status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, SearchWithPersistenceReturnsValidResults) {
- IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
-
- {
- // Set the schema up beforehand.
- IcingSearchEngine icing(icing_options, GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- // Schema will be persisted to disk when icing goes out of scope.
- }
-
- {
- // Ensure that icing initializes the schema and section_manager
- // properly from the pre-existing file.
- IcingSearchEngine icing(icing_options, GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
- // The index and document store will be persisted to disk when icing goes
- // out of scope.
- }
-
- {
- // Ensure that the index is brought back up without problems and we
- // can query for the content that we expect.
- IcingSearchEngine icing(icing_options, GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("message");
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- CreateMessageDocument("namespace", "uri");
-
- SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- search_spec.set_query("foo");
-
- SearchResultProto empty_result;
- empty_result.mutable_status()->set_code(StatusProto::OK);
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results,
- EqualsSearchResultIgnoreStatsAndScores(empty_result));
- }
-}
-
-TEST_F(IcingSearchEngineTest, SearchShouldReturnEmpty) {
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(1000);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("message");
-
- // Empty result, no next-page token
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
-
- EXPECT_THAT(search_result_proto.query_stats().latency_ms(), Eq(1000));
- EXPECT_THAT(search_result_proto.query_stats().parse_query_latency_ms(),
- Eq(1000));
- EXPECT_THAT(search_result_proto.query_stats().scoring_latency_ms(), Eq(1000));
- EXPECT_THAT(search_result_proto.query_stats().ranking_latency_ms(), Eq(0));
- EXPECT_THAT(search_result_proto.query_stats().document_retrieval_latency_ms(),
- Eq(0));
- EXPECT_THAT(search_result_proto.query_stats().lock_acquisition_latency_ms(),
- Eq(1000));
-
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchShouldReturnMultiplePages) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates and inserts 5 documents
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
- DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
- DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
- DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("message");
-
- ResultSpecProto result_spec;
- result_spec.set_num_per_page(2);
-
- // Searches and gets the first page, 2 results
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document5;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document4;
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken));
- uint64_t next_page_token = search_result_proto.next_page_token();
- // Since the token is a random number, we don't need to verify
- expected_search_result_proto.set_next_page_token(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- // Second page, 2 results
- expected_search_result_proto.clear_results();
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document3;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- search_result_proto = icing.GetNextPage(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- // Third page, 1 result
- expected_search_result_proto.clear_results();
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
- // Because there are no more results, we should not return the next page
- // token.
- expected_search_result_proto.clear_next_page_token();
- search_result_proto = icing.GetNextPage(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- // No more results
- expected_search_result_proto.clear_results();
- search_result_proto = icing.GetNextPage(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchWithNoScoringShouldReturnMultiplePages) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates and inserts 5 documents
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
- DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
- DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
- DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("message");
-
- ScoringSpecProto scoring_spec;
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE);
-
- ResultSpecProto result_spec;
- result_spec.set_num_per_page(2);
-
- // Searches and gets the first page, 2 results
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document5;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document4;
- SearchResultProto search_result_proto =
- icing.Search(search_spec, scoring_spec, result_spec);
- EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken));
- uint64_t next_page_token = search_result_proto.next_page_token();
- // Since the token is a random number, we don't need to verify
- expected_search_result_proto.set_next_page_token(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- // Second page, 2 results
- expected_search_result_proto.clear_results();
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document3;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- search_result_proto = icing.GetNextPage(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- // Third page, 1 result
- expected_search_result_proto.clear_results();
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
- // Because there are no more results, we should not return the next page
- // token.
- expected_search_result_proto.clear_next_page_token();
- search_result_proto = icing.GetNextPage(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- // No more results
- expected_search_result_proto.clear_results();
- search_result_proto = icing.GetNextPage(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchWithUnknownEnabledFeatureShouldReturnError) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("message");
- search_spec.add_enabled_features("BAD_FEATURE");
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto.status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, ShouldReturnMultiplePagesWithSnippets) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates and inserts 5 documents
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
- DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
- DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
- DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("message");
-
- ResultSpecProto result_spec;
- result_spec.set_num_per_page(2);
- result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
- result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
- result_spec.mutable_snippet_spec()->set_num_to_snippet(3);
-
- // Searches and gets the first page, 2 results with 2 snippets
- SearchResultProto search_result =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- ASSERT_THAT(search_result.status(), ProtoIsOk());
- ASSERT_THAT(search_result.results(), SizeIs(2));
- ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
-
- const DocumentProto& document_result_1 = search_result.results(0).document();
- EXPECT_THAT(document_result_1, EqualsProto(document5));
- const SnippetProto& snippet_result_1 = search_result.results(0).snippet();
- EXPECT_THAT(snippet_result_1.entries(), SizeIs(1));
- EXPECT_THAT(snippet_result_1.entries(0).property_name(), Eq("body"));
- std::string_view content = GetString(
- &document_result_1, snippet_result_1.entries(0).property_name());
- EXPECT_THAT(GetWindows(content, snippet_result_1.entries(0)),
- ElementsAre("message body"));
- EXPECT_THAT(GetMatches(content, snippet_result_1.entries(0)),
- ElementsAre("message"));
-
- const DocumentProto& document_result_2 = search_result.results(1).document();
- EXPECT_THAT(document_result_2, EqualsProto(document4));
- const SnippetProto& snippet_result_2 = search_result.results(1).snippet();
- EXPECT_THAT(snippet_result_2.entries(0).property_name(), Eq("body"));
- content = GetString(&document_result_2,
- snippet_result_2.entries(0).property_name());
- EXPECT_THAT(GetWindows(content, snippet_result_2.entries(0)),
- ElementsAre("message body"));
- EXPECT_THAT(GetMatches(content, snippet_result_2.entries(0)),
- ElementsAre("message"));
-
- // Second page, 2 result with 1 snippet
- search_result = icing.GetNextPage(search_result.next_page_token());
- ASSERT_THAT(search_result.status(), ProtoIsOk());
- ASSERT_THAT(search_result.results(), SizeIs(2));
- ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
-
- const DocumentProto& document_result_3 = search_result.results(0).document();
- EXPECT_THAT(document_result_3, EqualsProto(document3));
- const SnippetProto& snippet_result_3 = search_result.results(0).snippet();
- EXPECT_THAT(snippet_result_3.entries(0).property_name(), Eq("body"));
- content = GetString(&document_result_3,
- snippet_result_3.entries(0).property_name());
- EXPECT_THAT(GetWindows(content, snippet_result_3.entries(0)),
- ElementsAre("message body"));
- EXPECT_THAT(GetMatches(content, snippet_result_3.entries(0)),
- ElementsAre("message"));
-
- EXPECT_THAT(search_result.results(1).document(), EqualsProto(document2));
- EXPECT_THAT(search_result.results(1).snippet().entries(), IsEmpty());
-
- // Third page, 1 result with 0 snippets
- search_result = icing.GetNextPage(search_result.next_page_token());
- ASSERT_THAT(search_result.status(), ProtoIsOk());
- ASSERT_THAT(search_result.results(), SizeIs(1));
- ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
-
- EXPECT_THAT(search_result.results(0).document(), EqualsProto(document1));
- EXPECT_THAT(search_result.results(0).snippet().entries(), IsEmpty());
-}
-
-TEST_F(IcingSearchEngineTest, ShouldInvalidateNextPageToken) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("message");
-
- ResultSpecProto result_spec;
- result_spec.set_num_per_page(1);
-
- // Searches and gets the first page, 1 result
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken));
- uint64_t next_page_token = search_result_proto.next_page_token();
- // Since the token is a random number, we don't need to verify
- expected_search_result_proto.set_next_page_token(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
- // Now document1 is still to be fetched.
-
- // Invalidates token
- icing.InvalidateNextPageToken(next_page_token);
-
- // Tries to fetch the second page, no result since it's invalidated
- expected_search_result_proto.clear_results();
- expected_search_result_proto.clear_next_page_token();
- search_result_proto = icing.GetNextPage(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- AllPageTokensShouldBeInvalidatedAfterOptimization) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("message");
-
- ResultSpecProto result_spec;
- result_spec.set_num_per_page(1);
-
- // Searches and gets the first page, 1 result
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken));
- uint64_t next_page_token = search_result_proto.next_page_token();
- // Since the token is a random number, we don't need to verify
- expected_search_result_proto.set_next_page_token(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
- // Now document1 is still to be fetched.
-
- OptimizeResultProto optimize_result_proto;
- optimize_result_proto.mutable_status()->set_code(StatusProto::OK);
- optimize_result_proto.mutable_status()->set_message("");
- OptimizeResultProto actual_result = icing.Optimize();
- actual_result.clear_optimize_stats();
- ASSERT_THAT(actual_result, EqualsProto(optimize_result_proto));
-
- // Tries to fetch the second page, no results since all tokens have been
- // invalidated during Optimize()
- expected_search_result_proto.clear_results();
- expected_search_result_proto.clear_next_page_token();
- search_result_proto = icing.GetNextPage(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, OptimizationShouldRemoveDeletedDocs) {
- IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
-
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace, uri1) not found.");
- {
- IcingSearchEngine icing(icing_options, GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-
- // Deletes document1
- ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
- const std::string document_log_path =
- icing_options.base_dir() + "/document_dir/" +
- DocumentLogCreator::GetDocumentLogFilename();
- int64_t document_log_size_before =
- filesystem()->GetFileSize(document_log_path.c_str());
- ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
- int64_t document_log_size_after =
- filesystem()->GetFileSize(document_log_path.c_str());
-
- // Validates that document can't be found right after Optimize()
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- // Validates that document is actually removed from document log
- EXPECT_THAT(document_log_size_after, Lt(document_log_size_before));
- } // Destroys IcingSearchEngine to make sure nothing is cached.
-
- IcingSearchEngine icing(icing_options, GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, OptimizationShouldDeleteTemporaryDirectory) {
- IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
- IcingSearchEngine icing(icing_options, GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Create a tmp dir that will be used in Optimize() to swap files,
- // this validates that any tmp dirs will be deleted before using.
- const std::string tmp_dir =
- icing_options.base_dir() + "/document_dir_optimize_tmp";
-
- const std::string tmp_file = tmp_dir + "/file";
- ASSERT_TRUE(filesystem()->CreateDirectory(tmp_dir.c_str()));
- ScopedFd fd(filesystem()->OpenForWrite(tmp_file.c_str()));
- ASSERT_TRUE(fd.is_valid());
- ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
- fd.reset();
-
- EXPECT_THAT(icing.Optimize().status(), ProtoIsOk());
-
- EXPECT_FALSE(filesystem()->DirectoryExists(tmp_dir.c_str()));
- EXPECT_FALSE(filesystem()->FileExists(tmp_file.c_str()));
-}
-
-TEST_F(IcingSearchEngineTest, GetOptimizeInfoHasCorrectStats) {
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .SetCreationTimestampMs(100)
- .SetTtlMs(500)
- .Build();
-
- {
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetSystemTimeMilliseconds(1000);
-
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Just initialized, nothing is optimizable yet.
- GetOptimizeInfoResultProto optimize_info = icing.GetOptimizeInfo();
- EXPECT_THAT(optimize_info.status(), ProtoIsOk());
- EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
- EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
- EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
-
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-
- // Only have active documents, nothing is optimizable yet.
- optimize_info = icing.GetOptimizeInfo();
- EXPECT_THAT(optimize_info.status(), ProtoIsOk());
- EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
- EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
- EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
-
- // Deletes document1
- ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
-
- optimize_info = icing.GetOptimizeInfo();
- EXPECT_THAT(optimize_info.status(), ProtoIsOk());
- EXPECT_THAT(optimize_info.optimizable_docs(), Eq(1));
- EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Gt(0));
- EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
- int64_t first_estimated_optimizable_bytes =
- optimize_info.estimated_optimizable_bytes();
-
- // Add a second document, but it'll be expired since the time (1000) is
- // greater than the document's creation timestamp (100) + the document's ttl
- // (500)
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- optimize_info = icing.GetOptimizeInfo();
- EXPECT_THAT(optimize_info.status(), ProtoIsOk());
- EXPECT_THAT(optimize_info.optimizable_docs(), Eq(2));
- EXPECT_THAT(optimize_info.estimated_optimizable_bytes(),
- Gt(first_estimated_optimizable_bytes));
- EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
-
- // Optimize
- ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
- }
-
- {
- // Recreate with new time
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetSystemTimeMilliseconds(5000);
-
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Nothing is optimizable now that everything has been optimized away.
- GetOptimizeInfoResultProto optimize_info = icing.GetOptimizeInfo();
- EXPECT_THAT(optimize_info.status(), ProtoIsOk());
- EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
- EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
- EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(4000));
- }
-}
-
-TEST_F(IcingSearchEngineTest, GetAndPutShouldWorkAfterOptimization) {
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
- DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
- DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
- DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Delete("namespace", "uri2").status(), ProtoIsOk());
- ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
-
- // Validates that Get() and Put() are good right after Optimize()
- *expected_get_result_proto.mutable_document() = document1;
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- EXPECT_THAT(
- icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
- .status()
- .code(),
- Eq(StatusProto::NOT_FOUND));
- *expected_get_result_proto.mutable_document() = document3;
- EXPECT_THAT(
- icing.Get("namespace", "uri3", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- EXPECT_THAT(icing.Put(document4).status(), ProtoIsOk());
- } // Destroys IcingSearchEngine to make sure nothing is cached.
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- *expected_get_result_proto.mutable_document() = document1;
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- EXPECT_THAT(
- icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
- .status()
- .code(),
- Eq(StatusProto::NOT_FOUND));
- *expected_get_result_proto.mutable_document() = document3;
- EXPECT_THAT(
- icing.Get("namespace", "uri3", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- *expected_get_result_proto.mutable_document() = document4;
- EXPECT_THAT(
- icing.Get("namespace", "uri4", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- EXPECT_THAT(icing.Put(document5).status(), ProtoIsOk());
-}
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Artist")
+ .AddParentType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("company")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-TEST_F(IcingSearchEngineTest,
- GetAndPutShouldWorkAfterOptimizationWithEmptyDocuments) {
- DocumentProto empty_document1 =
+ // Add a person document and an artist document
+ DocumentProto document_person =
DocumentBuilder()
.SetKey("namespace", "uri1")
- .SetSchema("Message")
- .AddStringProperty("body", "")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
.Build();
- DocumentProto empty_document2 =
+ DocumentProto document_artist =
DocumentBuilder()
.SetKey("namespace", "uri2")
- .SetSchema("Message")
- .AddStringProperty("body", "")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto empty_document3 =
- DocumentBuilder()
- .SetKey("namespace", "uri3")
- .SetSchema("Message")
- .AddStringProperty("body", "")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- ASSERT_THAT(icing.Put(empty_document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(empty_document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Delete("namespace", "uri2").status(), ProtoIsOk());
- ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
-
- // Validates that Get() and Put() are good right after Optimize()
- *expected_get_result_proto.mutable_document() = empty_document1;
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- EXPECT_THAT(
- icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
- .status()
- .code(),
- Eq(StatusProto::NOT_FOUND));
- EXPECT_THAT(icing.Put(empty_document3).status(), ProtoIsOk());
-}
-
-TEST_F(IcingSearchEngineTest, DeleteShouldWorkAfterOptimization) {
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
-
- // Validates that Delete() works right after Optimize()
- EXPECT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(
- StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace, uri1) not found.");
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- expected_get_result_proto.mutable_status()->clear_message();
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- } // Destroys IcingSearchEngine to make sure nothing is cached.
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.Delete("namespace", "uri2").status(), ProtoIsOk());
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace, uri1) not found.");
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace, uri2) not found.");
- EXPECT_THAT(
- icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, OptimizationFailureUninitializesIcing) {
- // Setup filesystem to fail
- auto mock_filesystem = std::make_unique<MockFilesystem>();
- bool just_swapped_files = false;
- auto create_dir_lambda = [this, &just_swapped_files](const char* dir_name) {
- if (just_swapped_files) {
- // We should fail the first call immediately after swapping files.
- just_swapped_files = false;
- return false;
- }
- return filesystem()->CreateDirectoryRecursively(dir_name);
- };
- ON_CALL(*mock_filesystem, CreateDirectoryRecursively)
- .WillByDefault(create_dir_lambda);
-
- auto swap_lambda = [&just_swapped_files](const char* first_dir,
- const char* second_dir) {
- just_swapped_files = true;
- return false;
- };
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- ON_CALL(*mock_filesystem, SwapFiles(HasSubstr("document_dir_optimize_tmp"),
- HasSubstr("document_dir")))
- .WillByDefault(swap_lambda);
- TestIcingSearchEngine icing(options, std::move(mock_filesystem),
- std::make_unique<IcingFilesystem>(),
- std::make_unique<FakeClock>(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // The mocks should cause an unrecoverable error during Optimize - returning
- // INTERNAL.
- ASSERT_THAT(icing.Optimize().status(), ProtoStatusIs(StatusProto::INTERNAL));
-
- // Ordinary operations should fail safely.
- SchemaProto simple_schema;
- auto type = simple_schema.add_types();
- type->set_schema_type("type0");
- auto property = type->add_properties();
- property->set_property_name("prop0");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- DocumentProto simple_doc = DocumentBuilder()
- .SetKey("namespace0", "uri0")
- .SetSchema("type0")
- .AddStringProperty("prop0", "foo")
- .Build();
-
- SearchSpecProto search_spec;
- search_spec.set_query("foo");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- ResultSpecProto result_spec;
- ScoringSpecProto scoring_spec;
- scoring_spec.set_rank_by(
- ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
-
- EXPECT_THAT(icing.SetSchema(simple_schema).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.Put(simple_doc).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing
- .Get(simple_doc.namespace_(), simple_doc.uri(),
- GetResultSpecProto::default_instance())
- .status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
-
- // Reset should get icing back to a safe (empty) and working state.
- EXPECT_THAT(icing.Reset().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(simple_schema).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(simple_doc).status(), ProtoIsOk());
- EXPECT_THAT(icing
- .Get(simple_doc.namespace_(), simple_doc.uri(),
- GetResultSpecProto::default_instance())
- .status(),
- ProtoIsOk());
- EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(),
- ProtoIsOk());
-}
-
-TEST_F(IcingSearchEngineTest, DeleteBySchemaType) {
- SchemaProto schema;
- // Add an email type
- auto type = schema.add_types();
- type->set_schema_type("email");
- auto property = type->add_properties();
- property->set_property_name("subject");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- property->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- // Add an message type
- type = schema.add_types();
- type->set_schema_type("message");
- property = type->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- property->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("message")
- .AddStringProperty("body", "message body1")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("email")
- .AddStringProperty("subject", "message body2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Artist")
+ .AddStringProperty("name", "Meg Artist")
+ .AddStringProperty("emailAddress", "artist@aol.com")
+ .AddStringProperty("company", "aol")
.Build();
+ ASSERT_THAT(icing.Put(document_person).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document_artist).status(), ProtoIsOk());
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(7);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
+ // Add type property masks
+ GetResultSpecProto result_spec;
+ TypePropertyMask* person_type_property_mask =
+ result_spec.add_type_property_masks();
+ person_type_property_mask->set_schema_type("Person");
+ person_type_property_mask->add_paths("name");
+ // Since Artist is a child type of Person, the TypePropertyMask for Person
+ // will be merged to Artist's TypePropertyMask by polymorphism, so that 'name'
+ // will also show in Artist's projection results.
+ TypePropertyMask* artist_type_property_mask =
+ result_spec.add_type_property_masks();
+ artist_type_property_mask->set_schema_type("Artist");
+ artist_type_property_mask->add_paths("emailAddress");
+
+ // Verify that the returned person document only contains the 'name' property,
+ // and the returned artist document contain both the 'name' and 'emailAddress'
+ // properties.
GetResultProto expected_get_result_proto;
expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = document1;
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Delete the first type. The first doc should be irretrievable. The
- // second should still be present.
- DeleteBySchemaTypeResultProto result_proto =
- icing.DeleteBySchemaType("message");
- EXPECT_THAT(result_proto.status(), ProtoIsOk());
- DeleteStatsProto exp_stats;
- exp_stats.set_delete_type(DeleteStatsProto::DeleteType::SCHEMA_TYPE);
- exp_stats.set_latency_ms(7);
- exp_stats.set_num_documents_deleted(1);
- EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace1, uri1) not found.");
- expected_get_result_proto.clear_document();
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- expected_get_result_proto.mutable_status()->clear_message();
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Search for "message", only document2 should show up.
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.set_query("message");
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, DeleteSchemaTypeByQuery) {
- SchemaProto schema = CreateMessageSchema();
- // Add an email type
- SchemaProto tmp = CreateEmailSchema();
- *schema.add_types() = tmp.types(0);
-
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema(schema.types(0).schema_type())
- .AddStringProperty("body", "message body1")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
+ *expected_get_result_proto.mutable_document() =
DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema(schema.types(1).schema_type())
- .AddStringProperty("subject", "subject subject2")
- .AddStringProperty("body", "message body2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
.Build();
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = document1;
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Delete the first type. The first doc should be irretrievable. The
- // second should still be present.
- SearchSpecProto search_spec;
- search_spec.add_schema_type_filters(schema.types(0).schema_type());
- EXPECT_THAT(icing.DeleteByQuery(search_spec).status(), ProtoIsOk());
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace1, uri1) not found.");
- expected_get_result_proto.clear_document();
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- expected_get_result_proto.mutable_status()->clear_message();
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- search_spec = SearchSpecProto::default_instance();
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
+ ASSERT_THAT(icing.Get("namespace", "uri1", result_spec),
+ EqualsProto(expected_get_result_proto));
-TEST_F(IcingSearchEngineTest, DeleteByNamespace) {
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Message")
- .AddStringProperty("body", "message body1")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace1", "uri2")
- .SetSchema("Message")
- .AddStringProperty("body", "message body2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document3 =
+ *expected_get_result_proto.mutable_document() =
DocumentBuilder()
- .SetKey("namespace3", "uri3")
- .SetSchema("Message")
- .AddStringProperty("body", "message body2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Artist")
+ .AddStringProperty("name", "Meg Artist")
+ .AddStringProperty("emailAddress", "artist@aol.com")
.Build();
-
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(7);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = document1;
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- *expected_get_result_proto.mutable_document() = document3;
- EXPECT_THAT(
- icing.Get("namespace3", "uri3", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Delete namespace1. Document1 and document2 should be irretrievable.
- // Document3 should still be present.
- DeleteByNamespaceResultProto result_proto =
- icing.DeleteByNamespace("namespace1");
- EXPECT_THAT(result_proto.status(), ProtoIsOk());
- DeleteStatsProto exp_stats;
- exp_stats.set_delete_type(DeleteStatsProto::DeleteType::NAMESPACE);
- exp_stats.set_latency_ms(7);
- exp_stats.set_num_documents_deleted(2);
- EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace1, uri1) not found.");
- expected_get_result_proto.clear_document();
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace1, uri2) not found.");
- expected_get_result_proto.clear_document();
- EXPECT_THAT(
- icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- expected_get_result_proto.mutable_status()->clear_message();
- *expected_get_result_proto.mutable_document() = document3;
- EXPECT_THAT(
- icing.Get("namespace3", "uri3", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Search for "message", only document3 should show up.
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document3;
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.set_query("message");
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
+ ASSERT_THAT(icing.Get("namespace", "uri2", result_spec),
+ EqualsProto(expected_get_result_proto));
}
-TEST_F(IcingSearchEngineTest, DeleteNamespaceByQuery) {
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Message")
- .AddStringProperty("body", "message body1")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Message")
- .AddStringProperty("body", "message body2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
+TEST_F(IcingSearchEngineTest, GetDocumentProjectionMultipleParentPolymorphism) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = document1;
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Delete the first namespace. The first doc should be irretrievable. The
- // second should still be present.
- SearchSpecProto search_spec;
- search_spec.add_namespace_filters("namespace1");
- EXPECT_THAT(icing.DeleteByQuery(search_spec).status(), ProtoIsOk());
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace1, uri1) not found.");
- expected_get_result_proto.clear_document();
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- expected_get_result_proto.mutable_status()->clear_message();
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- search_spec = SearchSpecProto::default_instance();
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, DeleteByQuery) {
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Message")
- .AddStringProperty("body", "message body1")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Message")
- .AddStringProperty("body", "message body2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("recipient")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("content")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("note")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("EmailMessage")
+ .AddParentType("Email")
+ .AddParentType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("recipient")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("content")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("note")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(7);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = document1;
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Delete all docs containing 'body1'. The first doc should be irretrievable.
- // The second should still be present.
- SearchSpecProto search_spec;
- search_spec.set_query("body1");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- DeleteByQueryResultProto result_proto = icing.DeleteByQuery(search_spec);
- EXPECT_THAT(result_proto.status(), ProtoIsOk());
- DeleteByQueryStatsProto exp_stats;
- exp_stats.set_latency_ms(7);
- exp_stats.set_num_documents_deleted(1);
- exp_stats.set_query_length(search_spec.query().length());
- exp_stats.set_num_terms(1);
- exp_stats.set_num_namespaces_filtered(0);
- exp_stats.set_num_schema_types_filtered(0);
- exp_stats.set_parse_query_latency_ms(7);
- exp_stats.set_document_removal_latency_ms(7);
- EXPECT_THAT(result_proto.delete_by_query_stats(), EqualsProto(exp_stats));
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace1, uri1) not found.");
- expected_get_result_proto.clear_document();
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- expected_get_result_proto.mutable_status()->clear_message();
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- search_spec = SearchSpecProto::default_instance();
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, DeleteByQueryReturnInfo) {
- DocumentProto document1 =
+ // Add an email document and a message document
+ DocumentProto document_email =
DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Message")
- .AddStringProperty("body", "message body1")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Message")
- .AddStringProperty("body", "message body2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document3 =
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("sender", "sender1")
+ .AddStringProperty("recipient", "recipient1")
+ .Build();
+ DocumentProto document_message = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Message")
+ .AddStringProperty("content", "content1")
+ .AddStringProperty("note", "note1")
+ .Build();
+ // Add an emailMessage document
+ DocumentProto document_email_message =
DocumentBuilder()
- .SetKey("namespace2", "uri3")
- .SetSchema("Message")
- .AddStringProperty("body", "message body3")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetKey("namespace", "uri3")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("EmailMessage")
+ .AddStringProperty("sender", "sender2")
+ .AddStringProperty("recipient", "recipient2")
+ .AddStringProperty("content", "content2")
+ .AddStringProperty("note", "note2")
.Build();
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(7);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document_email).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document_message).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document_email_message).status(), ProtoIsOk());
+ // Add type property masks for Email and Message, and both of them will apply
+ // to EmailMessage.
+ GetResultSpecProto result_spec;
+ TypePropertyMask* email_type_property_mask =
+ result_spec.add_type_property_masks();
+ email_type_property_mask->set_schema_type("Email");
+ email_type_property_mask->add_paths("sender");
+
+ TypePropertyMask* message_type_property_mask =
+ result_spec.add_type_property_masks();
+ message_type_property_mask->set_schema_type("Message");
+ message_type_property_mask->add_paths("content");
+
+ // Verify that
+ // - The returned email document only contains the 'sender' property.
+ // - The returned message document only contains the 'content' property.
+ // - The returned email message document contains both the 'sender' and
+ // 'content' properties,
GetResultProto expected_get_result_proto;
expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = document1;
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- *expected_get_result_proto.mutable_document() = document3;
- EXPECT_THAT(
- icing.Get("namespace2", "uri3", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Delete all docs to test the information is correctly grouped.
- SearchSpecProto search_spec;
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- DeleteByQueryResultProto result_proto =
- icing.DeleteByQuery(search_spec, true);
- EXPECT_THAT(result_proto.status(), ProtoIsOk());
- DeleteByQueryStatsProto exp_stats;
- exp_stats.set_latency_ms(7);
- exp_stats.set_num_documents_deleted(3);
- exp_stats.set_query_length(search_spec.query().length());
- exp_stats.set_num_terms(1);
- exp_stats.set_num_namespaces_filtered(0);
- exp_stats.set_num_schema_types_filtered(0);
- exp_stats.set_parse_query_latency_ms(7);
- exp_stats.set_document_removal_latency_ms(7);
- EXPECT_THAT(result_proto.delete_by_query_stats(), EqualsProto(exp_stats));
-
- // Check that DeleteByQuery can return information for deleted documents.
- DeleteByQueryResultProto::DocumentGroupInfo info1, info2;
- info1.set_namespace_("namespace1");
- info1.set_schema("Message");
- info1.add_uris("uri1");
- info2.set_namespace_("namespace2");
- info2.set_schema("Message");
- info2.add_uris("uri3");
- info2.add_uris("uri2");
- EXPECT_THAT(result_proto.deleted_documents(),
- UnorderedElementsAre(EqualsProto(info1), EqualsProto(info2)));
-
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance())
- .status()
- .code(),
- Eq(StatusProto::NOT_FOUND));
- EXPECT_THAT(
- icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance())
- .status()
- .code(),
- Eq(StatusProto::NOT_FOUND));
- EXPECT_THAT(
- icing.Get("namespace2", "uri3", GetResultSpecProto::default_instance())
- .status()
- .code(),
- Eq(StatusProto::NOT_FOUND));
-}
-
-TEST_F(IcingSearchEngineTest, DeleteByQueryNotFound) {
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Message")
- .AddStringProperty("body", "message body1")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
+ *expected_get_result_proto.mutable_document() =
DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Message")
- .AddStringProperty("body", "message body2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("sender", "sender1")
.Build();
+ ASSERT_THAT(icing.Get("namespace", "uri1", result_spec),
+ EqualsProto(expected_get_result_proto));
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = document1;
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Delete all docs containing 'foo', which should be none of them. Both docs
- // should still be present.
- SearchSpecProto search_spec;
- search_spec.set_query("foo");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- EXPECT_THAT(icing.DeleteByQuery(search_spec).status(),
- ProtoStatusIs(StatusProto::NOT_FOUND));
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- expected_get_result_proto.mutable_status()->clear_message();
- *expected_get_result_proto.mutable_document() = document1;
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- expected_get_result_proto.mutable_status()->clear_message();
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- search_spec = SearchSpecProto::default_instance();
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SetSchemaShouldWorkAfterOptimization) {
- // Creates 3 test schemas
- SchemaProto schema1 = SchemaProto(CreateMessageSchema());
-
- SchemaProto schema2 = SchemaProto(schema1);
- auto new_property2 = schema2.mutable_types(0)->add_properties();
- new_property2->set_property_name("property2");
- new_property2->set_data_type(PropertyConfigProto::DataType::STRING);
- new_property2->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- new_property2->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- new_property2->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- SchemaProto schema3 = SchemaProto(schema2);
- auto new_property3 = schema3.mutable_types(0)->add_properties();
- new_property3->set_property_name("property3");
- new_property3->set_data_type(PropertyConfigProto::DataType::STRING);
- new_property3->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- new_property3->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- new_property3->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(schema1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
-
- // Validates that SetSchema() works right after Optimize()
- EXPECT_THAT(icing.SetSchema(schema2).status(), ProtoIsOk());
- } // Destroys IcingSearchEngine to make sure nothing is cached.
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(schema3).status(), ProtoIsOk());
-}
-
-TEST_F(IcingSearchEngineTest, SearchShouldWorkAfterOptimization) {
- DocumentProto document = CreateMessageDocument("namespace", "uri");
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document;
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
-
- // Validates that Search() works right after Optimize()
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
- } // Destroys IcingSearchEngine to make sure nothing is cached.
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, IcingShouldWorkFineIfOptimizationIsAborted) {
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- {
- // Initializes a normal icing to create files needed
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- }
-
- // Creates a mock filesystem in which DeleteDirectoryRecursively() always
- // fails. This will fail IcingSearchEngine::OptimizeDocumentStore() and makes
- // it return ABORTED_ERROR.
- auto mock_filesystem = std::make_unique<MockFilesystem>();
- ON_CALL(*mock_filesystem,
- DeleteDirectoryRecursively(HasSubstr("_optimize_tmp")))
- .WillByDefault(Return(false));
-
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::move(mock_filesystem),
- std::make_unique<IcingFilesystem>(),
- std::make_unique<FakeClock>(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.Optimize().status(), ProtoStatusIs(StatusProto::ABORTED));
-
- // Now optimization is aborted, we verify that document-related functions
- // still work as expected.
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = document1;
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
-
- EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_query("m");
- search_spec.set_term_match_type(TermMatchType::PREFIX);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- OptimizationShouldRecoverIfFileDirectoriesAreMissing) {
- // Creates a mock filesystem in which SwapFiles() always fails and deletes the
- // directories. This will fail IcingSearchEngine::OptimizeDocumentStore().
- auto mock_filesystem = std::make_unique<MockFilesystem>();
- ON_CALL(*mock_filesystem, SwapFiles(HasSubstr("document_dir_optimize_tmp"),
- HasSubstr("document_dir")))
- .WillByDefault([this](const char* one, const char* two) {
- filesystem()->DeleteDirectoryRecursively(one);
- filesystem()->DeleteDirectoryRecursively(two);
- return false;
- });
-
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::move(mock_filesystem),
- std::make_unique<IcingFilesystem>(),
- std::make_unique<FakeClock>(), GetTestJniCache());
-
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
-
- // Optimize() fails due to filesystem error
- OptimizeResultProto result = icing.Optimize();
- EXPECT_THAT(result.status(), ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
- // Should rebuild the index for data loss.
- EXPECT_THAT(result.optimize_stats().index_restoration_mode(),
- Eq(OptimizeStatsProto::FULL_INDEX_REBUILD));
-
- // Document is not found because original file directory is missing
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace, uri) not found.");
- EXPECT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- DocumentProto new_document =
+ *expected_get_result_proto.mutable_document() =
DocumentBuilder()
.SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
.SetSchema("Message")
- .AddStringProperty("body", "new body")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .AddStringProperty("content", "content1")
.Build();
+ ASSERT_THAT(icing.Get("namespace", "uri2", result_spec),
+ EqualsProto(expected_get_result_proto));
- EXPECT_THAT(icing.Put(new_document).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_query("m");
- search_spec.set_term_match_type(TermMatchType::PREFIX);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-
- // Searching old content returns nothing because original file directory is
- // missing
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- search_spec.set_query("n");
-
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- new_document;
-
- // Searching new content returns the new document
- search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, OptimizationShouldRecoverIfDataFilesAreMissing) {
- // Creates a mock filesystem in which SwapFiles() always fails and empties the
- // directories. This will fail IcingSearchEngine::OptimizeDocumentStore().
- auto mock_filesystem = std::make_unique<MockFilesystem>();
- ON_CALL(*mock_filesystem, SwapFiles(HasSubstr("document_dir_optimize_tmp"),
- HasSubstr("document_dir")))
- .WillByDefault([this](const char* one, const char* two) {
- filesystem()->DeleteDirectoryRecursively(one);
- filesystem()->CreateDirectoryRecursively(one);
- filesystem()->DeleteDirectoryRecursively(two);
- filesystem()->CreateDirectoryRecursively(two);
- return false;
- });
-
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::move(mock_filesystem),
- std::make_unique<IcingFilesystem>(),
- std::make_unique<FakeClock>(), GetTestJniCache());
-
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
-
- // Optimize() fails due to filesystem error
- OptimizeResultProto result = icing.Optimize();
- EXPECT_THAT(result.status(), ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
- // Should rebuild the index for data loss.
- EXPECT_THAT(result.optimize_stats().index_restoration_mode(),
- Eq(OptimizeStatsProto::FULL_INDEX_REBUILD));
-
- // Document is not found because original files are missing
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace, uri) not found.");
- EXPECT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- DocumentProto new_document =
+ *expected_get_result_proto.mutable_document() =
DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Message")
- .AddStringProperty("body", "new body")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetKey("namespace", "uri3")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("EmailMessage")
+ .AddStringProperty("sender", "sender2")
+ .AddStringProperty("content", "content2")
.Build();
-
- EXPECT_THAT(icing.Put(new_document).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_query("m");
- search_spec.set_term_match_type(TermMatchType::PREFIX);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-
- // Searching old content returns nothing because original files are missing
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- search_spec.set_query("n");
-
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- new_document;
-
- // Searching new content returns the new document
- search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchIncludesDocumentsBeforeTtl) {
- SchemaProto schema;
- auto type = schema.add_types();
- type->set_schema_type("Message");
-
- auto body = type->add_properties();
- body->set_property_name("body");
- body->set_data_type(PropertyConfigProto::DataType::STRING);
- body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- body->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- body->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- DocumentProto document = DocumentBuilder()
- .SetKey("namespace", "uri")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .SetCreationTimestampMs(100)
- .SetTtlMs(500)
- .Build();
-
- SearchSpecProto search_spec;
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document;
-
- // Time just has to be less than the document's creation timestamp (100) + the
- // document's ttl (500)
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetSystemTimeMilliseconds(400);
-
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
-
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- // Check that the document is returned as part of search results
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchDoesntIncludeDocumentsPastTtl) {
- SchemaProto schema;
- auto type = schema.add_types();
- type->set_schema_type("Message");
-
- auto body = type->add_properties();
- body->set_property_name("body");
- body->set_data_type(PropertyConfigProto::DataType::STRING);
- body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- body->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- body->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- DocumentProto document = DocumentBuilder()
- .SetKey("namespace", "uri")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .SetCreationTimestampMs(100)
- .SetTtlMs(500)
- .Build();
-
- SearchSpecProto search_spec;
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-
- // Time just has to be greater than the document's creation timestamp (100) +
- // the document's ttl (500)
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetSystemTimeMilliseconds(700);
-
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
-
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- // Check that the document is not returned as part of search results
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
+ ASSERT_THAT(icing.Get("namespace", "uri3", result_spec),
+ EqualsProto(expected_get_result_proto));
}
-TEST_F(IcingSearchEngineTest, SearchWorksAfterSchemaTypesCompatiblyModified) {
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type("message");
-
- auto property = type_config->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- DocumentProto message_document =
- DocumentBuilder()
- .SetKey("namespace", "message_uri")
- .SetSchema("message")
- .AddStringProperty("body", "foo")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
+TEST_F(IcingSearchEngineTest, GetDocumentProjectionDiamondPolymorphism) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(message_document).status(), ProtoIsOk());
-
- // Make sure we can search for message document
- SearchSpecProto search_spec;
- search_spec.set_query("foo");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-
- // The message isn't indexed, so we get nothing
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- // With just the schema type filter, we can search for the message
- search_spec.Clear();
- search_spec.add_schema_type_filters("message");
-
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- message_document;
-
- search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- // Since SchemaTypeIds are assigned based on order in the SchemaProto, this
- // will force a change in the DocumentStore's cached SchemaTypeIds
- schema.clear_types();
- type_config = schema.add_types();
- type_config->set_schema_type("email");
-
- // Adding a new indexed property will require reindexing
- type_config = schema.add_types();
- type_config->set_schema_type("message");
-
- property = type_config->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- property->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- search_spec.Clear();
- search_spec.set_query("foo");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.add_schema_type_filters("message");
-
- // We can still search for the message document
- search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, RecoverFromMissingHeaderFile) {
- SearchSpecProto search_spec;
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- CreateMessageDocument("namespace", "uri");
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() =
- CreateMessageDocument("namespace", "uri");
-
- {
- // Basic initialization/setup
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
- EXPECT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
- } // This should shut down IcingSearchEngine and persist anything it needs to
-
- EXPECT_TRUE(filesystem()->DeleteFile(GetHeaderFilename().c_str()));
-
- // We should be able to recover from this and access all our previous data
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Checks that DocumentLog is still ok
- EXPECT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- // Checks that the index is still ok so we can search over it
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- // Checks that Schema is still since it'll be needed to validate the document
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
-}
-
-TEST_F(IcingSearchEngineTest, UnableToRecoverFromCorruptSchema) {
- {
- // Basic initialization/setup
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() =
- CreateMessageDocument("namespace", "uri");
-
- EXPECT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- } // This should shut down IcingSearchEngine and persist anything it needs to
-
- const std::string schema_file =
- absl_ports::StrCat(GetSchemaDir(), "/schema.pb");
- const std::string corrupt_data = "1234";
- EXPECT_TRUE(filesystem()->Write(schema_file.c_str(), corrupt_data.data(),
- corrupt_data.size()));
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(),
- ProtoStatusIs(StatusProto::INTERNAL));
-}
-
-TEST_F(IcingSearchEngineTest, UnableToRecoverFromCorruptDocumentLog) {
- {
- // Basic initialization/setup
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() =
- CreateMessageDocument("namespace", "uri");
-
- EXPECT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- } // This should shut down IcingSearchEngine and persist anything it needs to
-
- const std::string document_log_file = absl_ports::StrCat(
- GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
- const std::string corrupt_data = "1234";
- EXPECT_TRUE(filesystem()->Write(document_log_file.c_str(),
- corrupt_data.data(), corrupt_data.size()));
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(),
- ProtoStatusIs(StatusProto::INTERNAL));
-}
-
-TEST_F(IcingSearchEngineTest, RecoverFromInconsistentSchemaStore) {
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2_with_additional_property =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Message")
- .AddStringProperty("additional", "content")
- .AddStringProperty("body", "message body")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ // Create a schema with a diamond inheritance relation.
+ // Object
+ // / \
+ // Email Message
+ // \ /
+ // EmailMessage
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Object").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("objectId")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddParentType("Object")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("objectId")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("recipient")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddParentType("Object")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("objectId")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("content")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("note")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("EmailMessage")
+ .AddParentType("Email")
+ .AddParentType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("objectId")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("recipient")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("content")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("note")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- {
- // Initializes folder and schema
- IcingSearchEngine icing(options, GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SchemaProto schema;
- auto type = schema.add_types();
- type->set_schema_type("Message");
-
- auto property = type->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- property->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- property = type->add_properties();
- property->set_property_name("additional");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document2_with_additional_property).status(),
- ProtoIsOk());
-
- // Won't get us anything because "additional" isn't marked as an indexed
- // property in the schema
- SearchSpecProto search_spec;
- search_spec.set_query("additional:content");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
- } // This should shut down IcingSearchEngine and persist anything it needs to
-
- {
- // This schema will change the SchemaTypeIds from the previous schema_
- // (since SchemaTypeIds are assigned based on order of the types, and this
- // new schema changes the ordering of previous types)
- SchemaProto new_schema;
- auto type = new_schema.add_types();
- type->set_schema_type("Email");
-
- type = new_schema.add_types();
- type->set_schema_type("Message");
-
- // Adding a new property changes the SectionIds (since SectionIds are
- // assigned based on alphabetical order of indexed sections, marking
- // "additional" as an indexed property will push the "body" property to a
- // different SectionId)
- auto property = type->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- property->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- property = type->add_properties();
- property->set_property_name("additional");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- property->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- // Write the marker file
- std::string marker_filepath =
- absl_ports::StrCat(options.base_dir(), "/set_schema_marker");
- ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str()));
- ASSERT_TRUE(sfd.is_valid());
-
- // Write the new schema
- FakeClock fake_clock;
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
- ICING_EXPECT_OK(schema_store->SetSchema(new_schema));
- } // Will persist new schema
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // We can insert a Email document since we kept the new schema
- DocumentProto email_document =
+ // Add an email document and a message document
+ DocumentProto document_email =
DocumentBuilder()
- .SetKey("namespace", "email_uri")
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
.SetSchema("Email")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .AddStringProperty("objectId", "object1")
+ .AddStringProperty("sender", "sender1")
+ .AddStringProperty("recipient", "recipient1")
+ .Build();
+ DocumentProto document_message = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Message")
+ .AddStringProperty("objectId", "object2")
+ .AddStringProperty("content", "content1")
+ .AddStringProperty("note", "note1")
+ .Build();
+ // Add an emailMessage document
+ DocumentProto document_email_message =
+ DocumentBuilder()
+ .SetKey("namespace", "uri3")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("EmailMessage")
+ .AddStringProperty("objectId", "object3")
+ .AddStringProperty("sender", "sender2")
+ .AddStringProperty("recipient", "recipient2")
+ .AddStringProperty("content", "content2")
+ .AddStringProperty("note", "note2")
.Build();
- EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk());
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = email_document;
-
- EXPECT_THAT(icing.Get("namespace", "email_uri",
- GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- SearchSpecProto search_spec;
-
- // The section restrict will ensure we are using the correct, updated
- // SectionId in the Index
- search_spec.set_query("additional:content");
-
- // Schema type filter will ensure we're using the correct, updated
- // SchemaTypeId in the DocumentStore
- search_spec.add_schema_type_filters("Message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2_with_additional_property;
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, RecoverFromInconsistentDocumentStore) {
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
-
- {
- // Initializes folder and schema, index one document
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
- } // This should shut down IcingSearchEngine and persist anything it needs to
- {
- FakeClock fake_clock;
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
- ICING_EXPECT_OK(schema_store->SetSchema(CreateMessageSchema()));
-
- // Puts a second document into DocumentStore but doesn't index it.
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(filesystem(), GetDocumentDir(), &fake_clock,
- schema_store.get()));
- std::unique_ptr<DocumentStore> document_store =
- std::move(create_result.document_store);
-
- ICING_EXPECT_OK(document_store->Put(document2));
- }
+ ASSERT_THAT(icing.Put(document_email).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document_message).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document_email_message).status(), ProtoIsOk());
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- // Index Restoration should be triggered here and document2 should be
- // indexed.
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ // Add type property masks for Object, which should apply to Email, Message
+ // and EmailMessage.
+ GetResultSpecProto result_spec;
+ TypePropertyMask* email_type_property_mask =
+ result_spec.add_type_property_masks();
+ email_type_property_mask->set_schema_type("Object");
+ email_type_property_mask->add_paths("objectId");
+ // Verify that all the documents only contain the 'objectId' property.
GetResultProto expected_get_result_proto;
expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = document1;
-
- // DocumentStore kept the additional document
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // We indexed the additional document
- SearchSpecProto search_spec;
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
-
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, RecoverFromInconsistentIndex) {
- SearchSpecProto search_spec;
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- CreateMessageDocument("namespace", "uri");
-
- {
- // Initializes folder and schema, index one document
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
- } // This should shut down IcingSearchEngine and persist anything it needs to
-
- // Pretend we lost the entire index
- EXPECT_TRUE(filesystem()->DeleteDirectoryRecursively(
- absl_ports::StrCat(GetIndexDir(), "/idx/lite.").c_str()));
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Check that our index is ok by searching over the restored index
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, RecoverFromCorruptIndex) {
- SearchSpecProto search_spec;
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- CreateMessageDocument("namespace", "uri");
-
- {
- // Initializes folder and schema, index one document
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
- } // This should shut down IcingSearchEngine and persist anything it needs to
-
- // Pretend index is corrupted
- const std::string index_hit_buffer_file = GetIndexDir() + "/idx/lite.hb";
- ScopedFd fd(filesystem()->OpenForWrite(index_hit_buffer_file.c_str()));
- ASSERT_TRUE(fd.is_valid());
- ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Check that our index is ok by searching over the restored index
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByDocumentScore) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 3 documents and ensures the relationship in terms of document
- // score is: document1 < document2 < document3
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(1)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetScore(2)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document3 =
- DocumentBuilder()
- .SetKey("namespace", "uri/3")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetScore(3)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- // Intentionally inserts the documents in the order that is different than
- // their score order
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-
- // "m" will match all 3 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- // Result should be in descending score order
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document3;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchShouldAllowNoScoring) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 3 documents and ensures the relationship of them is:
- // document1 < document2 < document3
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(1)
- .SetCreationTimestampMs(1571111111111)
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetScore(2)
- .SetCreationTimestampMs(1572222222222)
- .Build();
- DocumentProto document3 = DocumentBuilder()
- .SetKey("namespace", "uri/3")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetScore(3)
- .SetCreationTimestampMs(1573333333333)
- .Build();
-
- // Intentionally inserts the documents in the order that is different than
- // their score order
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // "m" will match all 3 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document3;
-
- // Results should not be ranked by score but returned in reverse insertion
- // order.
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByCreationTimestamp) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 3 documents and ensures the relationship in terms of creation
- // timestamp score is: document1 < document2 < document3
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetCreationTimestampMs(1571111111111)
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetCreationTimestampMs(1572222222222)
- .Build();
- DocumentProto document3 = DocumentBuilder()
- .SetKey("namespace", "uri/3")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetCreationTimestampMs(1573333333333)
- .Build();
-
- // Intentionally inserts the documents in the order that is different than
- // their score order
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // "m" will match all 3 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- // Result should be in descending timestamp order
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document3;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(
- ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByUsageCount) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 3 test documents
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document3 =
- DocumentBuilder()
- .SetKey("namespace", "uri/3")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- // Intentionally inserts the documents in a different order to eliminate the
- // possibility that the following results are sorted in the default reverse
- // insertion order.
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // Report usage for doc3 twice and doc2 once. The order will be doc3 > doc2 >
- // doc1 when ranked by USAGE_TYPE1_COUNT.
- UsageReport usage_report_doc3 = CreateUsageReport(
- /*name_space=*/"namespace", /*uri=*/"uri/3", /*timestamp_ms=*/0,
- UsageReport::USAGE_TYPE1);
- UsageReport usage_report_doc2 = CreateUsageReport(
- /*name_space=*/"namespace", /*uri=*/"uri/2", /*timestamp_ms=*/0,
- UsageReport::USAGE_TYPE1);
- ASSERT_THAT(icing.ReportUsage(usage_report_doc3).status(), ProtoIsOk());
- ASSERT_THAT(icing.ReportUsage(usage_report_doc3).status(), ProtoIsOk());
- ASSERT_THAT(icing.ReportUsage(usage_report_doc2).status(), ProtoIsOk());
-
- // "m" will match all 3 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- // Result should be in descending USAGE_TYPE1_COUNT order
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document3;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
-
- ScoringSpecProto scoring_spec;
- scoring_spec.set_rank_by(
- ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchResultShouldHaveDefaultOrderWithoutUsageCounts) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 3 test documents
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document3 =
+ *expected_get_result_proto.mutable_document() =
DocumentBuilder()
- .SetKey("namespace", "uri/3")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("objectId", "object1")
.Build();
+ ASSERT_THAT(icing.Get("namespace", "uri1", result_spec),
+ EqualsProto(expected_get_result_proto));
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
-
- // "m" will match all 3 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- // None of the documents have usage reports. Result should be in the default
- // reverse insertion order.
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document3;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
-
- ScoringSpecProto scoring_spec;
- scoring_spec.set_rank_by(
- ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByUsageTimestamp) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 3 test documents
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document3 =
+ *expected_get_result_proto.mutable_document() =
DocumentBuilder()
- .SetKey("namespace", "uri/3")
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
.SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .AddStringProperty("objectId", "object2")
.Build();
+ ASSERT_THAT(icing.Get("namespace", "uri2", result_spec),
+ EqualsProto(expected_get_result_proto));
- // Intentionally inserts the documents in a different order to eliminate the
- // possibility that the following results are sorted in the default reverse
- // insertion order.
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // Report usage for doc2 and doc3. The order will be doc3 > doc2 > doc1 when
- // ranked by USAGE_TYPE1_LAST_USED_TIMESTAMP.
- UsageReport usage_report_doc2 = CreateUsageReport(
- /*name_space=*/"namespace", /*uri=*/"uri/2", /*timestamp_ms=*/1000,
- UsageReport::USAGE_TYPE1);
- UsageReport usage_report_doc3 = CreateUsageReport(
- /*name_space=*/"namespace", /*uri=*/"uri/3", /*timestamp_ms=*/5000,
- UsageReport::USAGE_TYPE1);
- ASSERT_THAT(icing.ReportUsage(usage_report_doc2).status(), ProtoIsOk());
- ASSERT_THAT(icing.ReportUsage(usage_report_doc3).status(), ProtoIsOk());
-
- // "m" will match all 3 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- // Result should be in descending USAGE_TYPE1_LAST_USED_TIMESTAMP order
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document3;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
-
- ScoringSpecProto scoring_spec;
- scoring_spec.set_rank_by(
- ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, Bm25fRelevanceScoringOneNamespace) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
-
- // Create and index documents in namespace "namespace1".
- DocumentProto document = CreateEmailDocument(
- "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
- "fresh fish. inexpensive. good sushi.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
- "indian food. buffet. spicy food. kadai chicken.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
- "panda express",
- "chinese food. cheap. inexpensive. kung pao.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
- "speederia pizza",
- "thin-crust pizza. good and fast.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
- "whole foods",
- "salads. pizza. organic food. expensive.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
- "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri6", /*score=*/4, "costco",
- "bulk. cheap whole beans. frozen fish. food samples.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
- "starbucks coffee",
- "habit. birthday rewards. good coffee");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.set_query("coffee OR food");
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
-
- // Result should be in descending score order
- EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
- // Both doc5 and doc7 have "coffee" in name and text sections.
- // However, doc5 has more matches in the text section.
- // Documents with "food" are ranked lower as the term "food" is commonly
- // present in this corpus, and thus, has a lower IDF.
- EXPECT_THAT(GetUrisFromSearchResults(search_result_proto),
- ElementsAre("namespace1/uri5", // 'coffee' 3 times
- "namespace1/uri7", // 'coffee' 2 times
- "namespace1/uri1", // 'food' 2 times
- "namespace1/uri4", // 'food' 2 times
- "namespace1/uri2", // 'food' 1 time
- "namespace1/uri6")); // 'food' 1 time
-}
-
-TEST_F(IcingSearchEngineTest, Bm25fRelevanceScoringOneNamespaceAdvanced) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
-
- // Create and index documents in namespace "namespace1".
- DocumentProto document = CreateEmailDocument(
- "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
- "fresh fish. inexpensive. good sushi.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
- "indian food. buffet. spicy food. kadai chicken.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
- "panda express",
- "chinese food. cheap. inexpensive. kung pao.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
- "speederia pizza",
- "thin-crust pizza. good and fast.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
- "whole foods",
- "salads. pizza. organic food. expensive.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
- "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri6", /*score=*/4, "costco",
- "bulk. cheap whole beans. frozen fish. food samples.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
- "starbucks coffee",
- "habit. birthday rewards. good coffee");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.set_query("coffee OR food");
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_advanced_scoring_expression("this.relevanceScore() * 2 + 1");
- scoring_spec.set_rank_by(
- ScoringSpecProto::RankingStrategy::ADVANCED_SCORING_EXPRESSION);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
-
- // Result should be in descending score order
- EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
- // Both doc5 and doc7 have "coffee" in name and text sections.
- // However, doc5 has more matches in the text section.
- // Documents with "food" are ranked lower as the term "food" is commonly
- // present in this corpus, and thus, has a lower IDF.
- EXPECT_THAT(GetUrisFromSearchResults(search_result_proto),
- ElementsAre("namespace1/uri5", // 'coffee' 3 times
- "namespace1/uri7", // 'coffee' 2 times
- "namespace1/uri1", // 'food' 2 times
- "namespace1/uri4", // 'food' 2 times
- "namespace1/uri2", // 'food' 1 time
- "namespace1/uri6")); // 'food' 1 time
-}
-
-TEST_F(IcingSearchEngineTest, Bm25fRelevanceScoringOneNamespaceNotOperator) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
-
- // Create and index documents in namespace "namespace1".
- DocumentProto document = CreateEmailDocument(
- "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
- "fresh fish. inexpensive. good sushi.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
- "indian food. buffet. spicy food. kadai chicken.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
- "panda express",
- "chinese food. cheap. inexpensive. kung pao.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri3", /*score=*/23, "speederia pizza",
- "thin-crust pizza. good and fast. nice coffee");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
- "whole foods",
- "salads. pizza. organic food. expensive.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
- "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri6", /*score=*/4, "costco",
- "bulk. cheap whole beans. frozen fish. food samples.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
- "starbucks coffee",
- "habit. birthday rewards. good coffee");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.set_query("coffee -starbucks");
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
-
- // Result should be in descending score order
- EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(
- GetUrisFromSearchResults(search_result_proto),
- ElementsAre("namespace1/uri5", // 'coffee' 3 times, 'starbucks' 0 times
- "namespace1/uri3")); // 'coffee' 1 times, 'starbucks' 0 times
-}
-
-TEST_F(IcingSearchEngineTest,
- Bm25fRelevanceScoringOneNamespaceSectionRestrict) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
-
- // Create and index documents in namespace "namespace1".
- DocumentProto document = CreateEmailDocument(
- "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
- "fresh fish. inexpensive. good sushi.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
- "indian food. buffet. spicy food. kadai chicken.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
- "panda express",
- "chinese food. cheap. inexpensive. kung pao.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
- "speederia pizza",
- "thin-crust pizza. good and fast.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
- "whole foods",
- "salads. pizza. organic food. expensive.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document =
- CreateEmailDocument("namespace1", "namespace1/uri5", /*score=*/18,
- "peets coffee, best coffee",
- "espresso. decaf. whole beans. excellent coffee.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri6", /*score=*/4, "costco",
- "bulk. cheap whole beans. frozen fish. food samples.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri7", /*score=*/4, "starbucks",
- "habit. birthday rewards. good coffee. brewed coffee");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.set_query("subject:coffee OR body:food");
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
-
- // Result should be in descending score order
- EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
- // The term frequencies of "coffee" and "food" are calculated respectively
- // from the subject section and the body section.
- // Documents with "food" are ranked lower as the term "food" is commonly
- // present in this corpus, and thus, has a lower IDF.
- EXPECT_THAT(
- GetUrisFromSearchResults(search_result_proto),
- ElementsAre("namespace1/uri5", // 'coffee' 2 times in section subject
- "namespace1/uri1", // 'food' 2 times in section body
- "namespace1/uri4", // 'food' 2 times in section body
- "namespace1/uri2", // 'food' 1 time in section body
- "namespace1/uri6")); // 'food' 1 time in section body
-}
-
-TEST_F(IcingSearchEngineTest, Bm25fRelevanceScoringTwoNamespaces) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
-
- // Create and index documents in namespace "namespace1".
- DocumentProto document = CreateEmailDocument(
- "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
- "fresh fish. inexpensive. good sushi.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
- "indian food. buffet. spicy food. kadai chicken.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
- "panda express",
- "chinese food. cheap. inexpensive. kung pao.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
- "speederia pizza",
- "thin-crust pizza. good and fast.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
- "whole foods",
- "salads. pizza. organic food. expensive.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
- "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri6", /*score=*/4, "costco",
- "bulk. cheap whole beans. frozen fish. food samples.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
- "starbucks coffee",
- "habit. birthday rewards. good coffee");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- // Create and index documents in namespace "namespace2".
- document = CreateEmailDocument("namespace2", "namespace2/uri0", /*score=*/10,
- "sushi belmont",
- "fresh fish. inexpensive. good sushi.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace2", "namespace2/uri1", /*score=*/13, "peacock koriander",
- "indian food. buffet. spicy food. kadai chicken.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace2", "namespace2/uri2", /*score=*/4,
- "panda express",
- "chinese food. cheap. inexpensive. kung pao.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace2", "namespace2/uri3", /*score=*/23,
- "speederia pizza",
- "thin-crust pizza. good and fast.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace2", "namespace2/uri4", /*score=*/8,
- "whole foods",
- "salads. pizza. organic food. expensive.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace2", "namespace2/uri5", /*score=*/18, "peets coffee",
- "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace2", "namespace2/uri6", /*score=*/4, "costco",
- "bulk. cheap whole beans. frozen fish. food samples.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace2", "namespace2/uri7", /*score=*/4,
- "starbucks coffee", "good coffee");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.set_query("coffee OR food");
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
- ResultSpecProto result_spec_proto;
- result_spec_proto.set_num_per_page(16);
- SearchResultProto search_result_proto =
- icing.Search(search_spec, scoring_spec, result_spec_proto);
-
- // Result should be in descending score order
- EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
- // The two corpora have the same documents except for document 7, which in
- // "namespace2" is much shorter than the average dcoument length, so it is
- // boosted.
- EXPECT_THAT(GetUrisFromSearchResults(search_result_proto),
- ElementsAre("namespace2/uri7", // 'coffee' 2 times, short doc
- "namespace1/uri5", // 'coffee' 3 times
- "namespace2/uri5", // 'coffee' 3 times
- "namespace1/uri7", // 'coffee' 2 times
- "namespace1/uri1", // 'food' 2 times
- "namespace2/uri1", // 'food' 2 times
- "namespace1/uri4", // 'food' 2 times
- "namespace2/uri4", // 'food' 2 times
- "namespace1/uri2", // 'food' 1 time
- "namespace2/uri2", // 'food' 1 time
- "namespace1/uri6", // 'food' 1 time
- "namespace2/uri6")); // 'food' 1 time
-}
-
-TEST_F(IcingSearchEngineTest, Bm25fRelevanceScoringWithNamespaceFilter) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
-
- // Create and index documents in namespace "namespace1".
- DocumentProto document = CreateEmailDocument(
- "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
- "fresh fish. inexpensive. good sushi.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
- "indian food. buffet. spicy food. kadai chicken.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
- "panda express",
- "chinese food. cheap. inexpensive. kung pao.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
- "speederia pizza",
- "thin-crust pizza. good and fast.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
- "whole foods",
- "salads. pizza. organic food. expensive.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
- "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri6", /*score=*/4, "costco",
- "bulk. cheap whole beans. frozen fish. food samples.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
- "starbucks coffee",
- "habit. birthday rewards. good coffee");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- // Create and index documents in namespace "namespace2".
- document = CreateEmailDocument("namespace2", "namespace2/uri0", /*score=*/10,
- "sushi belmont",
- "fresh fish. inexpensive. good sushi.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace2", "namespace2/uri1", /*score=*/13, "peacock koriander",
- "indian food. buffet. spicy food. kadai chicken.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace2", "namespace2/uri2", /*score=*/4,
- "panda express",
- "chinese food. cheap. inexpensive. kung pao.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace2", "namespace2/uri3", /*score=*/23,
- "speederia pizza",
- "thin-crust pizza. good and fast.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace2", "namespace2/uri4", /*score=*/8,
- "whole foods",
- "salads. pizza. organic food. expensive.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace2", "namespace2/uri5", /*score=*/18, "peets coffee",
- "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace2", "namespace2/uri6", /*score=*/4, "costco",
- "bulk. cheap whole beans. frozen fish. food samples.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace2", "namespace2/uri7", /*score=*/4,
- "starbucks coffee", "good coffee");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.set_query("coffee OR food");
- // Now query only corpus 2
- search_spec.add_namespace_filters("namespace2");
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
- search_result_proto = icing.Search(search_spec, scoring_spec,
- ResultSpecProto::default_instance());
-
- // Result from namespace "namespace2" should be in descending score order
- EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
- // Both doc5 and doc7 have "coffee" in name and text sections.
- // Even though doc5 has more matches in the text section, doc7's length is
- // much shorter than the average corpus's length, so it's being boosted.
- // Documents with "food" are ranked lower as the term "food" is commonly
- // present in this corpus, and thus, has a lower IDF.
- EXPECT_THAT(GetUrisFromSearchResults(search_result_proto),
- ElementsAre("namespace2/uri7", // 'coffee' 2 times, short doc
- "namespace2/uri5", // 'coffee' 3 times
- "namespace2/uri1", // 'food' 2 times
- "namespace2/uri4", // 'food' 2 times
- "namespace2/uri2", // 'food' 1 time
- "namespace2/uri6")); // 'food' 1 time
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchResultShouldHaveDefaultOrderWithoutUsageTimestamp) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 3 test documents
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document3 =
+ *expected_get_result_proto.mutable_document() =
DocumentBuilder()
- .SetKey("namespace", "uri/3")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetKey("namespace", "uri3")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("EmailMessage")
+ .AddStringProperty("objectId", "object3")
.Build();
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
-
- // "m" will match all 3 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- // None of the documents have usage reports. Result should be in the default
- // reverse insertion order.
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document3;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
-
- ScoringSpecProto scoring_spec;
- scoring_spec.set_rank_by(
- ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
+ ASSERT_THAT(icing.Get("namespace", "uri3", result_spec),
+ EqualsProto(expected_get_result_proto));
}
TEST_F(IcingSearchEngineTest, OlderUsageTimestampShouldNotOverrideNewerOnes) {
@@ -5586,13 +850,6 @@ TEST_F(IcingSearchEngineTest, OlderUsageTimestampShouldNotOverrideNewerOnes) {
.AddStringProperty("body", "message2")
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
- DocumentProto document3 =
- DocumentBuilder()
- .SetKey("namespace", "uri/3")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
@@ -5635,948 +892,6 @@ TEST_F(IcingSearchEngineTest, OlderUsageTimestampShouldNotOverrideNewerOnes) {
expected_search_result_proto));
}
-TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedAscendingly) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 3 documents and ensures the relationship in terms of document
- // score is: document1 < document2 < document3
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(1)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetScore(2)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document3 =
- DocumentBuilder()
- .SetKey("namespace", "uri/3")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetScore(3)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- // Intentionally inserts the documents in the order that is different than
- // their score order
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-
- // "m" will match all 3 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- // Result should be in ascending score order
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document3;
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
- scoring_spec.set_order_by(ScoringSpecProto::Order::ASC);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchResultGroupingDuplicateNamespaceShouldReturnError) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 2 documents and ensures the relationship in terms of document
- // score is: document1 < document2
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(1)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace2", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetScore(2)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // "m" will match all 2 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-
- // Specify "namespace1" twice. This should result in an error.
- ResultSpecProto result_spec;
- result_spec.set_result_group_type(ResultSpecProto::NAMESPACE);
- ResultSpecProto::ResultGrouping* result_grouping =
- result_spec.add_result_groupings();
- ResultSpecProto::ResultGrouping::Entry* entry =
- result_grouping->add_entry_groupings();
- result_grouping->set_max_results(1);
- entry->set_namespace_("namespace1");
- entry = result_grouping->add_entry_groupings();
- entry->set_namespace_("namespace2");
- entry = result_grouping->add_entry_groupings();
- entry->set_namespace_("namespace1");
- result_grouping = result_spec.add_result_groupings();
- entry = result_grouping->add_entry_groupings();
- result_grouping->set_max_results(1);
- entry->set_namespace_("namespace1");
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, scoring_spec, result_spec);
- EXPECT_THAT(search_result_proto.status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchResultGroupingDuplicateSchemaShouldReturnError) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 2 documents and ensures the relationship in terms of document
- // score is: document1 < document2
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(1)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace2", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetScore(2)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // "m" will match all 2 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-
- // Specify "Message" twice. This should result in an error.
- ResultSpecProto result_spec;
- result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE);
- ResultSpecProto::ResultGrouping* result_grouping =
- result_spec.add_result_groupings();
- ResultSpecProto::ResultGrouping::Entry* entry =
- result_grouping->add_entry_groupings();
- result_grouping->set_max_results(1);
- entry->set_schema("Message");
- entry = result_grouping->add_entry_groupings();
- entry->set_schema("nonexistentMessage");
- result_grouping = result_spec.add_result_groupings();
- result_grouping->set_max_results(1);
- entry = result_grouping->add_entry_groupings();
- entry->set_schema("Message");
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, scoring_spec, result_spec);
- EXPECT_THAT(search_result_proto.status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchResultGroupingDuplicateNamespaceAndSchemaSchemaShouldReturnError) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 2 documents and ensures the relationship in terms of document
- // score is: document1 < document2
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(1)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace2", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetScore(2)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // "m" will match all 2 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-
- // Specify "namespace1xMessage" twice. This should result in an error.
- ResultSpecProto result_spec;
- result_spec.set_result_group_type(ResultSpecProto::NAMESPACE_AND_SCHEMA_TYPE);
- ResultSpecProto::ResultGrouping* result_grouping =
- result_spec.add_result_groupings();
- ResultSpecProto::ResultGrouping::Entry* entry =
- result_grouping->add_entry_groupings();
- result_grouping->set_max_results(1);
- entry->set_namespace_("namespace1");
- entry->set_schema("Message");
- entry = result_grouping->add_entry_groupings();
- entry->set_namespace_("namespace2");
- entry->set_schema("Message");
- entry = result_grouping->add_entry_groupings();
- entry->set_namespace_("namespace1");
- entry->set_schema("Message");
- result_grouping = result_spec.add_result_groupings();
- result_grouping->set_max_results(1);
- entry = result_grouping->add_entry_groupings();
- entry->set_namespace_("namespace1");
- entry->set_schema("Message");
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, scoring_spec, result_spec);
- EXPECT_THAT(search_result_proto.status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchResultGroupingNonPositiveMaxResultsShouldReturnError) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 2 documents and ensures the relationship in terms of document
- // score is: document1 < document2
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(1)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace2", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetScore(2)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // "m" will match all 2 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-
- // Specify zero results. This should result in an error.
- ResultSpecProto result_spec;
- ResultSpecProto::ResultGrouping* result_grouping =
- result_spec.add_result_groupings();
- ResultSpecProto::ResultGrouping::Entry* entry =
- result_grouping->add_entry_groupings();
- result_grouping->set_max_results(0);
- entry->set_namespace_("namespace1");
- entry->set_schema("Message");
- result_grouping->add_entry_groupings();
- entry->set_namespace_("namespace2");
- entry->set_schema("Message");
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, scoring_spec, result_spec);
- EXPECT_THAT(search_result_proto.status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-
- // Specify negative results. This should result in an error.
- result_spec.mutable_result_groupings(0)->set_max_results(-1);
- EXPECT_THAT(search_result_proto.status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, SearchResultGroupingMultiNamespaceGrouping) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 3 documents and ensures the relationship in terms of document
- // score is: document1 < document2 < document3 < document4 < document5 <
- // document6
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(1)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetScore(2)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document3 =
- DocumentBuilder()
- .SetKey("namespace2", "uri/3")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetScore(3)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document4 =
- DocumentBuilder()
- .SetKey("namespace2", "uri/4")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(4)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document5 =
- DocumentBuilder()
- .SetKey("namespace3", "uri/5")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetScore(5)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document6 =
- DocumentBuilder()
- .SetKey("namespace3", "uri/6")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(6)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document6).status(), ProtoIsOk());
-
- // "m" will match all 6 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-
- ResultSpecProto result_spec;
- result_spec.set_result_group_type(ResultSpecProto::NAMESPACE);
- ResultSpecProto::ResultGrouping* result_grouping =
- result_spec.add_result_groupings();
- ResultSpecProto::ResultGrouping::Entry* entry =
- result_grouping->add_entry_groupings();
- result_grouping->set_max_results(1);
- entry->set_namespace_("namespace1");
- result_grouping = result_spec.add_result_groupings();
- result_grouping->set_max_results(2);
- entry = result_grouping->add_entry_groupings();
- entry->set_namespace_("namespace2");
- entry = result_grouping->add_entry_groupings();
- entry->set_namespace_("namespace3");
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, scoring_spec, result_spec);
-
- // The last result (document1) in namespace "namespace1" should not be
- // included. "namespace2" and "namespace3" are grouped together. So only the
- // two highest scored documents between the two (both of which are in
- // "namespace3") should be returned.
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document6;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document5;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
-
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchResultGroupingMultiSchemaGrouping) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
- PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED)))
- .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
- PropertyConfigBuilder()
- .SetName("name")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("sender")
- .SetDataTypeDocument(
- "Person",
- /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetScore(1)
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "foo")
- .AddDocumentProperty("sender", DocumentBuilder()
- .SetKey("namespace", "uri1-sender")
- .SetSchema("Person")
- .AddStringProperty("name", "foo")
- .Build())
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace1", "uri2")
- .SetSchema("Message")
- .SetScore(2)
- .SetCreationTimestampMs(10)
- .AddStringProperty("body", "fo")
- .Build();
- DocumentProto document3 = DocumentBuilder()
- .SetKey("namespace2", "uri3")
- .SetSchema("Message")
- .SetScore(3)
- .SetCreationTimestampMs(10)
- .AddStringProperty("body", "fo")
- .Build();
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
-
- // "f" will match all 3 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("f");
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-
- ResultSpecProto result_spec;
- result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE);
- ResultSpecProto::ResultGrouping* result_grouping =
- result_spec.add_result_groupings();
- ResultSpecProto::ResultGrouping::Entry* entry =
- result_grouping->add_entry_groupings();
- result_grouping->set_max_results(1);
- entry->set_schema("Message");
- result_grouping = result_spec.add_result_groupings();
- result_grouping->set_max_results(1);
- entry = result_grouping->add_entry_groupings();
- entry->set_namespace_("Email");
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, scoring_spec, result_spec);
-
- // Each of the highest scored documents of schema type "Message" (document3)
- // and "Email" (document1) should be returned.
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document3;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
-
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchResultGroupingMultiNamespaceAndSchemaGrouping) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 3 documents and ensures the relationship in terms of document
- // score is: document1 < document2 < document3 < document4 < document5 <
- // document6
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(1)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetScore(2)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document3 =
- DocumentBuilder()
- .SetKey("namespace2", "uri/3")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetScore(3)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document4 =
- DocumentBuilder()
- .SetKey("namespace2", "uri/4")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(4)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document5 =
- DocumentBuilder()
- .SetKey("namespace3", "uri/5")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetScore(5)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document6 =
- DocumentBuilder()
- .SetKey("namespace3", "uri/6")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(6)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document6).status(), ProtoIsOk());
-
- // "m" will match all 6 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-
- ResultSpecProto result_spec;
- result_spec.set_result_group_type(ResultSpecProto::NAMESPACE_AND_SCHEMA_TYPE);
- ResultSpecProto::ResultGrouping* result_grouping =
- result_spec.add_result_groupings();
- ResultSpecProto::ResultGrouping::Entry* entry =
- result_grouping->add_entry_groupings();
- result_grouping->set_max_results(1);
- entry->set_namespace_("namespace1");
- entry->set_schema("Message");
- result_grouping = result_spec.add_result_groupings();
- result_grouping->set_max_results(1);
- entry = result_grouping->add_entry_groupings();
- entry->set_namespace_("namespace2");
- entry->set_schema("Message");
- result_grouping = result_spec.add_result_groupings();
- result_grouping->set_max_results(1);
- entry = result_grouping->add_entry_groupings();
- entry->set_namespace_("namespace3");
- entry->set_schema("Message");
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, scoring_spec, result_spec);
-
- // The three highest scored documents that fit the criteria of
- // "namespace1xMessage" (document2), "namespace2xMessage" (document4),
- // and "namespace3xMessage" (document6) should be returned.
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document6;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document4;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
-
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchResultGroupingNonexistentNamespaceShouldBeIgnored) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 2 documents and ensures the relationship in terms of document
- // score is: document1 < document2
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(1)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetScore(2)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // "m" will match all 2 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-
- ResultSpecProto result_spec;
- result_spec.set_result_group_type(ResultSpecProto::NAMESPACE);
- ResultSpecProto::ResultGrouping* result_grouping =
- result_spec.add_result_groupings();
- ResultSpecProto::ResultGrouping::Entry* entry =
- result_grouping->add_entry_groupings();
- result_grouping->set_max_results(1);
- entry->set_namespace_("namespace1");
- entry = result_grouping->add_entry_groupings();
- entry->set_namespace_("nonexistentNamespace");
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, scoring_spec, result_spec);
-
- // Only the top ranked document in "namespace" (document2), should be
- // returned. The presence of "nonexistentNamespace" in the same result
- // grouping should have no effect.
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
-
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchResultGroupingNonexistentSchemaShouldBeIgnored) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 2 documents and ensures the relationship in terms of document
- // score is: document1 < document2
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(1)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetScore(2)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // "m" will match all 2 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-
- ResultSpecProto result_spec;
- result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE);
- ResultSpecProto::ResultGrouping* result_grouping =
- result_spec.add_result_groupings();
- ResultSpecProto::ResultGrouping::Entry* entry =
- result_grouping->add_entry_groupings();
- result_grouping->set_max_results(1);
- entry->set_schema("Message");
- entry = result_grouping->add_entry_groupings();
- entry->set_schema("nonexistentMessage");
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, scoring_spec, result_spec);
-
- // Only the top ranked document in "Message" (document2), should be
- // returned. The presence of "nonexistentMessage" in the same result
- // grouping should have no effect.
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
-
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchResultGroupingNonexistentNamespaceAndSchemaShouldBeIgnored) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 2 documents and ensures the relationship in terms of document
- // score is: document1 < document2
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(1)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetScore(2)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- DocumentProto document3 =
- DocumentBuilder()
- .SetKey("namespace2", "uri/3")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetScore(3)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- DocumentProto document4 =
- DocumentBuilder()
- .SetKey("namespace2", "uri/4")
- .SetSchema("Message")
- .AddStringProperty("body", "message4")
- .SetScore(4)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
-
- // "m" will match all 2 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-
- ResultSpecProto result_spec;
- result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE);
- ResultSpecProto::ResultGrouping* result_grouping =
- result_spec.add_result_groupings();
- ResultSpecProto::ResultGrouping::Entry* entry =
- result_grouping->add_entry_groupings();
- result_grouping->set_max_results(1);
- entry->set_namespace_("namespace2");
- entry->set_schema("Message");
- entry = result_grouping->add_entry_groupings();
- entry->set_schema("namespace1");
- entry->set_schema("nonexistentMessage");
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, scoring_spec, result_spec);
-
- // Only the top ranked document in "namespace2xMessage" (document4), should be
- // returned. The presence of "namespace1xnonexistentMessage" in the same
- // result grouping should have no effect. If either the namespace or the
- // schema type is nonexistent, the entire entry will be ignored.
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document4;
-
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- SetSchemaCanNotDetectPreviousSchemaWasLostWithoutDocuments) {
- SchemaProto schema;
- auto type = schema.add_types();
- type->set_schema_type("Message");
-
- auto body = type->add_properties();
- body->set_property_name("body");
- body->set_data_type(PropertyConfigProto::DataType::STRING);
- body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- // Make an incompatible schema, a previously OPTIONAL field is REQUIRED
- SchemaProto incompatible_schema = schema;
- incompatible_schema.mutable_types(0)->mutable_properties(0)->set_cardinality(
- PropertyConfigProto::Cardinality::REQUIRED);
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- } // This should shut down IcingSearchEngine and persist anything it needs to
-
- ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(GetSchemaDir().c_str()));
-
- // Since we don't have any documents yet, we can't detect this edge-case. But
- // it should be fine since there aren't any documents to be invalidated.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(incompatible_schema).status(), ProtoIsOk());
-}
-
-TEST_F(IcingSearchEngineTest, SetSchemaCanDetectPreviousSchemaWasLost) {
- SchemaProto schema;
- auto type = schema.add_types();
- type->set_schema_type("Message");
-
- auto body = type->add_properties();
- body->set_property_name("body");
- body->set_data_type(PropertyConfigProto::DataType::STRING);
- body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- body->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- body->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- // Make an incompatible schema, a previously OPTIONAL field is REQUIRED
- SchemaProto incompatible_schema = schema;
- incompatible_schema.mutable_types(0)->mutable_properties(0)->set_cardinality(
- PropertyConfigProto::Cardinality::REQUIRED);
-
- SearchSpecProto search_spec;
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- DocumentProto document = CreateMessageDocument("namespace", "uri");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- // Can retrieve by namespace/uri
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = document;
-
- ASSERT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Can search for it
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- CreateMessageDocument("namespace", "uri");
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
- } // This should shut down IcingSearchEngine and persist anything it needs to
-
- ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(GetSchemaDir().c_str()));
-
- // Setting the new, different schema will remove incompatible documents
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(incompatible_schema).status(), ProtoIsOk());
-
- // Can't retrieve by namespace/uri
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace, uri) not found.");
-
- EXPECT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Can't search for it
- SearchResultProto empty_result;
- empty_result.mutable_status()->set_code(StatusProto::OK);
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStatsAndScores(empty_result));
-}
-
TEST_F(IcingSearchEngineTest, ImplicitPersistToDiskFullSavesEverything) {
DocumentProto document = CreateMessageDocument("namespace", "uri");
{
@@ -6865,252 +1180,6 @@ TEST_F(IcingSearchEngineTest, ResetDeleteFailureCausesInternalError) {
ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
}
-TEST_F(IcingSearchEngineTest, SnippetNormalization) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Message")
- .AddStringProperty("body", "MDI zurich Team Meeting")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
-
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Message")
- .AddStringProperty("body", "mdi Zürich Team Meeting")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.set_query("mdi Zürich");
-
- ResultSpecProto result_spec;
- result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
- result_spec.mutable_snippet_spec()->set_num_matches_per_property(2);
- result_spec.mutable_snippet_spec()->set_num_to_snippet(2);
-
- SearchResultProto results =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(results.status(), ProtoIsOk());
- ASSERT_THAT(results.results(), SizeIs(2));
- const DocumentProto& result_document_1 = results.results(0).document();
- const SnippetProto& result_snippet_1 = results.results(0).snippet();
- EXPECT_THAT(result_document_1, EqualsProto(document_two));
- EXPECT_THAT(result_snippet_1.entries(), SizeIs(1));
- EXPECT_THAT(result_snippet_1.entries(0).property_name(), Eq("body"));
- std::string_view content = GetString(
- &result_document_1, result_snippet_1.entries(0).property_name());
- EXPECT_THAT(
- GetWindows(content, result_snippet_1.entries(0)),
- ElementsAre("mdi Zürich Team Meeting", "mdi Zürich Team Meeting"));
- EXPECT_THAT(GetMatches(content, result_snippet_1.entries(0)),
- ElementsAre("mdi", "Zürich"));
-
- const DocumentProto& result_document_2 = results.results(1).document();
- const SnippetProto& result_snippet_2 = results.results(1).snippet();
- EXPECT_THAT(result_document_2, EqualsProto(document_one));
- EXPECT_THAT(result_snippet_2.entries(), SizeIs(1));
- EXPECT_THAT(result_snippet_2.entries(0).property_name(), Eq("body"));
- content = GetString(&result_document_2,
- result_snippet_2.entries(0).property_name());
- EXPECT_THAT(
- GetWindows(content, result_snippet_2.entries(0)),
- ElementsAre("MDI zurich Team Meeting", "MDI zurich Team Meeting"));
- EXPECT_THAT(GetMatches(content, result_snippet_2.entries(0)),
- ElementsAre("MDI", "zurich"));
-}
-
-TEST_F(IcingSearchEngineTest, SnippetNormalizationPrefix) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Message")
- .AddStringProperty("body", "MDI zurich Team Meeting")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
-
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Message")
- .AddStringProperty("body", "mdi Zürich Team Meeting")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("md Zür");
-
- ResultSpecProto result_spec;
- result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
- result_spec.mutable_snippet_spec()->set_num_matches_per_property(2);
- result_spec.mutable_snippet_spec()->set_num_to_snippet(2);
-
- SearchResultProto results =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(results.status(), ProtoIsOk());
- ASSERT_THAT(results.results(), SizeIs(2));
- const DocumentProto& result_document_1 = results.results(0).document();
- const SnippetProto& result_snippet_1 = results.results(0).snippet();
- EXPECT_THAT(result_document_1, EqualsProto(document_two));
- EXPECT_THAT(result_snippet_1.entries(), SizeIs(1));
- EXPECT_THAT(result_snippet_1.entries(0).property_name(), Eq("body"));
- std::string_view content = GetString(
- &result_document_1, result_snippet_1.entries(0).property_name());
- EXPECT_THAT(
- GetWindows(content, result_snippet_1.entries(0)),
- ElementsAre("mdi Zürich Team Meeting", "mdi Zürich Team Meeting"));
- EXPECT_THAT(GetMatches(content, result_snippet_1.entries(0)),
- ElementsAre("mdi", "Zürich"));
-
- const DocumentProto& result_document_2 = results.results(1).document();
- const SnippetProto& result_snippet_2 = results.results(1).snippet();
- EXPECT_THAT(result_document_2, EqualsProto(document_one));
- EXPECT_THAT(result_snippet_2.entries(), SizeIs(1));
- EXPECT_THAT(result_snippet_2.entries(0).property_name(), Eq("body"));
- content = GetString(&result_document_2,
- result_snippet_2.entries(0).property_name());
- EXPECT_THAT(
- GetWindows(content, result_snippet_2.entries(0)),
- ElementsAre("MDI zurich Team Meeting", "MDI zurich Team Meeting"));
- EXPECT_THAT(GetMatches(content, result_snippet_2.entries(0)),
- ElementsAre("MDI", "zurich"));
-}
-
-TEST_F(IcingSearchEngineTest, SnippetSectionRestrict) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
-
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Email")
- .AddStringProperty("subject", "MDI zurich Team Meeting")
- .AddStringProperty("body", "MDI zurich Team Meeting")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
-
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Email")
- .AddStringProperty("subject", "MDI zurich trip")
- .AddStringProperty("body", "Let's travel to zurich")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- auto search_spec = std::make_unique<SearchSpecProto>();
- search_spec->set_term_match_type(TermMatchType::PREFIX);
- search_spec->set_query("body:Zür");
-
- auto result_spec = std::make_unique<ResultSpecProto>();
- result_spec->set_num_per_page(1);
- result_spec->mutable_snippet_spec()->set_max_window_utf32_length(64);
- result_spec->mutable_snippet_spec()->set_num_matches_per_property(10);
- result_spec->mutable_snippet_spec()->set_num_to_snippet(10);
-
- auto scoring_spec = std::make_unique<ScoringSpecProto>();
- *scoring_spec = GetDefaultScoringSpec();
-
- SearchResultProto results =
- icing.Search(*search_spec, *scoring_spec, *result_spec);
- EXPECT_THAT(results.status(), ProtoIsOk());
- ASSERT_THAT(results.results(), SizeIs(1));
-
- const DocumentProto& result_document_two = results.results(0).document();
- const SnippetProto& result_snippet_two = results.results(0).snippet();
- EXPECT_THAT(result_document_two, EqualsProto(document_two));
- EXPECT_THAT(result_snippet_two.entries(), SizeIs(1));
- EXPECT_THAT(result_snippet_two.entries(0).property_name(), Eq("body"));
- std::string_view content = GetString(
- &result_document_two, result_snippet_two.entries(0).property_name());
- EXPECT_THAT(GetWindows(content, result_snippet_two.entries(0)),
- ElementsAre("Let's travel to zurich"));
- EXPECT_THAT(GetMatches(content, result_snippet_two.entries(0)),
- ElementsAre("zurich"));
-
- search_spec.reset();
- scoring_spec.reset();
- result_spec.reset();
-
- results = icing.GetNextPage(results.next_page_token());
- EXPECT_THAT(results.status(), ProtoIsOk());
- ASSERT_THAT(results.results(), SizeIs(1));
-
- const DocumentProto& result_document_one = results.results(0).document();
- const SnippetProto& result_snippet_one = results.results(0).snippet();
- EXPECT_THAT(result_document_one, EqualsProto(document_one));
- EXPECT_THAT(result_snippet_one.entries(), SizeIs(1));
- EXPECT_THAT(result_snippet_one.entries(0).property_name(), Eq("body"));
- content = GetString(&result_document_one,
- result_snippet_one.entries(0).property_name());
- EXPECT_THAT(GetWindows(content, result_snippet_one.entries(0)),
- ElementsAre("MDI zurich Team Meeting"));
- EXPECT_THAT(GetMatches(content, result_snippet_one.entries(0)),
- ElementsAre("zurich"));
-}
-
-TEST_F(IcingSearchEngineTest, UninitializedInstanceFailsSafely) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-
- SchemaProto email_schema = CreateMessageSchema();
- EXPECT_THAT(icing.SetSchema(email_schema).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.GetSchema().status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.GetSchemaType(email_schema.types(0).schema_type()).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
-
- DocumentProto doc = CreateMessageDocument("namespace", "uri");
- EXPECT_THAT(icing.Put(doc).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing
- .Get(doc.namespace_(), doc.uri(),
- GetResultSpecProto::default_instance())
- .status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.Delete(doc.namespace_(), doc.uri()).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.DeleteByNamespace(doc.namespace_()).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.DeleteBySchemaType(email_schema.types(0).schema_type())
- .status()
- .code(),
- Eq(StatusProto::FAILED_PRECONDITION));
-
- SearchSpecProto search_spec = SearchSpecProto::default_instance();
- ScoringSpecProto scoring_spec = ScoringSpecProto::default_instance();
- ResultSpecProto result_spec = ResultSpecProto::default_instance();
- EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- constexpr int kSomePageToken = 12;
- EXPECT_THAT(icing.GetNextPage(kSomePageToken).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- icing.InvalidateNextPageToken(kSomePageToken); // Verify this doesn't crash.
-
- EXPECT_THAT(icing.PersistToDisk(PersistType::FULL).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.Optimize().status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
-}
-
TEST_F(IcingSearchEngineTest, GetAllNamespaces) {
DocumentProto namespace1 = DocumentBuilder()
.SetKey("namespace1", "uri")
@@ -7210,1551 +1279,6 @@ TEST_F(IcingSearchEngineTest, GetAllNamespaces) {
}
}
-TEST_F(IcingSearchEngineTest, Hyphens) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_schema_type("MyType");
- PropertyConfigProto* prop = type->add_properties();
- prop->set_property_name("foo");
- prop->set_data_type(PropertyConfigProto::DataType::STRING);
- prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- prop->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- prop->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("MyType")
- .AddStringProperty("foo", "foo bar-baz bat")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
-
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("MyType")
- .AddStringProperty("foo", "bar for baz bat-man")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.set_query("foo:bar-baz");
-
- ResultSpecProto result_spec;
- SearchResultProto results =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
-
- EXPECT_THAT(results.status(), ProtoIsOk());
- ASSERT_THAT(results.results(), SizeIs(2));
- EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
- EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
-}
-
-TEST_F(IcingSearchEngineTest, RestoreIndex) {
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", kIpsumText)
- .Build();
- // 1. Create an index with a LiteIndex that will only allow one document
- // before needing a merge.
- {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(document.ByteSizeLong());
- IcingSearchEngine icing(options, GetTestJniCache());
-
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Add two documents. These should get merged into the main index.
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = DocumentBuilder(document).SetUri("fake_type/1").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- // Add one document. This one should get remain in the lite index.
- document = DocumentBuilder(document).SetUri("fake_type/2").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- }
-
- // 2. Delete the index file to trigger RestoreIndexIfNeeded.
- std::string idx_subdir = GetIndexDir() + "/idx";
- filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str());
-
- // 3. Create the index again. This should trigger index restoration.
- {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(document.ByteSizeLong());
- IcingSearchEngine icing(options, GetTestJniCache());
-
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_query("consectetur");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- SearchResultProto results =
- icing.Search(search_spec, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(results.status(), ProtoIsOk());
- EXPECT_THAT(results.next_page_token(), Eq(0));
- // All documents should be retrievable.
- ASSERT_THAT(results.results(), SizeIs(3));
- EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/2"));
- EXPECT_THAT(results.results(1).document().uri(), Eq("fake_type/1"));
- EXPECT_THAT(results.results(2).document().uri(), Eq("fake_type/0"));
- }
-}
-
-TEST_F(IcingSearchEngineTest, RestoreIndexLoseLiteIndex) {
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", kIpsumText)
- .Build();
- // 1. Create an index with a LiteIndex that will only allow one document
- // before needing a merge.
- {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(document.ByteSizeLong());
- IcingSearchEngine icing(options, GetTestJniCache());
-
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Add two documents. These should get merged into the main index.
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = DocumentBuilder(document).SetUri("fake_type/1").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- // Add one document. This one should get remain in the lite index.
- document = DocumentBuilder(document).SetUri("fake_type/2").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- }
-
- // 2. Delete the last document from the document log
- {
- const std::string document_log_file = absl_ports::StrCat(
- GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
- filesystem()->DeleteFile(document_log_file.c_str());
- ICING_ASSERT_OK_AND_ASSIGN(
- auto create_result,
- PortableFileBackedProtoLog<DocumentWrapper>::Create(
- filesystem(), document_log_file.c_str(),
- PortableFileBackedProtoLog<DocumentWrapper>::Options(
- /*compress_in=*/true)));
- std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log =
- std::move(create_result.proto_log);
-
- document = DocumentBuilder(document).SetUri("fake_type/0").Build();
- DocumentWrapper wrapper;
- *wrapper.mutable_document() = document;
- ASSERT_THAT(document_log->WriteProto(wrapper), IsOk());
-
- document = DocumentBuilder(document).SetUri("fake_type/1").Build();
- *wrapper.mutable_document() = document;
- ASSERT_THAT(document_log->WriteProto(wrapper), IsOk());
- }
-
- // 3. Create the index again. This should throw out the lite index and trigger
- // index restoration which will only restore the two documents in the main
- // index.
- {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(document.ByteSizeLong());
- IcingSearchEngine icing(options, GetTestJniCache());
-
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_query("consectetur");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- SearchResultProto results =
- icing.Search(search_spec, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(results.status(), ProtoIsOk());
- EXPECT_THAT(results.next_page_token(), Eq(0));
- // Only the documents that were in the main index should be retrievable.
- ASSERT_THAT(results.results(), SizeIs(2));
- EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/1"));
- EXPECT_THAT(results.results(1).document().uri(), Eq("fake_type/0"));
- }
-}
-
-TEST_F(IcingSearchEngineTest, RestoreIndexLoseIndex) {
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", kIpsumText)
- .Build();
- // 1. Create an index with a LiteIndex that will only allow one document
- // before needing a merge.
- {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(document.ByteSizeLong());
- IcingSearchEngine icing(options, GetTestJniCache());
-
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Add two documents. These should get merged into the main index.
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = DocumentBuilder(document).SetUri("fake_type/1").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- // Add one document. This one should get remain in the lite index.
- document = DocumentBuilder(document).SetUri("fake_type/2").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- }
-
- // 2. Delete the last two documents from the document log.
- {
- const std::string document_log_file = absl_ports::StrCat(
- GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
- filesystem()->DeleteFile(document_log_file.c_str());
- ICING_ASSERT_OK_AND_ASSIGN(
- auto create_result,
- PortableFileBackedProtoLog<DocumentWrapper>::Create(
- filesystem(), document_log_file.c_str(),
- PortableFileBackedProtoLog<DocumentWrapper>::Options(
- /*compress_in=*/true)));
- std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log =
- std::move(create_result.proto_log);
-
- document = DocumentBuilder(document).SetUri("fake_type/0").Build();
- DocumentWrapper wrapper;
- *wrapper.mutable_document() = document;
- ASSERT_THAT(document_log->WriteProto(wrapper), IsOk());
- }
-
- // 3. Create the index again. This should throw out the lite and main index
- // and trigger index restoration.
- {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(document.ByteSizeLong());
- IcingSearchEngine icing(options, GetTestJniCache());
-
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_query("consectetur");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- SearchResultProto results =
- icing.Search(search_spec, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(results.status(), ProtoIsOk());
- EXPECT_THAT(results.next_page_token(), Eq(0));
- // Only the first document should be retrievable.
- ASSERT_THAT(results.results(), SizeIs(1));
- EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/0"));
- }
-}
-
-TEST_F(IcingSearchEngineTest,
- DocumentWithNoIndexedContentDoesntCauseRestoreIndex) {
- // 1. Create an index with a single document in it that has no indexed
- // content.
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Set a schema for a single type that has no indexed properties.
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
- PropertyConfigBuilder()
- .SetName("unindexedField")
- .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
- .SetCardinality(CARDINALITY_REQUIRED)))
- .Build();
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- // Add a document that contains no indexed content.
- DocumentProto document =
- DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("unindexedField",
- "Don't you dare search over this!")
- .Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- }
-
- // 2. Create the index again. This should NOT trigger a recovery of any kind.
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto init_result = icing.Initialize();
- EXPECT_THAT(init_result.status(), ProtoIsOk());
- EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
- Eq(InitializeStatsProto::NO_DATA_LOSS));
- EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::NONE));
- }
-}
-
-TEST_F(IcingSearchEngineTest,
- DocumentWithNoValidIndexedContentDoesntCauseRestoreIndex) {
- // 1. Create an index with a single document in it that has no valid indexed
- // tokens in its content.
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Set a schema for a single type that has no indexed properties.
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Add a document that contains no valid indexed content - just punctuation.
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", "?...!")
- .Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- }
-
- // 2. Create the index again. This should NOT trigger a recovery of any kind.
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto init_result = icing.Initialize();
- EXPECT_THAT(init_result.status(), ProtoIsOk());
- EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
- Eq(InitializeStatsProto::NO_DATA_LOSS));
- EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::NONE));
- }
-}
-
-TEST_F(IcingSearchEngineTest, IndexingDocMergeFailureResets) {
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", kIpsumText)
- .Build();
- // 1. Create an index with a LiteIndex that will only allow one document
- // before needing a merge.
- {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(document.ByteSizeLong());
- IcingSearchEngine icing(options, GetTestJniCache());
-
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Add two documents. These should get merged into the main index.
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = DocumentBuilder(document).SetUri("fake_type/1").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- // Add one document. This one should get remain in the lite index.
- document = DocumentBuilder(document).SetUri("fake_type/2").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- }
-
- // 2. Delete the index file to trigger RestoreIndexIfNeeded.
- std::string idx_subdir = GetIndexDir() + "/idx";
- filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str());
-
- // 3. Setup a mock filesystem to fail to grow the main index once.
- bool has_failed_already = false;
- auto open_write_lambda = [this, &has_failed_already](const char* filename) {
- std::string main_lexicon_suffix = "/main-lexicon.prop.2";
- std::string filename_string(filename);
- if (!has_failed_already &&
- filename_string.length() >= main_lexicon_suffix.length() &&
- filename_string.substr(
- filename_string.length() - main_lexicon_suffix.length(),
- main_lexicon_suffix.length()) == main_lexicon_suffix) {
- has_failed_already = true;
- return -1;
- }
- return this->filesystem()->OpenForWrite(filename);
- };
- auto mock_icing_filesystem = std::make_unique<IcingMockFilesystem>();
- ON_CALL(*mock_icing_filesystem, OpenForWrite)
- .WillByDefault(open_write_lambda);
-
- // 4. Create the index again. This should trigger index restoration.
- {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(document.ByteSizeLong());
- TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
- std::move(mock_icing_filesystem),
- std::make_unique<FakeClock>(),
- GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(),
- ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
-
- SearchSpecProto search_spec;
- search_spec.set_query("consectetur");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- SearchResultProto results =
- icing.Search(search_spec, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(results.status(), ProtoIsOk());
- EXPECT_THAT(results.next_page_token(), Eq(0));
- // Only the last document that was added should still be retrievable.
- ASSERT_THAT(results.results(), SizeIs(1));
- EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/2"));
- }
-}
-
-TEST_F(IcingSearchEngineTest, InitializeShouldLogFunctionLatency) {
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.initialize_stats().latency_ms(), Eq(10));
-}
-
-TEST_F(IcingSearchEngineTest, InitializeShouldLogNumberOfDocuments) {
- DocumentProto document1 = DocumentBuilder()
- .SetKey("icing", "fake_type/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("icing", "fake_type/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .Build();
-
- {
- // Initialize and put a document.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
- Eq(0));
-
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- }
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
- Eq(1));
-
- // Put another document.
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- }
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
- Eq(2));
- }
-}
-
-TEST_F(IcingSearchEngineTest,
- InitializeShouldNotLogRecoveryCauseForFirstTimeInitialize) {
- // Even though the fake timer will return 10, all the latency numbers related
- // to recovery / restoration should be 0 during the first-time initialization.
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_latency_ms(),
- Eq(0));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().document_store_data_status(),
- Eq(InitializeStatsProto::NO_DATA_LOSS));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
- Eq(0));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_latency_ms(),
- Eq(0));
-}
-
-TEST_F(IcingSearchEngineTest, InitializeShouldLogRecoveryCausePartialDataLoss) {
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .Build();
-
- {
- // Initialize and put a document.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- }
-
- {
- // Append a non-checksummed document. This will mess up the checksum of the
- // proto log, forcing it to rewind and later return a DATA_LOSS error.
- const std::string serialized_document = document.SerializeAsString();
- const std::string document_log_file = absl_ports::StrCat(
- GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
-
- int64_t file_size = filesystem()->GetFileSize(document_log_file.c_str());
- filesystem()->PWrite(document_log_file.c_str(), file_size,
- serialized_document.data(),
- serialized_document.size());
- }
-
- {
- // Document store will rewind to previous checkpoint. The cause should be
- // DATA_LOSS and the data status should be PARTIAL_LOSS.
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_cause(),
- Eq(InitializeStatsProto::DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_latency_ms(),
- Eq(10));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().document_store_data_status(),
- Eq(InitializeStatsProto::PARTIAL_LOSS));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .index_restoration_latency_ms(),
- Eq(0));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_latency_ms(),
- Eq(0));
- }
-}
-
-TEST_F(IcingSearchEngineTest,
- InitializeShouldLogRecoveryCauseCompleteDataLoss) {
- DocumentProto document1 = DocumentBuilder()
- .SetKey("icing", "fake_type/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .Build();
-
- const std::string document_log_file = absl_ports::StrCat(
- GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
- int64_t corruptible_offset;
-
- {
- // Initialize and put a document.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // There's some space at the beginning of the file (e.g. header, kmagic,
- // etc) that is necessary to initialize the FileBackedProtoLog. We can't
- // corrupt that region, so we need to figure out the offset at which
- // documents will be written to - which is the file size after
- // initialization.
- corruptible_offset = filesystem()->GetFileSize(document_log_file.c_str());
-
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
- }
-
- {
- // "Corrupt" the content written in the log. Make the corrupt document
- // smaller than our original one so we don't accidentally write past our
- // file.
- DocumentProto document =
- DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build();
- std::string serialized_document = document.SerializeAsString();
- ASSERT_TRUE(filesystem()->PWrite(
- document_log_file.c_str(), corruptible_offset,
- serialized_document.data(), serialized_document.size()));
-
- PortableFileBackedProtoLog<DocumentWrapper>::Header header =
- ReadDocumentLogHeader(*filesystem(), document_log_file);
-
- // Set dirty bit to true to reflect that something changed in the log.
- header.SetDirtyFlag(true);
- header.SetHeaderChecksum(header.CalculateHeaderChecksum());
-
- WriteDocumentLogHeader(*filesystem(), document_log_file, header);
- }
-
- {
- // Document store will completely rewind. The cause should be DATA_LOSS and
- // the data status should be COMPLETE_LOSS.
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_cause(),
- Eq(InitializeStatsProto::DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_latency_ms(),
- Eq(10));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().document_store_data_status(),
- Eq(InitializeStatsProto::COMPLETE_LOSS));
- // The complete rewind of ground truth causes us to clear the index, but
- // that's not considered a restoration.
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .index_restoration_latency_ms(),
- Eq(0));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_latency_ms(),
- Eq(0));
- }
-}
-
-TEST_F(IcingSearchEngineTest,
- InitializeShouldLogRecoveryCauseInconsistentWithGroundTruth) {
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .Build();
- {
- // Initialize and put a document.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- }
-
- {
- // Delete the index file to trigger RestoreIndexIfNeeded.
- std::string idx_subdir = GetIndexDir() + "/idx";
- filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str());
- }
-
- {
- // Index is empty but ground truth is not. Index should be restored due to
- // the inconsistency.
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .index_restoration_latency_ms(),
- Eq(10));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_latency_ms(),
- Eq(0));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().document_store_data_status(),
- Eq(InitializeStatsProto::NO_DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_latency_ms(),
- Eq(0));
- }
-}
-
-TEST_F(IcingSearchEngineTest,
- InitializeShouldLogRecoveryCauseSchemaChangesOutofSync) {
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .Build();
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- {
- // Initialize and put one document.
- IcingSearchEngine icing(options, GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- }
-
- {
- // Simulate a schema change where power is lost after the schema is written.
- SchemaProto new_schema =
- SchemaBuilder()
- .AddType(
- SchemaTypeConfigBuilder()
- .SetType("Message")
- .AddProperty(PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED))
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
- // Write the marker file
- std::string marker_filepath =
- absl_ports::StrCat(options.base_dir(), "/set_schema_marker");
- ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str()));
- ASSERT_TRUE(sfd.is_valid());
-
- // Write the new schema
- FakeClock fake_clock;
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
- ICING_EXPECT_OK(schema_store->SetSchema(new_schema));
- }
-
- {
- // Both document store and index should be recovered from checksum mismatch.
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .index_restoration_latency_ms(),
- Eq(10));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_cause(),
- Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_latency_ms(),
- Eq(10));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().document_store_data_status(),
- Eq(InitializeStatsProto::NO_DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_latency_ms(),
- Eq(0));
- }
-
- {
- // No recovery should be needed.
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .index_restoration_latency_ms(),
- Eq(0));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_latency_ms(),
- Eq(0));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().document_store_data_status(),
- Eq(InitializeStatsProto::NO_DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_latency_ms(),
- Eq(0));
- }
-}
-
-TEST_F(IcingSearchEngineTest, InitializeShouldLogRecoveryCauseIndexIOError) {
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .Build();
- {
- // Initialize and put one document.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- }
-
- // lambda to fail OpenForWrite on lite index hit buffer once.
- bool has_failed_already = false;
- auto open_write_lambda = [this, &has_failed_already](const char* filename) {
- std::string lite_index_buffer_file_path =
- absl_ports::StrCat(GetIndexDir(), "/idx/lite.hb");
- std::string filename_string(filename);
- if (!has_failed_already && filename_string == lite_index_buffer_file_path) {
- has_failed_already = true;
- return -1;
- }
- return this->filesystem()->OpenForWrite(filename);
- };
-
- auto mock_icing_filesystem = std::make_unique<IcingMockFilesystem>();
- // This fails Index::Create() once.
- ON_CALL(*mock_icing_filesystem, OpenForWrite)
- .WillByDefault(open_write_lambda);
-
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::move(mock_icing_filesystem),
- std::move(fake_clock), GetTestJniCache());
-
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::IO_ERROR));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
- Eq(10));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_latency_ms(),
- Eq(0));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().document_store_data_status(),
- Eq(InitializeStatsProto::NO_DATA_LOSS));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_latency_ms(),
- Eq(0));
-}
-
-TEST_F(IcingSearchEngineTest, InitializeShouldLogRecoveryCauseDocStoreIOError) {
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .Build();
- {
- // Initialize and put one document.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- }
-
- // lambda to fail Read on document store header once.
- bool has_failed_already = false;
- auto read_lambda = [this, &has_failed_already](const char* filename,
- void* buf, size_t buf_size) {
- std::string document_store_header_file_path =
- absl_ports::StrCat(GetDocumentDir(), "/document_store_header");
- std::string filename_string(filename);
- if (!has_failed_already &&
- filename_string == document_store_header_file_path) {
- has_failed_already = true;
- return false;
- }
- return this->filesystem()->Read(filename, buf, buf_size);
- };
-
- auto mock_filesystem = std::make_unique<MockFilesystem>();
- // This fails DocumentStore::InitializeDerivedFiles() once.
- ON_CALL(*mock_filesystem, Read(A<const char*>(), _, _))
- .WillByDefault(read_lambda);
-
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::move(mock_filesystem),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
-
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_cause(),
- Eq(InitializeStatsProto::IO_ERROR));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_latency_ms(),
- Eq(10));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().document_store_data_status(),
- Eq(InitializeStatsProto::NO_DATA_LOSS));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
- Eq(0));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_latency_ms(),
- Eq(0));
-}
-
-TEST_F(IcingSearchEngineTest,
- InitializeShouldLogRecoveryCauseSchemaStoreIOError) {
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- }
-
- {
- // Delete the schema store header file to trigger an I/O error.
- std::string schema_store_header_file_path =
- GetSchemaDir() + "/schema_store_header";
- filesystem()->DeleteFile(schema_store_header_file_path.c_str());
- }
-
- {
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_cause(),
- Eq(InitializeStatsProto::IO_ERROR));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_latency_ms(),
- Eq(10));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_latency_ms(),
- Eq(0));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().document_store_data_status(),
- Eq(InitializeStatsProto::NO_DATA_LOSS));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .index_restoration_latency_ms(),
- Eq(0));
- }
-}
-
-TEST_F(IcingSearchEngineTest, InitializeShouldLogNumberOfSchemaTypes) {
- {
- // Initialize an empty storage.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- // There should be 0 schema types.
- EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
- Eq(0));
-
- // Set a schema with one type config.
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- }
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- // There should be 1 schema type.
- EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
- Eq(1));
-
- // Create and set a schema with two type configs: Email and Message.
- SchemaProto schema = CreateEmailSchema();
-
- auto type = schema.add_types();
- type->set_schema_type("Message");
- auto body = type->add_properties();
- body->set_property_name("body");
- body->set_data_type(PropertyConfigProto::DataType::STRING);
- body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- body->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- body->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- }
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
- Eq(2));
- }
-}
-
-TEST_F(IcingSearchEngineTest, PutDocumentShouldLogFunctionLatency) {
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .Build();
-
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- PutResultProto put_result_proto = icing.Put(document);
- EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(put_result_proto.put_document_stats().latency_ms(), Eq(10));
-}
-
-TEST_F(IcingSearchEngineTest, PutDocumentShouldLogDocumentStoreStats) {
- DocumentProto document =
- DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .AddStringProperty("body", "message body")
- .Build();
-
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- PutResultProto put_result_proto = icing.Put(document);
- EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(put_result_proto.put_document_stats().document_store_latency_ms(),
- Eq(10));
- size_t document_size = put_result_proto.put_document_stats().document_size();
- EXPECT_THAT(document_size, Ge(document.ByteSizeLong()));
- EXPECT_THAT(document_size, Le(document.ByteSizeLong() +
- sizeof(DocumentProto::InternalFields)));
-}
-
-TEST_F(IcingSearchEngineTest, PutDocumentShouldLogIndexingStats) {
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .Build();
-
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- PutResultProto put_result_proto = icing.Put(document);
- EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(put_result_proto.put_document_stats().index_latency_ms(), Eq(10));
- // No merge should happen.
- EXPECT_THAT(put_result_proto.put_document_stats().index_merge_latency_ms(),
- Eq(0));
- // The input document has 2 tokens.
- EXPECT_THAT(put_result_proto.put_document_stats()
- .tokenization_stats()
- .num_tokens_indexed(),
- Eq(2));
-}
-
-TEST_F(IcingSearchEngineTest, PutDocumentShouldLogIndexMergeLatency) {
- DocumentProto document1 = DocumentBuilder()
- .SetKey("icing", "fake_type/1")
- .SetSchema("Message")
- .AddStringProperty("body", kIpsumText)
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("icing", "fake_type/2")
- .SetSchema("Message")
- .AddStringProperty("body", kIpsumText)
- .Build();
-
- // Create an icing instance with index_merge_size = document1's size.
- IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
- icing_options.set_index_merge_size(document1.ByteSizeLong());
-
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(icing_options, std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
-
- // Putting document2 should trigger an index merge.
- PutResultProto put_result_proto = icing.Put(document2);
- EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(put_result_proto.put_document_stats().index_merge_latency_ms(),
- Eq(10));
-}
-
-TEST_F(IcingSearchEngineTest, SearchWithProjectionEmptyFieldPath) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- // 1. Add two email documents
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty(
- "sender",
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Person")
- .AddStringProperty("name", "Meg Ryan")
- .AddStringProperty("emailAddress", "shopgirl@aol.com")
- .Build())
- .AddStringProperty("subject", "Hello World!")
- .AddStringProperty(
- "body", "Oh what a beautiful morning! Oh what a beautiful day!")
- .Build();
- ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
-
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty(
- "sender", DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Person")
- .AddStringProperty("name", "Tom Hanks")
- .AddStringProperty("emailAddress", "ny152@aol.com")
- .Build())
- .AddStringProperty("subject", "Goodnight Moon!")
- .AddStringProperty("body",
- "Count all the sheep and tell them 'Hello'.")
- .Build();
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- // 2. Issue a query that will match those documents and use an empty field
- // mask to request NO properties.
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("hello");
-
- ResultSpecProto result_spec;
- // Retrieve only one result at a time to make sure that projection works when
- // retrieving all pages.
- result_spec.set_num_per_page(1);
- TypePropertyMask* email_field_mask = result_spec.add_type_property_masks();
- email_field_mask->set_schema_type("Email");
- email_field_mask->add_paths("");
-
- SearchResultProto results =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(results.status(), ProtoIsOk());
- EXPECT_THAT(results.results(), SizeIs(1));
-
- // 3. Verify that the returned results contain no properties.
- DocumentProto projected_document_two = DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .Build();
- EXPECT_THAT(results.results(0).document(),
- EqualsProto(projected_document_two));
-
- results = icing.GetNextPage(results.next_page_token());
- EXPECT_THAT(results.status(), ProtoIsOk());
- EXPECT_THAT(results.results(), SizeIs(1));
- DocumentProto projected_document_one = DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .Build();
- EXPECT_THAT(results.results(0).document(),
- EqualsProto(projected_document_one));
-}
-
-TEST_F(IcingSearchEngineTest, SearchWithProjectionMultipleFieldPaths) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- // 1. Add two email documents
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty(
- "sender",
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Person")
- .AddStringProperty("name", "Meg Ryan")
- .AddStringProperty("emailAddress", "shopgirl@aol.com")
- .Build())
- .AddStringProperty("subject", "Hello World!")
- .AddStringProperty(
- "body", "Oh what a beautiful morning! Oh what a beautiful day!")
- .Build();
- ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
-
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty(
- "sender", DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Person")
- .AddStringProperty("name", "Tom Hanks")
- .AddStringProperty("emailAddress", "ny152@aol.com")
- .Build())
- .AddStringProperty("subject", "Goodnight Moon!")
- .AddStringProperty("body",
- "Count all the sheep and tell them 'Hello'.")
- .Build();
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- // 2. Issue a query that will match those documents and request only
- // 'sender.name' and 'subject' properties.
- // Create all of search_spec, result_spec and scoring_spec as objects with
- // scope that will end before the call to GetNextPage to ensure that the
- // implementation isn't relying on references to any of them.
- auto search_spec = std::make_unique<SearchSpecProto>();
- search_spec->set_term_match_type(TermMatchType::PREFIX);
- search_spec->set_query("hello");
-
- auto result_spec = std::make_unique<ResultSpecProto>();
- // Retrieve only one result at a time to make sure that projection works when
- // retrieving all pages.
- result_spec->set_num_per_page(1);
- TypePropertyMask* email_field_mask = result_spec->add_type_property_masks();
- email_field_mask->set_schema_type("Email");
- email_field_mask->add_paths("sender.name");
- email_field_mask->add_paths("subject");
-
- auto scoring_spec = std::make_unique<ScoringSpecProto>();
- *scoring_spec = GetDefaultScoringSpec();
- SearchResultProto results =
- icing.Search(*search_spec, *scoring_spec, *result_spec);
- EXPECT_THAT(results.status(), ProtoIsOk());
- EXPECT_THAT(results.results(), SizeIs(1));
-
- // 3. Verify that the first returned result only contains the 'sender.name'
- // property.
- DocumentProto projected_document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty("sender",
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Person")
- .AddStringProperty("name", "Tom Hanks")
- .Build())
- .AddStringProperty("subject", "Goodnight Moon!")
- .Build();
- EXPECT_THAT(results.results(0).document(),
- EqualsProto(projected_document_two));
-
- // 4. Now, delete all of the specs used in the search. GetNextPage should have
- // no problem because it shouldn't be keeping any references to them.
- search_spec.reset();
- result_spec.reset();
- scoring_spec.reset();
-
- // 5. Verify that the second returned result only contains the 'sender.name'
- // property.
- results = icing.GetNextPage(results.next_page_token());
- EXPECT_THAT(results.status(), ProtoIsOk());
- EXPECT_THAT(results.results(), SizeIs(1));
- DocumentProto projected_document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty("sender",
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Person")
- .AddStringProperty("name", "Meg Ryan")
- .Build())
- .AddStringProperty("subject", "Hello World!")
- .Build();
- EXPECT_THAT(results.results(0).document(),
- EqualsProto(projected_document_one));
-}
-
-TEST_F(IcingSearchEngineTest, QueryStatsProtoTest) {
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(5);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates and inserts 5 documents
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
- DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
- DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
- DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.add_namespace_filters("namespace");
- search_spec.add_schema_type_filters(document1.schema());
- search_spec.set_query("message");
-
- ResultSpecProto result_spec;
- result_spec.set_num_per_page(2);
- result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
- result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
- result_spec.mutable_snippet_spec()->set_num_to_snippet(3);
-
- ScoringSpecProto scoring_spec;
- scoring_spec.set_rank_by(
- ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
-
- // Searches and gets the first page, 2 results with 2 snippets
- SearchResultProto search_result =
- icing.Search(search_spec, scoring_spec, result_spec);
- ASSERT_THAT(search_result.status(), ProtoIsOk());
- ASSERT_THAT(search_result.results(), SizeIs(2));
- ASSERT_THAT(search_result.next_page_token(), Ne(kInvalidNextPageToken));
-
- // Check the stats
- QueryStatsProto exp_stats;
- exp_stats.set_query_length(7);
- exp_stats.set_num_terms(1);
- exp_stats.set_num_namespaces_filtered(1);
- exp_stats.set_num_schema_types_filtered(1);
- exp_stats.set_ranking_strategy(
- ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
- exp_stats.set_is_first_page(true);
- exp_stats.set_requested_page_size(2);
- exp_stats.set_num_results_returned_current_page(2);
- exp_stats.set_num_documents_scored(5);
- exp_stats.set_num_results_with_snippets(2);
- exp_stats.set_latency_ms(5);
- exp_stats.set_parse_query_latency_ms(5);
- exp_stats.set_scoring_latency_ms(5);
- exp_stats.set_ranking_latency_ms(5);
- exp_stats.set_document_retrieval_latency_ms(5);
- exp_stats.set_lock_acquisition_latency_ms(5);
- EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
-
- // Second page, 2 result with 1 snippet
- search_result = icing.GetNextPage(search_result.next_page_token());
- ASSERT_THAT(search_result.status(), ProtoIsOk());
- ASSERT_THAT(search_result.results(), SizeIs(2));
- ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
-
- exp_stats = QueryStatsProto();
- exp_stats.set_is_first_page(false);
- exp_stats.set_requested_page_size(2);
- exp_stats.set_num_results_returned_current_page(2);
- exp_stats.set_num_results_with_snippets(1);
- exp_stats.set_latency_ms(5);
- exp_stats.set_document_retrieval_latency_ms(5);
- exp_stats.set_lock_acquisition_latency_ms(5);
- EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
-
- // Third page, 1 result with 0 snippets
- search_result = icing.GetNextPage(search_result.next_page_token());
- ASSERT_THAT(search_result.status(), ProtoIsOk());
- ASSERT_THAT(search_result.results(), SizeIs(1));
- ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
-
- exp_stats = QueryStatsProto();
- exp_stats.set_is_first_page(false);
- exp_stats.set_requested_page_size(2);
- exp_stats.set_num_results_returned_current_page(1);
- exp_stats.set_num_results_with_snippets(0);
- exp_stats.set_latency_ms(5);
- exp_stats.set_document_retrieval_latency_ms(5);
- exp_stats.set_lock_acquisition_latency_ms(5);
- EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
-}
-
-TEST_F(IcingSearchEngineTest, OptimizeStatsProtoTest) {
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(5);
- fake_clock->SetSystemTimeMilliseconds(10000);
- auto icing = std::make_unique<TestIcingSearchEngine>(
- GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(), std::move(fake_clock),
- GetTestJniCache());
- ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing->SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Create three documents.
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
- document2.set_creation_timestamp_ms(9000);
- document2.set_ttl_ms(500);
- DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
- ASSERT_THAT(icing->Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing->Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing->Put(document3).status(), ProtoIsOk());
-
- // Delete the first document.
- ASSERT_THAT(icing->Delete(document1.namespace_(), document1.uri()).status(),
- ProtoIsOk());
- ASSERT_THAT(icing->PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
-
- OptimizeStatsProto expected;
- expected.set_latency_ms(5);
- expected.set_document_store_optimize_latency_ms(5);
- expected.set_index_restoration_latency_ms(5);
- expected.set_num_original_documents(3);
- expected.set_num_deleted_documents(1);
- expected.set_num_expired_documents(1);
- expected.set_index_restoration_mode(OptimizeStatsProto::INDEX_TRANSLATION);
-
- // Run Optimize
- OptimizeResultProto result = icing->Optimize();
- // Depending on how many blocks the documents end up spread across, it's
- // possible that Optimize can remove documents without shrinking storage. The
- // first Optimize call will also write the OptimizeStatusProto for the first
- // time which will take up 1 block. So make sure that before_size is no less
- // than after_size - 1 block.
- uint32_t page_size = getpagesize();
- EXPECT_THAT(result.optimize_stats().storage_size_before(),
- Ge(result.optimize_stats().storage_size_after() - page_size));
- result.mutable_optimize_stats()->clear_storage_size_before();
- result.mutable_optimize_stats()->clear_storage_size_after();
- EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
-
- fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(5);
- fake_clock->SetSystemTimeMilliseconds(20000);
- icing = std::make_unique<TestIcingSearchEngine>(
- GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(), std::move(fake_clock),
- GetTestJniCache());
- ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
-
- expected = OptimizeStatsProto();
- expected.set_latency_ms(5);
- expected.set_document_store_optimize_latency_ms(5);
- expected.set_index_restoration_latency_ms(5);
- expected.set_num_original_documents(1);
- expected.set_num_deleted_documents(0);
- expected.set_num_expired_documents(0);
- expected.set_time_since_last_optimize_ms(10000);
- expected.set_index_restoration_mode(OptimizeStatsProto::INDEX_TRANSLATION);
-
- // Run Optimize
- result = icing->Optimize();
- EXPECT_THAT(result.optimize_stats().storage_size_before(),
- Eq(result.optimize_stats().storage_size_after()));
- result.mutable_optimize_stats()->clear_storage_size_before();
- result.mutable_optimize_stats()->clear_storage_size_after();
- EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
-
- // Delete the last document.
- ASSERT_THAT(icing->Delete(document3.namespace_(), document3.uri()).status(),
- ProtoIsOk());
-
- expected = OptimizeStatsProto();
- expected.set_latency_ms(5);
- expected.set_document_store_optimize_latency_ms(5);
- expected.set_index_restoration_latency_ms(5);
- expected.set_num_original_documents(1);
- expected.set_num_deleted_documents(1);
- expected.set_num_expired_documents(0);
- expected.set_time_since_last_optimize_ms(0);
- // Should rebuild the index since all documents are removed.
- expected.set_index_restoration_mode(OptimizeStatsProto::FULL_INDEX_REBUILD);
-
- // Run Optimize
- result = icing->Optimize();
- EXPECT_THAT(result.optimize_stats().storage_size_before(),
- Ge(result.optimize_stats().storage_size_after()));
- result.mutable_optimize_stats()->clear_storage_size_before();
- result.mutable_optimize_stats()->clear_storage_size_after();
- EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
-}
-
TEST_F(IcingSearchEngineTest, StorageInfoTest) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -8775,1481 +1299,6 @@ TEST_F(IcingSearchEngineTest, StorageInfoTest) {
EXPECT_THAT(result.storage_info().total_storage_size(), Ge(0));
}
-TEST_F(IcingSearchEngineTest, SnippetErrorTest) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("Generic").AddProperty(
- PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REPEATED)))
- .Build();
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetScore(10)
- .SetSchema("Generic")
- .AddStringProperty("subject", "I like cats", "I like dogs",
- "I like birds", "I like fish")
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetScore(20)
- .SetSchema("Generic")
- .AddStringProperty("subject", "I like red", "I like green",
- "I like blue", "I like yellow")
- .Build();
- DocumentProto document3 =
- DocumentBuilder()
- .SetKey("namespace", "uri3")
- .SetScore(5)
- .SetSchema("Generic")
- .AddStringProperty("subject", "I like cupcakes", "I like donuts",
- "I like eclairs", "I like froyo")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.add_schema_type_filters("Generic");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.set_query("like");
- ScoringSpecProto scoring_spec;
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
- ResultSpecProto result_spec;
- result_spec.mutable_snippet_spec()->set_num_to_snippet(2);
- result_spec.mutable_snippet_spec()->set_num_matches_per_property(3);
- result_spec.mutable_snippet_spec()->set_max_window_utf32_length(4);
- SearchResultProto search_results =
- icing.Search(search_spec, scoring_spec, result_spec);
-
- ASSERT_THAT(search_results.results(), SizeIs(3));
- const SearchResultProto::ResultProto* result = &search_results.results(0);
- EXPECT_THAT(result->document().uri(), Eq("uri2"));
- ASSERT_THAT(result->snippet().entries(), SizeIs(3));
- const SnippetProto::EntryProto* entry = &result->snippet().entries(0);
- EXPECT_THAT(entry->property_name(), "subject[0]");
- std::string_view content = GetString(&result->document(), "subject[0]");
- EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
-
- entry = &result->snippet().entries(1);
- EXPECT_THAT(entry->property_name(), "subject[1]");
- content = GetString(&result->document(), "subject[1]");
- EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
-
- entry = &result->snippet().entries(2);
- EXPECT_THAT(entry->property_name(), "subject[2]");
- content = GetString(&result->document(), "subject[2]");
- EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
-
- result = &search_results.results(1);
- EXPECT_THAT(result->document().uri(), Eq("uri1"));
- ASSERT_THAT(result->snippet().entries(), SizeIs(3));
- entry = &result->snippet().entries(0);
- EXPECT_THAT(entry->property_name(), "subject[0]");
- content = GetString(&result->document(), "subject[0]");
- EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
-
- entry = &result->snippet().entries(1);
- ASSERT_THAT(entry->property_name(), "subject[1]");
- content = GetString(&result->document(), "subject[1]");
- EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
-
- entry = &result->snippet().entries(2);
- ASSERT_THAT(entry->property_name(), "subject[2]");
- content = GetString(&result->document(), "subject[2]");
- EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
-
- result = &search_results.results(2);
- ASSERT_THAT(result->document().uri(), Eq("uri3"));
- ASSERT_THAT(result->snippet().entries(), IsEmpty());
-}
-
-TEST_F(IcingSearchEngineTest, CJKSnippetTest) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // String: "我每天走路去上班。"
- // ^ ^ ^ ^^
- // UTF8 idx: 0 3 9 15 18
- // UTF16 idx: 0 1 3 5 6
- // Breaks into segments: "我", "每天", "走路", "去", "上班"
- constexpr std::string_view kChinese = "我每天走路去上班。";
- DocumentProto document = DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Message")
- .AddStringProperty("body", kChinese)
- .Build();
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- // Search and request snippet matching but no windowing.
- SearchSpecProto search_spec;
- search_spec.set_query("走");
- search_spec.set_term_match_type(TERM_MATCH_PREFIX);
-
- ResultSpecProto result_spec;
- result_spec.mutable_snippet_spec()->set_num_to_snippet(
- std::numeric_limits<int>::max());
- result_spec.mutable_snippet_spec()->set_num_matches_per_property(
- std::numeric_limits<int>::max());
-
- // Search and make sure that we got a single successful result
- SearchResultProto search_results = icing.Search(
- search_spec, ScoringSpecProto::default_instance(), result_spec);
- ASSERT_THAT(search_results.status(), ProtoIsOk());
- ASSERT_THAT(search_results.results(), SizeIs(1));
- const SearchResultProto::ResultProto* result = &search_results.results(0);
- EXPECT_THAT(result->document().uri(), Eq("uri1"));
-
- // Ensure that one and only one property was matched and it was "body"
- ASSERT_THAT(result->snippet().entries(), SizeIs(1));
- const SnippetProto::EntryProto* entry = &result->snippet().entries(0);
- EXPECT_THAT(entry->property_name(), Eq("body"));
-
- // Get the content for "subject" and see what the match is.
- std::string_view content = GetString(&result->document(), "body");
- ASSERT_THAT(content, Eq(kChinese));
-
- // Ensure that there is one and only one match within "subject"
- ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
- const SnippetMatchProto& match_proto = entry->snippet_matches(0);
-
- EXPECT_THAT(match_proto.exact_match_byte_position(), Eq(9));
- EXPECT_THAT(match_proto.exact_match_byte_length(), Eq(6));
- std::string_view match =
- content.substr(match_proto.exact_match_byte_position(),
- match_proto.exact_match_byte_length());
- ASSERT_THAT(match, Eq("走路"));
-
- // Ensure that the utf-16 values are also as expected
- EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(3));
- EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2));
-}
-
-TEST_F(IcingSearchEngineTest, InvalidToEmptyQueryTest) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // String: "Luca Brasi sleeps with the 🐟🐟🐟."
- // ^ ^ ^ ^ ^ ^ ^ ^ ^
- // UTF8 idx: 0 5 11 18 23 27 3135 39
- // UTF16 idx: 0 5 11 18 23 27 2931 33
- // Breaks into segments: "Luca", "Brasi", "sleeps", "with", "the", "🐟", "🐟"
- // and "🐟".
- constexpr std::string_view kSicilianMessage =
- "Luca Brasi sleeps with the 🐟🐟🐟.";
- DocumentProto document = DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Message")
- .AddStringProperty("body", kSicilianMessage)
- .Build();
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Message")
- .AddStringProperty("body", "Some other content.")
- .Build();
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- // Search and request snippet matching but no windowing.
- SearchSpecProto search_spec;
- search_spec.set_query("?");
- search_spec.set_term_match_type(TERM_MATCH_PREFIX);
- ScoringSpecProto scoring_spec;
- ResultSpecProto result_spec;
-
- // Search and make sure that we got a single successful result
- SearchResultProto search_results =
- icing.Search(search_spec, scoring_spec, result_spec);
- EXPECT_THAT(search_results.status(), ProtoIsOk());
- EXPECT_THAT(search_results.results(), SizeIs(2));
-
- search_spec.set_query("。");
- search_results = icing.Search(search_spec, scoring_spec, result_spec);
- EXPECT_THAT(search_results.status(), ProtoIsOk());
- EXPECT_THAT(search_results.results(), SizeIs(2));
-
- search_spec.set_query("-");
- search_results = icing.Search(search_spec, scoring_spec, result_spec);
- EXPECT_THAT(search_results.status(), ProtoIsOk());
- EXPECT_THAT(search_results.results(), SizeIs(2));
-
- search_spec.set_query(":");
- search_results = icing.Search(search_spec, scoring_spec, result_spec);
- EXPECT_THAT(search_results.status(), ProtoIsOk());
- EXPECT_THAT(search_results.results(), SizeIs(2));
-
- search_spec.set_query("OR");
- search_results = icing.Search(search_spec, scoring_spec, result_spec);
- EXPECT_THAT(search_results.status(), ProtoIsOk());
- EXPECT_THAT(search_results.results(), SizeIs(2));
-
- search_spec.set_query(" ");
- search_results = icing.Search(search_spec, scoring_spec, result_spec);
- EXPECT_THAT(search_results.status(), ProtoIsOk());
- EXPECT_THAT(search_results.results(), SizeIs(2));
-}
-
-TEST_F(IcingSearchEngineTest, EmojiSnippetTest) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // String: "Luca Brasi sleeps with the 🐟🐟🐟."
- // ^ ^ ^ ^ ^ ^ ^ ^ ^
- // UTF8 idx: 0 5 11 18 23 27 3135 39
- // UTF16 idx: 0 5 11 18 23 27 2931 33
- // Breaks into segments: "Luca", "Brasi", "sleeps", "with", "the", "🐟", "🐟"
- // and "🐟".
- constexpr std::string_view kSicilianMessage =
- "Luca Brasi sleeps with the 🐟🐟🐟.";
- DocumentProto document = DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Message")
- .AddStringProperty("body", kSicilianMessage)
- .Build();
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Message")
- .AddStringProperty("body", "Some other content.")
- .Build();
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- // Search and request snippet matching but no windowing.
- SearchSpecProto search_spec;
- search_spec.set_query("🐟");
- search_spec.set_term_match_type(TERM_MATCH_PREFIX);
-
- ResultSpecProto result_spec;
- result_spec.mutable_snippet_spec()->set_num_to_snippet(1);
- result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
-
- // Search and make sure that we got a single successful result
- SearchResultProto search_results = icing.Search(
- search_spec, ScoringSpecProto::default_instance(), result_spec);
- ASSERT_THAT(search_results.status(), ProtoIsOk());
- ASSERT_THAT(search_results.results(), SizeIs(1));
- const SearchResultProto::ResultProto* result = &search_results.results(0);
- EXPECT_THAT(result->document().uri(), Eq("uri1"));
-
- // Ensure that one and only one property was matched and it was "body"
- ASSERT_THAT(result->snippet().entries(), SizeIs(1));
- const SnippetProto::EntryProto* entry = &result->snippet().entries(0);
- EXPECT_THAT(entry->property_name(), Eq("body"));
-
- // Get the content for "subject" and see what the match is.
- std::string_view content = GetString(&result->document(), "body");
- ASSERT_THAT(content, Eq(kSicilianMessage));
-
- // Ensure that there is one and only one match within "subject"
- ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
- const SnippetMatchProto& match_proto = entry->snippet_matches(0);
-
- EXPECT_THAT(match_proto.exact_match_byte_position(), Eq(27));
- EXPECT_THAT(match_proto.exact_match_byte_length(), Eq(4));
- std::string_view match =
- content.substr(match_proto.exact_match_byte_position(),
- match_proto.exact_match_byte_length());
- ASSERT_THAT(match, Eq("🐟"));
-
- // Ensure that the utf-16 values are also as expected
- EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(27));
- EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2));
-}
-
-TEST_F(IcingSearchEngineTest, PutDocumentIndexFailureDeletion) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Testing has shown that adding ~600,000 terms generated this way will
- // fill up the hit buffer.
- std::vector<std::string> terms = GenerateUniqueTerms(600000);
- std::string content = absl_ports::StrJoin(terms, " ");
- DocumentProto document = DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Message")
- .AddStringProperty("body", "foo " + content)
- .Build();
- // We failed to add the document to the index fully. This means that we should
- // reject the document from Icing entirely.
- ASSERT_THAT(icing.Put(document).status(),
- ProtoStatusIs(StatusProto::OUT_OF_SPACE));
-
- // Make sure that the document isn't searchable.
- SearchSpecProto search_spec;
- search_spec.set_query("foo");
- search_spec.set_term_match_type(TERM_MATCH_PREFIX);
-
- SearchResultProto search_results =
- icing.Search(search_spec, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
- ASSERT_THAT(search_results.status(), ProtoIsOk());
- ASSERT_THAT(search_results.results(), IsEmpty());
-
- // Make sure that the document isn't retrievable.
- GetResultProto get_result =
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance());
- ASSERT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
-}
-
-TEST_F(IcingSearchEngineTest, SearchSuggestionsTest) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- // Creates and inserts 6 documents, and index 6 termSix, 5 termFive, 4
- // termFour, 3 termThree, 2 termTwo and one termOne.
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty(
- "subject", "termOne termTwo termThree termFour termFive termSix")
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject",
- "termTwo termThree termFour termFive termSix")
- .Build();
- DocumentProto document3 =
- DocumentBuilder()
- .SetKey("namespace", "uri3")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "termThree termFour termFive termSix")
- .Build();
- DocumentProto document4 =
- DocumentBuilder()
- .SetKey("namespace", "uri4")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "termFour termFive termSix")
- .Build();
- DocumentProto document5 =
- DocumentBuilder()
- .SetKey("namespace", "uri5")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "termFive termSix")
- .Build();
- DocumentProto document6 = DocumentBuilder()
- .SetKey("namespace", "uri6")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "termSix")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document6).status(), ProtoIsOk());
-
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("t");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
-
- // Query all suggestions, and they will be ranked.
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions().at(0).query(), "termsix");
- ASSERT_THAT(response.suggestions().at(1).query(), "termfive");
- ASSERT_THAT(response.suggestions().at(2).query(), "termfour");
- ASSERT_THAT(response.suggestions().at(3).query(), "termthree");
- ASSERT_THAT(response.suggestions().at(4).query(), "termtwo");
- ASSERT_THAT(response.suggestions().at(5).query(), "termone");
-
- // Query first three suggestions, and they will be ranked.
- suggestion_spec.set_num_to_return(3);
- response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions().at(0).query(), "termsix");
- ASSERT_THAT(response.suggestions().at(1).query(), "termfive");
- ASSERT_THAT(response.suggestions().at(2).query(), "termfour");
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchSuggestionsTest_ShouldReturnInOneNamespace) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "foo fool")
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- SuggestionResponse::Suggestion suggestionFoo;
- suggestionFoo.set_query("foo");
- SuggestionResponse::Suggestion suggestionFool;
- suggestionFool.set_query("fool");
-
- // namespace1 has 2 results.
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.add_namespace_filters("namespace1");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFoo),
- EqualsProto(suggestionFool)));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchSuggestionsTest_ShouldReturnInMultipleNamespace) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fo")
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "foo")
- .Build();
- DocumentProto document3 = DocumentBuilder()
- .SetKey("namespace3", "uri3")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
-
- SuggestionResponse::Suggestion suggestionFoo;
- suggestionFoo.set_query("foo");
- SuggestionResponse::Suggestion suggestionFool;
- suggestionFool.set_query("fool");
-
- // namespace2 and namespace3 has 2 results.
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.add_namespace_filters("namespace2");
- suggestion_spec.add_namespace_filters("namespace3");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFoo),
- EqualsProto(suggestionFool)));
-}
-
-TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_NamespaceNotFound) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fo")
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "foo")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // Search for non-exist namespace3
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.add_namespace_filters("namespace3");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- EXPECT_THAT(response.status().code(), Eq(StatusProto::OK));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchSuggestionsTest_OtherNamespaceDontContributeToHitCount) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- // Index 4 documents,
- // namespace1 has 2 hit2 for term one
- // namespace2 has 2 hit2 for term two and 1 hit for term one.
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "termone")
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace1", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "termone")
- .Build();
- DocumentProto document3 = DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "termone termtwo")
- .Build();
- DocumentProto document4 = DocumentBuilder()
- .SetKey("namespace2", "uri3")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "termtwo")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
-
- SuggestionResponse::Suggestion suggestionTermOne;
- suggestionTermOne.set_query("termone");
- SuggestionResponse::Suggestion suggestionTermTwo;
- suggestionTermTwo.set_query("termtwo");
-
- // only search suggestion for namespace2. The correctly order should be
- // {"termtwo", "termone"}. If we're not filtering out namespace1 when
- // calculating our score, then it will be {"termone", "termtwo"}.
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("t");
- suggestion_spec.add_namespace_filters("namespace2");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- ElementsAre(EqualsProto(suggestionTermTwo),
- EqualsProto(suggestionTermOne)));
-}
-
-TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_DeletionTest) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- SuggestionResponse::Suggestion suggestionFool;
- suggestionFool.set_query("fool");
-
- // namespace1 has this suggestion
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.add_namespace_filters("namespace1");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFool)));
-
- // namespace2 has this suggestion
- suggestion_spec.clear_namespace_filters();
- suggestion_spec.add_namespace_filters("namespace2");
- response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFool)));
-
- // delete document from namespace 1
- EXPECT_THAT(icing.Delete("namespace1", "uri1").status(), ProtoIsOk());
-
- // Now namespace1 will return empty
- suggestion_spec.clear_namespace_filters();
- suggestion_spec.add_namespace_filters("namespace1");
- response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(), IsEmpty());
-
- // namespace2 still has this suggestion, so we can prove the reason of
- // namespace 1 cannot find it is we filter it out, not it doesn't exist.
- suggestion_spec.add_namespace_filters("namespace2");
- response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFool)));
-}
-
-TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_ShouldReturnInOneDocument) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace1", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "foo")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- SuggestionResponse::Suggestion suggestionFool;
- suggestionFool.set_query("fool");
- SuggestionResponse::Suggestion suggestionFoo;
- suggestionFoo.set_query("foo");
-
- // Only search in namespace1,uri1
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
- NamespaceDocumentUriGroup* namespace1_uri1 =
- suggestion_spec.add_document_uri_filters();
- namespace1_uri1->set_namespace_("namespace1");
- namespace1_uri1->add_document_uris("uri1");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFool)));
-
- // Only search in namespace1,uri2
- suggestion_spec.clear_document_uri_filters();
- NamespaceDocumentUriGroup* namespace1_uri2 =
- suggestion_spec.add_document_uri_filters();
- namespace1_uri2->set_namespace_("namespace1");
- namespace1_uri2->add_document_uris("uri2");
-
- response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFoo)));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchSuggestionsTest_ShouldReturnInMultipleDocument) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace1", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "foo")
- .Build();
- DocumentProto document3 = DocumentBuilder()
- .SetKey("namespace1", "uri3")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fo")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
-
- SuggestionResponse::Suggestion suggestionFool;
- suggestionFool.set_query("fool");
- SuggestionResponse::Suggestion suggestionFoo;
- suggestionFoo.set_query("foo");
-
- // Only search document in namespace1,uri1 and namespace2,uri2
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
- NamespaceDocumentUriGroup* namespace1_uri1_uri2 =
- suggestion_spec.add_document_uri_filters();
- namespace1_uri1_uri2->set_namespace_("namespace1");
- namespace1_uri1_uri2->add_document_uris("uri1");
- namespace1_uri1_uri2->add_document_uris("uri2");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFool),
- EqualsProto(suggestionFoo)));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchSuggestionsTest_ShouldReturnInDesiredDocumentAndNamespace) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "foo")
- .Build();
- DocumentProto document3 = DocumentBuilder()
- .SetKey("namespace3", "uri3")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fo")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
-
- SuggestionResponse::Suggestion suggestionFool;
- suggestionFool.set_query("fool");
- SuggestionResponse::Suggestion suggestionFoo;
- suggestionFoo.set_query("foo");
-
- // Only search document in namespace1,uri1 and all documents under namespace2
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
- suggestion_spec.add_namespace_filters("namespace1");
- suggestion_spec.add_namespace_filters("namespace2");
- NamespaceDocumentUriGroup* namespace1_uri1 =
- suggestion_spec.add_document_uri_filters();
- namespace1_uri1->set_namespace_("namespace1");
- namespace1_uri1->add_document_uris("uri1");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFool),
- EqualsProto(suggestionFoo)));
-}
-
-TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_DocumentIdDoesntExist) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "foo")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // Search for a non-exist document id : namespace3,uri3
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
- suggestion_spec.add_namespace_filters("namespace3");
- NamespaceDocumentUriGroup* namespace3_uri3 =
- suggestion_spec.add_document_uri_filters();
- namespace3_uri3->set_namespace_("namespace3");
- namespace3_uri3->add_document_uris("uri3");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(), IsEmpty());
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchSuggestionsTest_DocumentIdFilterDoesntMatchNamespaceFilter) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "foo")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // Search for the document namespace1,uri1 with namespace filter in
- // namespace2.
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
- NamespaceDocumentUriGroup* namespace1_uri1 =
- suggestion_spec.add_document_uri_filters();
- namespace1_uri1->set_namespace_("namespace1");
- namespace1_uri1->add_document_uris("uri1");
- suggestion_spec.add_namespace_filters("namespace2");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- EXPECT_THAT(response.status().code(), Eq(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchSuggestionsTest_EmptyDocumentIdInNamespace) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-
- // Give empty document uris in namespace 1
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
- NamespaceDocumentUriGroup* namespace1_uri1 =
- suggestion_spec.add_document_uri_filters();
- namespace1_uri1->set_namespace_("namespace1");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- EXPECT_THAT(response.status().code(), Eq(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchSuggestionsTest_ShouldReturnInDesiredSchemaType) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
- PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED)))
- .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
- PropertyConfigBuilder()
- .SetName("name")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("sender")
- .SetDataTypeDocument(
- "Person",
- /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .AddDocumentProperty("sender", DocumentBuilder()
- .SetKey("namespace", "uri1-sender")
- .SetSchema("Person")
- .AddStringProperty("name", "foo")
- .Build())
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace1", "uri2")
- .SetSchema("Message")
- .SetCreationTimestampMs(10)
- .AddStringProperty("body", "fo")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- SuggestionResponse::Suggestion suggestionFool;
- suggestionFool.set_query("fool");
- SuggestionResponse::Suggestion suggestionFoo;
- suggestionFoo.set_query("foo");
-
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
- suggestion_spec.add_schema_type_filters("Email");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFoo),
- EqualsProto(suggestionFool)));
-}
-
-TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_SchemaTypeNotFound) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
- PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED)))
- .Build();
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Message")
- .SetCreationTimestampMs(10)
- .AddStringProperty("body", "fo")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
- suggestion_spec.add_schema_type_filters("Email");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(), IsEmpty());
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchSuggestionsTest_ShouldReturnInDesiredProperty) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .AddDocumentProperty("sender",
- DocumentBuilder()
- .SetKey("namespace", "uri1-sender")
- .SetSchema("Person")
- .AddStringProperty("name", "foo")
- .AddStringProperty("emailAddress", "fo")
- .Build())
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-
- SuggestionResponse::Suggestion suggestionFool;
- suggestionFool.set_query("fool");
- SuggestionResponse::Suggestion suggestionFoo;
- suggestionFoo.set_query("foo");
-
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
-
- // Only search in subject.
- TypePropertyMask* mask = suggestion_spec.add_type_property_filters();
- mask->set_schema_type("Email");
- mask->add_paths("subject");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFool)));
-
- // Search in subject and sender.name
- suggestion_spec.clear_type_property_filters();
- mask = suggestion_spec.add_type_property_filters();
- mask->set_schema_type("Email");
- mask->add_paths("subject");
- mask->add_paths("sender.name");
-
- response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFoo),
- EqualsProto(suggestionFool)));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchSuggestionsTest_NestedPropertyReturnNothing) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .AddDocumentProperty("sender", DocumentBuilder()
- .SetKey("namespace", "uri1-sender")
- .SetSchema("Person")
- .AddStringProperty("name", "foo")
- .Build())
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
-
- // Only search in Person.name.
- suggestion_spec.add_schema_type_filters("Person");
- TypePropertyMask* mask = suggestion_spec.add_type_property_filters();
- mask->set_schema_type("Person");
- mask->add_paths("name");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(), IsEmpty());
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchSuggestionsTest_PropertyFilterAndSchemaFilter) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
- PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED)))
- .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
- PropertyConfigBuilder()
- .SetName("name")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("sender")
- .SetDataTypeDocument(
- "Person",
- /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .AddDocumentProperty("sender", DocumentBuilder()
- .SetKey("namespace", "uri1-sender")
- .SetSchema("Person")
- .AddStringProperty("name", "foo")
- .Build())
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace1", "uri2")
- .SetSchema("Message")
- .SetCreationTimestampMs(10)
- .AddStringProperty("body", "fo")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- SuggestionResponse::Suggestion suggestionFoo;
- suggestionFoo.set_query("foo");
- SuggestionResponse::Suggestion suggestionFo;
- suggestionFo.set_query("fo");
-
- // Search in sender.name of Email and everything in Message.
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
- suggestion_spec.add_schema_type_filters("Email");
- suggestion_spec.add_schema_type_filters("Message");
- TypePropertyMask* mask1 = suggestion_spec.add_type_property_filters();
- mask1->set_schema_type("Email");
- mask1->add_paths("sender.name");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFoo),
- EqualsProto(suggestionFo)));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchSuggestionsTest_PropertyFilterNotMatchSchemaFilter) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
- PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED)))
- .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
- PropertyConfigBuilder()
- .SetName("name")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("sender")
- .SetDataTypeDocument(
- "Person",
- /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Message")
- .SetCreationTimestampMs(10)
- .AddStringProperty("body", "fo")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-
- // Search in sender.name of Email but schema type is Message.
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
- suggestion_spec.add_schema_type_filters("Message");
- TypePropertyMask* mask1 = suggestion_spec.add_type_property_filters();
- mask1->set_schema_type("Email");
- mask1->add_paths("sender.name");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- EXPECT_THAT(response.status().code(), Eq(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_OrderByTermFrequency) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
- PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED)))
- .Build();
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Message")
- .SetCreationTimestampMs(10)
- .AddStringProperty(
- "body", "termthree termthree termthree termtwo termtwo termone")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-
- // Search in sender.name of Email but schema type is Message.
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("t");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::EXACT_ONLY);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::TERM_FREQUENCY);
-
- SuggestionResponse::Suggestion suggestionTermOne;
- suggestionTermOne.set_query("termone");
- SuggestionResponse::Suggestion suggestionTermTwo;
- suggestionTermTwo.set_query("termtwo");
- SuggestionResponse::Suggestion suggestionTermThree;
- suggestionTermThree.set_query("termthree");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- ElementsAre(EqualsProto(suggestionTermThree),
- EqualsProto(suggestionTermTwo),
- EqualsProto(suggestionTermOne)));
-}
-
-TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_ExpiredTest) {
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(100)
- .SetTtlMs(500)
- .AddStringProperty("subject", "fool")
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(100)
- .SetTtlMs(1000)
- .AddStringProperty("subject", "fool")
- .Build();
- {
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetSystemTimeMilliseconds(400);
-
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- SuggestionResponse::Suggestion suggestionFool;
- suggestionFool.set_query("fool");
-
- // namespace1 has this suggestion
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.add_namespace_filters("namespace1");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFool)));
-
- // namespace2 has this suggestion
- suggestion_spec.clear_namespace_filters();
- suggestion_spec.add_namespace_filters("namespace2");
- response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFool)));
- }
- // We reinitialize here so we can feed in a fake clock this time
- {
- // Time needs to be past document1 creation time (100) + ttl (500) for it
- // to count as "expired". document2 is not expired since its ttl is 1000.
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetSystemTimeMilliseconds(800);
-
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.add_namespace_filters("namespace1");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
-
- // Now namespace1 will return empty
- suggestion_spec.clear_namespace_filters();
- suggestion_spec.add_namespace_filters("namespace1");
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(), IsEmpty());
-
- // namespace2 still has this suggestion
- SuggestionResponse::Suggestion suggestionFool;
- suggestionFool.set_query("fool");
-
- suggestion_spec.add_namespace_filters("namespace2");
- response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFool)));
- }
-}
-
-TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_emptyPrefix) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
-
- ASSERT_THAT(icing.SearchSuggestions(suggestion_spec).status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_NonPositiveNumToReturn) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("prefix");
- suggestion_spec.set_num_to_return(0);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
-
- ASSERT_THAT(icing.SearchSuggestions(suggestion_spec).status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
TEST_F(IcingSearchEngineTest, GetDebugInfoVerbosityBasicSucceeds) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -10325,539 +1374,6 @@ TEST_F(IcingSearchEngineTest, GetDebugInfoWithSchemaNoDocumentsSucceeds) {
ASSERT_THAT(result.status(), ProtoIsOk());
}
-TEST_F(IcingSearchEngineTest, IcingShouldWorkFor64Sections) {
- // Create a schema with 64 sections
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- // Person has 4 sections.
- .SetType("Person")
- .AddProperty(PropertyConfigBuilder()
- .SetName("firstName")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("lastName")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("emailAddress")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("phoneNumber")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .AddType(SchemaTypeConfigBuilder()
- // Email has 16 sections.
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("date")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("time")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(
- PropertyConfigBuilder()
- .SetName("sender")
- .SetDataTypeDocument(
- "Person", /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(
- PropertyConfigBuilder()
- .SetName("receiver")
- .SetDataTypeDocument(
- "Person", /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(
- PropertyConfigBuilder()
- .SetName("cc")
- .SetDataTypeDocument(
- "Person", /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_REPEATED)))
- .AddType(SchemaTypeConfigBuilder()
- // EmailCollection has 64 sections.
- .SetType("EmailCollection")
- .AddProperty(
- PropertyConfigBuilder()
- .SetName("email1")
- .SetDataTypeDocument(
- "Email", /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(
- PropertyConfigBuilder()
- .SetName("email2")
- .SetDataTypeDocument(
- "Email", /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(
- PropertyConfigBuilder()
- .SetName("email3")
- .SetDataTypeDocument(
- "Email", /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(
- PropertyConfigBuilder()
- .SetName("email4")
- .SetDataTypeDocument(
- "Email", /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
-
- DocumentProto person1 =
- DocumentBuilder()
- .SetKey("namespace", "person1")
- .SetSchema("Person")
- .AddStringProperty("firstName", "first1")
- .AddStringProperty("lastName", "last1")
- .AddStringProperty("emailAddress", "email1@gmail.com")
- .AddStringProperty("phoneNumber", "000-000-001")
- .Build();
- DocumentProto person2 =
- DocumentBuilder()
- .SetKey("namespace", "person2")
- .SetSchema("Person")
- .AddStringProperty("firstName", "first2")
- .AddStringProperty("lastName", "last2")
- .AddStringProperty("emailAddress", "email2@gmail.com")
- .AddStringProperty("phoneNumber", "000-000-002")
- .Build();
- DocumentProto person3 =
- DocumentBuilder()
- .SetKey("namespace", "person3")
- .SetSchema("Person")
- .AddStringProperty("firstName", "first3")
- .AddStringProperty("lastName", "last3")
- .AddStringProperty("emailAddress", "email3@gmail.com")
- .AddStringProperty("phoneNumber", "000-000-003")
- .Build();
- DocumentProto email1 = DocumentBuilder()
- .SetKey("namespace", "email1")
- .SetSchema("Email")
- .AddStringProperty("body", "test body")
- .AddStringProperty("subject", "test subject")
- .AddStringProperty("date", "2022-08-01")
- .AddStringProperty("time", "1:00 PM")
- .AddDocumentProperty("sender", person1)
- .AddDocumentProperty("receiver", person2)
- .AddDocumentProperty("cc", person3)
- .Build();
- DocumentProto email2 = DocumentBuilder()
- .SetKey("namespace", "email2")
- .SetSchema("Email")
- .AddStringProperty("body", "test body")
- .AddStringProperty("subject", "test subject")
- .AddStringProperty("date", "2022-08-02")
- .AddStringProperty("time", "2:00 PM")
- .AddDocumentProperty("sender", person2)
- .AddDocumentProperty("receiver", person1)
- .AddDocumentProperty("cc", person3)
- .Build();
- DocumentProto email3 = DocumentBuilder()
- .SetKey("namespace", "email3")
- .SetSchema("Email")
- .AddStringProperty("body", "test body")
- .AddStringProperty("subject", "test subject")
- .AddStringProperty("date", "2022-08-03")
- .AddStringProperty("time", "3:00 PM")
- .AddDocumentProperty("sender", person3)
- .AddDocumentProperty("receiver", person1)
- .AddDocumentProperty("cc", person2)
- .Build();
- DocumentProto email4 = DocumentBuilder()
- .SetKey("namespace", "email4")
- .SetSchema("Email")
- .AddStringProperty("body", "test body")
- .AddStringProperty("subject", "test subject")
- .AddStringProperty("date", "2022-08-04")
- .AddStringProperty("time", "4:00 PM")
- .AddDocumentProperty("sender", person3)
- .AddDocumentProperty("receiver", person2)
- .AddDocumentProperty("cc", person1)
- .Build();
- DocumentProto email_collection =
- DocumentBuilder()
- .SetKey("namespace", "email_collection")
- .SetSchema("EmailCollection")
- .AddDocumentProperty("email1", email1)
- .AddDocumentProperty("email2", email2)
- .AddDocumentProperty("email3", email3)
- .AddDocumentProperty("email4", email4)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(email_collection).status(), ProtoIsOk());
-
- const std::vector<std::string> query_terms = {
- "first1", "last2", "email3@gmail.com", "000-000-001",
- "body", "subject", "2022-08-02", "3\\:00"};
- SearchResultProto expected_document;
- expected_document.mutable_status()->set_code(StatusProto::OK);
- *expected_document.mutable_results()->Add()->mutable_document() =
- email_collection;
- for (const std::string& query_term : query_terms) {
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query(query_term);
- SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results,
- EqualsSearchResultIgnoreStatsAndScores(expected_document));
- }
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("foo");
- SearchResultProto expected_no_documents;
- expected_no_documents.mutable_status()->set_code(StatusProto::OK);
- SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results,
- EqualsSearchResultIgnoreStatsAndScores(expected_no_documents));
-}
-
-TEST_F(IcingSearchEngineTest, JoinByQualifiedId) {
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Person")
- .AddProperty(PropertyConfigBuilder()
- .SetName("firstName")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("lastName")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("emailAddress")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("personQualifiedId")
- .SetDataTypeJoinableString(
- JOINABLE_VALUE_TYPE_QUALIFIED_ID)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
-
- DocumentProto person1 =
- DocumentBuilder()
- .SetKey("pkg$db/namespace", "person1")
- .SetSchema("Person")
- .AddStringProperty("firstName", "first1")
- .AddStringProperty("lastName", "last1")
- .AddStringProperty("emailAddress", "email1@gmail.com")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .SetScore(1)
- .Build();
- DocumentProto person2 =
- DocumentBuilder()
- .SetKey("pkg$db/namespace", "person2")
- .SetSchema("Person")
- .AddStringProperty("firstName", "first2")
- .AddStringProperty("lastName", "last2")
- .AddStringProperty("emailAddress", "email2@gmail.com")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .SetScore(2)
- .Build();
- DocumentProto person3 =
- DocumentBuilder()
- .SetKey(R"(pkg$db/name#space\\)", "person3")
- .SetSchema("Person")
- .AddStringProperty("firstName", "first3")
- .AddStringProperty("lastName", "last3")
- .AddStringProperty("emailAddress", "email3@gmail.com")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .SetScore(3)
- .Build();
-
- DocumentProto email1 =
- DocumentBuilder()
- .SetKey("namespace", "email1")
- .SetSchema("Email")
- .AddStringProperty("subject", "test subject 1")
- .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .SetScore(3)
- .Build();
- DocumentProto email2 =
- DocumentBuilder()
- .SetKey("namespace", "email2")
- .SetSchema("Email")
- .AddStringProperty("subject", "test subject 2")
- .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .SetScore(2)
- .Build();
- DocumentProto email3 =
- DocumentBuilder()
- .SetKey("namespace", "email3")
- .SetSchema("Email")
- .AddStringProperty("subject", "test subject 3")
- .AddStringProperty("personQualifiedId",
- R"(pkg$db/name\#space\\\\#person3)") // escaped
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .SetScore(1)
- .Build();
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(person3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(email3).status(), ProtoIsOk());
-
- // Parent SearchSpec
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("firstName:first");
-
- // JoinSpec
- JoinSpecProto* join_spec = search_spec.mutable_join_spec();
- join_spec->set_max_joined_child_count(100);
- join_spec->set_parent_property_expression(
- std::string(JoinProcessor::kQualifiedIdExpr));
- join_spec->set_child_property_expression("personQualifiedId");
- join_spec->set_aggregation_scoring_strategy(
- JoinSpecProto::AggregationScoringStrategy::MAX);
- JoinSpecProto::NestedSpecProto* nested_spec =
- join_spec->mutable_nested_spec();
- SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
- nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
- nested_search_spec->set_query("subject:test");
- *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
- *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
-
- // Parent ScoringSpec
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
-
- // Parent ResultSpec
- ResultSpecProto result_spec;
- result_spec.set_num_per_page(1);
-
- // Since we:
- // - Use MAX for aggregation scoring strategy.
- // - (Default) use DOCUMENT_SCORE to score child documents.
- // - (Default) use DESC as the ranking order.
- //
- // person1 + email1 should have the highest aggregated score (3) and be
- // returned first. person2 + email2 (aggregated score = 2) should be the
- // second, and person3 + email3 (aggregated score = 1) should be the last.
- SearchResultProto expected_result1;
- expected_result1.mutable_status()->set_code(StatusProto::OK);
- SearchResultProto::ResultProto* result_proto1 =
- expected_result1.mutable_results()->Add();
- *result_proto1->mutable_document() = person1;
- *result_proto1->mutable_joined_results()->Add()->mutable_document() = email1;
-
- SearchResultProto expected_result2;
- expected_result2.mutable_status()->set_code(StatusProto::OK);
- SearchResultProto::ResultProto* result_proto2 =
- expected_result2.mutable_results()->Add();
- *result_proto2->mutable_document() = person2;
- *result_proto2->mutable_joined_results()->Add()->mutable_document() = email2;
-
- SearchResultProto expected_result3;
- expected_result3.mutable_status()->set_code(StatusProto::OK);
- SearchResultProto::ResultProto* result_proto3 =
- expected_result3.mutable_results()->Add();
- *result_proto3->mutable_document() = person3;
- *result_proto3->mutable_joined_results()->Add()->mutable_document() = email3;
-
- SearchResultProto result1 =
- icing.Search(search_spec, scoring_spec, result_spec);
- uint64_t next_page_token = result1.next_page_token();
- EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
- expected_result1.set_next_page_token(next_page_token);
- EXPECT_THAT(result1,
- EqualsSearchResultIgnoreStatsAndScores(expected_result1));
-
- SearchResultProto result2 = icing.GetNextPage(next_page_token);
- next_page_token = result2.next_page_token();
- EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
- expected_result2.set_next_page_token(next_page_token);
- EXPECT_THAT(result2,
- EqualsSearchResultIgnoreStatsAndScores(expected_result2));
-
- SearchResultProto result3 = icing.GetNextPage(next_page_token);
- next_page_token = result3.next_page_token();
- EXPECT_THAT(next_page_token, Eq(kInvalidNextPageToken));
- EXPECT_THAT(result3,
- EqualsSearchResultIgnoreStatsAndScores(expected_result3));
-}
-
-TEST_F(IcingSearchEngineTest, NumericFilterAdvancedQuerySucceeds) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Create the schema and document store
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- .SetType("transaction")
- .AddProperty(PropertyConfigBuilder()
- .SetName("price")
- .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("cost")
- .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- DocumentProto document_one = DocumentBuilder()
- .SetKey("namespace", "1")
- .SetSchema("transaction")
- .SetCreationTimestampMs(1)
- .AddInt64Property("price", 10)
- .Build();
- ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
-
- DocumentProto document_two = DocumentBuilder()
- .SetKey("namespace", "2")
- .SetSchema("transaction")
- .SetCreationTimestampMs(1)
- .AddInt64Property("price", 25)
- .Build();
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- DocumentProto document_three = DocumentBuilder()
- .SetKey("namespace", "3")
- .SetSchema("transaction")
- .SetCreationTimestampMs(1)
- .AddInt64Property("cost", 2)
- .Build();
- ASSERT_THAT(icing.Put(document_three).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_query("price < 20");
- search_spec.set_search_type(
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
- search_spec.add_enabled_features(std::string(kNumericSearchFeature));
-
- SearchResultProto results =
- icing.Search(search_spec, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
- ASSERT_THAT(results.results(), SizeIs(1));
- EXPECT_THAT(results.results(0).document(), EqualsProto(document_one));
-
- search_spec.set_query("price == 25");
- results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
- ASSERT_THAT(results.results(), SizeIs(1));
- EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
-
- search_spec.set_query("cost > 2");
- results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(results.results(), IsEmpty());
-
- search_spec.set_query("cost >= 2");
- results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
- ASSERT_THAT(results.results(), SizeIs(1));
- EXPECT_THAT(results.results(0).document(), EqualsProto(document_three));
-
- search_spec.set_query("price <= 25");
- results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
- ASSERT_THAT(results.results(), SizeIs(2));
- EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
- EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
-}
-
-TEST_F(IcingSearchEngineTest, NumericFilterOldQueryFails) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Create the schema and document store
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- .SetType("transaction")
- .AddProperty(PropertyConfigBuilder()
- .SetName("price")
- .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("cost")
- .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- DocumentProto document_one = DocumentBuilder()
- .SetKey("namespace", "1")
- .SetSchema("transaction")
- .SetCreationTimestampMs(1)
- .AddInt64Property("price", 10)
- .Build();
- ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
-
- DocumentProto document_two = DocumentBuilder()
- .SetKey("namespace", "2")
- .SetSchema("transaction")
- .SetCreationTimestampMs(1)
- .AddInt64Property("price", 25)
- .Build();
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- DocumentProto document_three = DocumentBuilder()
- .SetKey("namespace", "3")
- .SetSchema("transaction")
- .SetCreationTimestampMs(1)
- .AddInt64Property("cost", 2)
- .Build();
- ASSERT_THAT(icing.Put(document_three).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_query("price < 20");
- search_spec.set_search_type(SearchSpecProto::SearchType::ICING_RAW_QUERY);
- search_spec.add_enabled_features(std::string(kNumericSearchFeature));
-
- SearchResultProto results =
- icing.Search(search_spec, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(results.status(), ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
} // namespace
} // namespace lib
} // namespace icing
diff --git a/icing/index/section-indexing-handler.h b/icing/index/data-indexing-handler.h
index ff461cb..16a1796 100644
--- a/icing/index/section-indexing-handler.h
+++ b/icing/index/data-indexing-handler.h
@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#ifndef ICING_INDEX_SECTION_INDEXING_HANDLER_H_
-#define ICING_INDEX_SECTION_INDEXING_HANDLER_H_
+#ifndef ICING_INDEX_DATA_INDEXING_HANDLER_H_
+#define ICING_INDEX_DATA_INDEXING_HANDLER_H_
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/proto/logging.pb.h"
@@ -24,37 +24,46 @@
namespace icing {
namespace lib {
-// Parent class for indexing different types of sections in TokenizedDocument.
-class SectionIndexingHandler {
+// Parent class for indexing different types of data in TokenizedDocument.
+class DataIndexingHandler {
public:
- explicit SectionIndexingHandler(const Clock* clock) : clock_(*clock) {}
+ explicit DataIndexingHandler(const Clock* clock) : clock_(*clock) {}
- virtual ~SectionIndexingHandler() = default;
+ virtual ~DataIndexingHandler() = default;
- // Handles the indexing process: add data (hits) into the specific type index
- // (e.g. string index, integer index) for all contents in the corresponding
- // type of sections in tokenized_document.
+ // Handles the indexing process: add data into the specific type index (e.g.
+ // term index, integer index, qualified id type joinable index) for all
+ // contents in the corresponding type of data in tokenized_document.
// For example, IntegerSectionIndexingHandler::Handle should add data into
// integer index for all contents in tokenized_document.integer_sections.
//
- // tokenized_document: document object with different types of tokenized
- // sections.
+ // Also it should handle last added DocumentId properly (based on
+ // recovery_mode_) to avoid adding previously indexed documents.
+ //
+ // tokenized_document: document object with different types of tokenized data.
// document_id: id of the document.
+ // recovery_mode: decides how to handle document_id <=
+ // last_added_document_id. If in recovery_mode, then
+ // Handle() will simply return OK immediately. Otherwise,
+ // returns INVALID_ARGUMENT_ERROR.
// put_document_stats: object for collecting stats during indexing. It can be
// nullptr.
//
/// Returns:
- // - OK on success
+ // - OK on success.
+ // - INVALID_ARGUMENT_ERROR if document_id is invalid OR document_id is less
+ // than or equal to the document_id of a previously indexed document in
+ // non recovery mode.
// - Any other errors. It depends on each implementation.
virtual libtextclassifier3::Status Handle(
const TokenizedDocument& tokenized_document, DocumentId document_id,
- PutDocumentStatsProto* put_document_stats) = 0;
+ bool recovery_mode, PutDocumentStatsProto* put_document_stats) = 0;
protected:
- const Clock& clock_;
+ const Clock& clock_; // Does not own.
};
} // namespace lib
} // namespace icing
-#endif // ICING_INDEX_SECTION_INDEXING_HANDLER_H_
+#endif // ICING_INDEX_DATA_INDEXING_HANDLER_H_
diff --git a/icing/index/index-processor.cc b/icing/index/index-processor.cc
index 9f21c9d..9a773e8 100644
--- a/icing/index/index-processor.cc
+++ b/icing/index/index-processor.cc
@@ -14,52 +14,31 @@
#include "icing/index/index-processor.h"
-#include <cstdint>
#include <memory>
-#include <string>
-#include <string_view>
-#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
-#include "icing/index/index.h"
-#include "icing/index/integer-section-indexing-handler.h"
-#include "icing/index/numeric/numeric-index.h"
-#include "icing/index/string-section-indexing-handler.h"
+#include "icing/index/data-indexing-handler.h"
#include "icing/proto/logging.pb.h"
#include "icing/store/document-id.h"
-#include "icing/transform/normalizer.h"
#include "icing/util/status-macros.h"
#include "icing/util/tokenized-document.h"
namespace icing {
namespace lib {
-libtextclassifier3::StatusOr<std::unique_ptr<IndexProcessor>>
-IndexProcessor::Create(const Normalizer* normalizer, Index* index,
- NumericIndex<int64_t>* integer_index,
- const Clock* clock) {
- ICING_RETURN_ERROR_IF_NULL(normalizer);
- ICING_RETURN_ERROR_IF_NULL(index);
- ICING_RETURN_ERROR_IF_NULL(integer_index);
- ICING_RETURN_ERROR_IF_NULL(clock);
-
- std::vector<std::unique_ptr<SectionIndexingHandler>> handlers;
- handlers.push_back(
- std::make_unique<StringSectionIndexingHandler>(clock, normalizer, index));
- handlers.push_back(
- std::make_unique<IntegerSectionIndexingHandler>(clock, integer_index));
-
- return std::unique_ptr<IndexProcessor>(
- new IndexProcessor(std::move(handlers), clock));
-}
-
libtextclassifier3::Status IndexProcessor::IndexDocument(
const TokenizedDocument& tokenized_document, DocumentId document_id,
PutDocumentStatsProto* put_document_stats) {
- // TODO(b/259744228): set overall index latency.
- for (auto& section_indexing_handler : section_indexing_handlers_) {
- ICING_RETURN_IF_ERROR(section_indexing_handler->Handle(
- tokenized_document, document_id, put_document_stats));
+ std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
+
+ for (auto& data_indexing_handler : data_indexing_handlers_) {
+ ICING_RETURN_IF_ERROR(data_indexing_handler->Handle(
+ tokenized_document, document_id, recovery_mode_, put_document_stats));
+ }
+
+ if (put_document_stats != nullptr) {
+ put_document_stats->set_index_latency_ms(
+ index_timer->GetElapsedMilliseconds());
}
return libtextclassifier3::Status::OK;
diff --git a/icing/index/index-processor.h b/icing/index/index-processor.h
index 45954c4..9b96f00 100644
--- a/icing/index/index-processor.h
+++ b/icing/index/index-processor.h
@@ -20,12 +20,9 @@
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
-#include "icing/index/index.h"
-#include "icing/index/numeric/numeric-index.h"
-#include "icing/index/section-indexing-handler.h"
+#include "icing/index/data-indexing-handler.h"
#include "icing/proto/logging.pb.h"
#include "icing/store/document-id.h"
-#include "icing/transform/normalizer.h"
#include "icing/util/tokenized-document.h"
namespace icing {
@@ -33,16 +30,12 @@ namespace lib {
class IndexProcessor {
public:
- // Factory function to create an IndexProcessor which does not take ownership
- // of any input components, and all pointers must refer to valid objects that
- // outlive the created IndexProcessor instance.
- //
- // Returns:
- // An IndexProcessor on success
- // FAILED_PRECONDITION if any of the pointers is null.
- static libtextclassifier3::StatusOr<std::unique_ptr<IndexProcessor>> Create(
- const Normalizer* normalizer, Index* index,
- NumericIndex<int64_t>* integer_index_, const Clock* clock);
+ explicit IndexProcessor(std::vector<std::unique_ptr<DataIndexingHandler>>&&
+ data_indexing_handlers,
+ const Clock* clock, bool recovery_mode = false)
+ : data_indexing_handlers_(std::move(data_indexing_handlers)),
+ clock_(*clock),
+ recovery_mode_(recovery_mode) {}
// Add tokenized document to the index, associated with document_id. If the
// number of tokens in the document exceeds max_tokens_per_document, then only
@@ -57,21 +50,15 @@ class IndexProcessor {
//
// Returns:
// - OK on success.
- // - Any SectionIndexingHandler errors.
+ // - Any DataIndexingHandler errors.
libtextclassifier3::Status IndexDocument(
const TokenizedDocument& tokenized_document, DocumentId document_id,
PutDocumentStatsProto* put_document_stats = nullptr);
private:
- explicit IndexProcessor(std::vector<std::unique_ptr<SectionIndexingHandler>>&&
- section_indexing_handlers,
- const Clock* clock)
- : section_indexing_handlers_(std::move(section_indexing_handlers)),
- clock_(*clock) {}
-
- std::vector<std::unique_ptr<SectionIndexingHandler>>
- section_indexing_handlers_;
- const Clock& clock_;
+ std::vector<std::unique_ptr<DataIndexingHandler>> data_indexing_handlers_;
+ const Clock& clock_; // Does not own.
+ bool recovery_mode_;
};
} // namespace lib
diff --git a/icing/index/index-processor_benchmark.cc b/icing/index/index-processor_benchmark.cc
index 6123f47..8766f0b 100644
--- a/icing/index/index-processor_benchmark.cc
+++ b/icing/index/index-processor_benchmark.cc
@@ -12,14 +12,22 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "testing/base/public/benchmark.h"
#include "gmock/gmock.h"
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
+#include "icing/index/data-indexing-handler.h"
#include "icing/index/index-processor.h"
#include "icing/index/index.h"
-#include "icing/index/numeric/dummy-numeric-index.h"
+#include "icing/index/integer-section-indexing-handler.h"
+#include "icing/index/numeric/integer-index.h"
#include "icing/index/numeric/numeric-index.h"
+#include "icing/index/string-section-indexing-handler.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/schema-util.h"
@@ -70,6 +78,8 @@ namespace lib {
namespace {
+using ::testing::IsTrue;
+
// Creates a fake type config with 10 properties (p0 - p9)
void CreateFakeTypeConfig(SchemaTypeConfigProto* type_config) {
type_config->set_schema_type("Fake_Type");
@@ -79,7 +89,7 @@ void CreateFakeTypeConfig(SchemaTypeConfigProto* type_config) {
property->set_property_name(
IcingStringUtil::StringPrintf("p%d", i)); // p0 - p9
property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
property->mutable_string_indexing_config()->set_term_match_type(
TermMatchType::EXACT_ONLY);
property->mutable_string_indexing_config()->set_tokenizer_type(
@@ -140,7 +150,9 @@ DocumentProto CreateDocumentWithHiragana(int content_length) {
std::unique_ptr<Index> CreateIndex(const IcingFilesystem& icing_filesystem,
const Filesystem& filesystem,
const std::string& index_dir) {
- Index::Options options(index_dir, /*index_merge_size=*/1024 * 1024 * 10);
+ Index::Options options(index_dir, /*index_merge_size=*/1024 * 1024 * 10,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
return Index::Create(options, &filesystem, &icing_filesystem).ValueOrDie();
}
@@ -151,14 +163,20 @@ std::unique_ptr<Normalizer> CreateNormalizer() {
.ValueOrDie();
}
-std::unique_ptr<SchemaStore> CreateSchemaStore(const Clock* clock) {
- Filesystem filesystem;
+std::unique_ptr<SchemaStore> CreateSchemaStore(const Filesystem& filesystem,
+ const Clock* clock,
+ const std::string& base_dir) {
+ std::string schema_store_dir = base_dir + "/schema_store_test";
+ filesystem.CreateDirectoryRecursively(schema_store_dir.c_str());
+
std::unique_ptr<SchemaStore> schema_store =
- SchemaStore::Create(&filesystem, GetTestTempDir(), clock).ValueOrDie();
+ SchemaStore::Create(&filesystem, schema_store_dir, clock).ValueOrDie();
SchemaProto schema;
CreateFakeTypeConfig(schema.add_types());
- auto set_schema_status = schema_store->SetSchema(schema);
+ auto set_schema_status = schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false);
if (!set_schema_status.ok()) {
ICING_LOG(ERROR) << set_schema_status.status().error_message();
@@ -167,8 +185,26 @@ std::unique_ptr<SchemaStore> CreateSchemaStore(const Clock* clock) {
return schema_store;
}
-void CleanUp(const Filesystem& filesystem, const std::string& index_dir) {
- filesystem.DeleteDirectoryRecursively(index_dir.c_str());
+libtextclassifier3::StatusOr<std::vector<std::unique_ptr<DataIndexingHandler>>>
+CreateDataIndexingHandlers(const Clock* clock, const Normalizer* normalizer,
+ Index* index, NumericIndex<int64_t>* integer_index) {
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<StringSectionIndexingHandler>
+ string_section_indexing_handler,
+ StringSectionIndexingHandler::Create(clock, normalizer, index));
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<IntegerSectionIndexingHandler>
+ integer_section_indexing_handler,
+ IntegerSectionIndexingHandler::Create(clock, integer_index));
+
+ std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
+ handlers.push_back(std::move(string_section_indexing_handler));
+ handlers.push_back(std::move(integer_section_indexing_handler));
+ return handlers;
+}
+
+void CleanUp(const Filesystem& filesystem, const std::string& base_dir) {
+ filesystem.DeleteDirectoryRecursively(base_dir.c_str());
}
void BM_IndexDocumentWithOneProperty(benchmark::State& state) {
@@ -180,24 +216,36 @@ void BM_IndexDocumentWithOneProperty(benchmark::State& state) {
IcingFilesystem icing_filesystem;
Filesystem filesystem;
- std::string index_dir = GetTestTempDir() + "/index_test/";
+ std::string base_dir = GetTestTempDir() + "/index_processor_benchmark";
+ std::string index_dir = base_dir + "/index_test/";
+ std::string integer_index_dir = base_dir + "/integer_index_test/";
- CleanUp(filesystem, index_dir);
+ CleanUp(filesystem, base_dir);
+ ASSERT_THAT(filesystem.CreateDirectoryRecursively(base_dir.c_str()),
+ IsTrue());
std::unique_ptr<Index> index =
CreateIndex(icing_filesystem, filesystem, index_dir);
- std::unique_ptr<NumericIndex<int64_t>> integer_index =
- std::make_unique<DummyNumericIndex<int64_t>>();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ IntegerIndex::Create(filesystem, integer_index_dir,
+ IntegerIndex::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv=*/true));
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
Clock clock;
- std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore(&clock);
+ std::unique_ptr<SchemaStore> schema_store =
+ CreateSchemaStore(filesystem, &clock, base_dir);
+
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<IndexProcessor> index_processor,
- IndexProcessor::Create(normalizer.get(), index.get(), integer_index.get(),
- &clock));
+ std::vector<std::unique_ptr<DataIndexingHandler>> handlers,
+ CreateDataIndexingHandlers(&clock, normalizer.get(), index.get(),
+ integer_index.get()));
+ auto index_processor =
+ std::make_unique<IndexProcessor>(std::move(handlers), &clock);
+
DocumentProto input_document = CreateDocumentWithOneProperty(state.range(0));
TokenizedDocument tokenized_document(std::move(
TokenizedDocument::Create(schema_store.get(), language_segmenter.get(),
@@ -210,7 +258,14 @@ void BM_IndexDocumentWithOneProperty(benchmark::State& state) {
index_processor->IndexDocument(tokenized_document, document_id++));
}
- CleanUp(filesystem, index_dir);
+ index_processor.reset();
+ schema_store.reset();
+ normalizer.reset();
+ language_segmenter.reset();
+ integer_index.reset();
+ index.reset();
+
+ CleanUp(filesystem, base_dir);
}
BENCHMARK(BM_IndexDocumentWithOneProperty)
->Arg(1000)
@@ -237,24 +292,35 @@ void BM_IndexDocumentWithTenProperties(benchmark::State& state) {
IcingFilesystem icing_filesystem;
Filesystem filesystem;
- std::string index_dir = GetTestTempDir() + "/index_test/";
+ std::string base_dir = GetTestTempDir() + "/index_processor_benchmark";
+ std::string index_dir = base_dir + "/index_test/";
+ std::string integer_index_dir = base_dir + "/integer_index_test/";
- CleanUp(filesystem, index_dir);
+ CleanUp(filesystem, base_dir);
+ ASSERT_THAT(filesystem.CreateDirectoryRecursively(base_dir.c_str()),
+ IsTrue());
std::unique_ptr<Index> index =
CreateIndex(icing_filesystem, filesystem, index_dir);
- std::unique_ptr<NumericIndex<int64_t>> integer_index =
- std::make_unique<DummyNumericIndex<int64_t>>();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ IntegerIndex::Create(filesystem, integer_index_dir,
+ IntegerIndex::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv=*/true));
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
Clock clock;
- std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore(&clock);
+ std::unique_ptr<SchemaStore> schema_store =
+ CreateSchemaStore(filesystem, &clock, base_dir);
+
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<IndexProcessor> index_processor,
- IndexProcessor::Create(normalizer.get(), index.get(), integer_index.get(),
- &clock));
+ std::vector<std::unique_ptr<DataIndexingHandler>> handlers,
+ CreateDataIndexingHandlers(&clock, normalizer.get(), index.get(),
+ integer_index.get()));
+ auto index_processor =
+ std::make_unique<IndexProcessor>(std::move(handlers), &clock);
DocumentProto input_document =
CreateDocumentWithTenProperties(state.range(0));
@@ -269,7 +335,14 @@ void BM_IndexDocumentWithTenProperties(benchmark::State& state) {
index_processor->IndexDocument(tokenized_document, document_id++));
}
- CleanUp(filesystem, index_dir);
+ index_processor.reset();
+ schema_store.reset();
+ normalizer.reset();
+ language_segmenter.reset();
+ integer_index.reset();
+ index.reset();
+
+ CleanUp(filesystem, base_dir);
}
BENCHMARK(BM_IndexDocumentWithTenProperties)
->Arg(1000)
@@ -296,24 +369,35 @@ void BM_IndexDocumentWithDiacriticLetters(benchmark::State& state) {
IcingFilesystem icing_filesystem;
Filesystem filesystem;
- std::string index_dir = GetTestTempDir() + "/index_test/";
+ std::string base_dir = GetTestTempDir() + "/index_processor_benchmark";
+ std::string index_dir = base_dir + "/index_test/";
+ std::string integer_index_dir = base_dir + "/integer_index_test/";
- CleanUp(filesystem, index_dir);
+ CleanUp(filesystem, base_dir);
+ ASSERT_THAT(filesystem.CreateDirectoryRecursively(base_dir.c_str()),
+ IsTrue());
std::unique_ptr<Index> index =
CreateIndex(icing_filesystem, filesystem, index_dir);
- std::unique_ptr<NumericIndex<int64_t>> integer_index =
- std::make_unique<DummyNumericIndex<int64_t>>();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ IntegerIndex::Create(filesystem, integer_index_dir,
+ IntegerIndex::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv=*/true));
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
Clock clock;
- std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore(&clock);
+ std::unique_ptr<SchemaStore> schema_store =
+ CreateSchemaStore(filesystem, &clock, base_dir);
+
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<IndexProcessor> index_processor,
- IndexProcessor::Create(normalizer.get(), index.get(), integer_index.get(),
- &clock));
+ std::vector<std::unique_ptr<DataIndexingHandler>> handlers,
+ CreateDataIndexingHandlers(&clock, normalizer.get(), index.get(),
+ integer_index.get()));
+ auto index_processor =
+ std::make_unique<IndexProcessor>(std::move(handlers), &clock);
DocumentProto input_document =
CreateDocumentWithDiacriticLetters(state.range(0));
@@ -328,7 +412,14 @@ void BM_IndexDocumentWithDiacriticLetters(benchmark::State& state) {
index_processor->IndexDocument(tokenized_document, document_id++));
}
- CleanUp(filesystem, index_dir);
+ index_processor.reset();
+ schema_store.reset();
+ normalizer.reset();
+ language_segmenter.reset();
+ integer_index.reset();
+ index.reset();
+
+ CleanUp(filesystem, base_dir);
}
BENCHMARK(BM_IndexDocumentWithDiacriticLetters)
->Arg(1000)
@@ -355,24 +446,35 @@ void BM_IndexDocumentWithHiragana(benchmark::State& state) {
IcingFilesystem icing_filesystem;
Filesystem filesystem;
- std::string index_dir = GetTestTempDir() + "/index_test/";
+ std::string base_dir = GetTestTempDir() + "/index_processor_benchmark";
+ std::string index_dir = base_dir + "/index_test/";
+ std::string integer_index_dir = base_dir + "/integer_index_test/";
- CleanUp(filesystem, index_dir);
+ CleanUp(filesystem, base_dir);
+ ASSERT_THAT(filesystem.CreateDirectoryRecursively(base_dir.c_str()),
+ IsTrue());
std::unique_ptr<Index> index =
CreateIndex(icing_filesystem, filesystem, index_dir);
- std::unique_ptr<NumericIndex<int64_t>> integer_index =
- std::make_unique<DummyNumericIndex<int64_t>>();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ IntegerIndex::Create(filesystem, integer_index_dir,
+ IntegerIndex::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv=*/true));
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
Clock clock;
- std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore(&clock);
+ std::unique_ptr<SchemaStore> schema_store =
+ CreateSchemaStore(filesystem, &clock, base_dir);
+
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<IndexProcessor> index_processor,
- IndexProcessor::Create(normalizer.get(), index.get(), integer_index.get(),
- &clock));
+ std::vector<std::unique_ptr<DataIndexingHandler>> handlers,
+ CreateDataIndexingHandlers(&clock, normalizer.get(), index.get(),
+ integer_index.get()));
+ auto index_processor =
+ std::make_unique<IndexProcessor>(std::move(handlers), &clock);
DocumentProto input_document = CreateDocumentWithHiragana(state.range(0));
TokenizedDocument tokenized_document(std::move(
@@ -386,7 +488,14 @@ void BM_IndexDocumentWithHiragana(benchmark::State& state) {
index_processor->IndexDocument(tokenized_document, document_id++));
}
- CleanUp(filesystem, index_dir);
+ index_processor.reset();
+ schema_store.reset();
+ normalizer.reset();
+ language_segmenter.reset();
+ integer_index.reset();
+ index.reset();
+
+ CleanUp(filesystem, base_dir);
}
BENCHMARK(BM_IndexDocumentWithHiragana)
->Arg(1000)
diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc
index b83d33c..ba4ece3 100644
--- a/icing/index/index-processor_test.cc
+++ b/icing/index/index-processor_test.cc
@@ -30,13 +30,18 @@
#include "icing/absl_ports/str_join.h"
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
+#include "icing/index/data-indexing-handler.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/index.h"
+#include "icing/index/integer-section-indexing-handler.h"
#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
-#include "icing/index/numeric/dummy-numeric-index.h"
+#include "icing/index/numeric/integer-index.h"
#include "icing/index/numeric/numeric-index.h"
+#include "icing/index/string-section-indexing-handler.h"
#include "icing/index/term-property-id.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/join/qualified-id-join-indexing-handler.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/legacy/index/icing-mock-filesystem.h"
#include "icing/portable/platform.h"
@@ -48,6 +53,7 @@
#include "icing/schema/schema-util.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/icu-data-file-helper.h"
@@ -132,6 +138,7 @@ using DataType = PropertyConfigProto::DataType;
using ::testing::ElementsAre;
using ::testing::Eq;
using ::testing::IsEmpty;
+using ::testing::IsTrue;
using ::testing::SizeIs;
using ::testing::Test;
@@ -150,12 +157,34 @@ class IndexProcessorTest : public Test {
GetTestFilePath("icing/icu.dat")));
}
- index_dir_ = GetTestTempDir() + "/index_test";
- Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
+ base_dir_ = GetTestTempDir() + "/index_processor_test";
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+ IsTrue());
+
+ index_dir_ = base_dir_ + "/index";
+ integer_index_dir_ = base_dir_ + "/integer_index";
+ qualified_id_join_index_dir_ = base_dir_ + "/qualified_id_join_index";
+ schema_store_dir_ = base_dir_ + "/schema_store";
+ doc_store_dir_ = base_dir_ + "/doc_store";
+
+ Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
- integer_index_ = std::make_unique<DummyNumericIndex<int64_t>>();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ integer_index_,
+ IntegerIndex::Create(
+ filesystem_, integer_index_dir_,
+ IntegerIndex::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv=*/false));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ qualified_id_join_index_,
+ QualifiedIdJoinIndex::Create(filesystem_, qualified_id_join_index_dir_,
+ /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false));
language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -167,12 +196,11 @@ class IndexProcessorTest : public Test {
normalizer_factory::Create(
/*max_term_byte_size=*/std::numeric_limits<int32_t>::max()));
- std::string schema_store_dir = GetTestTempDir() + "/schema_store";
ASSERT_TRUE(
- filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str()));
+ filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()));
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
- SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
SchemaProto schema =
SchemaBuilder()
.AddType(
@@ -247,17 +275,59 @@ class IndexProcessorTest : public Test {
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(
+ &filesystem_, doc_store_dir_, &fake_clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
+ doc_store_ = std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(
- index_processor_,
- IndexProcessor::Create(normalizer_.get(), index_.get(),
- integer_index_.get(), &fake_clock_));
+ std::unique_ptr<StringSectionIndexingHandler>
+ string_section_indexing_handler,
+ StringSectionIndexingHandler::Create(&fake_clock_, normalizer_.get(),
+ index_.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerSectionIndexingHandler>
+ integer_section_indexing_handler,
+ IntegerSectionIndexingHandler::Create(
+ &fake_clock_, integer_index_.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexingHandler>
+ qualified_id_join_indexing_handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ qualified_id_join_index_.get()));
+ std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
+ handlers.push_back(std::move(string_section_indexing_handler));
+ handlers.push_back(std::move(integer_section_indexing_handler));
+ handlers.push_back(std::move(qualified_id_join_indexing_handler));
+
+ index_processor_ =
+ std::make_unique<IndexProcessor>(std::move(handlers), &fake_clock_);
+
mock_icing_filesystem_ = std::make_unique<IcingMockFilesystem>();
}
void TearDown() override {
- filesystem_.DeleteDirectoryRecursively(index_dir_.c_str());
+ index_processor_.reset();
+ doc_store_.reset();
+ schema_store_.reset();
+ normalizer_.reset();
+ lang_segmenter_.reset();
+ qualified_id_join_index_.reset();
+ integer_index_.reset();
+ index_.reset();
+
+ filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
}
std::unique_ptr<IcingMockFilesystem> mock_icing_filesystem_;
@@ -265,13 +335,21 @@ class IndexProcessorTest : public Test {
Filesystem filesystem_;
IcingFilesystem icing_filesystem_;
FakeClock fake_clock_;
+ std::string base_dir_;
std::string index_dir_;
+ std::string integer_index_dir_;
+ std::string qualified_id_join_index_dir_;
+ std::string schema_store_dir_;
+ std::string doc_store_dir_;
- std::unique_ptr<LanguageSegmenter> lang_segmenter_;
- std::unique_ptr<Normalizer> normalizer_;
std::unique_ptr<Index> index_;
std::unique_ptr<NumericIndex<int64_t>> integer_index_;
+ std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index_;
+ std::unique_ptr<LanguageSegmenter> lang_segmenter_;
+ std::unique_ptr<Normalizer> normalizer_;
std::unique_ptr<SchemaStore> schema_store_;
+ std::unique_ptr<DocumentStore> doc_store_;
+
std::unique_ptr<IndexProcessor> index_processor_;
};
@@ -297,16 +375,6 @@ std::vector<DocHitInfoTermFrequencyPair> GetHitsWithTermFrequency(
return infos;
}
-TEST_F(IndexProcessorTest, CreationWithNullPointerShouldFail) {
- EXPECT_THAT(IndexProcessor::Create(/*normalizer=*/nullptr, index_.get(),
- integer_index_.get(), &fake_clock_),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
-
- EXPECT_THAT(IndexProcessor::Create(normalizer_.get(), /*index=*/nullptr,
- integer_index_.get(), &fake_clock_),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
-}
-
TEST_F(IndexProcessorTest, NoTermMatchTypeContent) {
DocumentProto document =
DocumentBuilder()
@@ -356,9 +424,11 @@ TEST_F(IndexProcessorTest, OneDoc) {
IsOk());
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("hello", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("hello", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
std::vector<DocHitInfoTermFrequencyPair> hits =
GetHitsWithTermFrequency(std::move(itr));
std::unordered_map<SectionId, Hit::TermFrequency> expectedMap{
@@ -367,8 +437,9 @@ TEST_F(IndexProcessorTest, OneDoc) {
kDocumentId0, expectedMap)));
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("hello", 1U << kPrefixedSectionId,
- TermMatchType::EXACT_ONLY));
+ itr, index_->GetIterator(
+ "hello", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ 1U << kPrefixedSectionId, TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
}
@@ -409,9 +480,11 @@ TEST_F(IndexProcessorTest, MultipleDocs) {
IsOk());
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("world", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("world", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
std::vector<DocHitInfoTermFrequencyPair> hits =
GetHitsWithTermFrequency(std::move(itr));
std::unordered_map<SectionId, Hit::TermFrequency> expectedMap1{
@@ -424,17 +497,19 @@ TEST_F(IndexProcessorTest, MultipleDocs) {
EqualsDocHitInfoWithTermFrequency(kDocumentId0, expectedMap2)));
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("world", 1U << kPrefixedSectionId,
- TermMatchType::EXACT_ONLY));
+ itr, index_->GetIterator(
+ "world", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ 1U << kPrefixedSectionId, TermMatchType::EXACT_ONLY));
hits = GetHitsWithTermFrequency(std::move(itr));
std::unordered_map<SectionId, Hit::TermFrequency> expectedMap{
{kPrefixedSectionId, 2}};
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
kDocumentId1, expectedMap)));
- ICING_ASSERT_OK_AND_ASSIGN(itr,
- index_->GetIterator("coffee", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("coffee", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
hits = GetHitsWithTermFrequency(std::move(itr));
expectedMap = {{kExactSectionId, Hit::kMaxTermFrequency}};
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
@@ -464,9 +539,11 @@ TEST_F(IndexProcessorTest, DocWithNestedProperty) {
IsOk());
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("rocky", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("rocky", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kNestedSectionId})));
@@ -489,9 +566,11 @@ TEST_F(IndexProcessorTest, DocWithRepeatedProperty) {
IsOk());
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("italian", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("italian", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kRepeatedSectionId})));
@@ -555,10 +634,15 @@ TEST_F(IndexProcessorTest, TooLongTokens) {
normalizer_factory::Create(
/*max_term_byte_size=*/4));
- ICING_ASSERT_OK_AND_ASSIGN(
- index_processor_,
- IndexProcessor::Create(normalizer.get(), index_.get(),
- integer_index_.get(), &fake_clock_));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<StringSectionIndexingHandler>
+ string_section_indexing_handler,
+ StringSectionIndexingHandler::Create(
+ &fake_clock_, normalizer.get(), index_.get()));
+ std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
+ handlers.push_back(std::move(string_section_indexing_handler));
+
+ index_processor_ =
+ std::make_unique<IndexProcessor>(std::move(handlers), &fake_clock_);
DocumentProto document =
DocumentBuilder()
@@ -576,23 +660,27 @@ TEST_F(IndexProcessorTest, TooLongTokens) {
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
// "good" should have been indexed normally.
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("good", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("good", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kPrefixedSectionId})));
// "night" should not have been.
- ICING_ASSERT_OK_AND_ASSIGN(itr,
- index_->GetIterator("night", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("night", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
// "night" should have been truncated to "nigh".
- ICING_ASSERT_OK_AND_ASSIGN(itr,
- index_->GetIterator("nigh", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("nigh", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kPrefixedSectionId})));
@@ -630,7 +718,9 @@ TEST_F(IndexProcessorTest, NonPrefixedContentPrefixQuery) {
// Only document_id 1 should surface in a prefix query for "Rock"
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("rock", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("rock", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId1, std::vector<SectionId>{kPrefixedSectionId})));
@@ -665,9 +755,11 @@ TEST_F(IndexProcessorTest, TokenNormalization) {
IsOk());
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("case", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("case", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(
GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(kDocumentId1,
@@ -682,6 +774,7 @@ TEST_F(IndexProcessorTest, OutOfOrderDocumentIds) {
.SetKey("icing", "fake_type/1")
.SetSchema(std::string(kFakeType))
.AddStringProperty(std::string(kExactProperty), "ALL UPPER CASE")
+ .AddInt64Property(std::string(kIndexableIntegerProperty), 123)
.Build();
ICING_ASSERT_OK_AND_ASSIGN(
TokenizedDocument tokenized_document,
@@ -691,13 +784,19 @@ TEST_F(IndexProcessorTest, OutOfOrderDocumentIds) {
IsOk());
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
- // Indexing a document with document_id < last_added_document_id should cause
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t index_element_size,
+ index_->GetElementsSize());
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 integer_index_crc,
+ integer_index_->UpdateChecksums());
+
+ // Indexing a document with document_id <= last_added_document_id should cause
// a failure.
document =
DocumentBuilder()
.SetKey("icing", "fake_type/2")
.SetSchema(std::string(kFakeType))
.AddStringProperty(std::string(kExactProperty), "all lower case")
+ .AddInt64Property(std::string(kIndexableIntegerProperty), 456)
.Build();
ICING_ASSERT_OK_AND_ASSIGN(
tokenized_document,
@@ -705,12 +804,98 @@ TEST_F(IndexProcessorTest, OutOfOrderDocumentIds) {
document));
EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ // Verify that both index_ and integer_index_ are unchanged.
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+ EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(index_element_size));
+ EXPECT_THAT(integer_index_->last_added_document_id(), Eq(kDocumentId1));
+ EXPECT_THAT(integer_index_->UpdateChecksums(),
+ IsOkAndHolds(integer_index_crc));
// As should indexing a document document_id == last_added_document_id.
- EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+ EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId1),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ // Verify that both index_ and integer_index_ are unchanged.
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+ EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(index_element_size));
+ EXPECT_THAT(integer_index_->last_added_document_id(), Eq(kDocumentId1));
+ EXPECT_THAT(integer_index_->UpdateChecksums(),
+ IsOkAndHolds(integer_index_crc));
+}
+
+TEST_F(IndexProcessorTest, OutOfOrderDocumentIdsInRecoveryMode) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<StringSectionIndexingHandler>
+ string_section_indexing_handler,
+ StringSectionIndexingHandler::Create(&fake_clock_, normalizer_.get(),
+ index_.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerSectionIndexingHandler>
+ integer_section_indexing_handler,
+ IntegerSectionIndexingHandler::Create(
+ &fake_clock_, integer_index_.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<QualifiedIdJoinIndexingHandler>
+ qualified_id_join_indexing_handler,
+ QualifiedIdJoinIndexingHandler::Create(
+ &fake_clock_, qualified_id_join_index_.get()));
+ std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
+ handlers.push_back(std::move(string_section_indexing_handler));
+ handlers.push_back(std::move(integer_section_indexing_handler));
+ handlers.push_back(std::move(qualified_id_join_indexing_handler));
+
+ IndexProcessor index_processor(std::move(handlers), &fake_clock_,
+ /*recovery_mode=*/true);
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kExactProperty), "ALL UPPER CASE")
+ .AddInt64Property(std::string(kIndexableIntegerProperty), 123)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+ EXPECT_THAT(index_processor.IndexDocument(tokenized_document, kDocumentId1),
+ IsOk());
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t index_element_size,
+ index_->GetElementsSize());
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 integer_index_crc,
+ integer_index_->UpdateChecksums());
+
+ // Indexing a document with document_id <= last_added_document_id in recovery
+ // mode should not get any error, but IndexProcessor should still ignore it
+ // and index data should remain unchanged.
+ document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/2")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kExactProperty), "all lower case")
+ .AddInt64Property(std::string(kIndexableIntegerProperty), 456)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+ EXPECT_THAT(index_processor.IndexDocument(tokenized_document, kDocumentId0),
+ IsOk());
+ // Verify that both index_ and integer_index_ are unchanged.
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+ EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(index_element_size));
+ EXPECT_THAT(integer_index_->last_added_document_id(), Eq(kDocumentId1));
+ EXPECT_THAT(integer_index_->UpdateChecksums(),
+ IsOkAndHolds(integer_index_crc));
+ // As should indexing a document document_id == last_added_document_id.
+ EXPECT_THAT(index_processor.IndexDocument(tokenized_document, kDocumentId1),
+ IsOk());
+ // Verify that both index_ and integer_index_ are unchanged.
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+ EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(index_element_size));
+ EXPECT_THAT(integer_index_->last_added_document_id(), Eq(kDocumentId1));
+ EXPECT_THAT(integer_index_->UpdateChecksums(),
+ IsOkAndHolds(integer_index_crc));
}
TEST_F(IndexProcessorTest, NonAsciiIndexing) {
@@ -735,9 +920,11 @@ TEST_F(IndexProcessorTest, NonAsciiIndexing) {
IsOk());
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("你好", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("你好", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kExactSectionId})));
@@ -785,14 +972,23 @@ TEST_F(IndexProcessorTest, IndexingDocAutomaticMerge) {
TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
document));
Index::Options options(index_dir_,
- /*index_merge_size=*/document.ByteSizeLong() * 100);
+ /*index_merge_size=*/document.ByteSizeLong() * 100,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/64);
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
ICING_ASSERT_OK_AND_ASSIGN(
- index_processor_,
- IndexProcessor::Create(normalizer_.get(), index_.get(),
- integer_index_.get(), &fake_clock_));
+ std::unique_ptr<StringSectionIndexingHandler>
+ string_section_indexing_handler,
+ StringSectionIndexingHandler::Create(&fake_clock_, normalizer_.get(),
+ index_.get()));
+ std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
+ handlers.push_back(std::move(string_section_indexing_handler));
+
+ index_processor_ =
+ std::make_unique<IndexProcessor>(std::move(handlers), &fake_clock_);
+
DocumentId doc_id = 0;
// Have determined experimentally that indexing 3373 documents with this text
// will cause the LiteIndex to fill up. Further indexing will fail unless the
@@ -841,15 +1037,23 @@ TEST_F(IndexProcessorTest, IndexingDocMergeFailureResets) {
// 2. Recreate the index with the mock filesystem and a merge size that will
// only allow one document to be added before requiring a merge.
Index::Options options(index_dir_,
- /*index_merge_size=*/document.ByteSizeLong());
+ /*index_merge_size=*/document.ByteSizeLong(),
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/16);
ICING_ASSERT_OK_AND_ASSIGN(
index_,
Index::Create(options, &filesystem_, mock_icing_filesystem_.get()));
ICING_ASSERT_OK_AND_ASSIGN(
- index_processor_,
- IndexProcessor::Create(normalizer_.get(), index_.get(),
- integer_index_.get(), &fake_clock_));
+ std::unique_ptr<StringSectionIndexingHandler>
+ string_section_indexing_handler,
+ StringSectionIndexingHandler::Create(&fake_clock_, normalizer_.get(),
+ index_.get()));
+ std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
+ handlers.push_back(std::move(string_section_indexing_handler));
+
+ index_processor_ =
+ std::make_unique<IndexProcessor>(std::move(handlers), &fake_clock_);
// 3. Index one document. This should fit in the LiteIndex without requiring a
// merge.
@@ -891,7 +1095,8 @@ TEST_F(IndexProcessorTest, ExactVerbatimProperty) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("Hello, world!", kSectionIdMaskAll,
+ index_->GetIterator("Hello, world!", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
TermMatchType::EXACT_ONLY));
std::vector<DocHitInfoTermFrequencyPair> hits =
GetHitsWithTermFrequency(std::move(itr));
@@ -922,9 +1127,11 @@ TEST_F(IndexProcessorTest, PrefixVerbatimProperty) {
// We expect to match the document we indexed as "Hello, w" is a prefix
// of "Hello, world!"
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("Hello, w", kSectionIdMaskAll,
- TermMatchType::PREFIX));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("Hello, w", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
std::vector<DocHitInfoTermFrequencyPair> hits =
GetHitsWithTermFrequency(std::move(itr));
std::unordered_map<SectionId, Hit::TermFrequency> expectedMap{
@@ -954,7 +1161,9 @@ TEST_F(IndexProcessorTest, VerbatimPropertyDoesntMatchSubToken) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("world", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("world", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
std::vector<DocHitInfo> hits = GetHits(std::move(itr));
// We should not have hits for term "world" as the index processor should
@@ -984,9 +1193,11 @@ TEST_F(IndexProcessorTest, Rfc822PropertyExact) {
std::unordered_map<SectionId, Hit::TermFrequency> expected_map{
{kRfc822SectionId, 2}};
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("alexsav", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("alexsav", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
std::vector<DocHitInfoTermFrequencyPair> hits =
GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
@@ -995,15 +1206,17 @@ TEST_F(IndexProcessorTest, Rfc822PropertyExact) {
expected_map = {{kRfc822SectionId, 1}};
ICING_ASSERT_OK_AND_ASSIGN(
- itr,
- index_->GetIterator("com", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ itr, index_->GetIterator("com", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
hits = GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
kDocumentId0, expected_map)));
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("alexsav@google.com", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ itr, index_->GetIterator("alexsav@google.com", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
hits = GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
kDocumentId0, expected_map)));
@@ -1029,14 +1242,23 @@ TEST_F(IndexProcessorTest, Rfc822PropertyExactShouldNotReturnPrefix) {
std::unordered_map<SectionId, Hit::TermFrequency> expected_map{
{kRfc822SectionId, 2}};
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("alexsa", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("alexsa", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
std::vector<DocHitInfo> hits = GetHits(std::move(itr));
EXPECT_THAT(hits, IsEmpty());
}
// Some prefixes of generated RFC822 tokens.
+#ifdef ENABLE_RFC822_PROPERTY_PREFIX_TEST
+// ENABLE_RFC822_PROPERTY_PREFIX_TEST won't be defined, so this test will not be
+// compiled.
+// TODO(b/250648165): Remove #ifdef to enable this test after fixing the
+// indeterministic behavior of prefix query term frequency in
+// lite index.
+//
TEST_F(IndexProcessorTest, Rfc822PropertyPrefix) {
DocumentProto document = DocumentBuilder()
.SetKey("icing", "fake_type/1")
@@ -1057,26 +1279,33 @@ TEST_F(IndexProcessorTest, Rfc822PropertyPrefix) {
std::unordered_map<SectionId, Hit::TermFrequency> expected_map{
{kRfc822SectionId, 1}};
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("alexsav@", kSectionIdMaskAll,
- TermMatchType::PREFIX));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("alexsav@", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
std::vector<DocHitInfoTermFrequencyPair> hits =
GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
kDocumentId0, expected_map)));
- ICING_ASSERT_OK_AND_ASSIGN(itr, index_->GetIterator("goog", kSectionIdMaskAll,
- TermMatchType::PREFIX));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("goog", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
hits = GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
kDocumentId0, expected_map)));
- ICING_ASSERT_OK_AND_ASSIGN(itr, index_->GetIterator("ale", kSectionIdMaskAll,
- TermMatchType::PREFIX));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("ale", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
hits = GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
kDocumentId0, expected_map)));
}
+#endif // ENABLE_RFC822_PROPERTY_PREFIX_TEST
TEST_F(IndexProcessorTest, Rfc822PropertyNoMatch) {
DocumentProto document = DocumentBuilder()
@@ -1099,7 +1328,9 @@ TEST_F(IndexProcessorTest, Rfc822PropertyNoMatch) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("abc.xyz", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("abc.xyz", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
std::vector<DocHitInfo> hits = GetHits(std::move(itr));
EXPECT_THAT(hits, IsEmpty());
@@ -1124,9 +1355,11 @@ TEST_F(IndexProcessorTest, ExactUrlProperty) {
IsOk());
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("google", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("google", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
std::vector<DocHitInfoTermFrequencyPair> hits =
GetHitsWithTermFrequency(std::move(itr));
std::unordered_map<SectionId, Hit::TermFrequency> expected_map{
@@ -1134,25 +1367,28 @@ TEST_F(IndexProcessorTest, ExactUrlProperty) {
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
kDocumentId0, expected_map)));
- ICING_ASSERT_OK_AND_ASSIGN(itr,
- index_->GetIterator("http", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("http", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
hits = GetHitsWithTermFrequency(std::move(itr));
expected_map = {{kUrlExactSectionId, 1}};
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
kDocumentId0, expected_map)));
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("www.google.com", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ itr, index_->GetIterator("www.google.com", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
hits = GetHitsWithTermFrequency(std::move(itr));
expected_map = {{kUrlExactSectionId, 1}};
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
kDocumentId0, expected_map)));
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("http://www.google.com", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ itr, index_->GetIterator("http://www.google.com", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
hits = GetHitsWithTermFrequency(std::move(itr));
expected_map = {{kUrlExactSectionId, 1}};
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
@@ -1179,20 +1415,24 @@ TEST_F(IndexProcessorTest, ExactUrlPropertyDoesNotMatchPrefix) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("co", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("co", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
std::vector<DocHitInfoTermFrequencyPair> hits =
GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, IsEmpty());
- ICING_ASSERT_OK_AND_ASSIGN(itr,
- index_->GetIterator("mail.go", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("mail.go", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
hits = GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, IsEmpty());
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("mail.google.com", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ itr, index_->GetIterator("mail.google.com", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
hits = GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, IsEmpty());
}
@@ -1218,7 +1458,9 @@ TEST_F(IndexProcessorTest, PrefixUrlProperty) {
// "goo" is a prefix of "google" and "google.com"
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("goo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("goo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
std::vector<DocHitInfoTermFrequencyPair> hits =
GetHitsWithTermFrequency(std::move(itr));
std::unordered_map<SectionId, Hit::TermFrequency> expected_map{
@@ -1227,8 +1469,10 @@ TEST_F(IndexProcessorTest, PrefixUrlProperty) {
kDocumentId0, expected_map)));
// "http" is a prefix of "http" and "http://www.google.com"
- ICING_ASSERT_OK_AND_ASSIGN(itr, index_->GetIterator("http", kSectionIdMaskAll,
- TermMatchType::PREFIX));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("http", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
hits = GetHitsWithTermFrequency(std::move(itr));
expected_map = {{kUrlPrefixedSectionId, 1}};
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
@@ -1236,8 +1480,9 @@ TEST_F(IndexProcessorTest, PrefixUrlProperty) {
// "www.go" is a prefix of "www.google.com"
ICING_ASSERT_OK_AND_ASSIGN(
- itr,
- index_->GetIterator("www.go", kSectionIdMaskAll, TermMatchType::PREFIX));
+ itr, index_->GetIterator("www.go", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
hits = GetHitsWithTermFrequency(std::move(itr));
expected_map = {{kUrlPrefixedSectionId, 1}};
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
@@ -1265,26 +1510,32 @@ TEST_F(IndexProcessorTest, PrefixUrlPropertyNoMatch) {
// no token starts with "gle", so we should have no hits
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("gle", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("gle", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
std::vector<DocHitInfoTermFrequencyPair> hits =
GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, IsEmpty());
ICING_ASSERT_OK_AND_ASSIGN(
- itr,
- index_->GetIterator("w.goo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ itr, index_->GetIterator("w.goo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
hits = GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, IsEmpty());
// tokens have separators removed, so no hits here
- ICING_ASSERT_OK_AND_ASSIGN(itr, index_->GetIterator(".com", kSectionIdMaskAll,
- TermMatchType::PREFIX));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator(".com", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
hits = GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, IsEmpty());
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("calendar/render", kSectionIdMaskAll,
- TermMatchType::PREFIX));
+ itr, index_->GetIterator("calendar/render", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
hits = GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, IsEmpty());
}
@@ -1311,7 +1562,8 @@ TEST_F(IndexProcessorTest, IndexableIntegerProperty) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
integer_index_->GetIterator(kIndexableIntegerProperty, /*key_lower=*/1,
- /*key_upper=*/5));
+ /*key_upper=*/5, *doc_store_, *schema_store_,
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(
GetHits(std::move(itr)),
@@ -1340,7 +1592,8 @@ TEST_F(IndexProcessorTest, IndexableIntegerPropertyNoMatch) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
integer_index_->GetIterator(kIndexableIntegerProperty, /*key_lower=*/-1,
- /*key_upper=*/0));
+ /*key_upper=*/0, *doc_store_, *schema_store_,
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
}
diff --git a/icing/index/index.cc b/icing/index/index.cc
index a35c80d..31dcc7e 100644
--- a/icing/index/index.cc
+++ b/icing/index/index.cc
@@ -14,31 +14,38 @@
#include "icing/index/index.h"
+#include <algorithm>
+#include <cstddef>
#include <cstdint>
#include <memory>
#include <string>
#include <utility>
+#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
#include "icing/index/hit/hit.h"
#include "icing/index/iterator/doc-hit-info-iterator-or.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/lite/doc-hit-info-iterator-term-lite.h"
#include "icing/index/lite/lite-index.h"
#include "icing/index/main/doc-hit-info-iterator-term-main.h"
+#include "icing/index/main/main-index.h"
#include "icing/index/term-id-codec.h"
-#include "icing/index/term-property-id.h"
+#include "icing/index/term-metadata.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/legacy/index/icing-dynamic-trie.h"
#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/proto/scoring.pb.h"
#include "icing/proto/storage.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/schema/section.h"
#include "icing/scoring/ranker.h"
#include "icing/store/document-id.h"
+#include "icing/store/suggestion-result-checker.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
@@ -59,7 +66,9 @@ libtextclassifier3::StatusOr<LiteIndex::Options> CreateLiteIndexOptions(
options.index_merge_size));
}
return LiteIndex::Options(options.base_dir + "/idx/lite.",
- options.index_merge_size);
+ options.index_merge_size,
+ options.lite_index_sort_at_indexing,
+ options.lite_index_sort_size);
}
std::string MakeMainIndexFilepath(const std::string& base_dir) {
@@ -151,9 +160,17 @@ libtextclassifier3::StatusOr<std::unique_ptr<Index>> Index::Create(
IcingDynamicTrie::max_value_index(GetMainLexiconOptions()),
IcingDynamicTrie::max_value_index(
lite_index_options.lexicon_options)));
+
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<LiteIndex> lite_index,
LiteIndex::Create(lite_index_options, icing_filesystem));
+ // Sort the lite index if we've enabled sorting the HitBuffer at indexing
+ // time, and there's an unsorted tail exceeding the threshold.
+ if (options.lite_index_sort_at_indexing &&
+ lite_index->HasUnsortedHitsExceedingSortThreshold()) {
+ lite_index->SortHits();
+ }
+
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<MainIndex> main_index,
MainIndex::Create(MakeMainIndexFilepath(options.base_dir), filesystem,
@@ -163,6 +180,12 @@ libtextclassifier3::StatusOr<std::unique_ptr<Index>> Index::Create(
std::move(main_index), filesystem));
}
+/* static */ libtextclassifier3::StatusOr<int> Index::ReadFlashIndexMagic(
+ const Filesystem* filesystem, const std::string& base_dir) {
+ return MainIndex::ReadFlashIndexMagic(filesystem,
+ MakeMainIndexFilepath(base_dir));
+}
+
libtextclassifier3::Status Index::TruncateTo(DocumentId document_id) {
if (lite_index_->last_added_document_id() != kInvalidDocumentId &&
lite_index_->last_added_document_id() > document_id) {
@@ -182,7 +205,8 @@ libtextclassifier3::Status Index::TruncateTo(DocumentId document_id) {
}
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
-Index::GetIterator(const std::string& term, SectionIdMask section_id_mask,
+Index::GetIterator(const std::string& term, int term_start_index,
+ int unnormalized_term_length, SectionIdMask section_id_mask,
TermMatchType::Code term_match_type,
bool need_hit_term_frequency) {
std::unique_ptr<DocHitInfoIterator> lite_itr;
@@ -190,17 +214,19 @@ Index::GetIterator(const std::string& term, SectionIdMask section_id_mask,
switch (term_match_type) {
case TermMatchType::EXACT_ONLY:
lite_itr = std::make_unique<DocHitInfoIteratorTermLiteExact>(
- term_id_codec_.get(), lite_index_.get(), term, section_id_mask,
- need_hit_term_frequency);
+ term_id_codec_.get(), lite_index_.get(), term, term_start_index,
+ unnormalized_term_length, section_id_mask, need_hit_term_frequency);
main_itr = std::make_unique<DocHitInfoIteratorTermMainExact>(
- main_index_.get(), term, section_id_mask, need_hit_term_frequency);
+ main_index_.get(), term, term_start_index, unnormalized_term_length,
+ section_id_mask, need_hit_term_frequency);
break;
case TermMatchType::PREFIX:
lite_itr = std::make_unique<DocHitInfoIteratorTermLitePrefix>(
- term_id_codec_.get(), lite_index_.get(), term, section_id_mask,
- need_hit_term_frequency);
+ term_id_codec_.get(), lite_index_.get(), term, term_start_index,
+ unnormalized_term_length, section_id_mask, need_hit_term_frequency);
main_itr = std::make_unique<DocHitInfoIteratorTermMainPrefix>(
- main_index_.get(), term, section_id_mask, need_hit_term_frequency);
+ main_index_.get(), term, term_start_index, unnormalized_term_length,
+ section_id_mask, need_hit_term_frequency);
break;
default:
return absl_ports::InvalidArgumentError(
diff --git a/icing/index/index.h b/icing/index/index.h
index 878ac59..32ea97b 100644
--- a/icing/index/index.h
+++ b/icing/index/index.h
@@ -18,8 +18,9 @@
#include <cstdint>
#include <memory>
#include <string>
-#include <unordered_set>
+#include <unordered_map>
#include <utility>
+#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
@@ -27,6 +28,7 @@
#include "icing/index/hit/hit.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/lite/lite-index.h"
+#include "icing/index/lite/term-id-hit-pair.h"
#include "icing/index/main/main-index-merger.h"
#include "icing/index/main/main-index.h"
#include "icing/index/term-id-codec.h"
@@ -40,7 +42,7 @@
#include "icing/store/document-id.h"
#include "icing/store/namespace-id.h"
#include "icing/store/suggestion-result-checker.h"
-#include "icing/util/crc32.h"
+#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
@@ -68,11 +70,18 @@ namespace lib {
class Index {
public:
struct Options {
- explicit Options(const std::string& base_dir, uint32_t index_merge_size)
- : base_dir(base_dir), index_merge_size(index_merge_size) {}
+ explicit Options(const std::string& base_dir, uint32_t index_merge_size,
+ bool lite_index_sort_at_indexing,
+ uint32_t lite_index_sort_size)
+ : base_dir(base_dir),
+ index_merge_size(index_merge_size),
+ lite_index_sort_at_indexing(lite_index_sort_at_indexing),
+ lite_index_sort_size(lite_index_sort_size) {}
std::string base_dir;
int32_t index_merge_size;
+ bool lite_index_sort_at_indexing;
+ int32_t lite_index_sort_size;
};
// Creates an instance of Index in the directory pointed by file_dir.
@@ -86,6 +95,16 @@ class Index {
const Options& options, const Filesystem* filesystem,
const IcingFilesystem* icing_filesystem);
+ // Reads magic from existing flash (main) index file header. We need this
+ // during Icing initialization phase to determine the version.
+ //
+ // Returns
+ // Valid magic on success
+ // NOT_FOUND if the lite index doesn't exist
+ // INTERNAL on I/O error
+ static libtextclassifier3::StatusOr<int> ReadFlashIndexMagic(
+ const Filesystem* filesystem, const std::string& base_dir);
+
// Clears all files created by the index. Returns OK if all files were
// cleared.
libtextclassifier3::Status Reset() {
@@ -177,15 +196,18 @@ class Index {
IndexStorageInfoProto GetStorageInfo() const;
// Create an iterator to iterate through all doc hit infos in the index that
- // match the term. section_id_mask can be set to ignore hits from sections not
- // listed in the mask. Eg. section_id_mask = 1U << 3; would only return hits
- // that occur in section 3.
+ // match the term. term_start_index is the start index of the given term in
+ // the search query. unnormalized_term_length is the length of the given
+ // unnormalized term in the search query not listed in the mask.
+ // Eg. section_id_mask = 1U << 3; would only return hits that occur in
+ // section 3.
//
// Returns:
// unique ptr to a valid DocHitInfoIterator that matches the term
// INVALID_ARGUMENT if given an invalid term_match_type
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> GetIterator(
- const std::string& term, SectionIdMask section_id_mask,
+ const std::string& term, int term_start_index,
+ int unnormalized_term_length, SectionIdMask section_id_mask,
TermMatchType::Code term_match_type, bool need_hit_term_frequency = true);
// Finds terms with the given prefix in the given namespaces. If
@@ -262,9 +284,23 @@ class Index {
ICING_RETURN_IF_ERROR(main_index_->AddHits(
*term_id_codec_, std::move(outputs.backfill_map),
std::move(term_id_hit_pairs), lite_index_->last_added_document_id()));
+ ICING_RETURN_IF_ERROR(main_index_->PersistToDisk());
return lite_index_->Reset();
}
+ // Whether the LiteIndex HitBuffer requires sorting. This is only true if
+ // Icing has enabled sorting during indexing time, and the HitBuffer's
+ // unsorted tail has exceeded the lite_index_sort_size.
+ bool LiteIndexNeedSort() const {
+ return options_.lite_index_sort_at_indexing &&
+ lite_index_->HasUnsortedHitsExceedingSortThreshold();
+ }
+
+ // Sorts the LiteIndex HitBuffer.
+ void SortLiteIndex() {
+ lite_index_->SortHits();
+ }
+
// Reduces internal file sizes by reclaiming space of deleted documents.
// new_last_added_document_id will be used to update the last added document
// id in the lite index.
diff --git a/icing/index/index_test.cc b/icing/index/index_test.cc
index 4349cc9..b823535 100644
--- a/icing/index/index_test.cc
+++ b/icing/index/index_test.cc
@@ -14,6 +14,8 @@
#include "icing/index/index.h"
+#include <unistd.h>
+
#include <algorithm>
#include <cstdint>
#include <limits>
@@ -56,6 +58,7 @@ using ::testing::Eq;
using ::testing::Ge;
using ::testing::Gt;
using ::testing::IsEmpty;
+using ::testing::IsFalse;
using ::testing::IsTrue;
using ::testing::Ne;
using ::testing::NiceMock;
@@ -73,12 +76,15 @@ class IndexTest : public Test {
protected:
void SetUp() override {
index_dir_ = GetTestTempDir() + "/index_test/";
- Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
+ Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
}
void TearDown() override {
+ index_.reset();
icing_filesystem_.DeleteDirectoryRecursively(index_dir_.c_str());
}
@@ -92,10 +98,12 @@ class IndexTest : public Test {
}
libtextclassifier3::StatusOr<std::vector<DocHitInfo>> GetHits(
- std::string term, TermMatchType::Code match_type) {
+ std::string term, int term_start_index, int unnormalized_term_length,
+ TermMatchType::Code match_type) {
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator(term, kSectionIdMaskAll, match_type));
+ index_->GetIterator(term, term_start_index, unnormalized_term_length,
+ kSectionIdMaskAll, match_type));
return GetHits(std::move(itr));
}
@@ -141,7 +149,9 @@ MATCHER_P2(EqualsTermMetadata, content, hit_count, "") {
}
TEST_F(IndexTest, CreationWithNullPointerShouldFail) {
- Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
+ Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
EXPECT_THAT(
Index::Create(options, &filesystem_, /*icing_filesystem=*/nullptr),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
@@ -153,13 +163,16 @@ TEST_F(IndexTest, CreationWithNullPointerShouldFail) {
TEST_F(IndexTest, EmptyIndex) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
ICING_ASSERT_OK_AND_ASSIGN(
- itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ itr, index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
@@ -170,17 +183,50 @@ TEST_F(IndexTest, EmptyIndexAfterMerge) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
ICING_ASSERT_OK_AND_ASSIGN(
- itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ itr, index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
+TEST_F(IndexTest, CreationWithLiteIndexSortAtIndexingEnabledShouldSort) {
+ // Make the index with lite_index_sort_at_indexing=false and a very small sort
+ // threshold.
+ Index::Options options(index_dir_, /*index_merge_size=*/1024,
+ /*lite_index_sort_at_indexing=*/false,
+ /*lite_index_sort_size=*/16);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ index_, Index::Create(options, &filesystem_, &icing_filesystem_));
+
+ Index::Editor edit = index_->Edit(
+ kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("bar"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("baz"), IsOk());
+ ASSERT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+ // Persist and recreate the index with lite_index_sort_at_indexing=true
+ ASSERT_THAT(index_->PersistToDisk(), IsOk());
+ options = Index::Options(index_dir_, /*index_merge_size=*/1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/16);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ index_, Index::Create(options, &filesystem_, &icing_filesystem_));
+
+ // Check that the index is sorted after recreating with
+ // lite_index_sort_at_indexing, with the unsorted HitBuffer exceeding the sort
+ // threshold.
+ EXPECT_THAT(index_->LiteIndexNeedSort(), IsFalse());
+}
+
TEST_F(IndexTest, AdvancePastEnd) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
@@ -189,15 +235,18 @@ TEST_F(IndexTest, AdvancePastEnd) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("bar", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("bar", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
EXPECT_THAT(itr->doc_hit_info(),
EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>()));
ICING_ASSERT_OK_AND_ASSIGN(
- itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ itr, index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(), IsOk());
EXPECT_THAT(itr->Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
@@ -215,15 +264,18 @@ TEST_F(IndexTest, AdvancePastEndAfterMerge) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("bar", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("bar", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
EXPECT_THAT(itr->doc_hit_info(),
EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>()));
ICING_ASSERT_OK_AND_ASSIGN(
- itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ itr, index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(), IsOk());
EXPECT_THAT(itr->Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
@@ -239,7 +291,9 @@ TEST_F(IndexTest, SingleHitSingleTermIndex) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -255,7 +309,9 @@ TEST_F(IndexTest, SingleHitSingleTermIndexAfterMerge) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -270,26 +326,32 @@ TEST_F(IndexTest, SingleHitSingleTermIndexAfterOptimize) {
ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2},
/*new_last_added_document_id=*/2));
- EXPECT_THAT(GetHits("foo", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
- kDocumentId2, std::vector<SectionId>{kSectionId2}))));
+ EXPECT_THAT(
+ GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ kDocumentId2, std::vector<SectionId>{kSectionId2}))));
EXPECT_EQ(index_->last_added_document_id(), kDocumentId2);
// Mapping to a different docid will translate the hit
ICING_ASSERT_OK(index_->Optimize(
/*document_id_old_to_new=*/{0, kInvalidDocumentId, kDocumentId1},
/*new_last_added_document_id=*/1));
- EXPECT_THAT(GetHits("foo", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
- kDocumentId1, std::vector<SectionId>{kSectionId2}))));
+ EXPECT_THAT(
+ GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ kDocumentId1, std::vector<SectionId>{kSectionId2}))));
EXPECT_EQ(index_->last_added_document_id(), kDocumentId1);
// Mapping to kInvalidDocumentId will remove the hit.
ICING_ASSERT_OK(
index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId},
/*new_last_added_document_id=*/0));
- EXPECT_THAT(GetHits("foo", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(
+ GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(IsEmpty()));
EXPECT_EQ(index_->last_added_document_id(), kDocumentId0);
}
@@ -304,26 +366,32 @@ TEST_F(IndexTest, SingleHitSingleTermIndexAfterMergeAndOptimize) {
ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2},
/*new_last_added_document_id=*/2));
- EXPECT_THAT(GetHits("foo", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
- kDocumentId2, std::vector<SectionId>{kSectionId2}))));
+ EXPECT_THAT(
+ GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ kDocumentId2, std::vector<SectionId>{kSectionId2}))));
EXPECT_EQ(index_->last_added_document_id(), kDocumentId2);
// Mapping to a different docid will translate the hit
ICING_ASSERT_OK(index_->Optimize(
/*document_id_old_to_new=*/{0, kInvalidDocumentId, kDocumentId1},
/*new_last_added_document_id=*/1));
- EXPECT_THAT(GetHits("foo", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
- kDocumentId1, std::vector<SectionId>{kSectionId2}))));
+ EXPECT_THAT(
+ GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ kDocumentId1, std::vector<SectionId>{kSectionId2}))));
EXPECT_EQ(index_->last_added_document_id(), kDocumentId1);
// Mapping to kInvalidDocumentId will remove the hit.
ICING_ASSERT_OK(
index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId},
/*new_last_added_document_id=*/0));
- EXPECT_THAT(GetHits("foo", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(
+ GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(IsEmpty()));
EXPECT_EQ(index_->last_added_document_id(), 0);
}
@@ -336,7 +404,9 @@ TEST_F(IndexTest, SingleHitMultiTermIndex) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -353,7 +423,9 @@ TEST_F(IndexTest, SingleHitMultiTermIndexAfterMerge) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -379,14 +451,17 @@ TEST_F(IndexTest, MultiHitMultiTermIndexAfterOptimize) {
ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2},
/*new_last_added_document_id=*/2));
EXPECT_THAT(
- GetHits("foo", TermMatchType::EXACT_ONLY),
+ GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
IsOkAndHolds(ElementsAre(
EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
EqualsDocHitInfo(kDocumentId0,
std::vector<SectionId>{kSectionId2}))));
- EXPECT_THAT(GetHits("bar", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
- kDocumentId1, std::vector<SectionId>{kSectionId2}))));
+ EXPECT_THAT(
+ GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ kDocumentId1, std::vector<SectionId>{kSectionId2}))));
EXPECT_EQ(index_->last_added_document_id(), kDocumentId2);
// Delete document id 1, and document id 2 is translated to 1.
@@ -394,23 +469,30 @@ TEST_F(IndexTest, MultiHitMultiTermIndexAfterOptimize) {
index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId, 1},
/*new_last_added_document_id=*/1));
EXPECT_THAT(
- GetHits("foo", TermMatchType::EXACT_ONLY),
+ GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
IsOkAndHolds(ElementsAre(
EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
EqualsDocHitInfo(kDocumentId0,
std::vector<SectionId>{kSectionId2}))));
- EXPECT_THAT(GetHits("bar", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(
+ GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(IsEmpty()));
EXPECT_EQ(index_->last_added_document_id(), kDocumentId1);
// Delete all the rest documents.
ICING_ASSERT_OK(index_->Optimize(
/*document_id_old_to_new=*/{kInvalidDocumentId, kInvalidDocumentId},
/*new_last_added_document_id=*/kInvalidDocumentId));
- EXPECT_THAT(GetHits("foo", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(IsEmpty()));
- EXPECT_THAT(GetHits("bar", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(
+ GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(
+ GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(IsEmpty()));
EXPECT_EQ(index_->last_added_document_id(), kInvalidDocumentId);
}
@@ -436,14 +518,17 @@ TEST_F(IndexTest, MultiHitMultiTermIndexAfterMergeAndOptimize) {
ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2},
/*new_last_added_document_id=*/2));
EXPECT_THAT(
- GetHits("foo", TermMatchType::EXACT_ONLY),
+ GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
IsOkAndHolds(ElementsAre(
EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
EqualsDocHitInfo(kDocumentId0,
std::vector<SectionId>{kSectionId2}))));
- EXPECT_THAT(GetHits("bar", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
- kDocumentId1, std::vector<SectionId>{kSectionId2}))));
+ EXPECT_THAT(
+ GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ kDocumentId1, std::vector<SectionId>{kSectionId2}))));
EXPECT_EQ(index_->last_added_document_id(), kDocumentId2);
// Delete document id 1, and document id 2 is translated to 1.
@@ -451,23 +536,30 @@ TEST_F(IndexTest, MultiHitMultiTermIndexAfterMergeAndOptimize) {
index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId, 1},
/*new_last_added_document_id=*/1));
EXPECT_THAT(
- GetHits("foo", TermMatchType::EXACT_ONLY),
+ GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
IsOkAndHolds(ElementsAre(
EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
EqualsDocHitInfo(kDocumentId0,
std::vector<SectionId>{kSectionId2}))));
- EXPECT_THAT(GetHits("bar", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(
+ GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(IsEmpty()));
EXPECT_EQ(index_->last_added_document_id(), kDocumentId1);
// Delete all the rest documents.
ICING_ASSERT_OK(index_->Optimize(
/*document_id_old_to_new=*/{kInvalidDocumentId, kInvalidDocumentId},
/*new_last_added_document_id=*/kInvalidDocumentId));
- EXPECT_THAT(GetHits("foo", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(IsEmpty()));
- EXPECT_THAT(GetHits("bar", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(
+ GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(
+ GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(IsEmpty()));
EXPECT_EQ(index_->last_added_document_id(), kInvalidDocumentId);
}
@@ -480,7 +572,9 @@ TEST_F(IndexTest, NoHitMultiTermIndex) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("baz", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("baz", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
@@ -496,7 +590,9 @@ TEST_F(IndexTest, NoHitMultiTermIndexAfterMerge) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("baz", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("baz", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
@@ -519,7 +615,9 @@ TEST_F(IndexTest, MultiHitMultiTermIndex) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(
GetHits(std::move(itr)),
ElementsAre(
@@ -547,7 +645,9 @@ TEST_F(IndexTest, MultiHitMultiTermIndexAfterMerge) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(
GetHits(std::move(itr)),
ElementsAre(
@@ -569,7 +669,9 @@ TEST_F(IndexTest, MultiHitSectionRestrict) {
SectionIdMask desired_section = 1U << kSectionId2;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", desired_section, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, desired_section,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -591,7 +693,9 @@ TEST_F(IndexTest, MultiHitSectionRestrictAfterMerge) {
SectionIdMask desired_section = 1U << kSectionId2;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", desired_section, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, desired_section,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -612,7 +716,9 @@ TEST_F(IndexTest, SingleHitDedupeIndex) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -626,7 +732,9 @@ TEST_F(IndexTest, PrefixHit) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -642,7 +750,9 @@ TEST_F(IndexTest, PrefixHitAfterMerge) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -661,7 +771,9 @@ TEST_F(IndexTest, MultiPrefixHit) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(
GetHits(std::move(itr)),
ElementsAre(
@@ -684,7 +796,9 @@ TEST_F(IndexTest, MultiPrefixHitAfterMerge) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(
GetHits(std::move(itr)),
ElementsAre(
@@ -705,7 +819,9 @@ TEST_F(IndexTest, NoExactHitInPrefixQuery) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId1, std::vector<SectionId>{kSectionId3})));
@@ -726,7 +842,9 @@ TEST_F(IndexTest, NoExactHitInPrefixQueryAfterMerge) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId1, std::vector<SectionId>{kSectionId3})));
@@ -741,7 +859,9 @@ TEST_F(IndexTest, PrefixHitDedupe) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -758,7 +878,9 @@ TEST_F(IndexTest, PrefixHitDedupeAfterMerge) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -768,21 +890,27 @@ TEST_F(IndexTest, PrefixToString) {
SectionIdMask id_mask = (1U << kSectionId2) | (1U << kSectionId3);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", id_mask, TermMatchType::PREFIX));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, id_mask,
+ TermMatchType::PREFIX));
EXPECT_THAT(itr->ToString(), Eq("(0000000000000000000000000000000000000000000"
"000000000000000001100:foo* OR "
"00000000000000000000000000000000000000000000"
"00000000000000001100:foo*)"));
- ICING_ASSERT_OK_AND_ASSIGN(itr, index_->GetIterator("foo", kSectionIdMaskAll,
- TermMatchType::PREFIX));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
EXPECT_THAT(itr->ToString(), Eq("(1111111111111111111111111111111111111111111"
"111111111111111111111:foo* OR "
"11111111111111111111111111111111111111111111"
"11111111111111111111:foo*)"));
- ICING_ASSERT_OK_AND_ASSIGN(itr, index_->GetIterator("foo", kSectionIdMaskNone,
- TermMatchType::PREFIX));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskNone, TermMatchType::PREFIX));
EXPECT_THAT(itr->ToString(), Eq("(0000000000000000000000000000000000000000000"
"000000000000000000000:foo* OR "
"00000000000000000000000000000000000000000000"
@@ -793,23 +921,27 @@ TEST_F(IndexTest, ExactToString) {
SectionIdMask id_mask = (1U << kSectionId2) | (1U << kSectionId3);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", id_mask, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, id_mask,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->ToString(), Eq("(0000000000000000000000000000000000000000000"
"000000000000000001100:foo OR "
"00000000000000000000000000000000000000000000"
"00000000000000001100:foo)"));
ICING_ASSERT_OK_AND_ASSIGN(
- itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ itr, index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->ToString(), Eq("(1111111111111111111111111111111111111111111"
"111111111111111111111:foo OR "
"11111111111111111111111111111111111111111111"
"11111111111111111111:foo)"));
- ICING_ASSERT_OK_AND_ASSIGN(itr,
- index_->GetIterator("foo", kSectionIdMaskNone,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskNone, TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->ToString(), Eq("(0000000000000000000000000000000000000000000"
"000000000000000000000:foo OR "
"00000000000000000000000000000000000000000000"
@@ -825,14 +957,17 @@ TEST_F(IndexTest, NonAsciiTerms) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("こんに", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("こんに", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
- ICING_ASSERT_OK_AND_ASSIGN(itr,
- index_->GetIterator("あなた", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("あなた", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -849,14 +984,17 @@ TEST_F(IndexTest, NonAsciiTermsAfterMerge) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("こんに", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("こんに", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
- ICING_ASSERT_OK_AND_ASSIGN(itr,
- index_->GetIterator("あなた", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("あなた", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -864,7 +1002,9 @@ TEST_F(IndexTest, NonAsciiTermsAfterMerge) {
TEST_F(IndexTest, FullIndex) {
// Make a smaller index so that it's easier to fill up.
- Index::Options options(index_dir_, /*index_merge_size=*/1024);
+ Index::Options options(index_dir_, /*index_merge_size=*/1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/64);
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
@@ -914,7 +1054,8 @@ TEST_F(IndexTest, FullIndex) {
for (int i = 0; i < query_terms.size(); i += 25) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator(query_terms.at(i).c_str(), kSectionIdMaskAll,
+ index_->GetIterator(query_terms.at(i).c_str(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
TermMatchType::PREFIX));
// Each query term should contain at least one hit - there may have been
// other hits for this term that were added.
@@ -922,7 +1063,8 @@ TEST_F(IndexTest, FullIndex) {
}
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> last_itr,
- index_->GetIterator(prefix.c_str(), kSectionIdMaskAll,
+ index_->GetIterator(prefix.c_str(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
TermMatchType::PREFIX));
EXPECT_THAT(last_itr->Advance(), IsOk());
EXPECT_THAT(last_itr->doc_hit_info().document_id(), Eq(document_id - 1));
@@ -930,7 +1072,9 @@ TEST_F(IndexTest, FullIndex) {
TEST_F(IndexTest, FullIndexMerge) {
// Make a smaller index so that it's easier to fill up.
- Index::Options options(index_dir_, /*index_merge_size=*/1024);
+ Index::Options options(index_dir_, /*index_merge_size=*/1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/64);
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
@@ -980,7 +1124,8 @@ TEST_F(IndexTest, FullIndexMerge) {
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> last_itr,
- index_->GetIterator(prefix.c_str(), kSectionIdMaskAll,
+ index_->GetIterator(prefix.c_str(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
TermMatchType::PREFIX));
EXPECT_THAT(last_itr->Advance(), IsOk());
EXPECT_THAT(last_itr->doc_hit_info().document_id(), Eq(document_id - 1));
@@ -997,14 +1142,16 @@ TEST_F(IndexTest, FullIndexMerge) {
EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator(prefix + "bar", kSectionIdMaskAll,
+ index_->GetIterator(prefix + "bar", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
TermMatchType::EXACT_ONLY));
// We know that "bar" should have at least one hit because we just added it!
EXPECT_THAT(itr->Advance(), IsOk());
EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(document_id + 1));
ICING_ASSERT_OK_AND_ASSIGN(
- last_itr, index_->GetIterator(prefix.c_str(), kSectionIdMaskAll,
- TermMatchType::PREFIX));
+ last_itr, index_->GetIterator(prefix.c_str(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
EXPECT_THAT(last_itr->Advance(), IsOk());
EXPECT_THAT(last_itr->doc_hit_info().document_id(), Eq(document_id + 1));
}
@@ -1018,11 +1165,15 @@ TEST_F(IndexTest, OptimizeShouldWorkForEmptyIndex) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("", kSectionIdMaskAll, /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("", kSectionIdMaskAll, TermMatchType::PREFIX));
+ itr, index_->GetIterator("", kSectionIdMaskAll, /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
}
@@ -1063,14 +1214,18 @@ TEST_F(IndexTest, IndexShouldWorkAtSectionLimit) {
std::reverse(exp_prefix_hits.begin(), exp_prefix_hits.end());
// Check prefix search.
- ICING_ASSERT_OK_AND_ASSIGN(std::vector<DocHitInfo> hits,
- GetHits(prefix, TermMatchType::PREFIX));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<DocHitInfo> hits,
+ GetHits(prefix, /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::PREFIX));
EXPECT_THAT(hits, ContainerEq(exp_prefix_hits));
// Check exact search.
for (int i = 0; i < 4096; ++i) {
ICING_ASSERT_OK_AND_ASSIGN(
- hits, GetHits(query_terms[i], TermMatchType::EXACT_ONLY));
+ hits,
+ GetHits(query_terms[i], /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, TermMatchType::EXACT_ONLY));
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfo(
i, std::vector<SectionId>{(SectionId)(i % 64)})));
}
@@ -1123,8 +1278,10 @@ TEST_F(IndexTest, IndexShouldWorkAtDocumentLimit) {
std::reverse(exp_prefix_hits.begin(), exp_prefix_hits.end());
// Check prefix search.
- ICING_ASSERT_OK_AND_ASSIGN(std::vector<DocHitInfo> hits,
- GetHits(prefix, TermMatchType::PREFIX));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<DocHitInfo> hits,
+ GetHits(prefix, /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::PREFIX));
EXPECT_THAT(hits, ContainerEq(exp_prefix_hits));
// Check exact search.
@@ -1132,7 +1289,9 @@ TEST_F(IndexTest, IndexShouldWorkAtDocumentLimit) {
if (i % 64 == 2) {
// Only section 2 is an exact section
ICING_ASSERT_OK_AND_ASSIGN(
- hits, GetHits(std::to_string(i), TermMatchType::EXACT_ONLY));
+ hits,
+ GetHits(std::to_string(i), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, TermMatchType::EXACT_ONLY));
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfo(
i, std::vector<SectionId>{(SectionId)(2)})));
}
@@ -1198,13 +1357,17 @@ TEST_F(IndexTest, IndexOptimize) {
index_->Optimize(document_id_old_to_new, new_last_added_document_id));
EXPECT_EQ(index_->last_added_document_id(), new_last_added_document_id);
// Check prefix search.
- ICING_ASSERT_OK_AND_ASSIGN(std::vector<DocHitInfo> hits,
- GetHits(prefix, TermMatchType::PREFIX));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<DocHitInfo> hits,
+ GetHits(prefix, /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::PREFIX));
EXPECT_THAT(hits, ContainerEq(exp_prefix_hits));
// Check exact search.
for (int i = 0; i < 2048; ++i) {
ICING_ASSERT_OK_AND_ASSIGN(
- hits, GetHits(query_terms[i], TermMatchType::EXACT_ONLY));
+ hits,
+ GetHits(query_terms[i], /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, TermMatchType::EXACT_ONLY));
if (document_id_old_to_new[i] == kInvalidDocumentId) {
EXPECT_THAT(hits, IsEmpty());
} else {
@@ -1218,12 +1381,16 @@ TEST_F(IndexTest, IndexOptimize) {
ICING_ASSERT_OK(index_->Merge());
EXPECT_EQ(index_->last_added_document_id(), new_last_added_document_id);
// Check prefix search.
- ICING_ASSERT_OK_AND_ASSIGN(hits, GetHits(prefix, TermMatchType::PREFIX));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ hits, GetHits(prefix, /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, TermMatchType::PREFIX));
EXPECT_THAT(hits, ContainerEq(exp_prefix_hits));
// Check exact search.
for (int i = 0; i < 2048; ++i) {
ICING_ASSERT_OK_AND_ASSIGN(
- hits, GetHits(query_terms[i], TermMatchType::EXACT_ONLY));
+ hits,
+ GetHits(query_terms[i], /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, TermMatchType::EXACT_ONLY));
if (document_id_old_to_new[i] == kInvalidDocumentId) {
EXPECT_THAT(hits, IsEmpty());
} else {
@@ -1240,7 +1407,9 @@ TEST_F(IndexTest, IndexCreateIOFailure) {
NiceMock<IcingMockFilesystem> mock_icing_filesystem;
ON_CALL(mock_icing_filesystem, CreateDirectoryRecursively)
.WillByDefault(Return(false));
- Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
+ Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
EXPECT_THAT(Index::Create(options, &filesystem_, &mock_icing_filesystem),
StatusIs(libtextclassifier3::StatusCode::INTERNAL));
}
@@ -1271,7 +1440,9 @@ TEST_F(IndexTest, IndexCreateCorruptionFailure) {
IsTrue());
// Recreate the index.
- Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
+ Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
EXPECT_THAT(Index::Create(options, &filesystem_, &icing_filesystem_),
StatusIs(libtextclassifier3::StatusCode::DATA_LOSS));
}
@@ -1289,14 +1460,18 @@ TEST_F(IndexTest, IndexPersistence) {
index_.reset();
// Recreate the index.
- Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
+ Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
// Check that the hits are present.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -1316,14 +1491,18 @@ TEST_F(IndexTest, IndexPersistenceAfterMerge) {
index_.reset();
// Recreate the index.
- Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
+ Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
// Check that the hits are present.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -1331,7 +1510,8 @@ TEST_F(IndexTest, IndexPersistenceAfterMerge) {
TEST_F(IndexTest, InvalidHitBufferSize) {
Index::Options options(
- index_dir_, /*index_merge_size=*/std::numeric_limits<uint32_t>::max());
+ index_dir_, /*index_merge_size=*/std::numeric_limits<uint32_t>::max(),
+ /*lite_index_sort_at_indexing=*/true, /*lite_index_sort_size=*/1024 * 8);
EXPECT_THAT(Index::Create(options, &filesystem_, &icing_filesystem_),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
@@ -1919,7 +2099,9 @@ TEST_F(IndexTest, ExactResultsFromLiteAndMain) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(
GetHits(std::move(itr)),
ElementsAre(
@@ -1950,7 +2132,9 @@ TEST_F(IndexTest, PrefixResultsFromLiteAndMain) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(
GetHits(std::move(itr)),
ElementsAre(
@@ -2065,7 +2249,9 @@ TEST_F(IndexTest, BackfillingMultipleTermsSucceeds) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(
GetHits(std::move(itr)),
ElementsAre(
@@ -2119,7 +2305,9 @@ TEST_F(IndexTest, BackfillingNewTermsSucceeds) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(
GetHits(std::move(itr)),
ElementsAre(
@@ -2132,7 +2320,9 @@ TEST_F(IndexTest, TruncateToInvalidDocumentIdHasNoEffect) {
EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(0));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
// Add one document to the lite index
@@ -2143,7 +2333,9 @@ TEST_F(IndexTest, TruncateToInvalidDocumentIdHasNoEffect) {
// Clipping to invalid should have no effect.
ICING_EXPECT_OK(index_->TruncateTo(kInvalidDocumentId));
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ itr, index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -2152,7 +2344,9 @@ TEST_F(IndexTest, TruncateToInvalidDocumentIdHasNoEffect) {
ICING_ASSERT_OK(index_->Merge());
ICING_EXPECT_OK(index_->TruncateTo(kInvalidDocumentId));
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ itr, index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -2166,7 +2360,9 @@ TEST_F(IndexTest, TruncateToInvalidDocumentIdHasNoEffect) {
// hits.
ICING_EXPECT_OK(index_->TruncateTo(kInvalidDocumentId));
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ itr, index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
EXPECT_THAT(
GetHits(std::move(itr)),
ElementsAre(
@@ -2179,7 +2375,9 @@ TEST_F(IndexTest, TruncateToLastAddedDocumentIdHasNoEffect) {
EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(0));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
// Add one document to the lite index
@@ -2191,7 +2389,9 @@ TEST_F(IndexTest, TruncateToLastAddedDocumentIdHasNoEffect) {
ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
// Clipping to invalid should have no effect.
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ itr, index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -2200,7 +2400,9 @@ TEST_F(IndexTest, TruncateToLastAddedDocumentIdHasNoEffect) {
ICING_ASSERT_OK(index_->Merge());
ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ itr, index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -2215,7 +2417,9 @@ TEST_F(IndexTest, TruncateToLastAddedDocumentIdHasNoEffect) {
// hits.
ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ itr, index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
EXPECT_THAT(
GetHits(std::move(itr)),
ElementsAre(
@@ -2245,7 +2449,9 @@ TEST_F(IndexTest, TruncateToThrowsOutLiteIndex) {
// Clipping to document 0 should toss out the lite index, but keep the main.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -2278,7 +2484,9 @@ TEST_F(IndexTest, TruncateToThrowsOutBothIndices) {
// Clipping to document 0 should toss out both indices.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
}
diff --git a/icing/index/integer-section-indexing-handler.cc b/icing/index/integer-section-indexing-handler.cc
index a49b9f3..63b09df 100644
--- a/icing/index/integer-section-indexing-handler.cc
+++ b/icing/index/integer-section-indexing-handler.cc
@@ -14,22 +14,59 @@
#include "icing/index/integer-section-indexing-handler.h"
+#include <cstdint>
+#include <memory>
+#include <utility>
+
#include "icing/text_classifier/lib3/utils/base/status.h"
-#include "icing/schema/section-manager.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/proto/logging.pb.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
+#include "icing/util/clock.h"
#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
#include "icing/util/tokenized-document.h"
namespace icing {
namespace lib {
+/* static */ libtextclassifier3::StatusOr<
+ std::unique_ptr<IntegerSectionIndexingHandler>>
+IntegerSectionIndexingHandler::Create(const Clock* clock,
+ NumericIndex<int64_t>* integer_index) {
+ ICING_RETURN_ERROR_IF_NULL(clock);
+ ICING_RETURN_ERROR_IF_NULL(integer_index);
+
+ return std::unique_ptr<IntegerSectionIndexingHandler>(
+ new IntegerSectionIndexingHandler(clock, integer_index));
+}
+
libtextclassifier3::Status IntegerSectionIndexingHandler::Handle(
const TokenizedDocument& tokenized_document, DocumentId document_id,
- PutDocumentStatsProto* put_document_stats) {
- // TODO(b/259744228):
- // 1. Resolve last_added_document_id for index rebuilding before rollout
- // 2. Set integer indexing latency and other stats
+ bool recovery_mode, PutDocumentStatsProto* put_document_stats) {
+ std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
+
+ if (!IsDocumentIdValid(document_id)) {
+ return absl_ports::InvalidArgumentError(
+ IcingStringUtil::StringPrintf("Invalid DocumentId %d", document_id));
+ }
+
+ if (integer_index_.last_added_document_id() != kInvalidDocumentId &&
+ document_id <= integer_index_.last_added_document_id()) {
+ if (recovery_mode) {
+ // Skip the document if document_id <= last_added_document_id in recovery
+ // mode without returning an error.
+ return libtextclassifier3::Status::OK;
+ }
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "DocumentId %d must be greater than last added document_id %d",
+ document_id, integer_index_.last_added_document_id()));
+ }
+ integer_index_.set_last_added_document_id(document_id);
libtextclassifier3::Status status;
// We have to add integer sections into integer index in reverse order because
@@ -55,7 +92,7 @@ libtextclassifier3::Status IntegerSectionIndexingHandler::Handle(
}
// Add all the seen keys to the integer index.
- status = editor->IndexAllBufferedKeys();
+ status = std::move(*editor).IndexAllBufferedKeys();
if (!status.ok()) {
ICING_LOG(WARNING) << "Failed to add keys into integer index due to: "
<< status.error_message();
@@ -63,6 +100,11 @@ libtextclassifier3::Status IntegerSectionIndexingHandler::Handle(
}
}
+ if (put_document_stats != nullptr) {
+ put_document_stats->set_integer_index_latency_ms(
+ index_timer->GetElapsedMilliseconds());
+ }
+
return status;
}
diff --git a/icing/index/integer-section-indexing-handler.h b/icing/index/integer-section-indexing-handler.h
index dd0e46c..0a501aa 100644
--- a/icing/index/integer-section-indexing-handler.h
+++ b/icing/index/integer-section-indexing-handler.h
@@ -15,9 +15,13 @@
#ifndef ICING_INDEX_INTEGER_SECTION_INDEXING_HANDLER_H_
#define ICING_INDEX_INTEGER_SECTION_INDEXING_HANDLER_H_
+#include <cstdint>
+#include <memory>
+
#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/index/data-indexing-handler.h"
#include "icing/index/numeric/numeric-index.h"
-#include "icing/index/section-indexing-handler.h"
#include "icing/store/document-id.h"
#include "icing/util/clock.h"
#include "icing/util/tokenized-document.h"
@@ -25,28 +29,40 @@
namespace icing {
namespace lib {
-class IntegerSectionIndexingHandler : public SectionIndexingHandler {
+class IntegerSectionIndexingHandler : public DataIndexingHandler {
public:
- explicit IntegerSectionIndexingHandler(const Clock* clock,
- NumericIndex<int64_t>* integer_index)
- : SectionIndexingHandler(clock), integer_index_(*integer_index) {}
+ // Creates an IntegerSectionIndexingHandler instance which does not take
+ // ownership of any input components. All pointers must refer to valid objects
+ // that outlive the created IntegerSectionIndexingHandler instance.
+ //
+ // Returns:
+ // - An IntegerSectionIndexingHandler instance on success
+ // - FAILED_PRECONDITION_ERROR if any of the input pointer is null
+ static libtextclassifier3::StatusOr<
+ std::unique_ptr<IntegerSectionIndexingHandler>>
+ Create(const Clock* clock, NumericIndex<int64_t>* integer_index);
~IntegerSectionIndexingHandler() override = default;
- // TODO(b/259744228): update this documentation after resolving
- // last_added_document_id problem.
// Handles the integer indexing process: add hits into the integer index for
// all contents in tokenized_document.integer_sections.
//
- /// Returns:
- // - OK on success
+ // Returns:
+ // - OK on success.
+ // - INVALID_ARGUMENT_ERROR if document_id is invalid OR document_id is less
+ // than or equal to the document_id of a previously indexed document in
+ // non recovery mode.
// - Any NumericIndex<int64_t>::Editor errors.
libtextclassifier3::Status Handle(
const TokenizedDocument& tokenized_document, DocumentId document_id,
- PutDocumentStatsProto* put_document_stats) override;
+ bool recovery_mode, PutDocumentStatsProto* put_document_stats) override;
private:
- NumericIndex<int64_t>& integer_index_;
+ explicit IntegerSectionIndexingHandler(const Clock* clock,
+ NumericIndex<int64_t>* integer_index)
+ : DataIndexingHandler(clock), integer_index_(*integer_index) {}
+
+ NumericIndex<int64_t>& integer_index_; // Does not own.
};
} // namespace lib
diff --git a/icing/index/integer-section-indexing-handler_test.cc b/icing/index/integer-section-indexing-handler_test.cc
new file mode 100644
index 0000000..91cc06f
--- /dev/null
+++ b/icing/index/integer-section-indexing-handler_test.cc
@@ -0,0 +1,601 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/integer-section-indexing-handler.h"
+
+#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/integer-index.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/util/tokenized-document.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::IsTrue;
+
+// Indexable properties (section) and section id. Section id is determined by
+// the lexicographical order of indexable property paths.
+// Schema type with indexable properties: FakeType
+// Section id = 0: "body"
+// Section id = 1: "timestamp"
+// Section id = 2: "title"
+static constexpr std::string_view kFakeType = "FakeType";
+static constexpr std::string_view kPropertyBody = "body";
+static constexpr std::string_view kPropertyTimestamp = "timestamp";
+static constexpr std::string_view kPropertyTitle = "title";
+
+static constexpr SectionId kSectionIdTimestamp = 1;
+
+// Schema type with nested indexable properties: NestedType
+// Section id = 0: "name"
+// Section id = 1: "nested.body"
+// Section id = 2: "nested.timestamp"
+// Section id = 3: "nested.title"
+// Section id = 4: "price"
+static constexpr std::string_view kNestedType = "NestedType";
+static constexpr std::string_view kPropertyName = "name";
+static constexpr std::string_view kPropertyNestedDoc = "nested";
+static constexpr std::string_view kPropertyPrice = "price";
+
+static constexpr SectionId kSectionIdNestedTimestamp = 2;
+static constexpr SectionId kSectionIdPrice = 4;
+
+class IntegerSectionIndexingHandlerTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ ICING_ASSERT_OK(
+ // File generated via icu_data_file rule in //icing/BUILD.
+ icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
+
+ base_dir_ = GetTestTempDir() + "/icing_test";
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+ IsTrue());
+
+ integer_index_working_path_ = base_dir_ + "/integer_index";
+ schema_store_dir_ = base_dir_ + "/schema_store";
+ document_store_dir_ = base_dir_ + "/document_store";
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ integer_index_,
+ IntegerIndex::Create(filesystem_, integer_index_working_path_,
+ /*num_data_threshold_for_bucket_split=*/65536,
+ /*pre_mapping_fbv=*/false));
+
+ language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ lang_segmenter_,
+ language_segmenter_factory::Create(std::move(segmenter_options)));
+
+ ASSERT_THAT(
+ filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()),
+ IsTrue());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kFakeType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyTitle)
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyBody)
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyTimestamp)
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kNestedType)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName(kPropertyNestedDoc)
+ .SetDataTypeDocument(
+ kFakeType, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyPrice)
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyName)
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ ASSERT_TRUE(
+ filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult doc_store_create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
+ document_store_ = std::move(doc_store_create_result.document_store);
+ }
+
+ void TearDown() override {
+ document_store_.reset();
+ schema_store_.reset();
+ lang_segmenter_.reset();
+ integer_index_.reset();
+
+ filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+ }
+
+ Filesystem filesystem_;
+ FakeClock fake_clock_;
+ std::string base_dir_;
+ std::string integer_index_working_path_;
+ std::string schema_store_dir_;
+ std::string document_store_dir_;
+
+ std::unique_ptr<NumericIndex<int64_t>> integer_index_;
+ std::unique_ptr<LanguageSegmenter> lang_segmenter_;
+ std::unique_ptr<SchemaStore> schema_store_;
+ std::unique_ptr<DocumentStore> document_store_;
+};
+
+std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) {
+ std::vector<DocHitInfo> infos;
+ while (iterator->Advance().ok()) {
+ infos.push_back(iterator->doc_hit_info());
+ }
+ return infos;
+}
+
+TEST_F(IntegerSectionIndexingHandlerTest, CreationWithNullPointerShouldFail) {
+ EXPECT_THAT(IntegerSectionIndexingHandler::Create(/*clock=*/nullptr,
+ integer_index_.get()),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+
+ EXPECT_THAT(IntegerSectionIndexingHandler::Create(&fake_clock_,
+ /*integer_index=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_F(IntegerSectionIndexingHandlerTest, HandleIntegerSection) {
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyTitle), "title")
+ .AddStringProperty(std::string(kPropertyBody), "body")
+ .AddInt64Property(std::string(kPropertyTimestamp), 123)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id,
+ document_store_->Put(tokenized_document.document()));
+
+ ASSERT_THAT(integer_index_->last_added_document_id(), Eq(kInvalidDocumentId));
+ // Handle document.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerSectionIndexingHandler> handler,
+ IntegerSectionIndexingHandler::Create(&fake_clock_,
+ integer_index_.get()));
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id));
+
+ // Query "timestamp".
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ integer_index_->GetIterator(
+ kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
+ *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(
+ document_id, std::vector<SectionId>{kSectionIdTimestamp})));
+}
+
+TEST_F(IntegerSectionIndexingHandlerTest, HandleNestedIntegerSection) {
+ DocumentProto nested_document =
+ DocumentBuilder()
+ .SetKey("icing", "nested_type/1")
+ .SetSchema(std::string(kNestedType))
+ .AddDocumentProperty(
+ std::string(kPropertyNestedDoc),
+ DocumentBuilder()
+ .SetKey("icing", "nested_fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyTitle),
+ "nested title")
+ .AddStringProperty(std::string(kPropertyBody), "nested body")
+ .AddInt64Property(std::string(kPropertyTimestamp), 123)
+ .Build())
+ .AddInt64Property(std::string(kPropertyPrice), 456)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(nested_document)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id,
+ document_store_->Put(tokenized_document.document()));
+
+ ASSERT_THAT(integer_index_->last_added_document_id(), Eq(kInvalidDocumentId));
+ // Handle nested_document.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerSectionIndexingHandler> handler,
+ IntegerSectionIndexingHandler::Create(&fake_clock_,
+ integer_index_.get()));
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id));
+
+ // Query "nested.timestamp".
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ integer_index_->GetIterator(
+ "nested.timestamp", /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
+ *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(
+ GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(
+ document_id, std::vector<SectionId>{kSectionIdNestedTimestamp})));
+
+ // Query "price".
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr,
+ integer_index_->GetIterator(
+ kPropertyPrice, /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
+ *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(
+ document_id, std::vector<SectionId>{kSectionIdPrice})));
+
+ // Query "timestamp". Should get empty result.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr,
+ integer_index_->GetIterator(
+ kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
+ *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+}
+
+TEST_F(IntegerSectionIndexingHandlerTest, HandleShouldSkipEmptyIntegerSection) {
+ // Create a FakeType document without "timestamp".
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyTitle), "title")
+ .AddStringProperty(std::string(kPropertyBody), "body")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id,
+ document_store_->Put(tokenized_document.document()));
+
+ ASSERT_THAT(integer_index_->last_added_document_id(), Eq(kInvalidDocumentId));
+ // Handle document. Index data should remain unchanged since there is no
+ // indexable integer, but last_added_document_id should be updated.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerSectionIndexingHandler> handler,
+ IntegerSectionIndexingHandler::Create(&fake_clock_,
+ integer_index_.get()));
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id));
+
+ // Query "timestamp". Should get empty result.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ integer_index_->GetIterator(
+ kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
+ *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+}
+
+TEST_F(IntegerSectionIndexingHandlerTest,
+ HandleInvalidDocumentIdShouldReturnInvalidArgumentError) {
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyTitle), "title")
+ .AddStringProperty(std::string(kPropertyBody), "body")
+ .AddInt64Property(std::string(kPropertyTimestamp), 123)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document)));
+ ICING_ASSERT_OK(document_store_->Put(tokenized_document.document()));
+
+ static constexpr DocumentId kCurrentDocumentId = 3;
+ integer_index_->set_last_added_document_id(kCurrentDocumentId);
+ ASSERT_THAT(integer_index_->last_added_document_id(), Eq(kCurrentDocumentId));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerSectionIndexingHandler> handler,
+ IntegerSectionIndexingHandler::Create(&fake_clock_,
+ integer_index_.get()));
+
+ // Handling document with kInvalidDocumentId should cause a failure, and both
+ // index data and last_added_document_id should remain unchanged.
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, kInvalidDocumentId,
+ /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(integer_index_->last_added_document_id(), Eq(kCurrentDocumentId));
+
+ // Query "timestamp". Should get empty result.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ integer_index_->GetIterator(
+ kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
+ *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+
+ // Recovery mode should get the same result.
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, kInvalidDocumentId,
+ /*recovery_mode=*/true, /*put_document_stats=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(integer_index_->last_added_document_id(), Eq(kCurrentDocumentId));
+
+ // Query "timestamp". Should get empty result.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr,
+ integer_index_->GetIterator(
+ kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
+ *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+}
+
+TEST_F(IntegerSectionIndexingHandlerTest,
+ HandleOutOfOrderDocumentIdShouldReturnInvalidArgumentError) {
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyTitle), "title")
+ .AddStringProperty(std::string(kPropertyBody), "body")
+ .AddInt64Property(std::string(kPropertyTimestamp), 123)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id,
+ document_store_->Put(tokenized_document.document()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerSectionIndexingHandler> handler,
+ IntegerSectionIndexingHandler::Create(&fake_clock_,
+ integer_index_.get()));
+
+ // Handling document with document_id == last_added_document_id should cause a
+ // failure, and both index data and last_added_document_id should remain
+ // unchanged.
+ integer_index_->set_last_added_document_id(document_id);
+ ASSERT_THAT(integer_index_->last_added_document_id(), Eq(document_id));
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id));
+
+ // Query "timestamp". Should get empty result.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ integer_index_->GetIterator(
+ kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
+ *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+
+ // Handling document with document_id < last_added_document_id should cause a
+ // failure, and both index data and last_added_document_id should remain
+ // unchanged.
+ integer_index_->set_last_added_document_id(document_id + 1);
+ ASSERT_THAT(integer_index_->last_added_document_id(), Eq(document_id + 1));
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id + 1));
+
+ // Query "timestamp". Should get empty result.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr,
+ integer_index_->GetIterator(
+ kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
+ *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+}
+
+TEST_F(IntegerSectionIndexingHandlerTest,
+ HandleRecoveryModeShouldIgnoreDocsLELastAddedDocId) {
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyTitle), "title one")
+ .AddStringProperty(std::string(kPropertyBody), "body one")
+ .AddInt64Property(std::string(kPropertyTimestamp), 123)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/2")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyTitle), "title two")
+ .AddStringProperty(std::string(kPropertyBody), "body two")
+ .AddInt64Property(std::string(kPropertyTimestamp), 456)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document1,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document2,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id1,
+ document_store_->Put(tokenized_document1.document()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id2,
+ document_store_->Put(tokenized_document2.document()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerSectionIndexingHandler> handler,
+ IntegerSectionIndexingHandler::Create(&fake_clock_,
+ integer_index_.get()));
+
+ // Handle document with document_id > last_added_document_id in recovery mode.
+ // The handler should index this document and update last_added_document_id.
+ EXPECT_THAT(
+ handler->Handle(tokenized_document1, document_id1, /*recovery_mode=*/true,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id1));
+
+ // Query "timestamp".
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ integer_index_->GetIterator(
+ kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
+ *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(
+ document_id1, std::vector<SectionId>{kSectionIdTimestamp})));
+
+ // Handle document with document_id == last_added_document_id in recovery
+ // mode. We should not get any error, but the handler should ignore the
+ // document, so both index data and last_added_document_id should remain
+ // unchanged.
+ integer_index_->set_last_added_document_id(document_id2);
+ ASSERT_THAT(integer_index_->last_added_document_id(), Eq(document_id2));
+ EXPECT_THAT(
+ handler->Handle(tokenized_document2, document_id2, /*recovery_mode=*/true,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id2));
+
+ // Query "timestamp". Should not get hits for document2.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr,
+ integer_index_->GetIterator(
+ kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
+ *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(
+ document_id1, std::vector<SectionId>{kSectionIdTimestamp})));
+
+ // Handle document with document_id < last_added_document_id in recovery mode.
+ // We should not get any error, but the handler should ignore the document, so
+ // both index data and last_added_document_id should remain unchanged.
+ integer_index_->set_last_added_document_id(document_id2 + 1);
+ ASSERT_THAT(integer_index_->last_added_document_id(), Eq(document_id2 + 1));
+ EXPECT_THAT(
+ handler->Handle(tokenized_document2, document_id2, /*recovery_mode=*/true,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id2 + 1));
+
+ // Query "timestamp". Should not get hits for document2.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr,
+ integer_index_->GetIterator(
+ kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
+ *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(
+ document_id1, std::vector<SectionId>{kSectionIdTimestamp})));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/iterator/doc-hit-info-iterator-all-document-id.cc b/icing/index/iterator/doc-hit-info-iterator-all-document-id.cc
index e75ed87..67c7d25 100644
--- a/icing/index/iterator/doc-hit-info-iterator-all-document-id.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-all-document-id.cc
@@ -40,5 +40,12 @@ libtextclassifier3::Status DocHitInfoIteratorAllDocumentId::Advance() {
return libtextclassifier3::Status::OK;
}
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorAllDocumentId::TrimRightMostNode() && {
+ // The all document id node should be trimmed.
+ TrimmedNode node = {nullptr, /*term=*/"", /*term_start_index_=*/0,
+ /*unnormalized_term_length_=*/0};
+ return node;
+}
} // namespace lib
} // namespace icing
diff --git a/icing/index/iterator/doc-hit-info-iterator-all-document-id.h b/icing/index/iterator/doc-hit-info-iterator-all-document-id.h
index 0fa74f5..bb16eaf 100644
--- a/icing/index/iterator/doc-hit-info-iterator-all-document-id.h
+++ b/icing/index/iterator/doc-hit-info-iterator-all-document-id.h
@@ -35,6 +35,8 @@ class DocHitInfoIteratorAllDocumentId : public DocHitInfoIterator {
libtextclassifier3::Status Advance() override;
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
int32_t GetNumBlocksInspected() const override { return 0; }
int32_t GetNumLeafAdvanceCalls() const override {
diff --git a/icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc b/icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc
index 7366b97..ea2dda6 100644
--- a/icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc
@@ -32,6 +32,7 @@ namespace {
using ::testing::ElementsAreArray;
using ::testing::Eq;
+using ::testing::IsNull;
using ::testing::Not;
TEST(DocHitInfoIteratorAllDocumentIdTest, Initialize) {
@@ -108,6 +109,16 @@ TEST(DocHitInfoIteratorAllDocumentIdTest, Advance) {
}
}
+TEST(DocHitInfoIteratorAllDocumentIdTest, TrimAllDocumentIdIterator) {
+ DocHitInfoIteratorAllDocumentId all_it(100);
+ ICING_ASSERT_OK_AND_ASSIGN(DocHitInfoIterator::TrimmedNode trimmed_node,
+ std::move(all_it).TrimRightMostNode());
+ // The whole iterator is trimmed
+ EXPECT_THAT(trimmed_node.term_, testing::IsEmpty());
+ EXPECT_THAT(trimmed_node.term_start_index_, 0);
+ EXPECT_THAT(trimmed_node.iterator_, IsNull());
+}
+
} // namespace
} // namespace lib
} // namespace icing
diff --git a/icing/index/iterator/doc-hit-info-iterator-and.cc b/icing/index/iterator/doc-hit-info-iterator-and.cc
index 3b7ede9..185a35e 100644
--- a/icing/index/iterator/doc-hit-info-iterator-and.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-and.cc
@@ -111,6 +111,19 @@ libtextclassifier3::Status DocHitInfoIteratorAnd::Advance() {
return libtextclassifier3::Status::OK;
}
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorAnd::TrimRightMostNode() && {
+ ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_long,
+ std::move(*long_).TrimRightMostNode());
+ if (trimmed_long.iterator_ == nullptr) {
+ trimmed_long.iterator_ = std::move(short_);
+ } else {
+ trimmed_long.iterator_ = std::make_unique<DocHitInfoIteratorAnd>(
+ std::move(short_), std::move(trimmed_long.iterator_));
+ }
+ return trimmed_long;
+}
+
int32_t DocHitInfoIteratorAnd::GetNumBlocksInspected() const {
return short_->GetNumBlocksInspected() + long_->GetNumBlocksInspected();
}
@@ -195,6 +208,27 @@ libtextclassifier3::Status DocHitInfoIteratorAndNary::Advance() {
return libtextclassifier3::Status::OK;
}
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorAndNary::TrimRightMostNode() && {
+ ICING_ASSIGN_OR_RETURN(
+ TrimmedNode trimmed_right,
+ std::move(*iterators_.rbegin()->get()).TrimRightMostNode());
+ if (trimmed_right.iterator_ == nullptr) {
+ if (iterators_.size() > 2) {
+ iterators_.pop_back();
+ trimmed_right.iterator_ =
+ std::make_unique<DocHitInfoIteratorAndNary>(std::move(iterators_));
+ } else if (iterators_.size() == 2) {
+ trimmed_right.iterator_ = std::move(iterators_.at(0));
+ }
+ } else {
+ iterators_.at(iterators_.size() - 1) = std::move(trimmed_right.iterator_);
+ trimmed_right.iterator_ =
+ std::make_unique<DocHitInfoIteratorAndNary>(std::move(iterators_));
+ }
+ return trimmed_right;
+}
+
int32_t DocHitInfoIteratorAndNary::GetNumBlocksInspected() const {
int32_t blockCount = 0;
for (const std::unique_ptr<DocHitInfoIterator>& iter : iterators_) {
diff --git a/icing/index/iterator/doc-hit-info-iterator-and.h b/icing/index/iterator/doc-hit-info-iterator-and.h
index 8ceff44..0f40f94 100644
--- a/icing/index/iterator/doc-hit-info-iterator-and.h
+++ b/icing/index/iterator/doc-hit-info-iterator-and.h
@@ -40,6 +40,8 @@ class DocHitInfoIteratorAnd : public DocHitInfoIterator {
std::unique_ptr<DocHitInfoIterator> long_it);
libtextclassifier3::Status Advance() override;
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
int32_t GetNumBlocksInspected() const override;
int32_t GetNumLeafAdvanceCalls() const override;
@@ -74,6 +76,8 @@ class DocHitInfoIteratorAndNary : public DocHitInfoIterator {
libtextclassifier3::Status Advance() override;
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
int32_t GetNumBlocksInspected() const override;
int32_t GetNumLeafAdvanceCalls() const override;
diff --git a/icing/index/iterator/doc-hit-info-iterator-and_test.cc b/icing/index/iterator/doc-hit-info-iterator-and_test.cc
index 9b9f44b..51828cb 100644
--- a/icing/index/iterator/doc-hit-info-iterator-and_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-and_test.cc
@@ -171,6 +171,123 @@ TEST(DocHitInfoIteratorAndTest, AdvanceNestedIterators) {
EXPECT_THAT(GetDocumentIds(outer_iter.get()), ElementsAre(10, 6, 2));
}
+TEST(DocHitInfoIteratorAndTest, TrimAndIterator) {
+ std::vector<DocHitInfo> left_vector = {DocHitInfo(3), DocHitInfo(2)};
+ std::vector<DocHitInfo> right_vector = {DocHitInfo(1), DocHitInfo(0)};
+
+ std::unique_ptr<DocHitInfoIterator> left_iter =
+ std::make_unique<DocHitInfoIteratorDummy>(left_vector);
+ std::unique_ptr<DocHitInfoIterator> right_iter =
+ std::make_unique<DocHitInfoIteratorDummy>(right_vector, "term", 10);
+
+ std::unique_ptr<DocHitInfoIterator> iter =
+ std::make_unique<DocHitInfoIteratorAnd>(std::move(left_iter),
+ std::move(right_iter));
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocHitInfoIterator::TrimmedNode trimmed_node,
+ std::move(*iter).TrimRightMostNode());
+ EXPECT_THAT(trimmed_node.term_, Eq("term"));
+ EXPECT_THAT(trimmed_node.term_start_index_, Eq(10));
+ EXPECT_THAT(GetDocumentIds(trimmed_node.iterator_.get()), ElementsAre(3, 2));
+}
+
+TEST(DocHitInfoIteratorAndTest, TrimAndIterator_TwoLayer) {
+ // Build an interator tree like:
+ //
+ // AND
+ // / \
+ // first AND
+ // | / \
+ // {0, 1} second third
+ // | |
+ // {1} {0}
+ std::vector<DocHitInfo> first_vector = {DocHitInfo(1), DocHitInfo(0)};
+ std::vector<DocHitInfo> second_vector = {DocHitInfo(1)};
+ std::vector<DocHitInfo> third_vector = {DocHitInfo(0)};
+
+ std::unique_ptr<DocHitInfoIterator> first_iter =
+ std::make_unique<DocHitInfoIteratorDummy>(first_vector);
+ std::unique_ptr<DocHitInfoIterator> second_iter =
+ std::make_unique<DocHitInfoIteratorDummy>(second_vector);
+ std::unique_ptr<DocHitInfoIterator> third_iter =
+ std::make_unique<DocHitInfoIteratorDummy>(third_vector, "term", 10);
+
+ std::unique_ptr<DocHitInfoIterator> nested_iter =
+ std::make_unique<DocHitInfoIteratorAnd>(std::move(second_iter),
+ std::move(third_iter));
+ std::unique_ptr<DocHitInfoIterator> iter =
+ std::make_unique<DocHitInfoIteratorAnd>(std::move(first_iter),
+ std::move(nested_iter));
+
+ // The third_iter is trimmed.
+ // AND
+ // / \
+ // first second
+ // | |
+ // {0, 1} {1}
+ ICING_ASSERT_OK_AND_ASSIGN(DocHitInfoIterator::TrimmedNode trimmed_node,
+ std::move(*iter).TrimRightMostNode());
+ EXPECT_THAT(GetDocumentIds(trimmed_node.iterator_.get()), ElementsAre(1));
+ EXPECT_THAT(trimmed_node.term_, Eq("term"));
+ EXPECT_THAT(trimmed_node.term_start_index_, Eq(10));
+}
+
+TEST(DocHitInfoIteratorAndNaryTest, TrimAndNaryIterator) {
+ std::vector<DocHitInfo> first_vector = {DocHitInfo(2), DocHitInfo(1),
+ DocHitInfo(0)};
+ std::vector<DocHitInfo> second_vector = {DocHitInfo(2), DocHitInfo(1)};
+ std::vector<DocHitInfo> third_vector = {DocHitInfo(2)};
+
+ std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+ iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(first_vector));
+ iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(second_vector));
+ iterators.push_back(
+ std::make_unique<DocHitInfoIteratorDummy>(third_vector, "term", 10));
+
+ std::unique_ptr<DocHitInfoIterator> iter =
+ std::make_unique<DocHitInfoIteratorAndNary>(std::move(iterators));
+
+ // The third iterator is trimmed
+ ICING_ASSERT_OK_AND_ASSIGN(DocHitInfoIterator::TrimmedNode trimmed_node,
+ std::move(*iter).TrimRightMostNode());
+ EXPECT_THAT(trimmed_node.term_, Eq("term"));
+ EXPECT_THAT(trimmed_node.term_start_index_, Eq(10));
+ EXPECT_THAT(GetDocumentIds(trimmed_node.iterator_.get()), ElementsAre(2, 1));
+}
+
+TEST(DocHitInfoIteratorAndNaryTest, TrimAndNaryIterator_TwoLayer) {
+ std::vector<DocHitInfo> first_vector = {DocHitInfo(3), DocHitInfo(2),
+ DocHitInfo(1), DocHitInfo(0)};
+ std::vector<DocHitInfo> second_vector = {DocHitInfo(2), DocHitInfo(1),
+ DocHitInfo(0)};
+ std::vector<DocHitInfo> third_vector = {DocHitInfo(1), DocHitInfo(0)};
+ std::vector<DocHitInfo> forth_vector = {DocHitInfo(0)};
+
+ // Build nested iterator
+ std::unique_ptr<DocHitInfoIterator> third_iter =
+ std::make_unique<DocHitInfoIteratorDummy>(third_vector);
+ std::unique_ptr<DocHitInfoIterator> forth_iter =
+ std::make_unique<DocHitInfoIteratorDummy>(forth_vector, "term", 10);
+ std::unique_ptr<DocHitInfoIterator> nested_iter =
+ std::make_unique<DocHitInfoIteratorAnd>(std::move(third_iter),
+ std::move(forth_iter));
+
+ // Build outer iterator
+ std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+ iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(first_vector));
+ iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(second_vector));
+ iterators.push_back(std::move(nested_iter));
+ std::unique_ptr<DocHitInfoIterator> iter =
+ std::make_unique<DocHitInfoIteratorAndNary>(std::move(iterators));
+
+ // The forth iterator is trimmed.
+ ICING_ASSERT_OK_AND_ASSIGN(DocHitInfoIterator::TrimmedNode trimmed_node,
+ std::move(*iter).TrimRightMostNode());
+ EXPECT_THAT(trimmed_node.term_, Eq("term"));
+ EXPECT_THAT(trimmed_node.term_start_index_, Eq(10));
+ EXPECT_THAT(GetDocumentIds(trimmed_node.iterator_.get()), ElementsAre(1, 0));
+}
+
TEST(DocHitInfoIteratorAndTest, SectionIdMask) {
// Arbitrary section ids for the documents in the DocHitInfoIterators.
// Created to test correct section_id_mask behavior.
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.cc b/icing/index/iterator/doc-hit-info-iterator-filter.cc
index 2e8ba23..a82e556 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-filter.cc
@@ -38,11 +38,12 @@ namespace lib {
DocHitInfoIteratorFilter::DocHitInfoIteratorFilter(
std::unique_ptr<DocHitInfoIterator> delegate,
const DocumentStore* document_store, const SchemaStore* schema_store,
- const Options& options)
+ const Options& options, int64_t current_time_ms)
: delegate_(std::move(delegate)),
document_store_(*document_store),
schema_store_(*schema_store),
- options_(options) {
+ options_(options),
+ current_time_ms_(current_time_ms) {
// Precompute all the NamespaceIds
for (std::string_view name_space : options_.namespaces) {
auto namespace_id_or = document_store_.GetNamespaceId(name_space);
@@ -55,11 +56,16 @@ DocHitInfoIteratorFilter::DocHitInfoIteratorFilter(
// Precompute all the SchemaTypeIds
for (std::string_view schema_type : options_.schema_types) {
- auto schema_type_id_or = schema_store_.GetSchemaTypeId(schema_type);
+ libtextclassifier3::StatusOr<const std::unordered_set<SchemaTypeId>*>
+ schema_type_ids_or =
+ schema_store_.GetSchemaTypeIdsWithChildren(schema_type);
// If we can't find the SchemaTypeId, just throw it away
- if (schema_type_id_or.ok()) {
- target_schema_type_ids_.emplace(schema_type_id_or.ValueOrDie());
+ if (schema_type_ids_or.ok()) {
+ const std::unordered_set<SchemaTypeId>* schema_type_ids =
+ schema_type_ids_or.ValueOrDie();
+ target_schema_type_ids_.insert(schema_type_ids->begin(),
+ schema_type_ids->end());
}
}
}
@@ -69,7 +75,7 @@ libtextclassifier3::Status DocHitInfoIteratorFilter::Advance() {
// Try to get the DocumentFilterData
auto document_filter_data_optional =
document_store_.GetAliveDocumentFilterData(
- delegate_->doc_hit_info().document_id());
+ delegate_->doc_hit_info().document_id(), current_time_ms_);
if (!document_filter_data_optional) {
// Didn't find the DocumentFilterData in the filter cache. This could be
// because the Document doesn't exist or the DocumentId isn't valid or the
@@ -105,6 +111,18 @@ libtextclassifier3::Status DocHitInfoIteratorFilter::Advance() {
return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
}
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorFilter::TrimRightMostNode() && {
+ ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_delegate,
+ std::move(*delegate_).TrimRightMostNode());
+ if (trimmed_delegate.iterator_ != nullptr) {
+ trimmed_delegate.iterator_ = std::make_unique<DocHitInfoIteratorFilter>(
+ std::move(trimmed_delegate.iterator_), &document_store_, &schema_store_,
+ options_, current_time_ms_);
+ }
+ return trimmed_delegate;
+}
+
int32_t DocHitInfoIteratorFilter::GetNumBlocksInspected() const {
return delegate_->GetNumBlocksInspected();
}
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.h b/icing/index/iterator/doc-hit-info-iterator-filter.h
index 5051607..be5e1e8 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter.h
+++ b/icing/index/iterator/doc-hit-info-iterator-filter.h
@@ -56,10 +56,12 @@ class DocHitInfoIteratorFilter : public DocHitInfoIterator {
explicit DocHitInfoIteratorFilter(
std::unique_ptr<DocHitInfoIterator> delegate,
const DocumentStore* document_store, const SchemaStore* schema_store,
- const Options& options);
+ const Options& options, int64_t current_time_ms);
libtextclassifier3::Status Advance() override;
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
int32_t GetNumBlocksInspected() const override;
int32_t GetNumLeafAdvanceCalls() const override;
@@ -80,6 +82,7 @@ class DocHitInfoIteratorFilter : public DocHitInfoIterator {
const Options options_;
std::unordered_set<NamespaceId> target_namespace_ids_;
std::unordered_set<SchemaTypeId> target_schema_type_ids_;
+ int64_t current_time_ms_;
};
} // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
index 967e518..d93fd02 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
@@ -17,6 +17,7 @@
#include <limits>
#include <memory>
#include <string>
+#include <string_view>
#include <utility>
#include <vector>
@@ -25,6 +26,7 @@
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator-and.h"
#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/proto/document.pb.h"
@@ -47,6 +49,18 @@ using ::testing::ElementsAre;
using ::testing::Eq;
using ::testing::IsEmpty;
+libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
+ const Filesystem* filesystem, const std::string& base_dir,
+ const Clock* clock, const SchemaStore* schema_store) {
+ return DocumentStore::Create(
+ filesystem, base_dir, clock, schema_store,
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr);
+}
+
class DocHitInfoIteratorDeletedFilterTest : public ::testing::Test {
protected:
DocHitInfoIteratorDeletedFilterTest()
@@ -68,12 +82,14 @@ class DocHitInfoIteratorDeletedFilterTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
document_store_ = std::move(create_result.document_store);
}
@@ -102,9 +118,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, EmptyOriginalIterator) {
std::unique_ptr<DocHitInfoIterator> original_iterator_empty =
std::make_unique<DocHitInfoIteratorDummy>();
- DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator_empty),
- document_store_.get(),
- schema_store_.get(), options_);
+ DocHitInfoIteratorFilter filtered_iterator(
+ std::move(original_iterator_empty), document_store_.get(),
+ schema_store_.get(), options_, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
}
@@ -117,8 +133,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, DeletedDocumentsAreFiltered) {
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
document_store_->Put(test_document3_));
// Deletes test document 2
- ICING_ASSERT_OK(document_store_->Delete(test_document2_.namespace_(),
- test_document2_.uri()));
+ ICING_ASSERT_OK(document_store_->Delete(
+ test_document2_.namespace_(), test_document2_.uri(),
+ fake_clock_.GetSystemTimeMilliseconds()));
std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1),
DocHitInfo(document_id2),
@@ -126,9 +143,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, DeletedDocumentsAreFiltered) {
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
- document_store_.get(),
- schema_store_.get(), options_);
+ DocHitInfoIteratorFilter filtered_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ options_, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&filtered_iterator),
ElementsAre(document_id1, document_id3));
@@ -152,9 +169,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, NonExistingDocumentsAreFiltered) {
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
- document_store_.get(),
- schema_store_.get(), options_);
+ DocHitInfoIteratorFilter filtered_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ options_, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&filtered_iterator),
ElementsAre(document_id1, document_id2, document_id3));
@@ -165,9 +182,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, NegativeDocumentIdIsIgnored) {
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
- document_store_.get(),
- schema_store_.get(), options_);
+ DocHitInfoIteratorFilter filtered_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ options_, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(filtered_iterator.Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
@@ -179,9 +196,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, InvalidDocumentIdIsIgnored) {
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
- document_store_.get(),
- schema_store_.get(), options_);
+ DocHitInfoIteratorFilter filtered_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ options_, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(filtered_iterator.Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
@@ -196,9 +213,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, GreaterThanMaxDocumentIdIsIgnored) {
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
- document_store_.get(),
- schema_store_.get(), options_);
+ DocHitInfoIteratorFilter filtered_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ options_, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(filtered_iterator.Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
@@ -235,12 +252,14 @@ class DocHitInfoIteratorNamespaceFilterTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
document_store_ = std::move(create_result.document_store);
}
@@ -272,9 +291,9 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest, EmptyOriginalIterator) {
std::make_unique<DocHitInfoIteratorDummy>();
options_.namespaces = std::vector<std::string_view>{};
- DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator_empty),
- document_store_.get(),
- schema_store_.get(), options_);
+ DocHitInfoIteratorFilter filtered_iterator(
+ std::move(original_iterator_empty), document_store_.get(),
+ schema_store_.get(), options_, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
}
@@ -290,9 +309,9 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest,
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
options_.namespaces = std::vector<std::string_view>{"nonexistent_namespace"};
- DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
- document_store_.get(),
- schema_store_.get(), options_);
+ DocHitInfoIteratorFilter filtered_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ options_, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
}
@@ -307,9 +326,9 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest, NoNamespacesReturnsAll) {
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
options_.namespaces = std::vector<std::string_view>{};
- DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
- document_store_.get(),
- schema_store_.get(), options_);
+ DocHitInfoIteratorFilter filtered_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ options_, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
}
@@ -331,9 +350,9 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest,
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
options_.namespaces = std::vector<std::string_view>{namespace1_};
- DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
- document_store_.get(),
- schema_store_.get(), options_);
+ DocHitInfoIteratorFilter filtered_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ options_, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&filtered_iterator),
ElementsAre(document_id1, document_id2));
@@ -357,9 +376,9 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest, FilterForMultipleNamespacesOk) {
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
options_.namespaces = std::vector<std::string_view>{namespace1_, namespace3_};
- DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
- document_store_.get(),
- schema_store_.get(), options_);
+ DocHitInfoIteratorFilter filtered_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ options_, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&filtered_iterator),
ElementsAre(document_id1, document_id2, document_id4));
@@ -367,35 +386,57 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest, FilterForMultipleNamespacesOk) {
class DocHitInfoIteratorSchemaTypeFilterTest : public ::testing::Test {
protected:
+ static constexpr std::string_view kSchema1 = "email";
+ static constexpr std::string_view kSchema2 = "message";
+ static constexpr std::string_view kSchema3 = "person";
+ static constexpr std::string_view kSchema4 = "artist";
+ static constexpr std::string_view kSchema5 = "emailMessage";
+
DocHitInfoIteratorSchemaTypeFilterTest()
: test_dir_(GetTestTempDir() + "/icing") {}
void SetUp() override {
filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
- document1_schema1_ =
- DocumentBuilder().SetKey("namespace", "1").SetSchema(schema1_).Build();
- document2_schema2_ =
- DocumentBuilder().SetKey("namespace", "2").SetSchema(schema2_).Build();
- document3_schema3_ =
- DocumentBuilder().SetKey("namespace", "3").SetSchema(schema3_).Build();
- document4_schema1_ =
- DocumentBuilder().SetKey("namespace", "4").SetSchema(schema1_).Build();
+ document1_schema1_ = DocumentBuilder()
+ .SetKey("namespace", "1")
+ .SetSchema(std::string(kSchema1))
+ .Build();
+ document2_schema2_ = DocumentBuilder()
+ .SetKey("namespace", "2")
+ .SetSchema(std::string(kSchema2))
+ .Build();
+ document3_schema3_ = DocumentBuilder()
+ .SetKey("namespace", "3")
+ .SetSchema(std::string(kSchema3))
+ .Build();
+ document4_schema1_ = DocumentBuilder()
+ .SetKey("namespace", "4")
+ .SetSchema(std::string(kSchema1))
+ .Build();
SchemaProto schema =
SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType(schema1_))
- .AddType(SchemaTypeConfigBuilder().SetType(schema2_))
- .AddType(SchemaTypeConfigBuilder().SetType(schema3_))
+ .AddType(SchemaTypeConfigBuilder().SetType(kSchema1))
+ .AddType(SchemaTypeConfigBuilder().SetType(kSchema2))
+ .AddType(SchemaTypeConfigBuilder().SetType(kSchema3))
+ .AddType(SchemaTypeConfigBuilder().SetType(kSchema4).AddParentType(
+ kSchema3))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(std::string(kSchema5))
+ .AddParentType(kSchema1)
+ .AddParentType(kSchema2))
.Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
document_store_ = std::move(create_result.document_store);
}
@@ -412,9 +453,6 @@ class DocHitInfoIteratorSchemaTypeFilterTest : public ::testing::Test {
FakeClock fake_clock_;
const Filesystem filesystem_;
const std::string test_dir_;
- const std::string schema1_ = "email";
- const std::string schema2_ = "message";
- const std::string schema3_ = "person";
DocumentProto document1_schema1_;
DocumentProto document2_schema2_;
DocumentProto document3_schema3_;
@@ -427,9 +465,9 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, EmptyOriginalIterator) {
std::make_unique<DocHitInfoIteratorDummy>();
options_.schema_types = std::vector<std::string_view>{};
- DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator_empty),
- document_store_.get(),
- schema_store_.get(), options_);
+ DocHitInfoIteratorFilter filtered_iterator(
+ std::move(original_iterator_empty), document_store_.get(),
+ schema_store_.get(), options_, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
}
@@ -446,9 +484,9 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest,
options_.schema_types =
std::vector<std::string_view>{"nonexistent_schema_type"};
- DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
- document_store_.get(),
- schema_store_.get(), options_);
+ DocHitInfoIteratorFilter filtered_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ options_, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
}
@@ -463,9 +501,9 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, NoSchemaTypesReturnsAll) {
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
options_.schema_types = std::vector<std::string_view>{};
- DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
- document_store_.get(),
- schema_store_.get(), options_);
+ DocHitInfoIteratorFilter filtered_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ options_, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
}
@@ -483,10 +521,10 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest,
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- options_.schema_types = std::vector<std::string_view>{schema1_};
- DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
- document_store_.get(),
- schema_store_.get(), options_);
+ options_.schema_types = std::vector<std::string_view>{kSchema1};
+ DocHitInfoIteratorFilter filtered_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ options_, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
}
@@ -506,15 +544,119 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, FilterForMultipleSchemaTypesOk) {
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- options_.schema_types = std::vector<std::string_view>{schema2_, schema3_};
- DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
- document_store_.get(),
- schema_store_.get(), options_);
+ options_.schema_types = std::vector<std::string_view>{kSchema2, kSchema3};
+ DocHitInfoIteratorFilter filtered_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ options_, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&filtered_iterator),
ElementsAre(document_id2, document_id3));
}
+TEST_F(DocHitInfoIteratorSchemaTypeFilterTest,
+ FilterForSchemaTypePolymorphismOk) {
+ // Add some irrelevant documents.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document1_schema1_));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document2_schema2_));
+
+ // Create a person document and an artist document, where the artist should be
+ // able to be interpreted as a person by polymorphism.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId person_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("person")
+ .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId artist_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "artist")
+ .SetSchema("artist")
+ .Build()));
+
+ std::vector<DocHitInfo> doc_hit_infos = {
+ DocHitInfo(document_id1), DocHitInfo(document_id2),
+ DocHitInfo(person_document_id), DocHitInfo(artist_document_id)};
+
+ // Filters for the "person" type should also include the "artist" type.
+ std::unique_ptr<DocHitInfoIterator> original_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+ options_.schema_types = {"person"};
+ DocHitInfoIteratorFilter filtered_iterator_1(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ options_, fake_clock_.GetSystemTimeMilliseconds());
+ EXPECT_THAT(GetDocumentIds(&filtered_iterator_1),
+ ElementsAre(person_document_id, artist_document_id));
+
+ // Filters for the "artist" type should not include the "person" type.
+ original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+ options_.schema_types = {"artist"};
+ DocHitInfoIteratorFilter filtered_iterator_2(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ options_, fake_clock_.GetSystemTimeMilliseconds());
+ EXPECT_THAT(GetDocumentIds(&filtered_iterator_2),
+ ElementsAre(artist_document_id));
+}
+
+TEST_F(DocHitInfoIteratorSchemaTypeFilterTest,
+ FilterForSchemaTypeMultipleParentPolymorphismOk) {
+ // Create an email and a message document.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId email_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "email")
+ .SetSchema("email")
+ .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId message_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "message")
+ .SetSchema("message")
+ .Build()));
+
+ // Create a emailMessage document, which the should be able to be interpreted
+ // as both an email and a message by polymorphism.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId email_message_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "emailMessage")
+ .SetSchema("emailMessage")
+ .Build()));
+
+ std::vector<DocHitInfo> doc_hit_infos = {
+ DocHitInfo(email_document_id), DocHitInfo(message_document_id),
+ DocHitInfo(email_message_document_id)};
+
+ // Filters for the "email" type should also include the "emailMessage" type.
+ std::unique_ptr<DocHitInfoIterator> original_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+ options_.schema_types = std::vector<std::string_view>{"email"};
+ DocHitInfoIteratorFilter filtered_iterator_1(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ options_, fake_clock_.GetSystemTimeMilliseconds());
+ EXPECT_THAT(GetDocumentIds(&filtered_iterator_1),
+ ElementsAre(email_document_id, email_message_document_id));
+
+ // Filters for the "message" type should also include the "emailMessage" type.
+ original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+ options_.schema_types = std::vector<std::string_view>{"message"};
+ DocHitInfoIteratorFilter filtered_iterator_2(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ options_, fake_clock_.GetSystemTimeMilliseconds());
+ EXPECT_THAT(GetDocumentIds(&filtered_iterator_2),
+ ElementsAre(message_document_id, email_message_document_id));
+
+ // Filters for a irrelevant type should return nothing.
+ original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+ options_.schema_types = std::vector<std::string_view>{"person"};
+ DocHitInfoIteratorFilter filtered_iterator_3(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ options_, fake_clock_.GetSystemTimeMilliseconds());
+ EXPECT_THAT(GetDocumentIds(&filtered_iterator_3), IsEmpty());
+}
+
class DocHitInfoIteratorExpirationFilterTest : public ::testing::Test {
protected:
DocHitInfoIteratorExpirationFilterTest()
@@ -530,12 +672,14 @@ class DocHitInfoIteratorExpirationFilterTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
document_store_ = std::move(create_result.document_store);
}
@@ -562,8 +706,8 @@ TEST_F(DocHitInfoIteratorExpirationFilterTest, TtlZeroIsntFilteredOut) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -581,9 +725,9 @@ TEST_F(DocHitInfoIteratorExpirationFilterTest, TtlZeroIsntFilteredOut) {
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
- document_store.get(),
- schema_store_.get(), options_);
+ DocHitInfoIteratorFilter filtered_iterator(
+ std::move(original_iterator), document_store.get(), schema_store_.get(),
+ options_, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
}
@@ -594,8 +738,8 @@ TEST_F(DocHitInfoIteratorExpirationFilterTest, BeforeTtlNotFilteredOut) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -613,9 +757,9 @@ TEST_F(DocHitInfoIteratorExpirationFilterTest, BeforeTtlNotFilteredOut) {
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
- document_store.get(),
- schema_store_.get(), options_);
+ DocHitInfoIteratorFilter filtered_iterator(
+ std::move(original_iterator), document_store.get(), schema_store_.get(),
+ options_, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
}
@@ -626,8 +770,8 @@ TEST_F(DocHitInfoIteratorExpirationFilterTest, EqualTtlFilteredOut) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -645,9 +789,9 @@ TEST_F(DocHitInfoIteratorExpirationFilterTest, EqualTtlFilteredOut) {
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
- document_store.get(),
- schema_store_.get(), options_);
+ DocHitInfoIteratorFilter filtered_iterator(
+ std::move(original_iterator), document_store.get(), schema_store_.get(),
+ options_, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
}
@@ -659,8 +803,8 @@ TEST_F(DocHitInfoIteratorExpirationFilterTest, PastTtlFilteredOut) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -678,9 +822,9 @@ TEST_F(DocHitInfoIteratorExpirationFilterTest, PastTtlFilteredOut) {
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
- document_store.get(),
- schema_store_.get(), options_);
+ DocHitInfoIteratorFilter filtered_iterator(
+ std::move(original_iterator), document_store.get(), schema_store_.get(),
+ options_, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
}
@@ -730,12 +874,14 @@ class DocHitInfoIteratorFilterTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
document_store_ = std::move(create_result.document_store);
}
@@ -769,8 +915,8 @@ TEST_F(DocHitInfoIteratorFilterTest, CombineAllFiltersOk) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -793,7 +939,8 @@ TEST_F(DocHitInfoIteratorFilterTest, CombineAllFiltersOk) {
// Deletes document2, causing it to be filtered out
ICING_ASSERT_OK(
document_store->Delete(document2_namespace1_schema1_.namespace_(),
- document2_namespace1_schema1_.uri()));
+ document2_namespace1_schema1_.uri(),
+ fake_clock_.GetSystemTimeMilliseconds()));
std::vector<DocHitInfo> doc_hit_infos = {
DocHitInfo(document_id1), DocHitInfo(document_id2),
@@ -811,9 +958,9 @@ TEST_F(DocHitInfoIteratorFilterTest, CombineAllFiltersOk) {
// Filters out document4 by schema type
options.schema_types = std::vector<std::string_view>{schema1_};
- DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
- document_store.get(),
- schema_store_.get(), options);
+ DocHitInfoIteratorFilter filtered_iterator(
+ std::move(original_iterator), document_store.get(), schema_store_.get(),
+ options, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
}
@@ -844,9 +991,9 @@ TEST_F(DocHitInfoIteratorFilterTest, SectionIdMasksArePopulatedCorrectly) {
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
DocHitInfoIteratorFilter::Options options;
- DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
- document_store_.get(),
- schema_store_.get(), options);
+ DocHitInfoIteratorFilter filtered_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ options, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocHitInfos(&filtered_iterator),
ElementsAre(EqualsDocHitInfo(document_id1, section_ids1),
@@ -859,9 +1006,9 @@ TEST_F(DocHitInfoIteratorFilterTest, GetNumBlocksInspected) {
original_iterator->SetNumBlocksInspected(5);
DocHitInfoIteratorFilter::Options options;
- DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
- document_store_.get(),
- schema_store_.get(), options);
+ DocHitInfoIteratorFilter filtered_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ options, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(filtered_iterator.GetNumBlocksInspected(), Eq(5));
}
@@ -871,13 +1018,62 @@ TEST_F(DocHitInfoIteratorFilterTest, GetNumLeafAdvanceCalls) {
original_iterator->SetNumLeafAdvanceCalls(6);
DocHitInfoIteratorFilter::Options options;
- DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
- document_store_.get(),
- schema_store_.get(), options);
+ DocHitInfoIteratorFilter filtered_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ options, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(filtered_iterator.GetNumLeafAdvanceCalls(), Eq(6));
}
+TEST_F(DocHitInfoIteratorFilterTest, TrimFilterIterator) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id1,
+ document_store_->Put(document1_namespace1_schema1_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id2,
+ document_store_->Put(document2_namespace1_schema1_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id3,
+ document_store_->Put(document3_namespace2_schema1_));
+
+ // Build an interator tree like:
+ // Filter
+ // |
+ // AND
+ // / \
+ // {1, 3} {2}
+ std::vector<DocHitInfo> left_vector = {DocHitInfo(document_id1),
+ DocHitInfo(document_id3)};
+ std::vector<DocHitInfo> right_vector = {DocHitInfo(document_id2)};
+
+ std::unique_ptr<DocHitInfoIterator> left_iter =
+ std::make_unique<DocHitInfoIteratorDummy>(left_vector);
+ std::unique_ptr<DocHitInfoIterator> right_iter =
+ std::make_unique<DocHitInfoIteratorDummy>(right_vector, "term", 10);
+
+ std::unique_ptr<DocHitInfoIterator> original_iterator =
+ std::make_unique<DocHitInfoIteratorAnd>(std::move(left_iter),
+ std::move(right_iter));
+
+ DocHitInfoIteratorFilter::Options options;
+ // Filters out document3 by namespace
+ options.namespaces = std::vector<std::string_view>{namespace1_};
+ DocHitInfoIteratorFilter filtered_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ options, fake_clock_.GetSystemTimeMilliseconds());
+
+ // The trimmed tree.
+ // Filter
+ // |
+ // {1, 3}
+ ICING_ASSERT_OK_AND_ASSIGN(DocHitInfoIterator::TrimmedNode trimmed_node,
+ std::move(filtered_iterator).TrimRightMostNode());
+ EXPECT_THAT(trimmed_node.term_, Eq("term"));
+ EXPECT_THAT(trimmed_node.term_start_index_, Eq(10));
+ EXPECT_THAT(GetDocumentIds(trimmed_node.iterator_.get()),
+ ElementsAre(document_id1));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-none.h b/icing/index/iterator/doc-hit-info-iterator-none.h
new file mode 100644
index 0000000..f938d32
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-none.h
@@ -0,0 +1,52 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_NONE_H_
+#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_NONE_H_
+
+#include <cstdint>
+#include <string>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+
+namespace icing {
+namespace lib {
+
+// Iterator that will return no results.
+class DocHitInfoIteratorNone : public DocHitInfoIterator {
+ public:
+ libtextclassifier3::Status Advance() override {
+ return absl_ports::ResourceExhaustedError(
+ "DocHitInfoIterator NONE has no hits.");
+ }
+
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override {
+ TrimmedNode node = {nullptr, /*term=*/"", /*term_start_index_=*/0,
+ /*unnormalized_term_length_=*/0};
+ return node;
+ }
+
+ int32_t GetNumBlocksInspected() const override { return 0; }
+
+ int32_t GetNumLeafAdvanceCalls() const override { return 0; }
+
+ std::string ToString() const override { return "(NONE)"; }
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_NONE_H_
diff --git a/icing/index/iterator/doc-hit-info-iterator-not.cc b/icing/index/iterator/doc-hit-info-iterator-not.cc
index 8fb3659..38b1ded 100644
--- a/icing/index/iterator/doc-hit-info-iterator-not.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-not.cc
@@ -60,6 +60,13 @@ libtextclassifier3::Status DocHitInfoIteratorNot::Advance() {
return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
}
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorNot::TrimRightMostNode() && {
+ // Don't generate suggestion if the last operator is NOT.
+ return absl_ports::InvalidArgumentError(
+ "Cannot generate suggestion if the last term is NOT operator.");
+}
+
int32_t DocHitInfoIteratorNot::GetNumBlocksInspected() const {
return to_be_excluded_->GetNumBlocksInspected() +
all_document_id_iterator_.GetNumBlocksInspected();
diff --git a/icing/index/iterator/doc-hit-info-iterator-not.h b/icing/index/iterator/doc-hit-info-iterator-not.h
index 58e909d..8cc3bf3 100644
--- a/icing/index/iterator/doc-hit-info-iterator-not.h
+++ b/icing/index/iterator/doc-hit-info-iterator-not.h
@@ -50,6 +50,11 @@ class DocHitInfoIteratorNot : public DocHitInfoIterator {
libtextclassifier3::Status Advance() override;
+ // The NOT operator is not suppose to be trimmed.
+ // We shouldn't generate suggestion for the last term if the last term belongs
+ // to NOT operator.
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
int32_t GetNumBlocksInspected() const override;
int32_t GetNumLeafAdvanceCalls() const override;
diff --git a/icing/index/iterator/doc-hit-info-iterator-not_test.cc b/icing/index/iterator/doc-hit-info-iterator-not_test.cc
index 5d0e4ac..5a8ce2c 100644
--- a/icing/index/iterator/doc-hit-info-iterator-not_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-not_test.cc
@@ -155,6 +155,17 @@ TEST(DocHitInfoIteratorNotTest, SectionIdsAlwaysNone) {
DocHitInfo(0, kSectionIdMaskNone)));
}
+TEST(DocHitInfoIteratorNotTest, TrimNotIterator) {
+ std::vector<DocHitInfo> exclude_doc_hit_infos = {DocHitInfo(0)};
+ std::unique_ptr<DocHitInfoIterator> to_be_excluded_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(exclude_doc_hit_infos);
+
+ DocHitInfoIteratorNot not_iterator(std::move(to_be_excluded_iterator),
+ /*document_id_limit=*/5);
+ EXPECT_THAT(std::move(not_iterator).TrimRightMostNode(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-or.cc b/icing/index/iterator/doc-hit-info-iterator-or.cc
index 655cafc..8f7b84f 100644
--- a/icing/index/iterator/doc-hit-info-iterator-or.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-or.cc
@@ -21,6 +21,7 @@
#include "icing/absl_ports/str_cat.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/store/document-id.h"
+#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
@@ -57,6 +58,26 @@ DocHitInfoIteratorOr::DocHitInfoIteratorOr(
std::unique_ptr<DocHitInfoIterator> right_it)
: left_(std::move(left_it)), right_(std::move(right_it)) {}
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorOr::TrimRightMostNode() && {
+ // Trim the whole OR iterator. Only keep the prefix of the right iterator.
+ //
+ // The OR operator has higher priority, it is not possible that we have an
+ // unfinished prefix in the nested iterator right-most child we need to search
+ // suggestion for.
+ //
+ // eg: `foo OR (bar baz)` is not valid for search suggestion since there is no
+ // unfinished last term to be filled.
+ //
+ // If we need to trim a OR iterator for search suggestion, the right child
+ // must be the last term. We don't need left side information to
+ // generate suggestion for the right side.
+ ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_right,
+ std::move(*right_).TrimRightMostNode());
+ trimmed_right.iterator_ = nullptr;
+ return trimmed_right;
+}
+
libtextclassifier3::Status DocHitInfoIteratorOr::Advance() {
// Cache the document_id of the left iterator for comparison to the right.
DocumentId orig_left_document_id = left_document_id_;
@@ -140,6 +161,26 @@ DocHitInfoIteratorOrNary::DocHitInfoIteratorOrNary(
std::vector<std::unique_ptr<DocHitInfoIterator>> iterators)
: iterators_(std::move(iterators)) {}
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorOrNary::TrimRightMostNode() && {
+ // Trim the whole OR iterator.
+ //
+ // The OR operator has higher priority, it is not possible that we have an
+ // unfinished prefix in the nested iterator right-most child we need to search
+ // suggestion for.
+ //
+ // eg: `foo OR (bar baz)` is not valid for search suggestion since there is no
+ // unfinished last term to be filled.
+ //
+ // If we need to trim a OR iterator for search suggestion, the right-most
+ // child must be the last term. We don't need left side information to
+ // generate suggestion for the right side.
+ ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_right,
+ std::move(*iterators_.back()).TrimRightMostNode());
+ trimmed_right.iterator_ = nullptr;
+ return trimmed_right;
+}
+
libtextclassifier3::Status DocHitInfoIteratorOrNary::Advance() {
current_iterators_.clear();
if (iterators_.size() < 2) {
diff --git a/icing/index/iterator/doc-hit-info-iterator-or.h b/icing/index/iterator/doc-hit-info-iterator-or.h
index 2dae68d..1e9847d 100644
--- a/icing/index/iterator/doc-hit-info-iterator-or.h
+++ b/icing/index/iterator/doc-hit-info-iterator-or.h
@@ -34,6 +34,8 @@ class DocHitInfoIteratorOr : public DocHitInfoIterator {
explicit DocHitInfoIteratorOr(std::unique_ptr<DocHitInfoIterator> left_it,
std::unique_ptr<DocHitInfoIterator> right_it);
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
libtextclassifier3::Status Advance() override;
int32_t GetNumBlocksInspected() const override;
@@ -77,6 +79,8 @@ class DocHitInfoIteratorOrNary : public DocHitInfoIterator {
explicit DocHitInfoIteratorOrNary(
std::vector<std::unique_ptr<DocHitInfoIterator>> iterators);
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
libtextclassifier3::Status Advance() override;
int32_t GetNumBlocksInspected() const override;
diff --git a/icing/index/iterator/doc-hit-info-iterator-or_test.cc b/icing/index/iterator/doc-hit-info-iterator-or_test.cc
index f487801..1950c01 100644
--- a/icing/index/iterator/doc-hit-info-iterator-or_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-or_test.cc
@@ -312,6 +312,47 @@ TEST(DocHitInfoIteratorOrTest, PopulateMatchedTermsStats) {
}
}
+TEST(DocHitInfoIteratorOrTest, TrimOrIterator) {
+ std::vector<DocHitInfo> first_vector = {DocHitInfo(0)};
+ std::vector<DocHitInfo> second_vector = {DocHitInfo(1)};
+
+ std::unique_ptr<DocHitInfoIterator> first_iter =
+ std::make_unique<DocHitInfoIteratorDummy>(first_vector);
+ std::unique_ptr<DocHitInfoIterator> second_iter =
+ std::make_unique<DocHitInfoIteratorDummy>(second_vector, "term", 10);
+
+ DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter));
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocHitInfoIterator::TrimmedNode trimmed_node,
+ std::move(or_iter).TrimRightMostNode());
+ // The whole iterator is trimmed
+ ASSERT_TRUE(trimmed_node.iterator_ == nullptr);
+ ASSERT_THAT(trimmed_node.term_, Eq("term"));
+ ASSERT_THAT(trimmed_node.term_start_index_, Eq(10));
+}
+
+TEST(DocHitInfoIteratorOrNaryTest, TrimOrNaryIterator) {
+ std::vector<DocHitInfo> first_vector = {DocHitInfo(0)};
+ std::vector<DocHitInfo> second_vector = {DocHitInfo(1)};
+ std::vector<DocHitInfo> third_vector = {DocHitInfo(2)};
+ std::vector<DocHitInfo> forth_vector = {DocHitInfo(3)};
+
+ std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+ iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(first_vector));
+ iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(second_vector));
+ iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(third_vector));
+ iterators.push_back(
+ std::make_unique<DocHitInfoIteratorDummy>(forth_vector, "term", 10));
+ DocHitInfoIteratorOrNary or_iter(std::move(iterators));
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocHitInfoIterator::TrimmedNode trimmed_node,
+ std::move(or_iter).TrimRightMostNode());
+ // The whole iterator is trimmed
+ ASSERT_TRUE(trimmed_node.iterator_ == nullptr);
+ ASSERT_THAT(trimmed_node.term_, Eq("term"));
+ ASSERT_THAT(trimmed_node.term_start_index_, Eq(10));
+}
+
TEST(DocHitInfoIteratorOrNaryTest, Initialize) {
std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>());
diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc
new file mode 100644
index 0000000..05778b0
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc
@@ -0,0 +1,115 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/doc-hit-info-iterator-property-in-schema.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+DocHitInfoIteratorPropertyInSchema::DocHitInfoIteratorPropertyInSchema(
+ std::unique_ptr<DocHitInfoIterator> delegate,
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ std::set<std::string> target_sections, int64_t current_time_ms)
+ : delegate_(std::move(delegate)),
+ document_store_(*document_store),
+ schema_store_(*schema_store),
+ target_properties_(std::move(target_sections)),
+ current_time_ms_(current_time_ms) {}
+
+libtextclassifier3::Status DocHitInfoIteratorPropertyInSchema::Advance() {
+ doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+ hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
+
+ // Maps from SchemaTypeId to a bool indicating whether or not the type has
+ // the requested property.
+ std::unordered_map<SchemaTypeId, bool> property_defined_types;
+ while (delegate_->Advance().ok()) {
+ DocumentId document_id = delegate_->doc_hit_info().document_id();
+ auto data_optional = document_store_.GetAliveDocumentFilterData(
+ document_id, current_time_ms_);
+ if (!data_optional) {
+ // Ran into some error retrieving information on this hit, skip
+ continue;
+ }
+
+ // Guaranteed that the DocumentFilterData exists at this point
+ SchemaTypeId schema_type_id = data_optional.value().schema_type_id();
+ bool valid_match = false;
+ auto itr = property_defined_types.find(schema_type_id);
+ if (itr != property_defined_types.end()) {
+ valid_match = itr->second;
+ } else {
+ for (const auto& property : target_properties_) {
+ if (schema_store_.IsPropertyDefinedInSchema(schema_type_id, property)) {
+ valid_match = true;
+ break;
+ }
+ }
+ property_defined_types[schema_type_id] = valid_match;
+ }
+
+ if (valid_match) {
+ doc_hit_info_ = delegate_->doc_hit_info();
+ hit_intersect_section_ids_mask_ =
+ delegate_->hit_intersect_section_ids_mask();
+ doc_hit_info_.set_hit_section_ids_mask(hit_intersect_section_ids_mask_);
+ return libtextclassifier3::Status::OK;
+ }
+
+ // The document's schema does not define any properties listed in
+ // target_properties_. Continue.
+ }
+
+ // Didn't find anything on the delegate iterator.
+ return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
+}
+
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorPropertyInSchema::TrimRightMostNode() && {
+ // Don't generate suggestion if the last operator is this custom function.
+ return absl_ports::InvalidArgumentError(
+ "Cannot generate suggestion if the last term is hasPropertyDefined().");
+}
+
+int32_t DocHitInfoIteratorPropertyInSchema::GetNumBlocksInspected() const {
+ return delegate_->GetNumBlocksInspected();
+}
+
+int32_t DocHitInfoIteratorPropertyInSchema::GetNumLeafAdvanceCalls() const {
+ return delegate_->GetNumLeafAdvanceCalls();
+}
+
+std::string DocHitInfoIteratorPropertyInSchema::ToString() const {
+ return absl_ports::StrCat("(", absl_ports::StrJoin(target_properties_, ","),
+ "): ", delegate_->ToString());
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-schema.h b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.h
new file mode 100644
index 0000000..730c497
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.h
@@ -0,0 +1,77 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_PROPERTY_IN_SCHEMA_H_
+#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_PROPERTY_IN_SCHEMA_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+// An iterator that helps filter for DocHitInfos whose schemas define the
+// properties named in target_properties_.
+class DocHitInfoIteratorPropertyInSchema : public DocHitInfoIterator {
+ public:
+ // Does not take any ownership, and all pointers must refer to valid objects
+ // that outlive the one constructed. The delegate should be at minimum be
+ // a DocHitInfoIteratorAllDocumentId, but other optimizations are possible,
+ // cf. go/icing-property-in-schema-existence.
+ explicit DocHitInfoIteratorPropertyInSchema(
+ std::unique_ptr<DocHitInfoIterator> delegate,
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ std::set<std::string> target_sections, int64_t current_time_ms);
+
+ libtextclassifier3::Status Advance() override;
+
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
+ int32_t GetNumBlocksInspected() const override;
+
+ int32_t GetNumLeafAdvanceCalls() const override;
+
+ std::string ToString() const override;
+
+ void PopulateMatchedTermsStats(
+ std::vector<TermMatchInfo>* matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
+ if (doc_hit_info_.document_id() == kInvalidDocumentId) {
+ // Current hit isn't valid, return.
+ return;
+ }
+ delegate_->PopulateMatchedTermsStats(matched_terms_stats,
+ filtering_section_mask);
+ }
+
+ private:
+ std::unique_ptr<DocHitInfoIterator> delegate_;
+ const DocumentStore& document_store_;
+ const SchemaStore& schema_store_;
+
+ std::set<std::string> target_properties_;
+ int64_t current_time_ms_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_PROPERTY_IN_SCHEMA_H_
diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc b/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc
new file mode 100644
index 0000000..47f5cc5
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc
@@ -0,0 +1,270 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/doc-hit-info-iterator-property-in-schema.h"
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator-all-document-id.h"
+#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+
+class DocHitInfoIteratorPropertyInSchemaTest : public ::testing::Test {
+ protected:
+ DocHitInfoIteratorPropertyInSchemaTest()
+ : test_dir_(GetTestTempDir() + "/icing") {}
+
+ void SetUp() override {
+ filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+ document1_ = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("email")
+ .Build();
+ document2_ =
+ DocumentBuilder().SetKey("namespace", "uri2").SetSchema("note").Build();
+
+ indexed_section_0 = "indexedSection0";
+ unindexed_section_1 = "unindexedSection1";
+ not_defined_section_2 = "notDefinedSection2";
+
+ schema_ =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("email")
+ // Add an indexed property so we generate section
+ // metadata on it
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(indexed_section_0)
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(unindexed_section_1)
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("note").AddProperty(
+ PropertyConfigBuilder()
+ .SetName(unindexed_section_1)
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(
+ &filesystem_, test_dir_, &fake_clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
+ document_store_ = std::move(create_result.document_store);
+ }
+
+ void TearDown() override {
+ document_store_.reset();
+ schema_store_.reset();
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ }
+
+ std::unique_ptr<SchemaStore> schema_store_;
+ std::unique_ptr<DocumentStore> document_store_;
+ const Filesystem filesystem_;
+ const std::string test_dir_;
+ std::string indexed_section_0;
+ std::string unindexed_section_1;
+ std::string not_defined_section_2;
+ SchemaProto schema_;
+ DocumentProto document1_;
+ DocumentProto document2_;
+ FakeClock fake_clock_;
+};
+
+TEST_F(DocHitInfoIteratorPropertyInSchemaTest,
+ AdvanceToDocumentWithIndexedProperty) {
+ // Populate the DocumentStore's FilterCache with this document's data
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store_->Put(document1_));
+
+ auto original_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
+ document_store_->num_documents());
+
+ DocHitInfoIteratorPropertyInSchema property_defined_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_target_sections=*/{indexed_section_0},
+ fake_clock_.GetSystemTimeMilliseconds());
+
+ EXPECT_THAT(GetDocumentIds(&property_defined_iterator),
+ ElementsAre(document_id));
+
+ EXPECT_FALSE(property_defined_iterator.Advance().ok());
+}
+
+TEST_F(DocHitInfoIteratorPropertyInSchemaTest,
+ AdvanceToDocumentWithUnindexedProperty) {
+ // Populate the DocumentStore's FilterCache with this document's data
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store_->Put(document1_));
+
+ auto original_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
+ document_store_->num_documents());
+
+ DocHitInfoIteratorPropertyInSchema property_defined_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_target_sections=*/{unindexed_section_1},
+ fake_clock_.GetSystemTimeMilliseconds());
+
+ EXPECT_THAT(GetDocumentIds(&property_defined_iterator),
+ ElementsAre(document_id));
+
+ EXPECT_FALSE(property_defined_iterator.Advance().ok());
+}
+
+TEST_F(DocHitInfoIteratorPropertyInSchemaTest, NoMatchWithUndefinedProperty) {
+ ICING_EXPECT_OK(document_store_->Put(document1_));
+
+ auto original_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
+ document_store_->num_documents());
+
+ DocHitInfoIteratorPropertyInSchema property_defined_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_target_sections=*/{not_defined_section_2},
+ fake_clock_.GetSystemTimeMilliseconds());
+ EXPECT_FALSE(property_defined_iterator.Advance().ok());
+}
+
+TEST_F(DocHitInfoIteratorPropertyInSchemaTest,
+ CorrectlySetsSectionIdMasksAndPopulatesTermMatchInfo) {
+ // Populate the DocumentStore's FilterCache with this document's data
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store_->Put(document1_));
+
+ // Arbitrary section ids for the documents in the DocHitInfoIterators.
+ // Created to test correct section_id_mask behavior.
+ SectionIdMask original_section_id_mask = 0b00000101; // hits in sections 0, 2
+
+ DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id);
+ doc_hit_info1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+ doc_hit_info1.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/2);
+
+ // Create a hit that was found in the indexed section
+ std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {doc_hit_info1};
+
+ auto original_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "hi");
+ original_iterator->set_hit_intersect_section_ids_mask(
+ original_section_id_mask);
+
+ DocHitInfoIteratorPropertyInSchema property_defined_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_target_sections=*/{indexed_section_0},
+ fake_clock_.GetSystemTimeMilliseconds());
+
+ std::vector<TermMatchInfo> matched_terms_stats;
+ property_defined_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, IsEmpty());
+
+ ICING_EXPECT_OK(property_defined_iterator.Advance());
+ EXPECT_THAT(property_defined_iterator.doc_hit_info().document_id(),
+ Eq(document_id));
+
+ // The expected mask is the same as the original mask, since the iterator
+ // should treat it as a pass-through.
+ SectionIdMask expected_section_id_mask = original_section_id_mask;
+ EXPECT_EQ(property_defined_iterator.hit_intersect_section_ids_mask(),
+ expected_section_id_mask);
+
+ property_defined_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map = {{0, 1}, {2, 2}};
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "hi", expected_section_ids_tf_map)));
+
+ EXPECT_FALSE(property_defined_iterator.Advance().ok());
+}
+
+TEST_F(DocHitInfoIteratorPropertyInSchemaTest,
+ TrimRightMostNodeResultsInError) {
+ auto original_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
+ document_store_->num_documents());
+
+ DocHitInfoIteratorPropertyInSchema property_defined_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_target_sections=*/{indexed_section_0},
+ fake_clock_.GetSystemTimeMilliseconds());
+
+ EXPECT_THAT(std::move(property_defined_iterator).TrimRightMostNode(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(DocHitInfoIteratorPropertyInSchemaTest,
+ FindPropertyDefinedByMultipleTypes) {
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document1_));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document2_));
+ auto original_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
+ document_store_->num_documents());
+
+ DocHitInfoIteratorPropertyInSchema property_defined_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_target_sections=*/{unindexed_section_1},
+ fake_clock_.GetSystemTimeMilliseconds());
+
+ EXPECT_THAT(GetDocumentIds(&property_defined_iterator),
+ ElementsAre(document_id2, document_id1));
+
+ EXPECT_FALSE(property_defined_iterator.Advance().ok());
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
index 0871436..b850a9b 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
@@ -38,21 +38,115 @@ namespace lib {
DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict(
std::unique_ptr<DocHitInfoIterator> delegate,
const DocumentStore* document_store, const SchemaStore* schema_store,
- std::string target_section)
+ std::set<std::string> target_sections, int64_t current_time_ms)
: delegate_(std::move(delegate)),
document_store_(*document_store),
schema_store_(*schema_store),
- target_section_(std::move(target_section)) {}
+ current_time_ms_(current_time_ms) {
+ type_property_filters_[std::string(SchemaStore::kSchemaTypeWildcard)] =
+ std::move(target_sections);
+}
+
+DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict(
+ std::unique_ptr<DocHitInfoIterator> delegate,
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ const SearchSpecProto& search_spec,
+ int64_t current_time_ms)
+ : delegate_(std::move(delegate)),
+ document_store_(*document_store),
+ schema_store_(*schema_store),
+ current_time_ms_(current_time_ms) {
+ // TODO(b/294274922): Add support for polymorphism in type property filters.
+ for (const TypePropertyMask& type_property_mask :
+ search_spec.type_property_filters()) {
+ type_property_filters_[type_property_mask.schema_type()] =
+ std::set<std::string>(type_property_mask.paths().begin(),
+ type_property_mask.paths().end());
+ }
+}
+
+DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict(
+ std::unique_ptr<DocHitInfoIterator> delegate,
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ std::unordered_map<std::string, std::set<std::string>>
+ type_property_filters,
+ std::unordered_map<std::string, SectionIdMask> type_property_masks,
+ int64_t current_time_ms)
+ : delegate_(std::move(delegate)),
+ document_store_(*document_store),
+ schema_store_(*schema_store),
+ current_time_ms_(current_time_ms),
+ type_property_filters_(std::move(type_property_filters)),
+ type_property_masks_(std::move(type_property_masks)) {}
+
+SectionIdMask DocHitInfoIteratorSectionRestrict::GenerateSectionMask(
+ const std::string& schema_type,
+ const std::set<std::string>& target_sections) const {
+ SectionIdMask section_mask = kSectionIdMaskNone;
+ auto section_metadata_list_or =
+ schema_store_.GetSectionMetadata(schema_type);
+ if (!section_metadata_list_or.ok()) {
+ // The current schema doesn't have section metadata.
+ return kSectionIdMaskNone;
+ }
+ const std::vector<SectionMetadata>* section_metadata_list =
+ section_metadata_list_or.ValueOrDie();
+ for (const SectionMetadata& section_metadata : *section_metadata_list) {
+ if (target_sections.find(section_metadata.path) !=
+ target_sections.end()) {
+ section_mask |= UINT64_C(1) << section_metadata.id;
+ }
+ }
+ return section_mask;
+}
+
+SectionIdMask DocHitInfoIteratorSectionRestrict::
+ ComputeAndCacheSchemaTypeAllowedSectionsMask(
+ const std::string& schema_type) {
+ if (const auto type_property_mask_itr =
+ type_property_masks_.find(schema_type);
+ type_property_mask_itr != type_property_masks_.end()) {
+ return type_property_mask_itr->second;
+ }
+
+ // Section id mask of schema_type is never calculated before, so
+ // calculate it here and put it into type_property_masks_.
+ // - If type property filters of schema_type or wildcard (*) are
+ // specified, then create a mask according to the filters.
+ // - Otherwise, create a mask to match all properties.
+ SectionIdMask new_section_id_mask = kSectionIdMaskAll;
+ if (const auto itr = type_property_filters_.find(schema_type);
+ itr != type_property_filters_.end()) {
+ // Property filters defined for given schema type
+ new_section_id_mask = GenerateSectionMask(
+ schema_type, itr->second);
+ } else if (const auto wildcard_itr = type_property_filters_.find(
+ std::string(SchemaStore::kSchemaTypeWildcard));
+ wildcard_itr != type_property_filters_.end()) {
+ // Property filters defined for wildcard entry
+ new_section_id_mask = GenerateSectionMask(
+ schema_type, wildcard_itr->second);
+ } else {
+ // Do not cache the section mask if no property filters apply to this schema
+ // type to avoid taking up unnecessary space.
+ return kSectionIdMaskAll;
+ }
+
+ type_property_masks_[schema_type] = new_section_id_mask;
+ return new_section_id_mask;
+}
libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() {
+ doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+ hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
while (delegate_->Advance().ok()) {
DocumentId document_id = delegate_->doc_hit_info().document_id();
SectionIdMask section_id_mask =
delegate_->doc_hit_info().hit_section_ids_mask();
- auto data_optional =
- document_store_.GetAliveDocumentFilterData(document_id);
+ auto data_optional = document_store_.GetAliveDocumentFilterData(
+ document_id, current_time_ms_);
if (!data_optional) {
// Ran into some error retrieving information on this hit, skip
continue;
@@ -60,41 +154,80 @@ libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() {
// Guaranteed that the DocumentFilterData exists at this point
SchemaTypeId schema_type_id = data_optional.value().schema_type_id();
-
- // A hit can be in multiple sections at once, need to check that at least
- // one of the confirmed section ids match the name of the target section
- while (section_id_mask != 0) {
- // There was a hit in this section id
- SectionId section_id = __builtin_ctzll(section_id_mask);
-
- auto section_metadata_or =
- schema_store_.GetSectionMetadata(schema_type_id, section_id);
-
- if (section_metadata_or.ok()) {
- const SectionMetadata* section_metadata =
- section_metadata_or.ValueOrDie();
-
- if (section_metadata->path == target_section_) {
- // The hit was in the target section name, return OK/found
- doc_hit_info_ = delegate_->doc_hit_info();
- hit_intersect_section_ids_mask_ = UINT64_C(1) << section_id;
- return libtextclassifier3::Status::OK;
- }
- }
-
- // Mark this section as checked
- section_id_mask &= ~(UINT64_C(1) << section_id);
+ auto schema_type_or = schema_store_.GetSchemaType(schema_type_id);
+ if (!schema_type_or.ok()) {
+ // Ran into error retrieving schema type, skip
+ continue;
+ }
+ const std::string* schema_type = std::move(schema_type_or).ValueOrDie();
+ SectionIdMask allowed_sections_mask =
+ ComputeAndCacheSchemaTypeAllowedSectionsMask(*schema_type);
+
+ // A hit can be in multiple sections at once, need to check which of the
+ // section ids match the sections allowed by type_property_masks_. This can
+ // be done by doing a bitwise and of the section_id_mask in the doc hit and
+ // the allowed_sections_mask.
+ hit_intersect_section_ids_mask_ = section_id_mask & allowed_sections_mask;
+
+ // Return this document if:
+ // - the sectionIdMask is not empty after applying property filters, or
+ // - no property filters apply for its schema type (allowed_sections_mask
+ // == kSectionIdMaskAll). This is needed to ensure that in case of empty
+ // query (which uses doc-hit-info-iterator-all-document-id), where
+ // section_id_mask (and hence hit_intersect_section_ids_mask_) is
+ // kSectionIdMaskNone, doc hits with no property restrictions don't get
+ // filtered out. Doc hits for schema types for whom property filters are
+ // specified will still get filtered out.
+ if (allowed_sections_mask == kSectionIdMaskAll
+ || hit_intersect_section_ids_mask_ != kSectionIdMaskNone) {
+ doc_hit_info_ = delegate_->doc_hit_info();
+ doc_hit_info_.set_hit_section_ids_mask(hit_intersect_section_ids_mask_);
+ return libtextclassifier3::Status::OK;
}
-
// Didn't find a matching section name for this hit. Continue.
}
// Didn't find anything on the delegate iterator.
- doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
- hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
}
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorSectionRestrict::TrimRightMostNode() && {
+ ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_delegate,
+ std::move(*delegate_).TrimRightMostNode());
+ // TrimRightMostNode is only used by suggestion processor to process query
+ // expression, so an entry for wildcard should always be present in
+ // type_property_filters_ when code flow reaches here. If the InternalError
+ // below is returned, that means TrimRightMostNode hasn't been called in the
+ // right context.
+ const auto it = type_property_filters_.find("*");
+ if (it == type_property_filters_.end()) {
+ return absl_ports::InternalError(
+ "A wildcard entry should always be present in type property filters "
+ "whenever TrimRightMostNode() is called for "
+ "DocHitInfoIteratorSectionRestrict");
+ }
+ std::set<std::string>& target_sections = it->second;
+ if (target_sections.empty()) {
+ return absl_ports::InternalError(
+ "Target sections should not be empty whenever TrimRightMostNode() is "
+ "called for DocHitInfoIteratorSectionRestrict");
+ }
+ if (trimmed_delegate.iterator_ == nullptr) {
+ // TODO(b/228240987): Update TrimmedNode and downstream code to handle
+ // multiple section restricts.
+ trimmed_delegate.target_section_ = std::move(*target_sections.begin());
+ return trimmed_delegate;
+ }
+ trimmed_delegate.iterator_ =
+ std::unique_ptr<DocHitInfoIteratorSectionRestrict>(
+ new DocHitInfoIteratorSectionRestrict(
+ std::move(trimmed_delegate.iterator_), &document_store_,
+ &schema_store_, std::move(type_property_filters_),
+ std::move(type_property_masks_), current_time_ms_));
+ return std::move(trimmed_delegate);
+}
+
int32_t DocHitInfoIteratorSectionRestrict::GetNumBlocksInspected() const {
return delegate_->GetNumBlocksInspected();
}
@@ -104,7 +237,14 @@ int32_t DocHitInfoIteratorSectionRestrict::GetNumLeafAdvanceCalls() const {
}
std::string DocHitInfoIteratorSectionRestrict::ToString() const {
- return absl_ports::StrCat(target_section_, ": ", delegate_->ToString());
+ std::string output = "";
+ for (auto it = type_property_filters_.cbegin();
+ it != type_property_filters_.cend(); it++) {
+ std::string paths = absl_ports::StrJoin(it->second, ",");
+ output += (it->first) + ":" + (paths) + "; ";
+ }
+ std::string result = "{" + output.substr(0, output.size() - 2) + "}: ";
+ return absl_ports::StrCat(result, delegate_->ToString());
}
} // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
index 2639e67..5d44ed7 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
@@ -19,10 +19,13 @@
#include <memory>
#include <string>
#include <string_view>
+#include <unordered_map>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-filter-data.h"
#include "icing/store/document-store.h"
namespace icing {
@@ -42,10 +45,18 @@ class DocHitInfoIteratorSectionRestrict : public DocHitInfoIterator {
explicit DocHitInfoIteratorSectionRestrict(
std::unique_ptr<DocHitInfoIterator> delegate,
const DocumentStore* document_store, const SchemaStore* schema_store,
- std::string target_section);
+ std::set<std::string> target_sections, int64_t current_time_ms);
+
+ explicit DocHitInfoIteratorSectionRestrict(
+ std::unique_ptr<DocHitInfoIterator> delegate,
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ const SearchSpecProto& search_spec,
+ int64_t current_time_ms);
libtextclassifier3::Status Advance() override;
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
int32_t GetNumBlocksInspected() const override;
int32_t GetNumLeafAdvanceCalls() const override;
@@ -70,12 +81,51 @@ class DocHitInfoIteratorSectionRestrict : public DocHitInfoIterator {
}
private:
+ explicit DocHitInfoIteratorSectionRestrict(
+ std::unique_ptr<DocHitInfoIterator> delegate,
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ std::unordered_map<std::string, std::set<std::string>>
+ type_property_filters,
+ std::unordered_map<std::string, SectionIdMask> type_property_masks,
+ int64_t current_time_ms);
+ // Calculates the section mask of allowed sections(determined by the property
+ // filters map) for the given schema type and caches the same for any future
+ // calls.
+ //
+ // Returns:
+ // - If type_property_filters_ has an entry for the given schema type or
+ // wildcard(*), return a bitwise or of section IDs in the schema type that
+ // that are also present in the relevant filter list.
+ // - Otherwise, return kSectionIdMaskAll.
+ SectionIdMask ComputeAndCacheSchemaTypeAllowedSectionsMask(
+ const std::string& schema_type);
+ // Generates a section mask for the given schema type and the target sections.
+ //
+ // Returns:
+ // - A bitwise or of section IDs in the schema_type that that are also
+ // present in the target_sections list.
+ // - If none of the sections in the schema_type are present in the
+ // target_sections list, return kSectionIdMaskNone.
+ // This is done by doing a bitwise or of the target section ids for the given
+ // schema type.
+ SectionIdMask GenerateSectionMask(const std::string& schema_type,
+ const std::set<std::string>&
+ target_sections) const;
+
std::unique_ptr<DocHitInfoIterator> delegate_;
const DocumentStore& document_store_;
const SchemaStore& schema_store_;
+ int64_t current_time_ms_;
- // Ensure that this does not outlive the underlying string value.
- std::string target_section_;
+ // Map of property filters per schema type. Supports wildcard(*) for schema
+ // type that will apply to all schema types that are not specifically
+ // specified in the mapping otherwise.
+ std::unordered_map<std::string, std::set<std::string>>
+ type_property_filters_;
+ // Mapping of schema type to the section mask of allowed sections for that
+ // schema type. This section mask is lazily calculated based on the specified
+ // property filters and cached for any future use.
+ std::unordered_map<std::string, SectionIdMask> type_property_masks_;
};
} // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
index 6d41e90..1500571 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
@@ -24,6 +24,7 @@
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator-and.h"
#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/proto/document.pb.h"
@@ -47,6 +48,9 @@ using ::testing::ElementsAre;
using ::testing::Eq;
using ::testing::IsEmpty;
+constexpr SectionId kIndexedSectionId0 = 0;
+constexpr SectionId kIndexedSectionId1 = 1;
+
class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test {
protected:
DocHitInfoIteratorSectionRestrictTest()
@@ -54,34 +58,57 @@ class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test {
void SetUp() override {
filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
- document_ =
- DocumentBuilder().SetKey("namespace", "uri").SetSchema("email").Build();
-
- schema_ = SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- .SetType("email")
- // Add an indexed property so we generate section
- // metadata on it
- .AddProperty(
- PropertyConfigBuilder()
- .SetName(indexed_property_)
- .SetDataTypeString(TERM_MATCH_EXACT,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
-
- // First and only indexed property, so it gets the first id of 0
- indexed_section_id_ = 0;
+ document1_ = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("email")
+ .Build();
+ document2_ = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("email")
+ .Build();
+ document3_ = DocumentBuilder()
+ .SetKey("namespace", "uri3")
+ .SetSchema("email")
+ .Build();
+
+ indexed_section_0 = "indexedSection0";
+ indexed_section_1 = "indexedSection1";
+ schema_ =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("email")
+ // Add an indexed property so we generate section
+ // metadata on it
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(indexed_section_0)
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(indexed_section_1)
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- ICING_ASSERT_OK(schema_store_->SetSchema(schema_));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ DocumentStore::Create(
+ &filesystem_, test_dir_, &fake_clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
}
@@ -95,10 +122,12 @@ class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test {
std::unique_ptr<DocumentStore> document_store_;
const Filesystem filesystem_;
const std::string test_dir_;
+ std::string indexed_section_0;
+ std::string indexed_section_1;
SchemaProto schema_;
- DocumentProto document_;
- const std::string indexed_property_ = "subject";
- int indexed_section_id_;
+ DocumentProto document1_;
+ DocumentProto document2_;
+ DocumentProto document3_;
FakeClock fake_clock_;
};
@@ -106,7 +135,7 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
PopulateMatchedTermsStats_IncludesHitWithMatchingSection) {
// Populate the DocumentStore's FilterCache with this document's data
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
- document_store_->Put(document_));
+ document_store_->Put(document1_));
// Arbitrary section ids for the documents in the DocHitInfoIterators.
// Created to test correct section_id_mask behavior.
@@ -128,7 +157,8 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
// get a result.
DocHitInfoIteratorSectionRestrict section_restrict_iterator(
std::move(original_iterator), document_store_.get(), schema_store_.get(),
- /*target_section=*/indexed_property_);
+ /*target_sections=*/{indexed_section_0},
+ fake_clock_.GetSystemTimeMilliseconds());
std::vector<TermMatchInfo> matched_terms_stats;
section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
@@ -156,7 +186,8 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, EmptyOriginalIterator) {
DocHitInfoIteratorSectionRestrict filtered_iterator(
std::move(original_iterator_empty), document_store_.get(),
- schema_store_.get(), /*target_section=*/"");
+ schema_store_.get(), /*target_sections=*/std::set<std::string>(),
+ fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
std::vector<TermMatchInfo> matched_terms_stats;
@@ -167,9 +198,9 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, EmptyOriginalIterator) {
TEST_F(DocHitInfoIteratorSectionRestrictTest, IncludesHitWithMatchingSection) {
// Populate the DocumentStore's FilterCache with this document's data
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
- document_store_->Put(document_));
+ document_store_->Put(document1_));
- SectionIdMask section_id_mask = 1U << indexed_section_id_;
+ SectionIdMask section_id_mask = 1U << kIndexedSectionId0;
// Create a hit that was found in the indexed section
std::vector<DocHitInfo> doc_hit_infos = {
@@ -181,12 +212,103 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, IncludesHitWithMatchingSection) {
// Filtering for the indexed section name should get a result
DocHitInfoIteratorSectionRestrict section_restrict_iterator(
std::move(original_iterator), document_store_.get(), schema_store_.get(),
- indexed_property_);
+ /*target_sections=*/{indexed_section_0},
+ fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&section_restrict_iterator),
ElementsAre(document_id));
}
+TEST_F(DocHitInfoIteratorSectionRestrictTest,
+ IncludesHitWithMultipleMatchingSectionsWithMultipleSectionRestricts) {
+ // Populate the DocumentStore's FilterCache with this document's data
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store_->Put(document1_));
+
+ SectionIdMask section_id_mask = 1U << kIndexedSectionId0;
+ section_id_mask |= 1U << kIndexedSectionId1;
+
+ // Create a hit that was found in the indexed section
+ std::vector<DocHitInfo> doc_hit_infos = {
+ DocHitInfo(document_id, section_id_mask)};
+
+ std::unique_ptr<DocHitInfoIterator> original_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+ // Filter for both target_sections
+ DocHitInfoIteratorSectionRestrict section_restrict_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_sections=*/{indexed_section_0, indexed_section_1},
+ fake_clock_.GetSystemTimeMilliseconds());
+
+ ICING_ASSERT_OK(section_restrict_iterator.Advance());
+ std::vector<SectionId> expected_section_ids = {kIndexedSectionId0,
+ kIndexedSectionId1};
+ EXPECT_THAT(section_restrict_iterator.doc_hit_info(),
+ EqualsDocHitInfo(document_id, expected_section_ids));
+ EXPECT_THAT(section_restrict_iterator.hit_intersect_section_ids_mask(),
+ Eq(section_id_mask));
+}
+
+TEST_F(DocHitInfoIteratorSectionRestrictTest,
+ IncludesHitWithMultipleMatchingSectionsWithSingleSectionRestrict) {
+ // Populate the DocumentStore's FilterCache with this document's data
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store_->Put(document1_));
+
+ SectionIdMask section_id_mask = 1U << kIndexedSectionId0;
+ section_id_mask |= 1U << kIndexedSectionId1;
+
+ // Create a hit that was found in the indexed section
+ std::vector<DocHitInfo> doc_hit_infos = {
+ DocHitInfo(document_id, section_id_mask)};
+
+ std::unique_ptr<DocHitInfoIterator> original_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+ // Filter for both target_sections
+ DocHitInfoIteratorSectionRestrict section_restrict_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_sections=*/{indexed_section_1},
+ fake_clock_.GetSystemTimeMilliseconds());
+
+ ICING_ASSERT_OK(section_restrict_iterator.Advance());
+ std::vector<SectionId> expected_section_ids = {kIndexedSectionId1};
+ EXPECT_THAT(section_restrict_iterator.doc_hit_info(),
+ EqualsDocHitInfo(document_id, expected_section_ids));
+ EXPECT_THAT(section_restrict_iterator.hit_intersect_section_ids_mask(),
+ Eq(1U << kIndexedSectionId1));
+}
+
+TEST_F(DocHitInfoIteratorSectionRestrictTest,
+ IncludesHitWithSingleMatchingSectionsWithMultiSectionRestrict) {
+ // Populate the DocumentStore's FilterCache with this document's data
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store_->Put(document1_));
+
+ SectionIdMask section_id_mask = 1U << kIndexedSectionId1;
+
+ // Create a hit that was found in the indexed section
+ std::vector<DocHitInfo> doc_hit_infos = {
+ DocHitInfo(document_id, section_id_mask)};
+
+ std::unique_ptr<DocHitInfoIterator> original_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+ // Filter for both target_sections
+ DocHitInfoIteratorSectionRestrict section_restrict_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_sections=*/{indexed_section_0, indexed_section_1},
+ fake_clock_.GetSystemTimeMilliseconds());
+
+ ICING_ASSERT_OK(section_restrict_iterator.Advance());
+ std::vector<SectionId> expected_section_ids = {kIndexedSectionId1};
+ EXPECT_THAT(section_restrict_iterator.doc_hit_info(),
+ EqualsDocHitInfo(document_id, expected_section_ids));
+ EXPECT_THAT(section_restrict_iterator.hit_intersect_section_ids_mask(),
+ Eq(1U << kIndexedSectionId1));
+}
+
TEST_F(DocHitInfoIteratorSectionRestrictTest, NoMatchingDocumentFilterData) {
// Create a hit with a document id that doesn't exist in the DocumentStore yet
std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(/*document_id_in=*/0)};
@@ -197,7 +319,7 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, NoMatchingDocumentFilterData) {
// Filtering for the indexed section name should get a result
DocHitInfoIteratorSectionRestrict section_restrict_iterator(
std::move(original_iterator), document_store_.get(), schema_store_.get(),
- /*target_section=*/"");
+ /*target_sections=*/{""}, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
std::vector<TermMatchInfo> matched_terms_stats;
@@ -209,9 +331,9 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
DoesntIncludeHitWithWrongSectionName) {
// Populate the DocumentStore's FilterCache with this document's data
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
- document_store_->Put(document_));
+ document_store_->Put(document1_));
- SectionIdMask section_id_mask = 1U << indexed_section_id_;
+ SectionIdMask section_id_mask = 1U << kIndexedSectionId0;
// Create a hit that was found in the indexed section
std::vector<DocHitInfo> doc_hit_infos = {
@@ -223,7 +345,8 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
// Filtering for the indexed section name should get a result
DocHitInfoIteratorSectionRestrict section_restrict_iterator(
std::move(original_iterator), document_store_.get(), schema_store_.get(),
- "some_section_name");
+ /*target_sections=*/{"some_section_name"},
+ fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
std::vector<TermMatchInfo> matched_terms_stats;
@@ -235,7 +358,7 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
DoesntIncludeHitWithNoSectionIds) {
// Populate the DocumentStore's FilterCache with this document's data
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
- document_store_->Put(document_));
+ document_store_->Put(document1_));
// Create a hit that doesn't exist in any sections, so it shouldn't match any
// section filters
@@ -247,7 +370,8 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
DocHitInfoIteratorSectionRestrict section_restrict_iterator(
std::move(original_iterator), document_store_.get(), schema_store_.get(),
- indexed_property_);
+ /*target_sections=*/{indexed_section_0},
+ fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
std::vector<TermMatchInfo> matched_terms_stats;
@@ -259,7 +383,7 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
DoesntIncludeHitWithDifferentSectionId) {
// Populate the DocumentStore's FilterCache with this document's data
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
- document_store_->Put(document_));
+ document_store_->Put(document1_));
// Anything that's not 0, which is the indexed property
SectionId not_matching_section_id = 2;
@@ -267,14 +391,15 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
// Create a hit that exists in a different section, so it shouldn't match any
// section filters
std::vector<DocHitInfo> doc_hit_infos = {
- DocHitInfo(document_id, kSectionIdMaskNone << not_matching_section_id)};
+ DocHitInfo(document_id, UINT64_C(1) << not_matching_section_id)};
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
DocHitInfoIteratorSectionRestrict section_restrict_iterator(
std::move(original_iterator), document_store_.get(), schema_store_.get(),
- indexed_property_);
+ /*target_sections=*/{indexed_section_0},
+ fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
std::vector<TermMatchInfo> matched_terms_stats;
@@ -288,7 +413,7 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, GetNumBlocksInspected) {
DocHitInfoIteratorSectionRestrict section_restrict_iterator(
std::move(original_iterator), document_store_.get(), schema_store_.get(),
- /*target_section=*/"");
+ /*target_sections=*/{""}, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(section_restrict_iterator.GetNumBlocksInspected(), Eq(5));
}
@@ -299,11 +424,102 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, GetNumLeafAdvanceCalls) {
DocHitInfoIteratorSectionRestrict section_restrict_iterator(
std::move(original_iterator), document_store_.get(), schema_store_.get(),
- /*target_section=*/"");
+ /*target_sections=*/{""}, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(section_restrict_iterator.GetNumLeafAdvanceCalls(), Eq(6));
}
+TEST_F(DocHitInfoIteratorSectionRestrictTest,
+ TrimSectionRestrictIterator_TwoLayer) {
+ // Populate the DocumentStore's FilterCache with this document's data
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document1_));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document2_));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ document_store_->Put(document3_));
+
+ // 0 is the indexed property
+ SectionId matching_section_id = 0;
+ // Anything that's not 0, which is the indexed property
+ SectionId not_matching_section_id = 2;
+
+ // Build an interator tree like:
+ // Restrict
+ // |
+ // AND
+ // / \
+ // [1, 1],[2, 2] [3, 2]
+ std::vector<DocHitInfo> left_infos = {
+ DocHitInfo(document_id1, 1U << matching_section_id),
+ DocHitInfo(document_id2, 1U << not_matching_section_id)};
+ std::vector<DocHitInfo> right_infos = {
+ DocHitInfo(document_id3, 1U << not_matching_section_id)};
+
+ std::unique_ptr<DocHitInfoIterator> left_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(left_infos);
+ std::unique_ptr<DocHitInfoIterator> right_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(right_infos, "term", 10);
+
+ std::unique_ptr<DocHitInfoIterator> original_iterator =
+ std::make_unique<DocHitInfoIteratorAnd>(std::move(left_iterator),
+ std::move(right_iterator));
+
+ DocHitInfoIteratorSectionRestrict section_restrict_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ {indexed_section_0}, fake_clock_.GetSystemTimeMilliseconds());
+
+ // The trimmed tree.
+ // Restrict
+ // |
+ // [1, 1],[2, 2]
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocHitInfoIterator::TrimmedNode node,
+ std::move(section_restrict_iterator).TrimRightMostNode());
+
+ EXPECT_THAT(GetDocumentIds(node.iterator_.get()), ElementsAre(document_id1));
+ EXPECT_THAT(node.term_, Eq("term"));
+ EXPECT_THAT(node.term_start_index_, Eq(10));
+ EXPECT_THAT(node.target_section_, Eq(""));
+}
+
+TEST_F(DocHitInfoIteratorSectionRestrictTest, TrimSectionRestrictIterator) {
+ // Populate the DocumentStore's FilterCache with this document's data
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document1_));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document2_));
+
+ // 0 is the indexed property
+ SectionId matching_section_id = 0;
+ // Anything that's not 0, which is the indexed property
+ SectionId not_matching_section_id = 2;
+
+ // Build an interator tree like:
+ // Restrict
+ // |
+ // [1, 1],[2, 2]
+ std::vector<DocHitInfo> doc_infos = {
+ DocHitInfo(document_id1, 1U << matching_section_id),
+ DocHitInfo(document_id2, 1U << not_matching_section_id)};
+ std::unique_ptr<DocHitInfoIterator> original_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(doc_infos, "term", 10);
+
+ DocHitInfoIteratorSectionRestrict section_restrict_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ {indexed_section_0}, fake_clock_.GetSystemTimeMilliseconds());
+
+ // The trimmed tree has null iterator but has target section.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocHitInfoIterator::TrimmedNode node,
+ std::move(section_restrict_iterator).TrimRightMostNode());
+
+ EXPECT_THAT(node.iterator_, testing::IsNull());
+ EXPECT_THAT(node.term_, Eq("term"));
+ EXPECT_THAT(node.term_start_index_, Eq(10));
+ EXPECT_THAT(node.target_section_, Eq(indexed_section_0));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-test-util.h b/icing/index/iterator/doc-hit-info-iterator-test-util.h
index fe3a4b9..a77b91c 100644
--- a/icing/index/iterator/doc-hit-info-iterator-test-util.h
+++ b/icing/index/iterator/doc-hit-info-iterator-test-util.h
@@ -80,8 +80,12 @@ class DocHitInfoIteratorDummy : public DocHitInfoIterator {
: doc_hit_infos_(std::move(doc_hit_infos)), term_(std::move(term)) {}
explicit DocHitInfoIteratorDummy(const std::vector<DocHitInfo>& doc_hit_infos,
- std::string term = "")
- : term_(std::move(term)) {
+ std::string term = "",
+ int term_start_index = 0,
+ int unnormalized_term_length = 0)
+ : term_(std::move(term)),
+ term_start_index_(term_start_index),
+ unnormalized_term_length_(unnormalized_term_length) {
for (auto& doc_hit_info : doc_hit_infos) {
doc_hit_infos_.push_back(DocHitInfoTermFrequencyPair(doc_hit_info));
}
@@ -98,6 +102,12 @@ class DocHitInfoIteratorDummy : public DocHitInfoIterator {
"No more DocHitInfos in iterator");
}
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override {
+ DocHitInfoIterator::TrimmedNode node = {nullptr, term_, term_start_index_,
+ unnormalized_term_length_};
+ return node;
+ }
+
// Imitates behavior of DocHitInfoIteratorTermMain/DocHitInfoIteratorTermLite
void PopulateMatchedTermsStats(
std::vector<TermMatchInfo>* matched_terms_stats,
@@ -170,6 +180,8 @@ class DocHitInfoIteratorDummy : public DocHitInfoIterator {
int32_t num_leaf_advance_calls_ = 0;
std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos_;
std::string term_;
+ int term_start_index_;
+ int unnormalized_term_length_;
};
inline std::vector<DocumentId> GetDocumentIds(DocHitInfoIterator* iterator) {
diff --git a/icing/index/iterator/doc-hit-info-iterator.h b/icing/index/iterator/doc-hit-info-iterator.h
index b73b264..d8cd3ad 100644
--- a/icing/index/iterator/doc-hit-info-iterator.h
+++ b/icing/index/iterator/doc-hit-info-iterator.h
@@ -62,6 +62,44 @@ struct TermMatchInfo {
// }
class DocHitInfoIterator {
public:
+ struct TrimmedNode {
+ // the query results which we should only search for suggestion in these
+ // documents.
+ std::unique_ptr<DocHitInfoIterator> iterator_;
+ // term of the trimmed node which we need to generate suggested strings.
+ std::string term_;
+ // the string in the query which indicates the target section we should
+ // search for suggestions.
+ std::string target_section_;
+ // the start index of the current term in the given search query.
+ int term_start_index_;
+ // The length of the given unnormalized term in the search query
+ int unnormalized_term_length_;
+
+ TrimmedNode(std::unique_ptr<DocHitInfoIterator> iterator, std::string term,
+ int term_start_index, int unnormalized_term_length)
+ : iterator_(std::move(iterator)),
+ term_(term),
+ target_section_(""),
+ term_start_index_(term_start_index),
+ unnormalized_term_length_(unnormalized_term_length) {}
+ };
+
+ // Trim the rightmost iterator of the iterator tree.
+ // This is to support search suggestions for the last term which is the
+ // right-most node of the root iterator tree. Only support trim the right-most
+ // node on the AND, AND_NARY, OR, OR_NARY, OR_LEAF, Filter, and the
+ // property-in-schema-check iterator.
+ //
+ // After calling this method, this iterator is no longer usable. Please use
+ // the returned iterator.
+ // Returns:
+ // the new iterator without the right-most child, if was able to trim the
+ // right-most node.
+ // nullptr if the current iterator should be trimmed.
+ // INVALID_ARGUMENT if the right-most node is not suppose to be trimmed.
+ virtual libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && = 0;
+
virtual ~DocHitInfoIterator() = default;
// Returns:
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.cc b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
index 0a3317c..acf3b33 100644
--- a/icing/index/lite/doc-hit-info-iterator-term-lite.cc
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
@@ -15,6 +15,7 @@
#include "icing/index/lite/doc-hit-info-iterator-term-lite.h"
#include <array>
+#include <cstddef>
#include <cstdint>
#include <numeric>
@@ -73,12 +74,20 @@ libtextclassifier3::Status DocHitInfoIteratorTermLite::Advance() {
return libtextclassifier3::Status::OK;
}
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorTermLite::TrimRightMostNode() && {
+ // Leaf iterator should trim itself.
+ DocHitInfoIterator::TrimmedNode node = {nullptr, term_, term_start_index_,
+ unnormalized_term_length_};
+ return node;
+}
+
libtextclassifier3::Status DocHitInfoIteratorTermLiteExact::RetrieveMoreHits() {
// Exact match only. All hits in lite lexicon are exact.
ICING_ASSIGN_OR_RETURN(uint32_t tvi, lite_index_->GetTermId(term_));
ICING_ASSIGN_OR_RETURN(uint32_t term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- lite_index_->AppendHits(
+ lite_index_->FetchHits(
term_id, section_restrict_mask_,
/*only_from_prefix_sections=*/false,
/*score_by=*/
@@ -105,7 +114,7 @@ DocHitInfoIteratorTermLitePrefix::RetrieveMoreHits() {
ICING_ASSIGN_OR_RETURN(
uint32_t term_id,
term_id_codec_->EncodeTvi(it.GetValueIndex(), TviType::LITE));
- lite_index_->AppendHits(
+ lite_index_->FetchHits(
term_id, section_restrict_mask_,
/*only_from_prefix_sections=*/!exact_match,
/*score_by=*/
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.h b/icing/index/lite/doc-hit-info-iterator-term-lite.h
index bd8a6ee..873ea89 100644
--- a/icing/index/lite/doc-hit-info-iterator-term-lite.h
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.h
@@ -33,9 +33,13 @@ class DocHitInfoIteratorTermLite : public DocHitInfoIterator {
explicit DocHitInfoIteratorTermLite(const TermIdCodec* term_id_codec,
LiteIndex* lite_index,
const std::string& term,
+ int term_start_index,
+ int unnormalized_term_length,
SectionIdMask section_restrict_mask,
bool need_hit_term_frequency)
: term_(term),
+ term_start_index_(term_start_index),
+ unnormalized_term_length_(unnormalized_term_length),
lite_index_(lite_index),
cached_hits_idx_(-1),
term_id_codec_(term_id_codec),
@@ -45,6 +49,8 @@ class DocHitInfoIteratorTermLite : public DocHitInfoIterator {
libtextclassifier3::Status Advance() override;
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
int32_t GetNumBlocksInspected() const override { return 0; }
int32_t GetNumLeafAdvanceCalls() const override { return num_advance_calls_; }
@@ -91,6 +97,10 @@ class DocHitInfoIteratorTermLite : public DocHitInfoIterator {
virtual libtextclassifier3::Status RetrieveMoreHits() = 0;
const std::string term_;
+ // The start index of the given term in the search query
+ int term_start_index_;
+ // The length of the given unnormalized term in the search query
+ int unnormalized_term_length_;
LiteIndex* const lite_index_;
// Stores hits retrieved from the index. This may only be a subset of the hits
// that are present in the index. Current value pointed to by the Iterator is
@@ -111,9 +121,12 @@ class DocHitInfoIteratorTermLiteExact : public DocHitInfoIteratorTermLite {
explicit DocHitInfoIteratorTermLiteExact(const TermIdCodec* term_id_codec,
LiteIndex* lite_index,
const std::string& term,
+ int term_start_index,
+ int unnormalized_term_length,
SectionIdMask section_id_mask,
bool need_hit_term_frequency)
: DocHitInfoIteratorTermLite(term_id_codec, lite_index, term,
+ term_start_index, unnormalized_term_length,
section_id_mask, need_hit_term_frequency) {}
std::string ToString() const override;
@@ -127,9 +140,12 @@ class DocHitInfoIteratorTermLitePrefix : public DocHitInfoIteratorTermLite {
explicit DocHitInfoIteratorTermLitePrefix(const TermIdCodec* term_id_codec,
LiteIndex* lite_index,
const std::string& term,
+ int term_start_index,
+ int unnormalized_term_length,
SectionIdMask section_id_mask,
bool need_hit_term_frequency)
: DocHitInfoIteratorTermLite(term_id_codec, lite_index, term,
+ term_start_index, unnormalized_term_length,
section_id_mask, need_hit_term_frequency) {}
std::string ToString() const override;
diff --git a/icing/index/lite/lite-index-options.cc b/icing/index/lite/lite-index-options.cc
index 29075f8..8780d45 100644
--- a/icing/index/lite/lite-index-options.cc
+++ b/icing/index/lite/lite-index-options.cc
@@ -14,6 +14,8 @@
#include "icing/index/lite/lite-index-options.h"
+#include <cstdint>
+
#include "icing/index/lite/term-id-hit-pair.h"
namespace icing {
@@ -64,9 +66,13 @@ IcingDynamicTrie::Options CalculateTrieOptions(uint32_t hit_buffer_size) {
} // namespace
LiteIndexOptions::LiteIndexOptions(const std::string& filename_base,
- uint32_t hit_buffer_want_merge_bytes)
+ uint32_t hit_buffer_want_merge_bytes,
+ bool hit_buffer_sort_at_indexing,
+ uint32_t hit_buffer_sort_threshold_bytes)
: filename_base(filename_base),
- hit_buffer_want_merge_bytes(hit_buffer_want_merge_bytes) {
+ hit_buffer_want_merge_bytes(hit_buffer_want_merge_bytes),
+ hit_buffer_sort_at_indexing(hit_buffer_sort_at_indexing),
+ hit_buffer_sort_threshold_bytes(hit_buffer_sort_threshold_bytes) {
hit_buffer_size = CalculateHitBufferSize(hit_buffer_want_merge_bytes);
lexicon_options = CalculateTrieOptions(hit_buffer_size);
display_mappings_options = CalculateTrieOptions(hit_buffer_size);
diff --git a/icing/index/lite/lite-index-options.h b/icing/index/lite/lite-index-options.h
index ae58802..9f8452c 100644
--- a/icing/index/lite/lite-index-options.h
+++ b/icing/index/lite/lite-index-options.h
@@ -27,7 +27,9 @@ struct LiteIndexOptions {
// hit_buffer_want_merge_bytes and the logic in CalculateHitBufferSize and
// CalculateTrieOptions.
LiteIndexOptions(const std::string& filename_base,
- uint32_t hit_buffer_want_merge_bytes);
+ uint32_t hit_buffer_want_merge_bytes,
+ bool hit_buffer_sort_at_indexing,
+ uint32_t hit_buffer_sort_threshold_bytes);
IcingDynamicTrie::Options lexicon_options;
IcingDynamicTrie::Options display_mappings_options;
@@ -35,6 +37,8 @@ struct LiteIndexOptions {
std::string filename_base;
uint32_t hit_buffer_want_merge_bytes = 0;
uint32_t hit_buffer_size = 0;
+ bool hit_buffer_sort_at_indexing = false;
+ uint32_t hit_buffer_sort_threshold_bytes = 0;
};
} // namespace lib
diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc
index 1ea945c..ec7141a 100644
--- a/icing/index/lite/lite-index.cc
+++ b/icing/index/lite/lite-index.cc
@@ -30,11 +30,14 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/mutex.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/file/filesystem.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/hit/hit.h"
#include "icing/index/lite/lite-index-header.h"
+#include "icing/index/lite/term-id-hit-pair.h"
+#include "icing/index/term-id-codec.h"
#include "icing/index/term-property-id.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/legacy/core/icing-timer.h"
@@ -43,10 +46,13 @@
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/legacy/index/icing-mmapper.h"
#include "icing/proto/debug.pb.h"
+#include "icing/proto/scoring.pb.h"
#include "icing/proto/storage.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
+#include "icing/store/namespace-id.h"
+#include "icing/store/suggestion-result-checker.h"
#include "icing/util/crc32.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
@@ -114,6 +120,7 @@ libtextclassifier3::Status LiteIndex::Initialize() {
uint64_t file_size;
IcingTimer timer;
+ absl_ports::unique_lock l(&mutex_);
if (!lexicon_.CreateIfNotExist(options_.lexicon_options) ||
!lexicon_.Init()) {
return absl_ports::InternalError("Failed to initialize lexicon trie");
@@ -158,7 +165,7 @@ libtextclassifier3::Status LiteIndex::Initialize() {
}
// Set up header.
- header_mmap_.Remap(hit_buffer_fd_.get(), 0, header_size());
+ header_mmap_.Remap(hit_buffer_fd_.get(), kHeaderFileOffset, header_size());
header_ = std::make_unique<LiteIndex_HeaderImpl>(
reinterpret_cast<LiteIndex_HeaderImpl::HeaderData*>(
header_mmap_.address()));
@@ -173,7 +180,7 @@ libtextclassifier3::Status LiteIndex::Initialize() {
UpdateChecksum();
} else {
- header_mmap_.Remap(hit_buffer_fd_.get(), 0, header_size());
+ header_mmap_.Remap(hit_buffer_fd_.get(), kHeaderFileOffset, header_size());
header_ = std::make_unique<LiteIndex_HeaderImpl>(
reinterpret_cast<LiteIndex_HeaderImpl::HeaderData*>(
header_mmap_.address()));
@@ -241,6 +248,7 @@ Crc32 LiteIndex::ComputeChecksum() {
libtextclassifier3::Status LiteIndex::Reset() {
IcingTimer timer;
+ absl_ports::unique_lock l(&mutex_);
// TODO(b/140436942): When these components have been changed to return errors
// they should be propagated from here.
lexicon_.Clear();
@@ -253,11 +261,13 @@ libtextclassifier3::Status LiteIndex::Reset() {
}
void LiteIndex::Warm() {
+ absl_ports::shared_lock l(&mutex_);
hit_buffer_.Warm();
lexicon_.Warm();
}
libtextclassifier3::Status LiteIndex::PersistToDisk() {
+ absl_ports::unique_lock l(&mutex_);
bool success = true;
if (!lexicon_.Sync()) {
ICING_VLOG(1) << "Failed to sync the lexicon.";
@@ -279,6 +289,7 @@ void LiteIndex::UpdateChecksum() {
libtextclassifier3::StatusOr<uint32_t> LiteIndex::InsertTerm(
const std::string& term, TermMatchType::Code term_match_type,
NamespaceId namespace_id) {
+ absl_ports::unique_lock l(&mutex_);
uint32_t tvi;
libtextclassifier3::Status status =
lexicon_.Insert(term.c_str(), "", &tvi, false);
@@ -287,13 +298,19 @@ libtextclassifier3::StatusOr<uint32_t> LiteIndex::InsertTerm(
<< status.error_message();
return status;
}
- ICING_RETURN_IF_ERROR(UpdateTermProperties(
+ ICING_RETURN_IF_ERROR(UpdateTermPropertiesImpl(
tvi, term_match_type == TermMatchType::PREFIX, namespace_id));
return tvi;
}
libtextclassifier3::Status LiteIndex::UpdateTermProperties(
uint32_t tvi, bool hasPrefixHits, NamespaceId namespace_id) {
+ absl_ports::unique_lock l(&mutex_);
+ return UpdateTermPropertiesImpl(tvi, hasPrefixHits, namespace_id);
+}
+
+libtextclassifier3::Status LiteIndex::UpdateTermPropertiesImpl(
+ uint32_t tvi, bool hasPrefixHits, NamespaceId namespace_id) {
if (hasPrefixHits &&
!lexicon_.SetProperty(tvi, GetHasHitsInPrefixSectionPropertyId())) {
return absl_ports::ResourceExhaustedError(
@@ -309,6 +326,7 @@ libtextclassifier3::Status LiteIndex::UpdateTermProperties(
}
libtextclassifier3::Status LiteIndex::AddHit(uint32_t term_id, const Hit& hit) {
+ absl_ports::unique_lock l(&mutex_);
if (is_full()) {
return absl_ports::ResourceExhaustedError("Hit buffer is full!");
}
@@ -329,6 +347,7 @@ libtextclassifier3::Status LiteIndex::AddHit(uint32_t term_id, const Hit& hit) {
libtextclassifier3::StatusOr<uint32_t> LiteIndex::GetTermId(
const std::string& term) const {
+ absl_ports::shared_lock l(&mutex_);
char dummy;
uint32_t tvi;
if (!lexicon_.Find(term.c_str(), &dummy, &tvi)) {
@@ -338,94 +357,195 @@ libtextclassifier3::StatusOr<uint32_t> LiteIndex::GetTermId(
return tvi;
}
-int LiteIndex::AppendHits(
+void LiteIndex::ScoreAndAppendFetchedHit(
+ const Hit& hit, SectionIdMask section_id_mask,
+ bool only_from_prefix_sections,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
+ const SuggestionResultChecker* suggestion_result_checker,
+ DocumentId& last_document_id, bool& is_last_document_desired,
+ int& total_score_out, std::vector<DocHitInfo>* hits_out,
+ std::vector<Hit::TermFrequencyArray>* term_frequency_out) const {
+ // Check sections.
+ if (((UINT64_C(1) << hit.section_id()) & section_id_mask) == 0) {
+ return;
+ }
+ // Check prefix section only.
+ if (only_from_prefix_sections && !hit.is_in_prefix_section()) {
+ return;
+ }
+ // Check whether this Hit is desired.
+ // TODO(b/230553264) Move common logic into helper function once we support
+ // score term by prefix_hit in lite_index.
+ DocumentId document_id = hit.document_id();
+ bool is_new_document = document_id != last_document_id;
+ if (is_new_document) {
+ last_document_id = document_id;
+ is_last_document_desired =
+ suggestion_result_checker == nullptr ||
+ suggestion_result_checker->BelongsToTargetResults(document_id,
+ hit.section_id());
+ }
+ if (!is_last_document_desired) {
+ // The document is removed or expired or not desired.
+ return;
+ }
+
+ // Score the hit by the strategy
+ switch (score_by) {
+ case SuggestionScoringSpecProto::SuggestionRankingStrategy::NONE:
+ total_score_out = 1;
+ break;
+ case SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT:
+ if (is_new_document) {
+ ++total_score_out;
+ }
+ break;
+ case SuggestionScoringSpecProto::SuggestionRankingStrategy::TERM_FREQUENCY:
+ if (hit.has_term_frequency()) {
+ total_score_out += hit.term_frequency();
+ } else {
+ ++total_score_out;
+ }
+ break;
+ }
+
+ // Append the Hit or update hit section to the output vector.
+ if (is_new_document && hits_out != nullptr) {
+ hits_out->push_back(DocHitInfo(document_id));
+ if (term_frequency_out != nullptr) {
+ term_frequency_out->push_back(Hit::TermFrequencyArray());
+ }
+ }
+ if (hits_out != nullptr) {
+ hits_out->back().UpdateSection(hit.section_id());
+ if (term_frequency_out != nullptr) {
+ term_frequency_out->back()[hit.section_id()] = hit.term_frequency();
+ }
+ }
+}
+
+int LiteIndex::FetchHits(
uint32_t term_id, SectionIdMask section_id_mask,
bool only_from_prefix_sections,
SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
const SuggestionResultChecker* suggestion_result_checker,
std::vector<DocHitInfo>* hits_out,
std::vector<Hit::TermFrequencyArray>* term_frequency_out) {
- int score = 0;
+ bool need_sort_at_querying = false;
+ {
+ absl_ports::shared_lock l(&mutex_);
+
+ // We sort here when:
+ // 1. We don't enable sorting at indexing time (i.e. we sort at querying
+ // time), and there is an unsorted tail portion. OR
+ // 2. The unsorted tail size exceeds the hit_buffer_sort_threshold,
+ // regardless of whether or not hit_buffer_sort_at_indexing is enabled.
+ // This is more of a sanity check. We should not really be encountering
+ // this case.
+ need_sort_at_querying = NeedSortAtQuerying();
+ }
+ if (need_sort_at_querying) {
+ absl_ports::unique_lock l(&mutex_);
+ IcingTimer timer;
+
+ // Transition from shared_lock to unique_lock is safe here because it
+ // doesn't hurt to sort again if sorting was done already by another thread
+ // after need_sort_at_querying is evaluated.
+ // We check need_sort_at_querying to improve query concurrency as threads
+ // can avoid acquiring the unique lock if no sorting is needed.
+ SortHitsImpl();
+
+ if (options_.hit_buffer_sort_at_indexing) {
+ // This is the second case for sort. Log as this should be a very rare
+ // occasion.
+ ICING_LOG(WARNING) << "Sorting HitBuffer at querying time when "
+ "hit_buffer_sort_at_indexing is enabled. Sort and "
+ "merge HitBuffer in "
+ << timer.Elapsed() * 1000 << " ms.";
+ }
+ }
+
+ // This downgrade from an unique_lock to a shared_lock is safe because we're
+ // searching for the term in the searchable (sorted) section of the HitBuffer
+ // only in Seek().
+ // Any operations that might execute in between the transition of downgrading
+ // the lock here are guaranteed not to alter the searchable section (or the
+ // LiteIndex) due to a global lock in IcingSearchEngine.
+ absl_ports::shared_lock l(&mutex_);
+
+ // Search in the HitBuffer array for Hits with the corresponding term_id.
+ // Hits are added in increasing order of doc ids, so hits that get appended
+ // later have larger docIds. This means that:
+ // 1. Hits in the unsorted tail will have larger docIds than hits in the
+ // sorted portion.
+ // 2. Hits at the end of the unsorted tail will have larger docIds than hits
+ // in the front of the tail.
+ // We want to retrieve hits in descending order of docIds. Therefore we should
+ // search by doing:
+ // 1. Linear search first in reverse iteration order over the unsorted tail
+ // portion.
+ // 2. Followed by binary search on the sorted portion.
+ const TermIdHitPair* array = hit_buffer_.array_cast<TermIdHitPair>();
+
DocumentId last_document_id = kInvalidDocumentId;
// Record whether the last document belongs to the given namespaces.
bool is_last_document_desired = false;
- for (uint32_t idx = Seek(term_id); idx < header_->cur_size(); idx++) {
- TermIdHitPair term_id_hit_pair(
- hit_buffer_.array_cast<TermIdHitPair>()[idx]);
- if (term_id_hit_pair.term_id() != term_id) break;
-
- const Hit& hit = term_id_hit_pair.hit();
- // Check sections.
- if (((UINT64_C(1) << hit.section_id()) & section_id_mask) == 0) {
- continue;
- }
- // Check prefix section only.
- if (only_from_prefix_sections && !hit.is_in_prefix_section()) {
- continue;
- }
- // TODO(b/230553264) Move common logic into helper function once we support
- // score term by prefix_hit in lite_index.
- // Check whether this Hit is desired.
- DocumentId document_id = hit.document_id();
- bool is_new_document = document_id != last_document_id;
- if (is_new_document) {
- last_document_id = document_id;
- is_last_document_desired =
- suggestion_result_checker == nullptr ||
- suggestion_result_checker->BelongsToTargetResults(document_id,
- hit.section_id());
- }
- if (!is_last_document_desired) {
- // The document is removed or expired or not desired.
- continue;
+ int total_score = 0;
+
+ // Linear search over unsorted tail in reverse iteration order.
+ // This should only be performed when hit_buffer_sort_at_indexing is enabled.
+ // When disabled, the entire HitBuffer should be sorted already and only
+ // binary search is needed.
+ if (options_.hit_buffer_sort_at_indexing) {
+ uint32_t unsorted_length = header_->cur_size() - header_->searchable_end();
+ for (uint32_t i = 1; i <= unsorted_length; ++i) {
+ TermIdHitPair term_id_hit_pair = array[header_->cur_size() - i];
+ if (term_id_hit_pair.term_id() == term_id) {
+ // We've found a matched hit.
+ const Hit& matched_hit = term_id_hit_pair.hit();
+ // Score the hit and add to total_score. Also add the hits and its term
+ // frequency info to hits_out and term_frequency_out if the two vectors
+ // are non-null.
+ ScoreAndAppendFetchedHit(matched_hit, section_id_mask,
+ only_from_prefix_sections, score_by,
+ suggestion_result_checker, last_document_id,
+ is_last_document_desired, total_score,
+ hits_out, term_frequency_out);
+ }
}
+ }
- // Score the hit by the strategy
- switch (score_by) {
- case SuggestionScoringSpecProto::SuggestionRankingStrategy::NONE:
- score = 1;
- break;
- case SuggestionScoringSpecProto::SuggestionRankingStrategy::
- DOCUMENT_COUNT:
- if (is_new_document) {
- ++score;
- }
- break;
- case SuggestionScoringSpecProto::SuggestionRankingStrategy::
- TERM_FREQUENCY:
- if (hit.has_term_frequency()) {
- score += hit.term_frequency();
- } else {
- ++score;
- }
- break;
+ // Do binary search over the sorted section and repeat the above steps.
+ TermIdHitPair target_term_id_hit_pair(
+ term_id, Hit(Hit::kMaxDocumentIdSortValue, Hit::kDefaultTermFrequency));
+ for (const TermIdHitPair* ptr = std::lower_bound(
+ array, array + header_->searchable_end(), target_term_id_hit_pair);
+ ptr < array + header_->searchable_end(); ++ptr) {
+ if (ptr->term_id() != term_id) {
+ // We've processed all matches. Stop iterating further.
+ break;
}
- // Append the Hit or update hit section to the output vector.
- if (is_new_document && hits_out != nullptr) {
- hits_out->push_back(DocHitInfo(document_id));
- if (term_frequency_out != nullptr) {
- term_frequency_out->push_back(Hit::TermFrequencyArray());
- }
- }
- if (hits_out != nullptr) {
- hits_out->back().UpdateSection(hit.section_id());
- if (term_frequency_out != nullptr) {
- term_frequency_out->back()[hit.section_id()] = hit.term_frequency();
- }
- }
+ const Hit& matched_hit = ptr->hit();
+ // Score the hit and add to total_score. Also add the hits and its term
+ // frequency info to hits_out and term_frequency_out if the two vectors are
+ // non-null.
+ ScoreAndAppendFetchedHit(
+ matched_hit, section_id_mask, only_from_prefix_sections, score_by,
+ suggestion_result_checker, last_document_id, is_last_document_desired,
+ total_score, hits_out, term_frequency_out);
}
- return score;
+ return total_score;
}
libtextclassifier3::StatusOr<int> LiteIndex::ScoreHits(
uint32_t term_id,
SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
const SuggestionResultChecker* suggestion_result_checker) {
- return AppendHits(term_id, kSectionIdMaskAll,
- /*only_from_prefix_sections=*/false, score_by,
- suggestion_result_checker,
- /*hits_out=*/nullptr);
+ return FetchHits(term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false, score_by,
+ suggestion_result_checker,
+ /*hits_out=*/nullptr);
}
bool LiteIndex::is_full() const {
@@ -434,6 +554,7 @@ bool LiteIndex::is_full() const {
}
std::string LiteIndex::GetDebugInfo(DebugInfoVerbosity::Code verbosity) {
+ absl_ports::unique_lock l(&mutex_);
std::string res;
std::string lexicon_info;
lexicon_.GetDebugInfo(verbosity, &lexicon_info);
@@ -468,6 +589,7 @@ libtextclassifier3::StatusOr<int64_t> LiteIndex::GetElementsSize() const {
IndexStorageInfoProto LiteIndex::GetStorageInfo(
IndexStorageInfoProto storage_info) const {
+ absl_ports::shared_lock l(&mutex_);
int64_t header_and_hit_buffer_file_size =
filesystem_->GetFileSize(hit_buffer_fd_.get());
storage_info.set_lite_index_hit_buffer_size(
@@ -481,7 +603,7 @@ IndexStorageInfoProto LiteIndex::GetStorageInfo(
return storage_info;
}
-void LiteIndex::SortHits() {
+void LiteIndex::SortHitsImpl() {
// Make searchable by sorting by hit buffer.
uint32_t sort_len = header_->cur_size() - header_->searchable_end();
if (sort_len <= 0) {
@@ -512,31 +634,17 @@ void LiteIndex::SortHits() {
UpdateChecksum();
}
-uint32_t LiteIndex::Seek(uint32_t term_id) {
- SortHits();
-
- // Binary search for our term_id. Make sure we get the first
- // element. Using kBeginSortValue ensures this for the hit value.
- TermIdHitPair term_id_hit_pair(
- term_id, Hit(Hit::kMaxDocumentIdSortValue, Hit::kDefaultTermFrequency));
-
- const TermIdHitPair::Value* array =
- hit_buffer_.array_cast<TermIdHitPair::Value>();
- const TermIdHitPair::Value* ptr = std::lower_bound(
- array, array + header_->cur_size(), term_id_hit_pair.value());
- return ptr - array;
-}
-
libtextclassifier3::Status LiteIndex::Optimize(
const std::vector<DocumentId>& document_id_old_to_new,
const TermIdCodec* term_id_codec, DocumentId new_last_added_document_id) {
+ absl_ports::unique_lock l(&mutex_);
header_->set_last_added_docid(new_last_added_document_id);
if (header_->cur_size() == 0) {
return libtextclassifier3::Status::OK;
}
// Sort the hits so that hits with the same term id will be grouped together,
// which helps later to determine which terms will be unused after compaction.
- SortHits();
+ SortHitsImpl();
uint32_t new_size = 0;
uint32_t curr_term_id = 0;
uint32_t curr_tvi = 0;
diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h
index e4fb686..288602a 100644
--- a/icing/index/lite/lite-index.h
+++ b/icing/index/lite/lite-index.h
@@ -20,6 +20,7 @@
#define ICING_INDEX_LITE_INDEX_H_
#include <cstdint>
+#include <iterator>
#include <limits>
#include <memory>
#include <string>
@@ -27,6 +28,8 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/mutex.h"
+#include "icing/absl_ports/thread_annotations.h"
#include "icing/file/filesystem.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/hit/hit.h"
@@ -46,17 +49,23 @@
#include "icing/store/document-id.h"
#include "icing/store/namespace-id.h"
#include "icing/store/suggestion-result-checker.h"
-#include "icing/util/bit-util.h"
#include "icing/util/crc32.h"
namespace icing {
namespace lib {
+// The LiteIndex is go/thread-compatible. Operations on the same data member
+// object interfere with each other, unless they are guaranteed not to mutate
+// the object (In the case of LiteIndex, this means all const methods,
+// FetchHits and ScoreHits).
class LiteIndex {
public:
// An entry in the hit buffer.
using Options = LiteIndexOptions;
+ // Offset for the LiteIndex_Header in the hit buffer mmap.
+ static constexpr uint32_t kHeaderFileOffset = 0;
+
// Updates checksum of subcomponents.
~LiteIndex();
@@ -72,25 +81,22 @@ class LiteIndex {
// Resets all internal members of the index. Returns OK if all operations were
// successful.
- libtextclassifier3::Status Reset();
+ libtextclassifier3::Status Reset() ICING_LOCKS_EXCLUDED(mutex_);
// Advises the OS to cache pages in the index, which will be accessed for a
// query soon.
- void Warm();
+ void Warm() ICING_LOCKS_EXCLUDED(mutex_);
// Syncs all modified files in the index to disk.
//
// Returns:
// OK on success
// INTERNAL on I/O error
- libtextclassifier3::Status PersistToDisk();
-
- // Calculate the checksum of all sub-components of the LiteIndex
- Crc32 ComputeChecksum();
+ libtextclassifier3::Status PersistToDisk() ICING_LOCKS_EXCLUDED(mutex_);
// Returns term_id if term found, NOT_FOUND otherwise.
libtextclassifier3::StatusOr<uint32_t> GetTermId(
- const std::string& term) const;
+ const std::string& term) const ICING_LOCKS_EXCLUDED(mutex_);
// Returns an iterator for all terms for which 'prefix' is a prefix.
class PrefixIterator {
@@ -109,7 +115,11 @@ class LiteIndex {
IcingDynamicTrie::Iterator delegate_;
};
- PrefixIterator FindTermPrefixes(const std::string& prefix) const {
+ // WARNING: Subsequent calls to AddHit/InsertTerm may invalidate any
+ // previously returned PrefixIterator.
+ PrefixIterator FindTermPrefixes(const std::string& prefix) const
+ ICING_LOCKS_EXCLUDED(mutex_) {
+ absl_ports::shared_lock l(&mutex_);
return PrefixIterator(IcingDynamicTrie::Iterator(lexicon_, prefix.c_str()));
}
@@ -120,7 +130,7 @@ class LiteIndex {
// RESOURCE_EXHAUSTED if lexicon is full or no disk space is available
libtextclassifier3::StatusOr<uint32_t> InsertTerm(
const std::string& term, TermMatchType::Code term_match_type,
- NamespaceId namespace_id);
+ NamespaceId namespace_id) ICING_LOCKS_EXCLUDED(mutex_);
// Updates term properties by setting hasPrefixHits and namespace id of the
// term.
@@ -130,7 +140,8 @@ class LiteIndex {
// RESOURCE_EXHAUSTED if no disk space is available
libtextclassifier3::Status UpdateTermProperties(uint32_t tvi,
bool hasPrefixHits,
- NamespaceId namespace_id);
+ NamespaceId namespace_id)
+ ICING_LOCKS_EXCLUDED(mutex_);
// Append hit to buffer. term_id must be encoded using the same term_id_codec
// supplied to the index constructor.
@@ -138,46 +149,64 @@ class LiteIndex {
// - OK if hit was successfully added
// - RESOURCE_EXHAUSTED if hit could not be added (either due to hit buffer
// or file system capacity reached).
- libtextclassifier3::Status AddHit(uint32_t term_id, const Hit& hit);
+ libtextclassifier3::Status AddHit(uint32_t term_id, const Hit& hit)
+ ICING_LOCKS_EXCLUDED(mutex_);
// Add all hits with term_id from the sections specified in section_id_mask,
// skipping hits in non-prefix sections if only_from_prefix_sections is true,
// to hits_out. If hits_out is nullptr, no hits will be added. The
- // corresponding hit term frequencies will also be added if term_frequency_out
- // is nullptr.
+ // corresponding hit term frequencies will also not be added if
+ // term_frequency_out is nullptr.
//
// Only those hits which belongs to the given namespaces will be counted and
- // appended. A nullptr namespace checker will disable this check.
+ // fetched. A nullptr namespace checker will disable this check.
//
// Returns the score of hits that would be added to hits_out according the
// given score_by.
- int AppendHits(
+ int FetchHits(
uint32_t term_id, SectionIdMask section_id_mask,
bool only_from_prefix_sections,
SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
const SuggestionResultChecker* suggestion_result_checker,
std::vector<DocHitInfo>* hits_out,
- std::vector<Hit::TermFrequencyArray>* term_frequency_out = nullptr);
+ std::vector<Hit::TermFrequencyArray>* term_frequency_out = nullptr)
+ ICING_LOCKS_EXCLUDED(mutex_);
// Returns the hit count of the term.
// Only those hits which belongs to the given namespaces will be counted.
libtextclassifier3::StatusOr<int> ScoreHits(
uint32_t term_id,
SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
- const SuggestionResultChecker* suggestion_result_checker);
+ const SuggestionResultChecker* suggestion_result_checker)
+ ICING_LOCKS_EXCLUDED(mutex_);
- // Check if buffer has reached its capacity.
- bool is_full() const;
+ bool empty() const ICING_LOCKS_EXCLUDED(mutex_) { return size() == 0; }
- bool empty() const { return size() == 0; }
+ uint32_t size() const ICING_LOCKS_EXCLUDED(mutex_) {
+ absl_ports::shared_lock l(&mutex_);
+ return size_impl();
+ }
- uint32_t size() const { return header_->cur_size(); }
+ bool WantsMerge() const ICING_LOCKS_EXCLUDED(mutex_) {
+ absl_ports::shared_lock l(&mutex_);
+ return is_full() || size_impl() >= (options_.hit_buffer_want_merge_bytes /
+ sizeof(TermIdHitPair::Value));
+ }
- bool WantsMerge() const {
- return size() >= (options_.hit_buffer_want_merge_bytes /
- sizeof(TermIdHitPair::Value));
+ // Whether or not the HitBuffer's unsorted tail size exceeds the sort
+ // threshold.
+ bool HasUnsortedHitsExceedingSortThreshold() const
+ ICING_LOCKS_EXCLUDED(mutex_) {
+ absl_ports::shared_lock l(&mutex_);
+ return HasUnsortedHitsExceedingSortThresholdImpl();
}
+ // Sort hits stored in the index.
+ void SortHits() ICING_LOCKS_EXCLUDED(mutex_) {
+ absl_ports::unique_lock l(&mutex_);
+ SortHitsImpl();
+ };
+
class const_iterator {
friend class LiteIndex;
@@ -224,11 +253,13 @@ class LiteIndex {
int end_position_;
};
- const_iterator begin() const {
+ const_iterator begin() const ICING_LOCKS_EXCLUDED(mutex_) {
+ absl_ports::shared_lock l(&mutex_);
// If the LiteIndex is empty, just return end().
- return empty() ? end()
- : const_iterator(hit_buffer_.array_cast<TermIdHitPair>(), 0,
- header_->cur_size());
+ return empty_impl()
+ ? end()
+ : const_iterator(hit_buffer_.array_cast<TermIdHitPair>(), 0,
+ header_->cur_size());
}
const_iterator end() const { return const_iterator(); }
@@ -240,19 +271,25 @@ class LiteIndex {
// We keep track of the last added document_id. This is always the largest
// document_id that has been added because hits can only be added in order of
// increasing document_id.
- DocumentId last_added_document_id() const {
+ DocumentId last_added_document_id() const ICING_LOCKS_EXCLUDED(mutex_) {
+ absl_ports::shared_lock l(&mutex_);
return header_->last_added_docid();
}
- void set_last_added_document_id(DocumentId document_id) const {
+ void set_last_added_document_id(DocumentId document_id)
+ ICING_LOCKS_EXCLUDED(mutex_) {
+ absl_ports::unique_lock l(&mutex_);
header_->set_last_added_docid(document_id);
}
+ // WARNING: Subsequent calls to AddHit/InsertTerm may invalidate the reference
+ // returned here.
const IcingDynamicTrie& lexicon() const { return lexicon_; }
// Returns debug information for the index in out.
// verbosity = BASIC, simplest debug information - size of lexicon, hit buffer
// verbosity = DETAILED, more detailed debug information from the lexicon.
- std::string GetDebugInfo(DebugInfoVerbosity::Code verbosity);
+ std::string GetDebugInfo(DebugInfoVerbosity::Code verbosity)
+ ICING_LOCKS_EXCLUDED(mutex_);
// Returns the byte size of all the elements held in the index. This excludes
// the size of any internal metadata of the index, e.g. the index's header.
@@ -260,15 +297,16 @@ class LiteIndex {
// Returns:
// Byte size on success
// INTERNAL_ERROR on IO error
- libtextclassifier3::StatusOr<int64_t> GetElementsSize() const;
+ libtextclassifier3::StatusOr<int64_t> GetElementsSize() const
+ ICING_LOCKS_EXCLUDED(mutex_);
// Takes the provided storage_info, populates the fields related to the lite
// index and returns that storage_info.
//
// If an IO error occurs while trying to calculate the value for a field, then
// that field will be set to -1.
- IndexStorageInfoProto GetStorageInfo(
- IndexStorageInfoProto storage_info) const;
+ IndexStorageInfoProto GetStorageInfo(IndexStorageInfoProto storage_info) const
+ ICING_LOCKS_EXCLUDED(mutex_);
// Reduces internal file sizes by reclaiming space of deleted documents.
//
@@ -281,7 +319,8 @@ class LiteIndex {
// invalid state and should be cleared.
libtextclassifier3::Status Optimize(
const std::vector<DocumentId>& document_id_old_to_new,
- const TermIdCodec* term_id_codec, DocumentId new_last_added_document_id);
+ const TermIdCodec* term_id_codec, DocumentId new_last_added_document_id)
+ ICING_LOCKS_EXCLUDED(mutex_);
private:
static IcingDynamicTrie::RuntimeOptions MakeTrieRuntimeOptions();
@@ -295,45 +334,108 @@ class LiteIndex {
// OK on success
// DATA_LOSS if the index was corrupted and cleared
// INTERNAL on I/O error
- libtextclassifier3::Status Initialize();
+ libtextclassifier3::Status Initialize() ICING_LOCKS_EXCLUDED(mutex_);
+
+ bool initialized() const ICING_SHARED_LOCKS_REQUIRED(mutex_) {
+ return header_ != nullptr;
+ }
- bool initialized() const { return header_ != nullptr; }
+ // Check if the hit buffer has reached its capacity.
+ bool is_full() const ICING_SHARED_LOCKS_REQUIRED(mutex_);
+
+ // Non-locking implementation for empty().
+ bool empty_impl() const ICING_SHARED_LOCKS_REQUIRED(mutex_) {
+ return size_impl() == 0;
+ }
+
+ // Non-locking implementation for size().
+ uint32_t size_impl() const ICING_SHARED_LOCKS_REQUIRED(mutex_) {
+ return header_->cur_size();
+ }
+
+ // Calculate the checksum of all sub-components of the LiteIndex
+ Crc32 ComputeChecksum() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Sets the computed checksum in the header
- void UpdateChecksum();
+ void UpdateChecksum() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+ // Non-locking implementation for UpdateTermProperties.
+ libtextclassifier3::Status UpdateTermPropertiesImpl(uint32_t tvi,
+ bool hasPrefixHits,
+ NamespaceId namespace_id)
+ ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+ // We need to sort during querying time when:
+ // 1. Sorting at indexing time is not enabled and there is an unsorted tail
+ // section in the HitBuffer.
+ // 2. The unsorted tail size exceeds the hit_buffer_sort_threshold, regardless
+ // of whether or not hit_buffer_sort_at_indexing is enabled. This is to
+ // prevent performing sequential search on a large unsorted tail section,
+ // which would result in bad query performance.
+ // This is more of a sanity check. We should not really be encountering
+ // this case.
+ bool NeedSortAtQuerying() const ICING_SHARED_LOCKS_REQUIRED(mutex_) {
+ return HasUnsortedHitsExceedingSortThresholdImpl() ||
+ (!options_.hit_buffer_sort_at_indexing &&
+ header_->cur_size() - header_->searchable_end() > 0);
+ }
- // Sort hits stored in the index.
- void SortHits();
+ // Non-locking implementation for HasUnsortedHitsExceedingSortThresholdImpl().
+ bool HasUnsortedHitsExceedingSortThresholdImpl() const
+ ICING_SHARED_LOCKS_REQUIRED(mutex_) {
+ return header_->cur_size() - header_->searchable_end() >=
+ (options_.hit_buffer_sort_threshold_bytes /
+ sizeof(TermIdHitPair::Value));
+ }
- // Returns the position of the first element with term_id, or the size of the
- // hit buffer if term_id is not present.
- uint32_t Seek(uint32_t term_id);
+ // Non-locking implementation for SortHits().
+ void SortHitsImpl() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+ // Calculates and adds the score for a fetched hit to total_score_out, while
+ // updating last_document_id (which keeps track of the last added docId so
+ // far), and is_last_document_desired (which keeps track of whether that last
+ // added docId belongs to the query's desired namespace.)
+ //
+ // Also appends the hit to hits_out and term_frequency_out if the vectors are
+ // not null.
+ void ScoreAndAppendFetchedHit(
+ const Hit& hit, SectionIdMask section_id_mask,
+ bool only_from_prefix_sections,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
+ const SuggestionResultChecker* suggestion_result_checker,
+ DocumentId& last_document_id, bool& is_last_document_desired,
+ int& total_score_out, std::vector<DocHitInfo>* hits_out,
+ std::vector<Hit::TermFrequencyArray>* term_frequency_out) const
+ ICING_SHARED_LOCKS_REQUIRED(mutex_);
// File descriptor that points to where the header and hit buffer are written
// to.
- ScopedFd hit_buffer_fd_;
+ ScopedFd hit_buffer_fd_ ICING_GUARDED_BY(mutex_);
// Mmapped region past the header that stores the hits.
- IcingArrayStorage hit_buffer_;
+ IcingArrayStorage hit_buffer_ ICING_GUARDED_BY(mutex_);
// Crc checksum of the hits, excludes the header.
- uint32_t hit_buffer_crc_;
+ uint32_t hit_buffer_crc_ ICING_GUARDED_BY(mutex_);
// Trie that maps indexed terms to their term id
- IcingDynamicTrie lexicon_;
+ IcingDynamicTrie lexicon_ ICING_GUARDED_BY(mutex_);
// TODO(b/140437260): Port over to MemoryMappedFile
// Memory mapped region of the underlying file that reflects the header.
- IcingMMapper header_mmap_;
+ IcingMMapper header_mmap_ ICING_GUARDED_BY(mutex_);
// Wrapper around the mmapped header that contains stats on the lite index.
- std::unique_ptr<LiteIndex_Header> header_;
+ std::unique_ptr<LiteIndex_Header> header_ ICING_GUARDED_BY(mutex_);
// Options used to initialize the LiteIndex.
const Options options_;
// TODO(b/139087650) Move to icing::Filesystem
const IcingFilesystem* const filesystem_;
+
+ // Used to provide reader and writer locks
+ mutable absl_ports::shared_mutex mutex_;
};
} // namespace lib
diff --git a/icing/index/lite/lite-index_test.cc b/icing/index/lite/lite-index_test.cc
index 2c29640..9811fa2 100644
--- a/icing/index/lite/lite-index_test.cc
+++ b/icing/index/lite/lite-index_test.cc
@@ -14,14 +14,27 @@
#include "icing/index/lite/lite-index.h"
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <unordered_map>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/lite/doc-hit-info-iterator-term-lite.h"
+#include "icing/index/lite/lite-index-header.h"
#include "icing/index/term-id-codec.h"
+#include "icing/legacy/index/icing-dynamic-trie.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/term.pb.h"
#include "icing/schema/section.h"
-#include "icing/store/suggestion-result-checker.h"
+#include "icing/store/namespace-id.h"
#include "icing/testing/always-false-suggestion-result-checker-impl.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/tmp-directory.h"
@@ -34,6 +47,8 @@ namespace {
using ::testing::ElementsAre;
using ::testing::Eq;
using ::testing::IsEmpty;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
using ::testing::SizeIs;
class LiteIndexTest : public testing::Test {
@@ -41,60 +56,329 @@ class LiteIndexTest : public testing::Test {
void SetUp() override {
index_dir_ = GetTestTempDir() + "/test_dir";
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(index_dir_.c_str()));
-
- std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
- LiteIndex::Options options(lite_index_file_name,
- /*hit_buffer_want_merge_bytes=*/1024 * 1024);
- ICING_ASSERT_OK_AND_ASSIGN(lite_index_,
- LiteIndex::Create(options, &icing_filesystem_));
-
- ICING_ASSERT_OK_AND_ASSIGN(
- term_id_codec_,
- TermIdCodec::Create(
- IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
- IcingDynamicTrie::max_value_index(options.lexicon_options)));
}
void TearDown() override {
+ term_id_codec_.reset();
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(index_dir_.c_str()));
}
std::string index_dir_;
Filesystem filesystem_;
IcingFilesystem icing_filesystem_;
- std::unique_ptr<LiteIndex> lite_index_;
std::unique_ptr<TermIdCodec> term_id_codec_;
};
constexpr NamespaceId kNamespace0 = 0;
-TEST_F(LiteIndexTest, LiteIndexAppendHits) {
+TEST_F(LiteIndexTest,
+ LiteIndexFetchHits_sortAtQuerying_unsortedHitsBelowSortThreshold) {
+ // Set up LiteIndex and TermIdCodec
+ std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+ // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+ LiteIndex::Options options(lite_index_file_name,
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/false,
+ /*hit_buffer_sort_threshold_bytes=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
+ LiteIndex::Create(options, &icing_filesystem_));
ICING_ASSERT_OK_AND_ASSIGN(
- uint32_t tvi,
- lite_index_->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0));
+ term_id_codec_,
+ TermIdCodec::Create(
+ IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+ IcingDynamicTrie::max_value_index(options.lexicon_options)));
+
+ // Add some hits
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t foo_tvi,
+ lite_index->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0));
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
- term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc_hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+ Hit foo_hit0(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
- Hit doc_hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ Hit foo_hit1(/*section_id=*/1, /*document_id=*/1, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
- ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit0));
- ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, foo_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, foo_hit1));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t bar_tvi,
+ lite_index->InsertTerm("bar", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t bar_term_id,
+ term_id_codec_->EncodeTvi(bar_tvi, TviType::LITE));
+ Hit bar_hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit bar_hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit1));
+
+ // Check that unsorted hits does not exceed the sort threshold.
+ EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse());
+
+ // Check that hits are unsorted. Persist the data and pread from
+ // LiteIndexHeader.
+ ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
+ LiteIndex_HeaderImpl::HeaderData header_data;
+ ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
+ &header_data, sizeof(header_data),
+ LiteIndex::kHeaderFileOffset));
+ EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(4));
+
+ // Query the LiteIndex
std::vector<DocHitInfo> hits1;
- lite_index_->AppendHits(
+ lite_index->FetchHits(
foo_term_id, kSectionIdMaskAll,
/*only_from_prefix_sections=*/false,
SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
/*namespace_checker=*/nullptr, &hits1);
EXPECT_THAT(hits1, SizeIs(1));
- EXPECT_THAT(hits1.back().document_id(), Eq(0));
+ EXPECT_THAT(hits1.back().document_id(), Eq(1));
// Check that the hits are coming from section 0 and section 1.
EXPECT_THAT(hits1.back().hit_section_ids_mask(), Eq(0b11));
std::vector<DocHitInfo> hits2;
AlwaysFalseSuggestionResultCheckerImpl always_false_suggestion_result_checker;
- lite_index_->AppendHits(
+ lite_index->FetchHits(
+ foo_term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ &always_false_suggestion_result_checker, &hits2);
+ // Check that no hits are returned because they get skipped by the namespace
+ // checker.
+ EXPECT_THAT(hits2, IsEmpty());
+
+ // Check that hits are sorted after querying LiteIndex. Persist the data and
+ // pread from LiteIndexHeader.
+ ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
+ ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
+ &header_data, sizeof(header_data),
+ LiteIndex::kHeaderFileOffset));
+ EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(0));
+}
+
+TEST_F(LiteIndexTest,
+ LiteIndexFetchHits_sortAtIndexing_unsortedHitsBelowSortThreshold) {
+ // Set up LiteIndex and TermIdCodec
+ std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+ // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+ // However note that in these tests we're unable to sort hits after
+ // indexing, as sorting performed by the string-section-indexing-handler
+ // after indexing all hits in an entire document, rather than after each
+ // AddHits() operation.
+ LiteIndex::Options options(lite_index_file_name,
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/true,
+ /*hit_buffer_sort_threshold_bytes=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
+ LiteIndex::Create(options, &icing_filesystem_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ term_id_codec_,
+ TermIdCodec::Create(
+ IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+ IcingDynamicTrie::max_value_index(options.lexicon_options)));
+
+ // Add some hits
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t foo_tvi,
+ lite_index->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+ term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+ Hit foo_hit0(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit foo_hit1(/*section_id=*/1, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, foo_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, foo_hit1));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t bar_tvi,
+ lite_index->InsertTerm("bar", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t bar_term_id,
+ term_id_codec_->EncodeTvi(bar_tvi, TviType::LITE));
+ Hit bar_hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit bar_hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit1));
+
+ // Check that unsorted hits does not exceed the sort threshold.
+ EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse());
+
+ // Check that hits are unsorted. Persist the data and pread from
+ // LiteIndexHeader.
+ ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
+ LiteIndex_HeaderImpl::HeaderData header_data;
+ ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
+ &header_data, sizeof(header_data),
+ LiteIndex::kHeaderFileOffset));
+ EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(4));
+
+ // Query the LiteIndex
+ std::vector<DocHitInfo> hits1;
+ lite_index->FetchHits(
+ foo_term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &hits1);
+ EXPECT_THAT(hits1, SizeIs(1));
+ EXPECT_THAT(hits1.back().document_id(), Eq(1));
+ // Check that the hits are coming from section 0 and section 1.
+ EXPECT_THAT(hits1.back().hit_section_ids_mask(), Eq(0b11));
+
+ std::vector<DocHitInfo> hits2;
+ AlwaysFalseSuggestionResultCheckerImpl always_false_suggestion_result_checker;
+ lite_index->FetchHits(
+ foo_term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ &always_false_suggestion_result_checker, &hits2);
+ // Check that no hits are returned because they get skipped by the namespace
+ // checker.
+ EXPECT_THAT(hits2, IsEmpty());
+
+ // Check that hits are still unsorted after querying LiteIndex because the
+ // HitBuffer unsorted size is still below the sort threshold, and we've
+ // enabled sort_at_indexing.
+ // Persist the data and performing a pread on LiteIndexHeader.
+ ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
+ ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
+ &header_data, sizeof(header_data),
+ LiteIndex::kHeaderFileOffset));
+ EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(4));
+}
+
+TEST_F(
+ LiteIndexTest,
+ LiteIndexFetchHits_sortAtQuerying_unsortedHitsExceedingSortAtIndexThreshold) {
+ // Set up LiteIndex and TermIdCodec
+ std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+ // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+ // However note that in these tests we're unable to sort hits after
+ // indexing, as sorting performed by the string-section-indexing-handler
+ // after indexing all hits in an entire document, rather than after each
+ // AddHits() operation.
+ LiteIndex::Options options(lite_index_file_name,
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/false,
+ /*hit_buffer_sort_threshold_bytes=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
+ LiteIndex::Create(options, &icing_filesystem_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ term_id_codec_,
+ TermIdCodec::Create(
+ IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+ IcingDynamicTrie::max_value_index(options.lexicon_options)));
+
+ // Create 4 hits for docs 0-2, and 2 hits for doc 3 -- 14 in total
+ // Doc 0
+ Hit doc0_hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc0_hit1(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc0_hit2(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc0_hit3(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ // Doc 1
+ Hit doc1_hit0(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc1_hit1(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc1_hit2(/*section_id=*/1, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc1_hit3(/*section_id=*/2, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ // Doc 2
+ Hit doc2_hit0(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc2_hit1(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc2_hit2(/*section_id=*/1, /*document_id=*/2, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc2_hit3(/*section_id=*/2, /*document_id=*/2, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ // Doc 3
+ Hit doc3_hit0(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc3_hit1(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+
+ // Create terms
+ // Foo
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t foo_tvi,
+ lite_index->InsertTerm("foo", TermMatchType::EXACT_ONLY, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+ term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+ // Bar
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t bar_tvi,
+ lite_index->InsertTerm("bar", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t bar_term_id,
+ term_id_codec_->EncodeTvi(bar_tvi, TviType::LITE));
+ // Baz
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t baz_tvi,
+ lite_index->InsertTerm("baz", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t baz_term_id,
+ term_id_codec_->EncodeTvi(baz_tvi, TviType::LITE));
+ // Qux
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t qux_tvi,
+ lite_index->InsertTerm("qux", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t qux_term_id,
+ term_id_codec_->EncodeTvi(qux_tvi, TviType::LITE));
+
+ // Add 14 hits and make sure that termIds are added in unsorted order.
+ // Documents should be inserted in order as new incoming hits should have
+ // larger document ids.
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc0_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc0_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(qux_term_id, doc0_hit3));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc1_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc1_hit3));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc2_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc2_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(qux_term_id, doc2_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc2_hit3));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc3_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc3_hit1));
+ // Verify that the HitBuffer has not been sorted.
+ EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsTrue());
+
+ // We now have the following in the hit buffer:
+ // <term>: {(docId, sectionId)...}
+ // foo: {(0, 0); (1, 0); (1, 1); (2, 0); (2, 2); (3, 0)}
+ // bar: {(0, 0); (1, 0); (1, 2)}
+ // baz: {(0, 1); (2, 0); (3, 0)}
+ // quz: {(0, 2); (2, 1)}
+
+ // Search over the HitBuffer.
+ std::vector<DocHitInfo> hits1;
+ lite_index->FetchHits(
+ foo_term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &hits1);
+ EXPECT_THAT(hits1, SizeIs(4));
+ // Check that hits are retrieved in descending order of docIds.
+ EXPECT_THAT(hits1[0].document_id(), Eq(3));
+ EXPECT_THAT(hits1[0].hit_section_ids_mask(), Eq(0b1));
+ EXPECT_THAT(hits1[1].document_id(), Eq(2));
+ EXPECT_THAT(hits1[1].hit_section_ids_mask(), Eq(0b101));
+ EXPECT_THAT(hits1[2].document_id(), Eq(1));
+ EXPECT_THAT(hits1[2].hit_section_ids_mask(), Eq(0b11));
+ EXPECT_THAT(hits1[3].document_id(), Eq(0));
+ EXPECT_THAT(hits1[3].hit_section_ids_mask(), Eq(0b1));
+
+ std::vector<DocHitInfo> hits2;
+ AlwaysFalseSuggestionResultCheckerImpl always_false_suggestion_result_checker;
+ lite_index->FetchHits(
foo_term_id, kSectionIdMaskAll,
/*only_from_prefix_sections=*/false,
SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
@@ -102,13 +386,308 @@ TEST_F(LiteIndexTest, LiteIndexAppendHits) {
// Check that no hits are returned because they get skipped by the namespace
// checker.
EXPECT_THAT(hits2, IsEmpty());
+
+ std::vector<DocHitInfo> hits3;
+ lite_index->FetchHits(
+ bar_term_id, 0b1,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &hits3);
+ EXPECT_THAT(hits3, SizeIs(2));
+ // Check fetching hits with SectionIdMask.
+ EXPECT_THAT(hits3[0].document_id(), Eq(1));
+ EXPECT_THAT(hits3[1].hit_section_ids_mask(), Eq(0b1));
+ EXPECT_THAT(hits3[1].document_id(), Eq(0));
+ EXPECT_THAT(hits3[1].hit_section_ids_mask(), Eq(0b1));
+
+ // Check that the HitBuffer is sorted after the query call.
+ EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse());
+}
+
+TEST_F(
+ LiteIndexTest,
+ LiteIndexFetchHits_sortAtIndexing_unsortedHitsExceedingSortAtIndexThreshold) {
+ // Set up LiteIndex and TermIdCodec
+ std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+ // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+ LiteIndex::Options options(lite_index_file_name,
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/true,
+ /*hit_buffer_sort_threshold_bytes=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
+ LiteIndex::Create(options, &icing_filesystem_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ term_id_codec_,
+ TermIdCodec::Create(
+ IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+ IcingDynamicTrie::max_value_index(options.lexicon_options)));
+
+ // Create 4 hits for docs 0-2, and 2 hits for doc 3 -- 14 in total
+ // Doc 0
+ Hit doc0_hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc0_hit1(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc0_hit2(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc0_hit3(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ // Doc 1
+ Hit doc1_hit0(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc1_hit1(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc1_hit2(/*section_id=*/1, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc1_hit3(/*section_id=*/2, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ // Doc 2
+ Hit doc2_hit0(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc2_hit1(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc2_hit2(/*section_id=*/1, /*document_id=*/2, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc2_hit3(/*section_id=*/2, /*document_id=*/2, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ // Doc 3
+ Hit doc3_hit0(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc3_hit1(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc3_hit2(/*section_id=*/1, /*document_id=*/3, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc3_hit3(/*section_id=*/2, /*document_id=*/3, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ // Doc 4
+ Hit doc4_hit0(/*section_id=*/0, /*document_id=*/4, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc4_hit1(/*section_id=*/0, /*document_id=*/4, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc4_hit2(/*section_id=*/1, /*document_id=*/4, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc4_hit3(/*section_id=*/2, /*document_id=*/4, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+
+ // Create terms
+ // Foo
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t foo_tvi,
+ lite_index->InsertTerm("foo", TermMatchType::EXACT_ONLY, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+ term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+ // Bar
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t bar_tvi,
+ lite_index->InsertTerm("bar", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t bar_term_id,
+ term_id_codec_->EncodeTvi(bar_tvi, TviType::LITE));
+ // Baz
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t baz_tvi,
+ lite_index->InsertTerm("baz", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t baz_term_id,
+ term_id_codec_->EncodeTvi(baz_tvi, TviType::LITE));
+ // Qux
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t qux_tvi,
+ lite_index->InsertTerm("qux", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t qux_term_id,
+ term_id_codec_->EncodeTvi(qux_tvi, TviType::LITE));
+
+ // Add hits and make sure that termIds are added in unsorted order.
+ // Documents should be inserted in order as new incoming hits should have
+ // larger document ids.
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc0_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc0_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(qux_term_id, doc0_hit3));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc1_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc1_hit3));
+ // Adding 8 hits exceeds the sort threshold. However when sort_at_indexing is
+ // enabled, sorting is done in the string-section-indexing-handler rather than
+ // AddHit() itself, we need to invoke SortHits() manually.
+ EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsTrue());
+ lite_index->SortHits();
+ // Check that the HitBuffer is sorted.
+ ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
+ LiteIndex_HeaderImpl::HeaderData header_data;
+ ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
+ &header_data, sizeof(header_data),
+ LiteIndex::kHeaderFileOffset));
+ EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(0));
+
+ // Add 12 more hits so that sort threshold is exceeded again.
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc2_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc2_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(qux_term_id, doc2_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc2_hit3));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc3_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc3_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc3_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc3_hit3));
+ ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc4_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(qux_term_id, doc4_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc4_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc4_hit3));
+
+ // Adding these hits exceeds the sort threshold. However when sort_at_indexing
+ // is enabled, sorting is done in the string-section-indexing-handler rather
+ // than AddHit() itself.
+ EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsTrue());
+
+ // We now have the following in the hit buffer:
+ // <term>: {(docId, sectionId)...}
+ // foo: {(0, 0); (1, 0); (1, 1); (2, 0); (2, 2); (3, 0); (3, 1); (4, 1)}
+ // bar: {(0, 0); (1, 0); (1, 2); (3, 2); (4, 2)}
+ // baz: {(0, 1); (2, 0); (3, 0); (4, 0)}
+ // quz: {(0, 2); (2, 1); (4, 0)}
+
+ // Search over the HitBuffer.
+ std::vector<DocHitInfo> hits1;
+ lite_index->FetchHits(
+ foo_term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &hits1);
+ EXPECT_THAT(hits1, SizeIs(5));
+ // Check that hits are retrieved in descending order of docIds.
+ EXPECT_THAT(hits1[0].document_id(), Eq(4));
+ EXPECT_THAT(hits1[0].hit_section_ids_mask(), Eq(0b10));
+ EXPECT_THAT(hits1[1].document_id(), Eq(3));
+ EXPECT_THAT(hits1[1].hit_section_ids_mask(), Eq(0b11));
+ EXPECT_THAT(hits1[2].document_id(), Eq(2));
+ EXPECT_THAT(hits1[2].hit_section_ids_mask(), Eq(0b101));
+ EXPECT_THAT(hits1[3].document_id(), Eq(1));
+ EXPECT_THAT(hits1[3].hit_section_ids_mask(), Eq(0b11));
+ EXPECT_THAT(hits1[4].document_id(), Eq(0));
+ EXPECT_THAT(hits1[4].hit_section_ids_mask(), Eq(0b1));
+
+ std::vector<DocHitInfo> hits2;
+ AlwaysFalseSuggestionResultCheckerImpl always_false_suggestion_result_checker;
+ lite_index->FetchHits(
+ foo_term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ &always_false_suggestion_result_checker, &hits2);
+ // Check that no hits are returned because they get skipped by the namespace
+ // checker.
+ EXPECT_THAT(hits2, IsEmpty());
+
+ std::vector<DocHitInfo> hits3;
+ lite_index->FetchHits(
+ bar_term_id, 0b1,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &hits3);
+ EXPECT_THAT(hits3, SizeIs(2));
+ // Check fetching hits with SectionIdMask.
+ EXPECT_THAT(hits3[0].document_id(), Eq(1));
+ EXPECT_THAT(hits3[1].hit_section_ids_mask(), Eq(0b1));
+ EXPECT_THAT(hits3[1].document_id(), Eq(0));
+ EXPECT_THAT(hits3[1].hit_section_ids_mask(), Eq(0b1));
+
+ // Check that the HitBuffer is sorted after the query call. FetchHits should
+ // sort before performing binary search if the HitBuffer unsorted size exceeds
+ // the sort threshold. Regardless of the sort_at_indexing config.
+ EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse());
+ ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
+ ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
+ &header_data, sizeof(header_data),
+ LiteIndex::kHeaderFileOffset));
+ EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(0));
}
TEST_F(LiteIndexTest, LiteIndexIterator) {
+ // Set up LiteIndex and TermIdCodec
+ std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+ // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+ LiteIndex::Options options(lite_index_file_name,
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/true,
+ /*hit_buffer_sort_threshold_bytes=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
+ LiteIndex::Create(options, &icing_filesystem_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ term_id_codec_,
+ TermIdCodec::Create(
+ IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+ IcingDynamicTrie::max_value_index(options.lexicon_options)));
+
+ const std::string term = "foo";
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t tvi,
+ lite_index->InsertTerm(term, TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+ term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+ Hit doc0_hit0(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/3,
+ /*is_in_prefix_section=*/false);
+ Hit doc0_hit1(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/5,
+ /*is_in_prefix_section=*/false);
+ SectionIdMask doc0_section_id_mask = 0b11;
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map0 = {{0, 3}, {1, 5}};
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit1));
+
+ Hit doc1_hit1(/*section_id=*/1, /*document_id=*/1, /*term_frequency=*/7,
+ /*is_in_prefix_section=*/false);
+ Hit doc1_hit2(/*section_id=*/2, /*document_id=*/1, /*term_frequency=*/11,
+ /*is_in_prefix_section=*/false);
+ SectionIdMask doc1_section_id_mask = 0b110;
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map1 = {{1, 7}, {2, 11}};
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit2));
+
+ std::unique_ptr<DocHitInfoIteratorTermLiteExact> iter =
+ std::make_unique<DocHitInfoIteratorTermLiteExact>(
+ term_id_codec_.get(), lite_index.get(), term, /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ /*need_hit_term_frequency=*/true);
+
+ ASSERT_THAT(iter->Advance(), IsOk());
+ EXPECT_THAT(iter->doc_hit_info().document_id(), Eq(1));
+ EXPECT_THAT(iter->doc_hit_info().hit_section_ids_mask(),
+ Eq(doc1_section_id_mask));
+
+ std::vector<TermMatchInfo> matched_terms_stats;
+ iter->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ term, expected_section_ids_tf_map1)));
+
+ ASSERT_THAT(iter->Advance(), IsOk());
+ EXPECT_THAT(iter->doc_hit_info().document_id(), Eq(0));
+ EXPECT_THAT(iter->doc_hit_info().hit_section_ids_mask(),
+ Eq(doc0_section_id_mask));
+ matched_terms_stats.clear();
+ iter->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ term, expected_section_ids_tf_map0)));
+}
+
+TEST_F(LiteIndexTest, LiteIndexIterator_sortAtIndexingDisabled) {
+ // Set up LiteIndex and TermIdCodec
+ std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+ // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+ LiteIndex::Options options(lite_index_file_name,
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/false,
+ /*hit_buffer_sort_threshold_bytes=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
+ LiteIndex::Create(options, &icing_filesystem_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ term_id_codec_,
+ TermIdCodec::Create(
+ IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+ IcingDynamicTrie::max_value_index(options.lexicon_options)));
+
const std::string term = "foo";
ICING_ASSERT_OK_AND_ASSIGN(
uint32_t tvi,
- lite_index_->InsertTerm(term, TermMatchType::PREFIX, kNamespace0));
+ lite_index->InsertTerm(term, TermMatchType::PREFIX, kNamespace0));
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
Hit doc0_hit0(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/3,
@@ -118,8 +697,8 @@ TEST_F(LiteIndexTest, LiteIndexIterator) {
SectionIdMask doc0_section_id_mask = 0b11;
std::unordered_map<SectionId, Hit::TermFrequency>
expected_section_ids_tf_map0 = {{0, 3}, {1, 5}};
- ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc0_hit0));
- ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc0_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit1));
Hit doc1_hit1(/*section_id=*/1, /*document_id=*/1, /*term_frequency=*/7,
/*is_in_prefix_section=*/false);
@@ -128,12 +707,13 @@ TEST_F(LiteIndexTest, LiteIndexIterator) {
SectionIdMask doc1_section_id_mask = 0b110;
std::unordered_map<SectionId, Hit::TermFrequency>
expected_section_ids_tf_map1 = {{1, 7}, {2, 11}};
- ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc1_hit1));
- ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc1_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit2));
std::unique_ptr<DocHitInfoIteratorTermLiteExact> iter =
std::make_unique<DocHitInfoIteratorTermLiteExact>(
- term_id_codec_.get(), lite_index_.get(), term, kSectionIdMaskAll,
+ term_id_codec_.get(), lite_index.get(), term, /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
/*need_hit_term_frequency=*/true);
ASSERT_THAT(iter->Advance(), IsOk());
diff --git a/icing/index/lite/lite-index_thread-safety_test.cc b/icing/index/lite/lite-index_thread-safety_test.cc
new file mode 100644
index 0000000..53aa6cd
--- /dev/null
+++ b/icing/index/lite/lite-index_thread-safety_test.cc
@@ -0,0 +1,399 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <array>
+#include <string>
+#include <thread>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/index/lite/lite-index.h"
+#include "icing/index/term-id-codec.h"
+#include "icing/schema/section.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::Ge;
+using ::testing::Le;
+using ::testing::SizeIs;
+
+// These tests cover concurrent FetchHits operations, as well as interleaving
+// AddHit and FetchHits operations. Other usages of the LiteIndex other than
+// these scenarios are not guaranteed with to be thread-safe as the LiteIndex is
+// go/thread-compatible.
+class LiteIndexThreadSafetyTest : public testing::Test {
+ protected:
+ void SetUp() override {
+ index_dir_ = GetTestTempDir() + "/test_dir";
+ ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(index_dir_.c_str()));
+
+ std::string lite_index_file_name =
+ index_dir_ + "/test_file.lite-idx-thread-safety.index";
+ LiteIndex::Options options(lite_index_file_name,
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/true,
+ /*hit_buffer_sort_threshold_bytes=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(lite_index_,
+ LiteIndex::Create(options, &icing_filesystem_));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ term_id_codec_,
+ TermIdCodec::Create(
+ IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+ IcingDynamicTrie::max_value_index(options.lexicon_options)));
+ }
+
+ void TearDown() override {
+ term_id_codec_.reset();
+ lite_index_.reset();
+ ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(index_dir_.c_str()));
+ }
+
+ std::string index_dir_;
+ Filesystem filesystem_;
+ IcingFilesystem icing_filesystem_;
+ std::unique_ptr<LiteIndex> lite_index_;
+ std::unique_ptr<TermIdCodec> term_id_codec_;
+};
+
+constexpr NamespaceId kNamespace0 = 0;
+constexpr DocumentId kDocumentId0 = 0;
+constexpr DocumentId kDocumentId1 = 1;
+constexpr SectionId kSectionId0 = 1;
+constexpr SectionId kSectionId1 = 0b11;
+
+static constexpr std::array<std::string_view, 100> kCommonWords = {
+ "the", "and", "for", "that", "this", "with",
+ "you", "not", "are", "from", "your", "all",
+ "have", "new", "more", "was", "will", "home",
+ "can", "about", "page", "has", "search", "free",
+ "but", "our", "one", "other", "information", "time",
+ "they", "site", "may", "what", "which", "their",
+ "news", "out", "use", "any", "there", "see",
+ "only", "his", "when", "contact", "here", "business",
+ "who", "web", "also", "now", "help", "get",
+ "view", "online", "first", "been", "would", "how",
+ "were", "services", "some", "these", "click", "its",
+ "like", "service", "than", "find", "price", "date",
+ "back", "top", "people", "had", "list", "name",
+ "just", "over", "state", "year", "day", "into",
+ "email", "two", "health", "world", "next", "used",
+ "work", "last", "most", "products", "music", "buy",
+ "data", "make", "them", "should"};
+
+TEST_F(LiteIndexThreadSafetyTest, SimultaneousFetchHits_singleTerm) {
+ // Add some hits
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t foo_tvi,
+ lite_index_->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0));
+
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+ term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+ Hit doc_hit0(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId0,
+ Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+ Hit doc_hit1(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId1,
+ Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit0));
+ ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit1));
+
+ // Create kNumThreads threads to call lite_index_->FetchHits()
+ // simultaneously. Each thread should get a valid result of 2 hits for the
+ // term 'foo', and there should be no crash.
+ constexpr int kNumThreads = 50;
+ std::vector<std::vector<DocHitInfo>> hits(kNumThreads);
+ auto callable = [&](int thread_id) {
+ lite_index_->FetchHits(
+ foo_term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &hits[thread_id]);
+ };
+ // Spawn threads for FetchHits().
+ std::vector<std::thread> thread_objs;
+ for (int i = 0; i < kNumThreads; ++i) {
+ thread_objs.emplace_back(callable, /*thread_id=*/i);
+ }
+
+ // Join threads and verify results
+ for (int i = 0; i < kNumThreads; ++i) {
+ thread_objs[i].join();
+ EXPECT_THAT(
+ hits[i],
+ ElementsAre(
+ EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId0}),
+ EqualsDocHitInfo(kDocumentId0,
+ std::vector<SectionId>{kSectionId0})));
+ }
+}
+
+TEST_F(LiteIndexThreadSafetyTest, SimultaneousFetchHits_multipleTerms) {
+ // Add two hits for each of the first 50 terms in kCommonWords.
+ for (int i = 0; i < 50; ++i) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t tvi,
+ lite_index_->InsertTerm(std::string(kCommonWords[i]),
+ TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t term_id,
+ term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+ Hit doc_hit0(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId0,
+ Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+ Hit doc_hit1(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId1,
+ Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit0));
+ ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit1));
+ }
+
+ // Create kNumThreads threads to call lite_index_->FetchHits()
+ // simultaneously. Each thread should get a valid result of 2 hits for each
+ // term, and there should be no crash.
+ constexpr int kNumThreads = 50;
+ std::vector<std::vector<DocHitInfo>> hits(kNumThreads);
+ auto callable = [&](int thread_id) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t tvi,
+ lite_index_->InsertTerm(std::string(kCommonWords[thread_id]),
+ TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t term_id,
+ term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+ lite_index_->FetchHits(
+ term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &hits[thread_id]);
+ };
+
+ // Spawn threads for FetchHits().
+ std::vector<std::thread> thread_objs;
+ for (int i = 0; i < kNumThreads; ++i) {
+ thread_objs.emplace_back(callable, /*thread_id=*/i);
+ }
+
+ // Join threads and verify results
+ for (int i = 0; i < kNumThreads; ++i) {
+ thread_objs[i].join();
+ EXPECT_THAT(
+ hits[i],
+ ElementsAre(
+ EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId0}),
+ EqualsDocHitInfo(kDocumentId0,
+ std::vector<SectionId>{kSectionId0})));
+ }
+}
+
+TEST_F(LiteIndexThreadSafetyTest, SimultaneousAddHitAndFetchHits_singleTerm) {
+ // Add some hits
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t foo_tvi,
+ lite_index_->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0));
+
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+ term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+ Hit doc_hit0(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId0,
+ Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit0));
+
+ // Create kNumThreads threads. Every even-numbered thread calls FetchHits and
+ // every odd numbered thread calls AddHit.
+ // Each AddHit operation adds the term 'foo' to a new section of the same doc.
+ // Each query result should contain one hit, and there should be no crash.
+ constexpr int kNumThreads = 50;
+ std::vector<std::vector<DocHitInfo>> hits(kNumThreads);
+ auto callable = [&](int thread_id) {
+ if (thread_id % 2 == 0) {
+ // Even-numbered thread calls FetchHits.
+ lite_index_->FetchHits(
+ foo_term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &hits[thread_id]);
+ } else {
+ // Odd-numbered thread calls AddHit.
+ Hit doc_hit(/*section_id=*/thread_id / 2, /*document_id=*/kDocumentId0,
+ Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit));
+ }
+ };
+
+ // Spawn threads.
+ std::vector<std::thread> thread_objs;
+ for (int i = 0; i < kNumThreads; ++i) {
+ thread_objs.emplace_back(callable, /*thread_id=*/i);
+ }
+
+ // Join threads and verify results.
+ for (int i = 0; i < kNumThreads; ++i) {
+ thread_objs[i].join();
+ // All AddHit operations add 'foo' to the same document, so there should
+ // only be one DocHitInfo per run.
+ if (i % 2 == 0) {
+ EXPECT_THAT(hits[i], SizeIs(1));
+ EXPECT_THAT(hits[i].back().document_id(), Eq(0));
+ }
+ }
+
+ // After all threads have executed, hits should come from sections 0-24.
+ std::vector<DocHitInfo> final_hits;
+ lite_index_->FetchHits(
+ foo_term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &final_hits);
+ EXPECT_THAT(final_hits, SizeIs(1));
+ EXPECT_THAT(final_hits.back().document_id(), Eq(0));
+ // Section mask of sections 0-24.
+ EXPECT_THAT(final_hits.back().hit_section_ids_mask(), Eq((1 << 25) - 1));
+}
+
+TEST_F(LiteIndexThreadSafetyTest,
+ SimultaneousAddHitAndFetchHits_multipleTerms) {
+ // Add the initial hit 'foo'.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t foo_tvi,
+ lite_index_->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0));
+
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+ term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+ Hit doc_hit0(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId0,
+ Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit0));
+
+ // Create kNumThreads threads. Every even-numbered thread calls FetchHits and
+ // every odd numbered thread calls AddHit.
+ // Each AddHit operation adds a different term to a new doc.
+ // Queries always search for the term 'foo' added above so there will always
+ // be a hit. There should be no crash.
+ constexpr int kNumThreads = 50;
+ std::vector<std::vector<DocHitInfo>> hits(kNumThreads);
+ auto callable = [&](int thread_id) {
+ // Create new tvi and term_id for new term kCommonWords[thread_id].
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t tvi,
+ lite_index_->InsertTerm(std::string(kCommonWords[thread_id]),
+ TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t term_id,
+ term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+
+ if (thread_id % 2 == 0) {
+ // Even-numbered thread calls FetchHits.
+ lite_index_->FetchHits(
+ foo_term_id, kSectionIdMaskAll, /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &hits[thread_id]);
+ } else {
+ // Odd-numbered thread calls AddHit.
+ // AddHit to section 0 of a new doc.
+ Hit doc_hit(/*section_id=*/kSectionId0, /*document_id=*/thread_id / 2,
+ Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit));
+ }
+ };
+
+ // Spawn threads.
+ std::vector<std::thread> thread_objs;
+ for (int i = 0; i < kNumThreads; ++i) {
+ thread_objs.emplace_back(callable, /*thread_id=*/i);
+ }
+
+ // Join threads and verify results. Queries always search for the term 'foo'
+ // so there will always be a hit
+ for (int i = 0; i < kNumThreads; ++i) {
+ thread_objs[i].join();
+ if (i % 2 == 0) {
+ EXPECT_THAT(hits[i],
+ ElementsAre(EqualsDocHitInfo(
+ kDocumentId0, std::vector<SectionId>{kSectionId0})));
+ }
+ }
+}
+
+TEST_F(LiteIndexThreadSafetyTest, ManyAddHitAndOneFetchHits_multipleTerms) {
+ // Add two hits for each of the first 20 terms in kCommonWords.
+ for (int i = 0; i < 20; ++i) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t tvi,
+ lite_index_->InsertTerm(std::string(kCommonWords[i]),
+ TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t term_id,
+ term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+ Hit doc_hit0(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId0,
+ Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+ Hit doc_hit1(/*section_id=*/kSectionId1, /*document_id=*/kDocumentId0,
+ Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit0));
+ ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit1));
+ }
+
+ // Create kNumThreads threads. Call one FetchHits operation after every 5
+ // AddHit operations.
+ // Each AddHit operation adds a different term to a new doc.
+ // Queries always search for the term 'foo' added above so there will always
+ // be a hit. There should be no crash.
+ constexpr int kNumThreads = 100;
+ std::vector<std::vector<DocHitInfo>> hits(kNumThreads);
+ auto callable = [&](int thread_id) {
+ // Create new tvi and term_id for new term kCommonWords[thread_id].
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t tvi,
+ lite_index_->InsertTerm(std::string(kCommonWords[thread_id / 5]),
+ TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t term_id,
+ term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+
+ if (thread_id % 5 == 0) {
+ // Call FetchHits on term kCommonWords[thread_id / 5]
+ lite_index_->FetchHits(
+ term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &hits[thread_id]);
+ } else {
+ // Odd-numbered thread calls AddHit.
+ // AddHit to section (thread_id % 5 + 1) of doc 0.
+ Hit doc_hit(/*section_id=*/thread_id % 5 + 1,
+ /*document_id=*/kDocumentId0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit));
+ }
+ };
+ // Spawn threads.
+ std::vector<std::thread> thread_objs;
+ for (int i = 0; i < kNumThreads; ++i) {
+ thread_objs.emplace_back(callable, /*thread_id=*/i);
+ }
+
+ // Join threads and verify FetchHits results.
+ // Every query should see a hit in doc 0 sections 0 and 1. Additional hits
+ // might also be found in sections 2-6 depending on thread execution order.
+ for (int i = 0; i < kNumThreads; ++i) {
+ thread_objs[i].join();
+ if (i % 5 == 0) {
+ EXPECT_THAT(hits[i], SizeIs(1));
+ EXPECT_THAT(hits[i].back().document_id(), Eq(0));
+ EXPECT_THAT(hits[i].back().hit_section_ids_mask(), Ge(0b11));
+ EXPECT_THAT(hits[i].back().hit_section_ids_mask(), Le(0b1111111));
+ }
+ }
+}
+
+} // namespace
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/lite/term-id-hit-pair.h b/icing/index/lite/term-id-hit-pair.h
index 61ec502..82bd010 100644
--- a/icing/index/lite/term-id-hit-pair.h
+++ b/icing/index/lite/term-id-hit-pair.h
@@ -73,6 +73,8 @@ class TermIdHitPair {
return value_ == rhs.value_;
}
+ bool operator<(const TermIdHitPair& rhs) const { return value_ < rhs.value_; }
+
private:
Value value_;
};
diff --git a/icing/index/main/doc-hit-info-iterator-term-main.cc b/icing/index/main/doc-hit-info-iterator-term-main.cc
index f06124a..5cf6a4c 100644
--- a/icing/index/main/doc-hit-info-iterator-term-main.cc
+++ b/icing/index/main/doc-hit-info-iterator-term-main.cc
@@ -14,16 +14,20 @@
#include "icing/index/main/doc-hit-info-iterator-term-main.h"
-#include <cstdint>
#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
-#include "icing/file/posting_list/posting-list-identifier.h"
#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/main/main-index.h"
#include "icing/index/main/posting-list-hit-accessor.h"
-#include "icing/legacy/core/icing-string-util.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
#include "icing/util/logging.h"
@@ -44,11 +48,36 @@ std::string SectionIdMaskToString(SectionIdMask section_id_mask) {
return mask;
}
+void MergeNewHitIntoCachedDocHitInfos(
+ const Hit& hit, bool need_hit_term_frequency,
+ std::vector<DocHitInfoIteratorTermMain::DocHitInfoAndTermFrequencyArray>&
+ cached_doc_hit_infos_out) {
+ if (cached_doc_hit_infos_out.empty() ||
+ hit.document_id() !=
+ cached_doc_hit_infos_out.back().doc_hit_info.document_id()) {
+ std::optional<Hit::TermFrequencyArray> tf_arr;
+ if (need_hit_term_frequency) {
+ tf_arr = std::make_optional<Hit::TermFrequencyArray>();
+ }
+
+ cached_doc_hit_infos_out.push_back(
+ DocHitInfoIteratorTermMain::DocHitInfoAndTermFrequencyArray(
+ DocHitInfo(hit.document_id()), std::move(tf_arr)));
+ }
+
+ cached_doc_hit_infos_out.back().doc_hit_info.UpdateSection(hit.section_id());
+ if (need_hit_term_frequency) {
+ (*cached_doc_hit_infos_out.back().term_frequency_array)[hit.section_id()] =
+ hit.term_frequency();
+ }
+}
+
} // namespace
libtextclassifier3::Status DocHitInfoIteratorTermMain::Advance() {
- if (posting_list_accessor_ == nullptr ||
- cached_doc_hit_infos_idx_ == (cached_doc_hit_infos_.size() - 2)) {
+ ++cached_doc_hit_infos_idx_;
+ while (posting_list_accessor_ == nullptr ||
+ (!all_pages_consumed_ && cached_doc_hit_info_count() == 1)) {
// If we haven't retrieved any hits before or we've already returned all but
// the last cached hit, then go get some more!
// We hold back the last cached hit because it could have more hits on the
@@ -65,8 +94,6 @@ libtextclassifier3::Status DocHitInfoIteratorTermMain::Advance() {
return absl_ports::ResourceExhaustedError(
"No more DocHitInfos in iterator");
}
- } else {
- ++cached_doc_hit_infos_idx_;
}
if (cached_doc_hit_infos_idx_ == -1 ||
cached_doc_hit_infos_idx_ >= cached_doc_hit_infos_.size()) {
@@ -77,22 +104,31 @@ libtextclassifier3::Status DocHitInfoIteratorTermMain::Advance() {
return absl_ports::ResourceExhaustedError(
"No more DocHitInfos in iterator");
}
- doc_hit_info_ = cached_doc_hit_infos_.at(cached_doc_hit_infos_idx_);
+ doc_hit_info_ =
+ cached_doc_hit_infos_.at(cached_doc_hit_infos_idx_).doc_hit_info;
hit_intersect_section_ids_mask_ = doc_hit_info_.hit_section_ids_mask();
return libtextclassifier3::Status::OK;
}
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorTermMain::TrimRightMostNode() && {
+ // Leaf iterator should trim itself.
+ DocHitInfoIterator::TrimmedNode node = {nullptr, term_, term_start_index_,
+ unnormalized_term_length_};
+ return node;
+}
+
libtextclassifier3::Status DocHitInfoIteratorTermMainExact::RetrieveMoreHits() {
- DocHitInfo last_doc_hit_info;
+ DocHitInfoAndTermFrequencyArray last_doc_hit_info;
if (!cached_doc_hit_infos_.empty()) {
- last_doc_hit_info = cached_doc_hit_infos_.back();
+ last_doc_hit_info = std::move(cached_doc_hit_infos_.back());
}
cached_doc_hit_infos_idx_ = 0;
cached_doc_hit_infos_.clear();
- if (last_doc_hit_info.document_id() != kInvalidDocumentId) {
+ if (last_doc_hit_info.doc_hit_info.document_id() != kInvalidDocumentId) {
// Carry over the last hit. It might need to be merged with the first hit of
// of the next posting list in the chain.
- cached_doc_hit_infos_.push_back(last_doc_hit_info);
+ cached_doc_hit_infos_.push_back(std::move(last_doc_hit_info));
}
if (posting_list_accessor_ == nullptr) {
ICING_ASSIGN_OR_RETURN(posting_list_accessor_,
@@ -101,9 +137,11 @@ libtextclassifier3::Status DocHitInfoIteratorTermMainExact::RetrieveMoreHits() {
ICING_ASSIGN_OR_RETURN(std::vector<Hit> hits,
posting_list_accessor_->GetNextHitsBatch());
+ if (hits.empty()) {
+ all_pages_consumed_ = true;
+ }
++num_blocks_inspected_;
- cached_doc_hit_infos_.reserve(hits.size() + 1);
- cached_hit_term_frequency_.reserve(hits.size() + 1);
+ cached_doc_hit_infos_.reserve(cached_doc_hit_infos_.size() + hits.size());
for (const Hit& hit : hits) {
// Check sections.
if (((UINT64_C(1) << hit.section_id()) & section_restrict_mask_) == 0) {
@@ -113,13 +151,9 @@ libtextclassifier3::Status DocHitInfoIteratorTermMainExact::RetrieveMoreHits() {
if (hit.is_prefix_hit()) {
continue;
}
- if (cached_doc_hit_infos_.empty() ||
- hit.document_id() != cached_doc_hit_infos_.back().document_id()) {
- cached_doc_hit_infos_.push_back(DocHitInfo(hit.document_id()));
- cached_hit_term_frequency_.push_back(Hit::TermFrequencyArray());
- }
- cached_doc_hit_infos_.back().UpdateSection(hit.section_id());
- cached_hit_term_frequency_.back()[hit.section_id()] = hit.term_frequency();
+
+ MergeNewHitIntoCachedDocHitInfos(hit, need_hit_term_frequency_,
+ cached_doc_hit_infos_);
}
return libtextclassifier3::Status::OK;
}
@@ -131,16 +165,16 @@ std::string DocHitInfoIteratorTermMainExact::ToString() const {
libtextclassifier3::Status
DocHitInfoIteratorTermMainPrefix::RetrieveMoreHits() {
- DocHitInfo last_doc_hit_info;
+ DocHitInfoAndTermFrequencyArray last_doc_hit_info;
if (!cached_doc_hit_infos_.empty()) {
- last_doc_hit_info = cached_doc_hit_infos_.back();
+ last_doc_hit_info = std::move(cached_doc_hit_infos_.back());
}
cached_doc_hit_infos_idx_ = 0;
cached_doc_hit_infos_.clear();
- if (last_doc_hit_info.document_id() != kInvalidDocumentId) {
+ if (last_doc_hit_info.doc_hit_info.document_id() != kInvalidDocumentId) {
// Carry over the last hit. It might need to be merged with the first hit of
// of the next posting list in the chain.
- cached_doc_hit_infos_.push_back(last_doc_hit_info);
+ cached_doc_hit_infos_.push_back(std::move(last_doc_hit_info));
}
++num_blocks_inspected_;
@@ -152,10 +186,10 @@ DocHitInfoIteratorTermMainPrefix::RetrieveMoreHits() {
}
ICING_ASSIGN_OR_RETURN(std::vector<Hit> hits,
posting_list_accessor_->GetNextHitsBatch());
- cached_doc_hit_infos_.reserve(hits.size());
- if (need_hit_term_frequency_) {
- cached_hit_term_frequency_.reserve(hits.size());
+ if (hits.empty()) {
+ all_pages_consumed_ = true;
}
+ cached_doc_hit_infos_.reserve(cached_doc_hit_infos_.size() + hits.size());
for (const Hit& hit : hits) {
// Check sections.
if (((UINT64_C(1) << hit.section_id()) & section_restrict_mask_) == 0) {
@@ -165,18 +199,9 @@ DocHitInfoIteratorTermMainPrefix::RetrieveMoreHits() {
if (!exact_ && !hit.is_in_prefix_section()) {
continue;
}
- if (cached_doc_hit_infos_.empty() ||
- hit.document_id() != cached_doc_hit_infos_.back().document_id()) {
- cached_doc_hit_infos_.push_back(DocHitInfo(hit.document_id()));
- if (need_hit_term_frequency_) {
- cached_hit_term_frequency_.push_back(Hit::TermFrequencyArray());
- }
- }
- cached_doc_hit_infos_.back().UpdateSection(hit.section_id());
- if (need_hit_term_frequency_) {
- cached_hit_term_frequency_.back()[hit.section_id()] =
- hit.term_frequency();
- }
+
+ MergeNewHitIntoCachedDocHitInfos(hit, need_hit_term_frequency_,
+ cached_doc_hit_infos_);
}
return libtextclassifier3::Status::OK;
}
diff --git a/icing/index/main/doc-hit-info-iterator-term-main.h b/icing/index/main/doc-hit-info-iterator-term-main.h
index 6a21dc3..1987e12 100644
--- a/icing/index/main/doc-hit-info-iterator-term-main.h
+++ b/icing/index/main/doc-hit-info-iterator-term-main.h
@@ -17,10 +17,14 @@
#include <cstdint>
#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/main/main-index.h"
#include "icing/index/main/posting-list-hit-accessor.h"
@@ -31,21 +35,41 @@ namespace lib {
class DocHitInfoIteratorTermMain : public DocHitInfoIterator {
public:
+ struct DocHitInfoAndTermFrequencyArray {
+ DocHitInfo doc_hit_info;
+ std::optional<Hit::TermFrequencyArray> term_frequency_array;
+
+ explicit DocHitInfoAndTermFrequencyArray() = default;
+
+ explicit DocHitInfoAndTermFrequencyArray(
+ DocHitInfo doc_hit_info_in,
+ std::optional<Hit::TermFrequencyArray> term_frequency_array_in)
+ : doc_hit_info(std::move(doc_hit_info_in)),
+ term_frequency_array(std::move(term_frequency_array_in)) {}
+ };
+
explicit DocHitInfoIteratorTermMain(MainIndex* main_index,
const std::string& term,
+ int term_start_index,
+ int unnormalized_term_length,
SectionIdMask section_restrict_mask,
bool need_hit_term_frequency)
: term_(term),
+ term_start_index_(term_start_index),
+ unnormalized_term_length_(unnormalized_term_length),
+ posting_list_accessor_(nullptr),
main_index_(main_index),
cached_doc_hit_infos_idx_(-1),
num_advance_calls_(0),
num_blocks_inspected_(0),
- next_posting_list_id_(PostingListIdentifier::kInvalid),
+ all_pages_consumed_(false),
section_restrict_mask_(section_restrict_mask),
need_hit_term_frequency_(need_hit_term_frequency) {}
libtextclassifier3::Status Advance() override;
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
int32_t GetNumBlocksInspected() const override {
return num_blocks_inspected_;
}
@@ -67,8 +91,9 @@ class DocHitInfoIteratorTermMain : public DocHitInfoIterator {
while (section_mask_copy) {
SectionId section_id = __builtin_ctzll(section_mask_copy);
if (need_hit_term_frequency_) {
- section_term_frequencies.at(section_id) = cached_hit_term_frequency_.at(
- cached_doc_hit_infos_idx_)[section_id];
+ section_term_frequencies.at(section_id) =
+ (*cached_doc_hit_infos_.at(cached_doc_hit_infos_idx_)
+ .term_frequency_array)[section_id];
}
section_mask_copy &= ~(UINT64_C(1) << section_id);
}
@@ -90,33 +115,53 @@ class DocHitInfoIteratorTermMain : public DocHitInfoIterator {
virtual libtextclassifier3::Status RetrieveMoreHits() = 0;
const std::string term_;
+
+ // The start index of the given term in the search query
+ int term_start_index_;
+ // The length of the given unnormalized term in the search query
+ int unnormalized_term_length_;
// The accessor of the posting list chain for the requested term.
std::unique_ptr<PostingListHitAccessor> posting_list_accessor_;
MainIndex* main_index_;
- // Stores hits retrieved from the index. This may only be a subset of the hits
- // that are present in the index. Current value pointed to by the Iterator is
- // tracked by cached_doc_hit_infos_idx_.
- std::vector<DocHitInfo> cached_doc_hit_infos_;
- std::vector<Hit::TermFrequencyArray> cached_hit_term_frequency_;
+ // Stores hits and optional term frequency arrays retrieved from the index.
+ // This may only be a subset of the hits that are present in the index.
+ // Current value pointed to by the Iterator is tracked by
+ // cached_doc_hit_infos_idx_.
+ std::vector<DocHitInfoAndTermFrequencyArray> cached_doc_hit_infos_;
int cached_doc_hit_infos_idx_;
+
int num_advance_calls_;
int num_blocks_inspected_;
- PostingListIdentifier next_posting_list_id_;
+ bool all_pages_consumed_;
// Mask indicating which sections hits should be considered for.
// Ex. 0000 0000 0000 0010 means that only hits from section 1 are desired.
const SectionIdMask section_restrict_mask_;
const bool need_hit_term_frequency_;
+
+ private:
+ // Remaining number of hits including the current hit.
+ // Returns -1 if cached_doc_hit_infos_idx_ is invalid.
+ int cached_doc_hit_info_count() const {
+ if (cached_doc_hit_infos_idx_ == -1 ||
+ cached_doc_hit_infos_idx_ >= cached_doc_hit_infos_.size()) {
+ return -1;
+ }
+ return cached_doc_hit_infos_.size() - cached_doc_hit_infos_idx_;
+ }
};
class DocHitInfoIteratorTermMainExact : public DocHitInfoIteratorTermMain {
public:
explicit DocHitInfoIteratorTermMainExact(MainIndex* main_index,
const std::string& term,
+ int term_start_index,
+ int unnormalized_term_length,
SectionIdMask section_restrict_mask,
bool need_hit_term_frequency)
- : DocHitInfoIteratorTermMain(main_index, term, section_restrict_mask,
- need_hit_term_frequency) {}
+ : DocHitInfoIteratorTermMain(
+ main_index, term, term_start_index, unnormalized_term_length,
+ section_restrict_mask, need_hit_term_frequency) {}
std::string ToString() const override;
@@ -128,10 +173,13 @@ class DocHitInfoIteratorTermMainPrefix : public DocHitInfoIteratorTermMain {
public:
explicit DocHitInfoIteratorTermMainPrefix(MainIndex* main_index,
const std::string& term,
+ int term_start_index,
+ int unnormalized_term_length,
SectionIdMask section_restrict_mask,
bool need_hit_term_frequency)
- : DocHitInfoIteratorTermMain(main_index, term, section_restrict_mask,
- need_hit_term_frequency) {}
+ : DocHitInfoIteratorTermMain(
+ main_index, term, term_start_index, unnormalized_term_length,
+ section_restrict_mask, need_hit_term_frequency) {}
std::string ToString() const override;
@@ -139,10 +187,6 @@ class DocHitInfoIteratorTermMainPrefix : public DocHitInfoIteratorTermMain {
libtextclassifier3::Status RetrieveMoreHits() override;
private:
- // After retrieving DocHitInfos from the index, a DocHitInfo for docid 1 and
- // "foo" and a DocHitInfo for docid 1 and "fool". These DocHitInfos should be
- // merged.
- void SortAndDedupeDocumentIds();
// Whether or not posting_list_accessor_ holds a posting list chain for
// 'term' or for a term for which 'term' is a prefix. This is necessary to
// determine whether to return hits that are not from a prefix section (hits
diff --git a/icing/index/main/main-index-merger_test.cc b/icing/index/main/main-index-merger_test.cc
index 8a2f691..37e14fc 100644
--- a/icing/index/main/main-index-merger_test.cc
+++ b/icing/index/main/main-index-merger_test.cc
@@ -45,7 +45,9 @@ class MainIndexMergerTest : public testing::Test {
std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
LiteIndex::Options options(lite_index_file_name,
- /*hit_buffer_want_merge_bytes=*/1024 * 1024);
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/true,
+ /*hit_buffer_sort_threshold_bytes=*/1024 * 8);
ICING_ASSERT_OK_AND_ASSIGN(lite_index_,
LiteIndex::Create(options, &icing_filesystem_));
diff --git a/icing/index/main/main-index.cc b/icing/index/main/main-index.cc
index fd1630a..aae60c6 100644
--- a/icing/index/main/main-index.cc
+++ b/icing/index/main/main-index.cc
@@ -22,8 +22,9 @@
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/file/destructible-directory.h"
+#include "icing/file/posting_list/flash-index-storage.h"
#include "icing/file/posting_list/posting-list-common.h"
-#include "icing/index/main/posting-list-used-hit-serializer.h"
+#include "icing/index/main/posting-list-hit-serializer.h"
#include "icing/index/term-id-codec.h"
#include "icing/index/term-property-id.h"
#include "icing/legacy/core/icing-string-util.h"
@@ -90,6 +91,10 @@ FindTermResult FindShortestValidTermWithPrefixHits(
return result;
}
+std::string MakeFlashIndexFilename(const std::string& base_dir) {
+ return base_dir + "/main_index";
+}
+
} // namespace
MainIndex::MainIndex(const std::string& index_directory,
@@ -98,8 +103,8 @@ MainIndex::MainIndex(const std::string& index_directory,
: base_dir_(index_directory),
filesystem_(filesystem),
icing_filesystem_(icing_filesystem),
- posting_list_used_hit_serializer_(
- std::make_unique<PostingListUsedHitSerializer>()) {}
+ posting_list_hit_serializer_(
+ std::make_unique<PostingListHitSerializer>()) {}
libtextclassifier3::StatusOr<std::unique_ptr<MainIndex>> MainIndex::Create(
const std::string& index_directory, const Filesystem* filesystem,
@@ -112,16 +117,22 @@ libtextclassifier3::StatusOr<std::unique_ptr<MainIndex>> MainIndex::Create(
return main_index;
}
+/* static */ libtextclassifier3::StatusOr<int> MainIndex::ReadFlashIndexMagic(
+ const Filesystem* filesystem, const std::string& index_directory) {
+ return FlashIndexStorage::ReadHeaderMagic(
+ filesystem, MakeFlashIndexFilename(index_directory));
+}
+
// TODO(b/139087650) : Migrate off of IcingFilesystem.
libtextclassifier3::Status MainIndex::Init() {
if (!filesystem_->CreateDirectoryRecursively(base_dir_.c_str())) {
return absl_ports::InternalError("Unable to create main index directory.");
}
- std::string flash_index_file = base_dir_ + "/main_index";
+ std::string flash_index_file = MakeFlashIndexFilename(base_dir_);
ICING_ASSIGN_OR_RETURN(
FlashIndexStorage flash_index,
FlashIndexStorage::Create(flash_index_file, filesystem_,
- posting_list_used_hit_serializer_.get()));
+ posting_list_hit_serializer_.get()));
flash_index_storage_ =
std::make_unique<FlashIndexStorage>(std::move(flash_index));
@@ -168,7 +179,7 @@ MainIndex::GetAccessorForExactTerm(const std::string& term) {
"Term %s is not present in main lexicon.", term.c_str()));
}
return PostingListHitAccessor::CreateFromExisting(
- flash_index_storage_.get(), posting_list_used_hit_serializer_.get(),
+ flash_index_storage_.get(), posting_list_hit_serializer_.get(),
posting_list_id);
}
@@ -201,7 +212,7 @@ MainIndex::GetAccessorForPrefixTerm(const std::string& prefix) {
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<PostingListHitAccessor> pl_accessor,
PostingListHitAccessor::CreateFromExisting(
- flash_index_storage_.get(), posting_list_used_hit_serializer_.get(),
+ flash_index_storage_.get(), posting_list_hit_serializer_.get(),
posting_list_id));
return GetPrefixAccessorResult(std::move(pl_accessor), exact);
}
@@ -242,7 +253,7 @@ MainIndex::FindTermsByPrefix(
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<PostingListHitAccessor> pl_accessor,
PostingListHitAccessor::CreateFromExisting(
- flash_index_storage_.get(), posting_list_used_hit_serializer_.get(),
+ flash_index_storage_.get(), posting_list_hit_serializer_.get(),
posting_list_id));
ICING_ASSIGN_OR_RETURN(std::vector<Hit> hits,
pl_accessor->GetNextHitsBatch());
@@ -554,10 +565,10 @@ libtextclassifier3::Status MainIndex::AddHits(
memcpy(&backfill_posting_list_id,
main_lexicon_->GetValueAtIndex(other_tvi_main_tvi_pair.second),
sizeof(backfill_posting_list_id));
- ICING_ASSIGN_OR_RETURN(std::unique_ptr<PostingListHitAccessor> hit_accum,
- PostingListHitAccessor::Create(
- flash_index_storage_.get(),
- posting_list_used_hit_serializer_.get()));
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<PostingListHitAccessor> hit_accum,
+ PostingListHitAccessor::Create(flash_index_storage_.get(),
+ posting_list_hit_serializer_.get()));
ICING_RETURN_IF_ERROR(
AddPrefixBackfillHits(backfill_posting_list_id, hit_accum.get()));
PostingListAccessor::FinalizeResult result =
@@ -592,16 +603,15 @@ libtextclassifier3::Status MainIndex::AddHitsForTerm(
"Valid posting list has an invalid block index!");
}
ICING_ASSIGN_OR_RETURN(
- pl_accessor,
- PostingListHitAccessor::CreateFromExisting(
- flash_index_storage_.get(), posting_list_used_hit_serializer_.get(),
- posting_list_id));
+ pl_accessor, PostingListHitAccessor::CreateFromExisting(
+ flash_index_storage_.get(),
+ posting_list_hit_serializer_.get(), posting_list_id));
} else {
// New posting list.
- ICING_ASSIGN_OR_RETURN(pl_accessor,
- PostingListHitAccessor::Create(
- flash_index_storage_.get(),
- posting_list_used_hit_serializer_.get()));
+ ICING_ASSIGN_OR_RETURN(
+ pl_accessor,
+ PostingListHitAccessor::Create(flash_index_storage_.get(),
+ posting_list_hit_serializer_.get()));
}
// 2. Backfill any hits if necessary.
@@ -631,7 +641,7 @@ libtextclassifier3::Status MainIndex::AddPrefixBackfillHits(
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<PostingListHitAccessor> backfill_accessor,
PostingListHitAccessor::CreateFromExisting(
- flash_index_storage_.get(), posting_list_used_hit_serializer_.get(),
+ flash_index_storage_.get(), posting_list_hit_serializer_.get(),
backfill_posting_list_id));
std::vector<Hit> backfill_hits;
ICING_ASSIGN_OR_RETURN(std::vector<Hit> tmp,
@@ -741,6 +751,13 @@ libtextclassifier3::StatusOr<DocumentId> MainIndex::TransferAndAddHits(
old_pl_accessor.GetNextHitsBatch());
while (!tmp.empty()) {
for (const Hit& hit : tmp) {
+ // A safety check to add robustness to the codebase, so to make sure that
+ // we never access invalid memory, in case that hit from the posting list
+ // is corrupted.
+ if (hit.document_id() < 0 ||
+ hit.document_id() >= document_id_old_to_new.size()) {
+ continue;
+ }
DocumentId new_document_id = document_id_old_to_new[hit.document_id()];
// Transfer the document id of the hit, if the document is not deleted
// or outdated.
@@ -768,11 +785,10 @@ libtextclassifier3::StatusOr<DocumentId> MainIndex::TransferAndAddHits(
return largest_document_id;
}
- ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<PostingListHitAccessor> hit_accum,
- PostingListHitAccessor::Create(
- new_index->flash_index_storage_.get(),
- new_index->posting_list_used_hit_serializer_.get()));
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<PostingListHitAccessor> hit_accum,
+ PostingListHitAccessor::Create(
+ new_index->flash_index_storage_.get(),
+ new_index->posting_list_hit_serializer_.get()));
for (auto itr = new_hits.rbegin(); itr != new_hits.rend(); ++itr) {
ICING_RETURN_IF_ERROR(hit_accum->PrependHit(*itr));
}
@@ -820,7 +836,7 @@ libtextclassifier3::Status MainIndex::TransferIndex(
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<PostingListHitAccessor> pl_accessor,
PostingListHitAccessor::CreateFromExisting(
- flash_index_storage_.get(), posting_list_used_hit_serializer_.get(),
+ flash_index_storage_.get(), posting_list_hit_serializer_.get(),
posting_list_id));
ICING_ASSIGN_OR_RETURN(
DocumentId curr_largest_document_id,
diff --git a/icing/index/main/main-index.h b/icing/index/main/main-index.h
index 70ae6f6..9e570d5 100644
--- a/icing/index/main/main-index.h
+++ b/icing/index/main/main-index.h
@@ -23,7 +23,7 @@
#include "icing/file/posting_list/flash-index-storage.h"
#include "icing/index/lite/term-id-hit-pair.h"
#include "icing/index/main/posting-list-hit-accessor.h"
-#include "icing/index/main/posting-list-used-hit-serializer.h"
+#include "icing/index/main/posting-list-hit-serializer.h"
#include "icing/index/term-id-codec.h"
#include "icing/index/term-metadata.h"
#include "icing/legacy/index/icing-dynamic-trie.h"
@@ -48,6 +48,16 @@ class MainIndex {
const std::string& index_directory, const Filesystem* filesystem,
const IcingFilesystem* icing_filesystem);
+ // Reads magic from existing flash index storage file header. We need this
+ // during Icing initialization phase to determine the version.
+ //
+ // RETURNS:
+ // - On success, a valid magic.
+ // - NOT_FOUND if the flash index doesn't exist.
+ // - INTERNAL on I/O error.
+ static libtextclassifier3::StatusOr<int> ReadFlashIndexMagic(
+ const Filesystem* filesystem, const std::string& index_directory);
+
// Get a PostingListHitAccessor that holds the posting list chain for 'term'.
//
// RETURNS:
@@ -161,7 +171,7 @@ class MainIndex {
if (main_lexicon_->Sync() && flash_index_storage_->PersistToDisk()) {
return libtextclassifier3::Status::OK;
}
- return absl_ports::InternalError("Unable to sync lite index components.");
+ return absl_ports::InternalError("Unable to sync main index components.");
}
DocumentId last_added_document_id() const {
@@ -329,8 +339,7 @@ class MainIndex {
std::string base_dir_;
const Filesystem* filesystem_;
const IcingFilesystem* icing_filesystem_;
- std::unique_ptr<PostingListUsedHitSerializer>
- posting_list_used_hit_serializer_;
+ std::unique_ptr<PostingListHitSerializer> posting_list_hit_serializer_;
std::unique_ptr<FlashIndexStorage> flash_index_storage_;
std::unique_ptr<IcingDynamicTrie> main_lexicon_;
};
diff --git a/icing/index/main/main-index_test.cc b/icing/index/main/main-index_test.cc
index 92601e7..fa96e6c 100644
--- a/icing/index/main/main-index_test.cc
+++ b/icing/index/main/main-index_test.cc
@@ -38,6 +38,7 @@ namespace lib {
namespace {
using ::testing::ElementsAre;
+using ::testing::Eq;
using ::testing::IsEmpty;
using ::testing::NiceMock;
using ::testing::Return;
@@ -52,18 +53,20 @@ std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) {
}
std::vector<DocHitInfo> GetExactHits(
- MainIndex* main_index, const std::string& term,
- SectionIdMask section_mask = kSectionIdMaskAll) {
+ MainIndex* main_index, int term_start_index, int unnormalized_term_length,
+ const std::string& term, SectionIdMask section_mask = kSectionIdMaskAll) {
auto iterator = std::make_unique<DocHitInfoIteratorTermMainExact>(
- main_index, term, section_mask, /*need_hit_term_frequency=*/true);
+ main_index, term, term_start_index, unnormalized_term_length,
+ section_mask, /*need_hit_term_frequency=*/true);
return GetHits(std::move(iterator));
}
std::vector<DocHitInfo> GetPrefixHits(
- MainIndex* main_index, const std::string& term,
- SectionIdMask section_mask = kSectionIdMaskAll) {
+ MainIndex* main_index, int term_start_index, int unnormalized_term_length,
+ const std::string& term, SectionIdMask section_mask = kSectionIdMaskAll) {
auto iterator = std::make_unique<DocHitInfoIteratorTermMainPrefix>(
- main_index, term, section_mask, /*need_hit_term_frequency=*/true);
+ main_index, term, term_start_index, unnormalized_term_length,
+ section_mask, /*need_hit_term_frequency=*/true);
return GetHits(std::move(iterator));
}
@@ -88,7 +91,9 @@ class MainIndexTest : public testing::Test {
std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
LiteIndex::Options options(lite_index_file_name,
- /*hit_buffer_want_merge_bytes=*/1024 * 1024);
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/true,
+ /*hit_buffer_sort_threshold_bytes=*/1024 * 8);
ICING_ASSERT_OK_AND_ASSIGN(lite_index_,
LiteIndex::Create(options, &icing_filesystem_));
@@ -100,6 +105,8 @@ class MainIndexTest : public testing::Test {
}
void TearDown() override {
+ term_id_codec_.reset();
+ lite_index_.reset();
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(index_dir_.c_str()));
}
@@ -269,9 +276,12 @@ TEST_F(MainIndexTest, MergeIndexToEmpty) {
MainIndex::Create(main_index_file_name, &filesystem_,
&icing_filesystem_));
- std::vector<DocHitInfo> hits = GetExactHits(main_index.get(), "foot");
+ std::vector<DocHitInfo> hits =
+ GetExactHits(main_index.get(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, "foot");
EXPECT_THAT(hits, IsEmpty());
- hits = GetPrefixHits(main_index.get(), "fo");
+ hits = GetPrefixHits(main_index.get(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, "fo");
EXPECT_THAT(hits, IsEmpty());
// 3. Merge the index. The main index should contain "fool", "foot"
@@ -279,7 +289,8 @@ TEST_F(MainIndexTest, MergeIndexToEmpty) {
// should not be present because it is not a branch point.
ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
// Get hits from an exact posting list.
- hits = GetExactHits(main_index.get(), "foot");
+ hits = GetExactHits(main_index.get(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, "foot");
// We should get hits for "foot" in doc1 and doc0
EXPECT_THAT(
hits,
@@ -290,7 +301,8 @@ TEST_F(MainIndexTest, MergeIndexToEmpty) {
std::vector<SectionId>{doc0_hit.section_id()})));
// Get hits from a branching point posting list. "fo" should redirect to "foo"
- hits = GetPrefixHits(main_index.get(), "fo");
+ hits = GetPrefixHits(main_index.get(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, "fo");
// We should get hits for "foot" in doc1 and "fool" in doc1. We shouldn't get
// the hits for "foot" in doc0 and "fool" in doc0 and doc2 because they
// weren't hits in prefix sections.
@@ -352,7 +364,9 @@ TEST_F(MainIndexTest, MergeIndexToPreexisting) {
// - Doc4 {"four", "foul" is_in_prefix_section=true}
std::string lite_index_file_name2 = index_dir_ + "/test_file.lite-idx.index2";
LiteIndex::Options options(lite_index_file_name2,
- /*hit_buffer_want_merge_bytes=*/1024 * 1024);
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/true,
+ /*hit_buffer_sort_threshold_bytes=*/1024 * 8);
ICING_ASSERT_OK_AND_ASSIGN(lite_index_,
LiteIndex::Create(options, &icing_filesystem_));
ICING_ASSERT_OK_AND_ASSIGN(
@@ -388,7 +402,9 @@ TEST_F(MainIndexTest, MergeIndexToPreexisting) {
// and "fall", a branch points for "fou" and backfill points for "fo".
ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
// Get hits from an exact posting list the existed before the merge.
- std::vector<DocHitInfo> hits = GetExactHits(main_index.get(), "foot");
+ std::vector<DocHitInfo> hits =
+ GetExactHits(main_index.get(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, "foot");
// We should get hits for "foot" in doc3, doc1 and doc0
EXPECT_THAT(
@@ -401,7 +417,8 @@ TEST_F(MainIndexTest, MergeIndexToPreexisting) {
EqualsDocHitInfo(doc0_hit.document_id(),
std::vector<SectionId>{doc0_hit.section_id()})));
// Get hits from backfill posting list.
- hits = GetPrefixHits(main_index.get(), "fo");
+ hits = GetPrefixHits(main_index.get(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, "fo");
// We should get hits for "four" and "foul" in doc4 and hits for "foot" and
// "fool" in doc1. We shouldn't get the hits for "foot" in doc0 and doc3,
// "fool" in doc0 and doc2 or the hits for "four" and "foul" in doc4 because
@@ -453,7 +470,9 @@ TEST_F(MainIndexTest, ExactRetrievedInPrefixSearch) {
// 3. Merge the lite lexicon. The main lexicon should contain "foot" and
// "foo".
ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
- std::vector<DocHitInfo> hits = GetPrefixHits(main_index.get(), "foo");
+ std::vector<DocHitInfo> hits =
+ GetPrefixHits(main_index.get(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, "foo");
// We should get hits for "foo" in doc1 and doc0, but not in doc2 because it
// is not a prefix hit.
EXPECT_THAT(
@@ -502,7 +521,9 @@ TEST_F(MainIndexTest, PrefixNotRetrievedInExactSearch) {
// 3. Merge the lite lexicon. The main lexicon should contain "foot" and
// "foo".
ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
- std::vector<DocHitInfo> hits = GetExactHits(main_index.get(), "foo");
+ std::vector<DocHitInfo> hits =
+ GetExactHits(main_index.get(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, "foo");
// We should get hits for "foo" in doc2 and doc1, but not in doc0 because it
// is not an exact hit.
@@ -515,30 +536,35 @@ TEST_F(MainIndexTest, PrefixNotRetrievedInExactSearch) {
std::vector<SectionId>{doc1_hit.section_id()})));
}
-TEST_F(MainIndexTest, SearchChainedPostingLists) {
+TEST_F(MainIndexTest,
+ SearchChainedPostingListsShouldMergeSectionsAndTermFrequency) {
// Index 2048 document with 3 hits in each document. When merged into the main
// index, this will 1) lead to a chained posting list and 2) split at least
// one document's hits across multiple posting lists.
+ const std::string term = "foot";
+
ICING_ASSERT_OK_AND_ASSIGN(
uint32_t tvi,
- lite_index_->InsertTerm("foot", TermMatchType::EXACT_ONLY, kNamespace0));
+ lite_index_->InsertTerm(term, TermMatchType::EXACT_ONLY, kNamespace0));
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
for (DocumentId document_id = 0; document_id < 2048; ++document_id) {
- Hit doc_hit0(/*section_id=*/0, /*document_id=*/document_id,
- Hit::kDefaultTermFrequency,
- /*is_in_prefix_section=*/false);
+ Hit::TermFrequency term_frequency = static_cast<Hit::TermFrequency>(
+ document_id % Hit::kMaxTermFrequency + 1);
+ Hit doc_hit0(
+ /*section_id=*/0, /*document_id=*/document_id, term_frequency,
+ /*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc_hit0));
- Hit doc_hit1(/*section_id=*/1, /*document_id=*/document_id,
- Hit::kDefaultTermFrequency,
- /*is_in_prefix_section=*/false);
+ Hit doc_hit1(
+ /*section_id=*/1, /*document_id=*/document_id, term_frequency,
+ /*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc_hit1));
- Hit doc_hit2(/*section_id=*/2, /*document_id=*/document_id,
- Hit::kDefaultTermFrequency,
- /*is_in_prefix_section=*/false);
+ Hit doc_hit2(
+ /*section_id=*/2, /*document_id=*/document_id, term_frequency,
+ /*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc_hit2));
}
@@ -552,13 +578,35 @@ TEST_F(MainIndexTest, SearchChainedPostingLists) {
// 3. Merge the lite index.
ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
// Get hits for all documents containing "foot" - which should be all of them.
- std::vector<DocHitInfo> hits = GetExactHits(main_index.get(), "foot");
- EXPECT_THAT(hits, SizeIs(2048));
- EXPECT_THAT(hits.front(),
- EqualsDocHitInfo(2047, std::vector<SectionId>{0, 1, 2}));
- EXPECT_THAT(hits.back(),
- EqualsDocHitInfo(0, std::vector<SectionId>{0, 1, 2}));
+ auto iterator = std::make_unique<DocHitInfoIteratorTermMainExact>(
+ main_index.get(), term, /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ /*need_hit_term_frequency=*/true);
+
+ DocumentId expected_document_id = 2047;
+ while (iterator->Advance().ok()) {
+ EXPECT_THAT(iterator->doc_hit_info(),
+ EqualsDocHitInfo(expected_document_id,
+ std::vector<SectionId>{0, 1, 2}));
+
+ std::vector<TermMatchInfo> matched_terms_stats;
+ iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+
+ Hit::TermFrequency expected_term_frequency =
+ static_cast<Hit::TermFrequency>(
+ expected_document_id % Hit::kMaxTermFrequency + 1);
+ ASSERT_THAT(matched_terms_stats, SizeIs(1));
+ EXPECT_THAT(matched_terms_stats[0].term, Eq(term));
+ EXPECT_THAT(matched_terms_stats[0].term_frequencies[0],
+ Eq(expected_term_frequency));
+ EXPECT_THAT(matched_terms_stats[0].term_frequencies[1],
+ Eq(expected_term_frequency));
+ EXPECT_THAT(matched_terms_stats[0].term_frequencies[2],
+ Eq(expected_term_frequency));
+ --expected_document_id;
+ }
+ EXPECT_THAT(expected_document_id, Eq(-1));
}
TEST_F(MainIndexTest, MergeIndexBackfilling) {
@@ -588,7 +636,9 @@ TEST_F(MainIndexTest, MergeIndexBackfilling) {
// - Doc1 {"foot" is_in_prefix_section=false}
std::string lite_index_file_name2 = index_dir_ + "/test_file.lite-idx.index2";
LiteIndex::Options options(lite_index_file_name2,
- /*hit_buffer_want_merge_bytes=*/1024 * 1024);
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/true,
+ /*hit_buffer_sort_threshold_bytes=*/1024 * 8);
ICING_ASSERT_OK_AND_ASSIGN(lite_index_,
LiteIndex::Create(options, &icing_filesystem_));
ICING_ASSERT_OK_AND_ASSIGN(
@@ -605,17 +655,55 @@ TEST_F(MainIndexTest, MergeIndexBackfilling) {
// and a backfill point for "foo".
ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
// Get hits from an exact posting list the existed before the merge.
- std::vector<DocHitInfo> hits = GetExactHits(main_index.get(), "foo");
+ std::vector<DocHitInfo> hits =
+ GetExactHits(main_index.get(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, "foo");
EXPECT_THAT(hits, IsEmpty());
// Get hits from backfill posting list.
- hits = GetPrefixHits(main_index.get(), "foo");
+ hits = GetPrefixHits(main_index.get(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, "foo");
// We should get a hit for "fool" in doc0.
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfo(
doc0_hit.document_id(),
std::vector<SectionId>{doc0_hit.section_id()})));
}
+TEST_F(MainIndexTest, OneHitInTheFirstPageForTwoPagesMainIndex) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t tvi,
+ lite_index_->InsertTerm("foo", TermMatchType::EXACT_ONLY, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+ term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+ SectionId section_id = 0;
+ // Based on debugging logs, 2038 documents in the following setting will
+ // result in two pages in the posting list chain, and the first page only
+ // contains one hit.
+ uint32_t num_docs = 2038;
+ for (DocumentId document_id = 0; document_id < num_docs; ++document_id) {
+ Hit doc_hit(section_id, document_id, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit));
+ }
+
+ std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<MainIndex> main_index,
+ MainIndex::Create(main_index_file_name, &filesystem_,
+ &icing_filesystem_));
+
+ ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
+ std::vector<DocHitInfo> hits =
+ GetExactHits(main_index.get(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, "foo");
+ ASSERT_THAT(hits, SizeIs(num_docs));
+ for (DocumentId document_id = num_docs - 1; document_id >= 0; --document_id) {
+ ASSERT_THAT(
+ hits[num_docs - 1 - document_id],
+ EqualsDocHitInfo(document_id, std::vector<SectionId>{section_id}));
+ }
+}
+
} // namespace
} // namespace lib
diff --git a/icing/index/main/posting-list-hit-accessor.cc b/icing/index/main/posting-list-hit-accessor.cc
index 30b2410..3d5476b 100644
--- a/icing/index/main/posting-list-hit-accessor.cc
+++ b/icing/index/main/posting-list-hit-accessor.cc
@@ -20,10 +20,9 @@
#include "icing/absl_ports/canonical_errors.h"
#include "icing/file/posting_list/flash-index-storage.h"
-#include "icing/file/posting_list/index-block.h"
#include "icing/file/posting_list/posting-list-identifier.h"
#include "icing/file/posting_list/posting-list-used.h"
-#include "icing/index/main/posting-list-used-hit-serializer.h"
+#include "icing/index/main/posting-list-hit-serializer.h"
#include "icing/util/status-macros.h"
namespace icing {
@@ -31,25 +30,20 @@ namespace lib {
libtextclassifier3::StatusOr<std::unique_ptr<PostingListHitAccessor>>
PostingListHitAccessor::Create(FlashIndexStorage *storage,
- PostingListUsedHitSerializer *serializer) {
- uint32_t max_posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
- storage->block_size(), serializer->GetDataTypeBytes());
- std::unique_ptr<uint8_t[]> posting_list_buffer_array =
- std::make_unique<uint8_t[]>(max_posting_list_bytes);
- ICING_ASSIGN_OR_RETURN(
- PostingListUsed posting_list_buffer,
- PostingListUsed::CreateFromUnitializedRegion(
- serializer, posting_list_buffer_array.get(), max_posting_list_bytes));
+ PostingListHitSerializer *serializer) {
+ uint32_t max_posting_list_bytes = storage->max_posting_list_bytes();
+ ICING_ASSIGN_OR_RETURN(PostingListUsed in_memory_posting_list,
+ PostingListUsed::CreateFromUnitializedRegion(
+ serializer, max_posting_list_bytes));
return std::unique_ptr<PostingListHitAccessor>(new PostingListHitAccessor(
- storage, serializer, std::move(posting_list_buffer_array),
- std::move(posting_list_buffer)));
+ storage, serializer, std::move(in_memory_posting_list)));
}
libtextclassifier3::StatusOr<std::unique_ptr<PostingListHitAccessor>>
PostingListHitAccessor::CreateFromExisting(
- FlashIndexStorage *storage, PostingListUsedHitSerializer *serializer,
+ FlashIndexStorage *storage, PostingListHitSerializer *serializer,
PostingListIdentifier existing_posting_list_id) {
- // Our posting_list_buffer_ will start as empty.
+ // Our in_memory_posting_list_ will start as empty.
ICING_ASSIGN_OR_RETURN(std::unique_ptr<PostingListHitAccessor> pl_accessor,
Create(storage, serializer));
ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
@@ -73,20 +67,23 @@ PostingListHitAccessor::GetNextHitsBatch() {
ICING_ASSIGN_OR_RETURN(
std::vector<Hit> batch,
serializer_->GetHits(&preexisting_posting_list_->posting_list));
- uint32_t next_block_index;
+ uint32_t next_block_index = kInvalidBlockIndex;
// Posting lists will only be chained when they are max-sized, in which case
- // block.next_block_index() will point to the next block for the next posting
- // list. Otherwise, block.next_block_index() can be kInvalidBlockIndex or be
- // used to point to the next free list block, which is not relevant here.
- if (preexisting_posting_list_->block.max_num_posting_lists() == 1) {
- next_block_index = preexisting_posting_list_->block.next_block_index();
- } else {
- next_block_index = kInvalidBlockIndex;
+ // next_block_index will point to the next block for the next posting list.
+ // Otherwise, next_block_index can be kInvalidBlockIndex or be used to point
+ // to the next free list block, which is not relevant here.
+ if (preexisting_posting_list_->posting_list.size_in_bytes() ==
+ storage_->max_posting_list_bytes()) {
+ next_block_index = preexisting_posting_list_->next_block_index;
}
+
if (next_block_index != kInvalidBlockIndex) {
+ // Since we only have to deal with next block for max-sized posting list
+ // block, max_num_posting_lists is 1 and posting_list_index_bits is
+ // BitsToStore(1).
PostingListIdentifier next_posting_list_id(
next_block_index, /*posting_list_index=*/0,
- preexisting_posting_list_->block.posting_list_index_bits());
+ /*posting_list_index_bits=*/BitsToStore(1));
ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
storage_->GetPostingList(next_posting_list_id));
preexisting_posting_list_ =
@@ -101,7 +98,7 @@ PostingListHitAccessor::GetNextHitsBatch() {
libtextclassifier3::Status PostingListHitAccessor::PrependHit(const Hit &hit) {
PostingListUsed &active_pl = (preexisting_posting_list_ != nullptr)
? preexisting_posting_list_->posting_list
- : posting_list_buffer_;
+ : in_memory_posting_list_;
libtextclassifier3::Status status = serializer_->PrependHit(&active_pl, hit);
if (!absl_ports::IsResourceExhausted(status)) {
return status;
@@ -110,16 +107,16 @@ libtextclassifier3::Status PostingListHitAccessor::PrependHit(const Hit &hit) {
// we need to either move those hits to a larger posting list or flush this
// posting list and create another max-sized posting list in the chain.
if (preexisting_posting_list_ != nullptr) {
- FlushPreexistingPostingList();
+ ICING_RETURN_IF_ERROR(FlushPreexistingPostingList());
} else {
ICING_RETURN_IF_ERROR(FlushInMemoryPostingList());
}
- // Re-add hit. Should always fit since we just cleared posting_list_buffer_.
- // It's fine to explicitly reference posting_list_buffer_ here because there's
- // no way of reaching this line while preexisting_posting_list_ is still in
- // use.
- return serializer_->PrependHit(&posting_list_buffer_, hit);
+ // Re-add hit. Should always fit since we just cleared
+ // in_memory_posting_list_. It's fine to explicitly reference
+ // in_memory_posting_list_ here because there's no way of reaching this line
+ // while preexisting_posting_list_ is still in use.
+ return serializer_->PrependHit(&in_memory_posting_list_, hit);
}
} // namespace lib
diff --git a/icing/index/main/posting-list-hit-accessor.h b/icing/index/main/posting-list-hit-accessor.h
index 953f2bd..7b72437 100644
--- a/icing/index/main/posting-list-hit-accessor.h
+++ b/icing/index/main/posting-list-hit-accessor.h
@@ -26,7 +26,7 @@
#include "icing/file/posting_list/posting-list-identifier.h"
#include "icing/file/posting_list/posting-list-used.h"
#include "icing/index/hit/hit.h"
-#include "icing/index/main/posting-list-used-hit-serializer.h"
+#include "icing/index/main/posting-list-hit-serializer.h"
namespace icing {
namespace lib {
@@ -43,7 +43,7 @@ class PostingListHitAccessor : public PostingListAccessor {
// - On success, a valid unique_ptr instance of PostingListHitAccessor
// - INVALID_ARGUMENT error if storage has an invalid block_size.
static libtextclassifier3::StatusOr<std::unique_ptr<PostingListHitAccessor>>
- Create(FlashIndexStorage* storage, PostingListUsedHitSerializer* serializer);
+ Create(FlashIndexStorage* storage, PostingListHitSerializer* serializer);
// Create a PostingListHitAccessor with an existing posting list identified by
// existing_posting_list_id.
@@ -57,10 +57,10 @@ class PostingListHitAccessor : public PostingListAccessor {
// - INVALID_ARGUMENT if storage has an invalid block_size.
static libtextclassifier3::StatusOr<std::unique_ptr<PostingListHitAccessor>>
CreateFromExisting(FlashIndexStorage* storage,
- PostingListUsedHitSerializer* serializer,
+ PostingListHitSerializer* serializer,
PostingListIdentifier existing_posting_list_id);
- PostingListUsedSerializer* GetSerializer() override { return serializer_; }
+ PostingListSerializer* GetSerializer() override { return serializer_; }
// Retrieve the next batch of hits for the posting list chain
//
@@ -86,15 +86,13 @@ class PostingListHitAccessor : public PostingListAccessor {
libtextclassifier3::Status PrependHit(const Hit& hit);
private:
- explicit PostingListHitAccessor(
- FlashIndexStorage* storage, PostingListUsedHitSerializer* serializer,
- std::unique_ptr<uint8_t[]> posting_list_buffer_array,
- PostingListUsed posting_list_buffer)
- : PostingListAccessor(storage, std::move(posting_list_buffer_array),
- std::move(posting_list_buffer)),
+ explicit PostingListHitAccessor(FlashIndexStorage* storage,
+ PostingListHitSerializer* serializer,
+ PostingListUsed in_memory_posting_list)
+ : PostingListAccessor(storage, std::move(in_memory_posting_list)),
serializer_(serializer) {}
- PostingListUsedHitSerializer* serializer_; // Does not own.
+ PostingListHitSerializer* serializer_; // Does not own.
};
} // namespace lib
diff --git a/icing/index/main/posting-list-hit-accessor_test.cc b/icing/index/main/posting-list-hit-accessor_test.cc
index fcdd580..1127814 100644
--- a/icing/index/main/posting-list-hit-accessor_test.cc
+++ b/icing/index/main/posting-list-hit-accessor_test.cc
@@ -24,7 +24,7 @@
#include "icing/file/posting_list/posting-list-identifier.h"
#include "icing/file/posting_list/posting-list-used.h"
#include "icing/index/hit/hit.h"
-#include "icing/index/main/posting-list-used-hit-serializer.h"
+#include "icing/index/main/posting-list-hit-serializer.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/hit-test-utils.h"
#include "icing/testing/tmp-directory.h"
@@ -49,7 +49,7 @@ class PostingListHitAccessorTest : public ::testing::Test {
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(test_dir_.c_str()));
- serializer_ = std::make_unique<PostingListUsedHitSerializer>();
+ serializer_ = std::make_unique<PostingListHitSerializer>();
ICING_ASSERT_OK_AND_ASSIGN(
FlashIndexStorage flash_index_storage,
@@ -67,7 +67,7 @@ class PostingListHitAccessorTest : public ::testing::Test {
Filesystem filesystem_;
std::string test_dir_;
std::string file_name_;
- std::unique_ptr<PostingListUsedHitSerializer> serializer_;
+ std::unique_ptr<PostingListHitSerializer> serializer_;
std::unique_ptr<FlashIndexStorage> flash_index_storage_;
};
@@ -93,7 +93,7 @@ TEST_F(PostingListHitAccessorTest, HitsAddAndRetrieveProperly) {
flash_index_storage_->GetPostingList(result.id));
EXPECT_THAT(serializer_->GetHits(&pl_holder.posting_list),
IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend())));
- EXPECT_THAT(pl_holder.block.next_block_index(), Eq(kInvalidBlockIndex));
+ EXPECT_THAT(pl_holder.next_block_index, Eq(kInvalidBlockIndex));
}
TEST_F(PostingListHitAccessorTest, PreexistingPLKeepOnSameBlock) {
@@ -223,7 +223,7 @@ TEST_F(PostingListHitAccessorTest, MultiBlockChainsBlocksProperly) {
ElementsAreArray(hits1.rbegin(), first_block_hits_start));
// Now retrieve all of the hits that were on the first block.
- uint32_t first_block_id = pl_holder.block.next_block_index();
+ uint32_t first_block_id = pl_holder.next_block_index;
EXPECT_THAT(first_block_id, Eq(1));
PostingListIdentifier pl_id(first_block_id, /*posting_list_index=*/0,
@@ -290,7 +290,7 @@ TEST_F(PostingListHitAccessorTest, PreexistingMultiBlockReusesBlocksProperly) {
ElementsAreArray(hits1.rbegin(), first_block_hits_start));
// Now retrieve all of the hits that were on the first block.
- uint32_t first_block_id = pl_holder.block.next_block_index();
+ uint32_t first_block_id = pl_holder.next_block_index;
EXPECT_THAT(first_block_id, Eq(1));
PostingListIdentifier pl_id(first_block_id, /*posting_list_index=*/0,
diff --git a/icing/index/main/posting-list-used-hit-serializer.cc b/icing/index/main/posting-list-hit-serializer.cc
index a163188..00c70e9 100644
--- a/icing/index/main/posting-list-used-hit-serializer.cc
+++ b/icing/index/main/posting-list-hit-serializer.cc
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/index/main/posting-list-used-hit-serializer.h"
+#include "icing/index/main/posting-list-hit-serializer.h"
#include <cstdint>
#include <cstring>
@@ -37,7 +37,7 @@ uint32_t GetTermFrequencyByteSize(const Hit& hit) {
} // namespace
-uint32_t PostingListUsedHitSerializer::GetBytesUsed(
+uint32_t PostingListHitSerializer::GetBytesUsed(
const PostingListUsed* posting_list_used) const {
// The special hits will be included if they represent actual hits. If they
// represent the hit offset or the invalid hit sentinel, they are not
@@ -46,7 +46,7 @@ uint32_t PostingListUsedHitSerializer::GetBytesUsed(
GetStartByteOffset(posting_list_used);
}
-uint32_t PostingListUsedHitSerializer::GetMinPostingListSizeToFit(
+uint32_t PostingListHitSerializer::GetMinPostingListSizeToFit(
const PostingListUsed* posting_list_used) const {
if (IsFull(posting_list_used) || IsAlmostFull(posting_list_used)) {
// If in either the FULL state or ALMOST_FULL state, this posting list *is*
@@ -65,15 +65,14 @@ uint32_t PostingListUsedHitSerializer::GetMinPostingListSizeToFit(
return GetBytesUsed(posting_list_used) + sizeof(Hit);
}
-void PostingListUsedHitSerializer::Clear(
- PostingListUsed* posting_list_used) const {
+void PostingListHitSerializer::Clear(PostingListUsed* posting_list_used) const {
// Safe to ignore return value because posting_list_used->size_in_bytes() is
// a valid argument.
SetStartByteOffset(posting_list_used,
/*offset=*/posting_list_used->size_in_bytes());
}
-libtextclassifier3::Status PostingListUsedHitSerializer::MoveFrom(
+libtextclassifier3::Status PostingListHitSerializer::MoveFrom(
PostingListUsed* dst, PostingListUsed* src) const {
ICING_RETURN_ERROR_IF_NULL(dst);
ICING_RETURN_ERROR_IF_NULL(src);
@@ -128,7 +127,7 @@ libtextclassifier3::Status PostingListUsedHitSerializer::MoveFrom(
return libtextclassifier3::Status::OK;
}
-uint32_t PostingListUsedHitSerializer::GetPadEnd(
+uint32_t PostingListHitSerializer::GetPadEnd(
const PostingListUsed* posting_list_used, uint32_t offset) const {
Hit::Value pad;
uint32_t pad_end = offset;
@@ -144,9 +143,8 @@ uint32_t PostingListUsedHitSerializer::GetPadEnd(
return pad_end;
}
-bool PostingListUsedHitSerializer::PadToEnd(PostingListUsed* posting_list_used,
- uint32_t start,
- uint32_t end) const {
+bool PostingListHitSerializer::PadToEnd(PostingListUsed* posting_list_used,
+ uint32_t start, uint32_t end) const {
if (end > posting_list_used->size_in_bytes()) {
ICING_LOG(ERROR) << "Cannot pad a region that ends after size!";
return false;
@@ -156,7 +154,7 @@ bool PostingListUsedHitSerializer::PadToEnd(PostingListUsed* posting_list_used,
return true;
}
-libtextclassifier3::Status PostingListUsedHitSerializer::PrependHitToAlmostFull(
+libtextclassifier3::Status PostingListHitSerializer::PrependHitToAlmostFull(
PostingListUsed* posting_list_used, const Hit& hit) const {
// Get delta between first hit and the new hit. Try to fit delta
// in the padded area and put new hit at the special position 1.
@@ -199,7 +197,7 @@ libtextclassifier3::Status PostingListUsedHitSerializer::PrependHitToAlmostFull(
return libtextclassifier3::Status::OK;
}
-void PostingListUsedHitSerializer::PrependHitToEmpty(
+void PostingListHitSerializer::PrependHitToEmpty(
PostingListUsed* posting_list_used, const Hit& hit) const {
// First hit to be added. Just add verbatim, no compression.
if (posting_list_used->size_in_bytes() == kSpecialHitsSize) {
@@ -221,7 +219,7 @@ void PostingListUsedHitSerializer::PrependHitToEmpty(
}
}
-libtextclassifier3::Status PostingListUsedHitSerializer::PrependHitToNotFull(
+libtextclassifier3::Status PostingListHitSerializer::PrependHitToNotFull(
PostingListUsed* posting_list_used, const Hit& hit, uint32_t offset) const {
// First hit in compressed area. It is uncompressed. See if delta
// between the first hit and new hit will still fit in the
@@ -315,7 +313,7 @@ libtextclassifier3::Status PostingListUsedHitSerializer::PrependHitToNotFull(
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::Status PostingListUsedHitSerializer::PrependHit(
+libtextclassifier3::Status PostingListHitSerializer::PrependHit(
PostingListUsed* posting_list_used, const Hit& hit) const {
static_assert(sizeof(Hit::Value) <= sizeof(uint64_t),
"Hit::Value cannot be larger than 8 bytes because the delta "
@@ -343,14 +341,14 @@ libtextclassifier3::Status PostingListUsedHitSerializer::PrependHit(
}
libtextclassifier3::StatusOr<std::vector<Hit>>
-PostingListUsedHitSerializer::GetHits(
+PostingListHitSerializer::GetHits(
const PostingListUsed* posting_list_used) const {
std::vector<Hit> hits_out;
ICING_RETURN_IF_ERROR(GetHits(posting_list_used, &hits_out));
return hits_out;
}
-libtextclassifier3::Status PostingListUsedHitSerializer::GetHits(
+libtextclassifier3::Status PostingListHitSerializer::GetHits(
const PostingListUsed* posting_list_used,
std::vector<Hit>* hits_out) const {
return GetHitsInternal(posting_list_used,
@@ -358,7 +356,7 @@ libtextclassifier3::Status PostingListUsedHitSerializer::GetHits(
/*pop=*/false, hits_out);
}
-libtextclassifier3::Status PostingListUsedHitSerializer::PopFrontHits(
+libtextclassifier3::Status PostingListHitSerializer::PopFrontHits(
PostingListUsed* posting_list_used, uint32_t num_hits) const {
if (num_hits == 1 && IsFull(posting_list_used)) {
// The PL is in full status which means that we save 2 uncompressed hits in
@@ -429,7 +427,7 @@ libtextclassifier3::Status PostingListUsedHitSerializer::PopFrontHits(
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::Status PostingListUsedHitSerializer::GetHitsInternal(
+libtextclassifier3::Status PostingListHitSerializer::GetHitsInternal(
const PostingListUsed* posting_list_used, uint32_t limit, bool pop,
std::vector<Hit>* out) const {
// Put current uncompressed val here.
@@ -563,7 +561,7 @@ libtextclassifier3::Status PostingListUsedHitSerializer::GetHitsInternal(
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::StatusOr<Hit> PostingListUsedHitSerializer::GetSpecialHit(
+libtextclassifier3::StatusOr<Hit> PostingListHitSerializer::GetSpecialHit(
const PostingListUsed* posting_list_used, uint32_t index) const {
static_assert(sizeof(Hit::Value) >= sizeof(uint32_t), "HitTooSmall");
if (index >= kNumSpecialData || index < 0) {
@@ -576,8 +574,9 @@ libtextclassifier3::StatusOr<Hit> PostingListUsedHitSerializer::GetSpecialHit(
return val;
}
-bool PostingListUsedHitSerializer::SetSpecialHit(
- PostingListUsed* posting_list_used, uint32_t index, const Hit& val) const {
+bool PostingListHitSerializer::SetSpecialHit(PostingListUsed* posting_list_used,
+ uint32_t index,
+ const Hit& val) const {
if (index >= kNumSpecialData || index < 0) {
ICING_LOG(ERROR) << "Special hits only exist at indices 0 and 1";
return false;
@@ -587,7 +586,7 @@ bool PostingListUsedHitSerializer::SetSpecialHit(
return true;
}
-bool PostingListUsedHitSerializer::IsPostingListValid(
+bool PostingListHitSerializer::IsPostingListValid(
const PostingListUsed* posting_list_used) const {
if (IsAlmostFull(posting_list_used)) {
// Special Hit 1 should hold a Hit. Calling ValueOrDie is safe because we
@@ -617,7 +616,7 @@ bool PostingListUsedHitSerializer::IsPostingListValid(
return true;
}
-uint32_t PostingListUsedHitSerializer::GetStartByteOffset(
+uint32_t PostingListHitSerializer::GetStartByteOffset(
const PostingListUsed* posting_list_used) const {
if (IsFull(posting_list_used)) {
return 0;
@@ -630,7 +629,7 @@ uint32_t PostingListUsedHitSerializer::GetStartByteOffset(
}
}
-bool PostingListUsedHitSerializer::SetStartByteOffset(
+bool PostingListHitSerializer::SetStartByteOffset(
PostingListUsed* posting_list_used, uint32_t offset) const {
if (offset > posting_list_used->size_in_bytes()) {
ICING_LOG(ERROR) << "offset cannot be a value greater than size "
@@ -665,7 +664,7 @@ bool PostingListUsedHitSerializer::SetStartByteOffset(
}
libtextclassifier3::StatusOr<uint32_t>
-PostingListUsedHitSerializer::PrependHitUncompressed(
+PostingListHitSerializer::PrependHitUncompressed(
PostingListUsed* posting_list_used, const Hit& hit, uint32_t offset) const {
if (hit.has_term_frequency()) {
if (offset < kSpecialHitsSize + sizeof(Hit)) {
@@ -689,7 +688,7 @@ PostingListUsedHitSerializer::PrependHitUncompressed(
}
libtextclassifier3::Status
-PostingListUsedHitSerializer::ConsumeTermFrequencyIfPresent(
+PostingListHitSerializer::ConsumeTermFrequencyIfPresent(
const PostingListUsed* posting_list_used, Hit* hit,
uint32_t* offset) const {
if (!hit->has_term_frequency()) {
diff --git a/icing/index/main/posting-list-used-hit-serializer.h b/icing/index/main/posting-list-hit-serializer.h
index 1a3cbc2..975b05a 100644
--- a/icing/index/main/posting-list-used-hit-serializer.h
+++ b/icing/index/main/posting-list-hit-serializer.h
@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#ifndef ICING_INDEX_MAIN_POSTING_LIST_USED_HIT_SERIALIZER_H_
-#define ICING_INDEX_MAIN_POSTING_LIST_USED_HIT_SERIALIZER_H_
+#ifndef ICING_INDEX_MAIN_POSTING_LIST_HIT_SERIALIZER_H_
+#define ICING_INDEX_MAIN_POSTING_LIST_HIT_SERIALIZER_H_
#include <cstdint>
#include <vector>
@@ -28,8 +28,8 @@ namespace icing {
namespace lib {
// A serializer class to serialize hits to PostingListUsed. Layout described in
-// comments in posting-list-used-hit-serializer.cc.
-class PostingListUsedHitSerializer : public PostingListUsedSerializer {
+// comments in posting-list-hit-serializer.cc.
+class PostingListHitSerializer : public PostingListSerializer {
public:
static constexpr uint32_t kSpecialHitsSize = kNumSpecialData * sizeof(Hit);
@@ -312,7 +312,7 @@ class PostingListUsedHitSerializer : public PostingListUsedSerializer {
// Inlined functions. Implementation details below. Avert eyes!
template <class T, Hit (*GetHit)(const T&)>
-uint32_t PostingListUsedHitSerializer::PrependHitArray(
+uint32_t PostingListHitSerializer::PrependHitArray(
PostingListUsed* posting_list_used, const T* array, uint32_t num_hits,
bool keep_prepended) const {
if (!IsPostingListValid(posting_list_used)) {
@@ -339,4 +339,4 @@ uint32_t PostingListUsedHitSerializer::PrependHitArray(
} // namespace lib
} // namespace icing
-#endif // ICING_INDEX_MAIN_POSTING_LIST_USED_HIT_SERIALIZER_H_
+#endif // ICING_INDEX_MAIN_POSTING_LIST_HIT_SERIALIZER_H_
diff --git a/icing/index/main/posting-list-used-hit-serializer_test.cc b/icing/index/main/posting-list-hit-serializer_test.cc
index 9ecb7ec..ffd8166 100644
--- a/icing/index/main/posting-list-used-hit-serializer_test.cc
+++ b/icing/index/main/posting-list-hit-serializer_test.cc
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/index/main/posting-list-used-hit-serializer.h"
+#include "icing/index/main/posting-list-hit-serializer.h"
#include <cstdint>
#include <deque>
@@ -47,17 +47,15 @@ struct HitElt {
Hit hit;
};
-TEST(PostingListUsedHitSerializerTest, PostingListUsedPrependHitNotFull) {
- PostingListUsedHitSerializer serializer;
+TEST(PostingListHitSerializerTest, PostingListUsedPrependHitNotFull) {
+ PostingListHitSerializer serializer;
static const int kNumHits = 2551;
static const size_t kHitsSize = kNumHits * sizeof(Hit);
- std::unique_ptr<char[]> hits_buf = std::make_unique<char[]>(kHitsSize);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf.get()), kHitsSize));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, kHitsSize));
// Make used.
Hit hit0(/*section_id=*/0, 0, /*term_frequency=*/56);
@@ -98,15 +96,13 @@ TEST(PostingListUsedHitSerializerTest, PostingListUsedPrependHitNotFull) {
IsOkAndHolds(ElementsAre(hit3, hit2, hit1, hit0)));
}
-TEST(PostingListUsedHitSerializerTest, PostingListUsedPrependHitAlmostFull) {
- PostingListUsedHitSerializer serializer;
+TEST(PostingListHitSerializerTest, PostingListUsedPrependHitAlmostFull) {
+ PostingListHitSerializer serializer;
int size = 2 * serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> hits_buf = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
// Fill up the compressed region.
// Transitions:
@@ -168,17 +164,13 @@ TEST(PostingListUsedHitSerializerTest, PostingListUsedPrependHitAlmostFull) {
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
-TEST(PostingListUsedHitSerializerTest, PostingListUsedMinSize) {
- PostingListUsedHitSerializer serializer;
-
- std::unique_ptr<char[]> hits_buf =
- std::make_unique<char[]>(serializer.GetMinPostingListSize());
+TEST(PostingListHitSerializerTest, PostingListUsedMinSize) {
+ PostingListHitSerializer serializer;
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf.get()),
- serializer.GetMinPostingListSize()));
+ &serializer, serializer.GetMinPostingListSize()));
// PL State: EMPTY
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(0));
EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(IsEmpty()));
@@ -216,19 +208,15 @@ TEST(PostingListUsedHitSerializerTest, PostingListUsedMinSize) {
IsOkAndHolds(ElementsAre(hit1, hit0)));
}
-TEST(PostingListUsedHitSerializerTest,
+TEST(PostingListHitSerializerTest,
PostingListPrependHitArrayMinSizePostingList) {
- PostingListUsedHitSerializer serializer;
-
- constexpr int kFinalSize = 1025;
- std::unique_ptr<char[]> hits_buf = std::make_unique<char[]>(kFinalSize);
+ PostingListHitSerializer serializer;
// Min Size = 10
int size = serializer.GetMinPostingListSize();
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<HitElt> hits_in;
hits_in.emplace_back(Hit(1, 0, Hit::kDefaultTermFrequency));
@@ -265,16 +253,14 @@ TEST(PostingListUsedHitSerializerTest,
IsOkAndHolds(ElementsAreArray(hits_pushed)));
}
-TEST(PostingListUsedHitSerializerTest, PostingListPrependHitArrayPostingList) {
- PostingListUsedHitSerializer serializer;
+TEST(PostingListHitSerializerTest, PostingListPrependHitArrayPostingList) {
+ PostingListHitSerializer serializer;
// Size = 30
int size = 3 * serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> hits_buf = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<HitElt> hits_in;
hits_in.emplace_back(Hit(1, 0, Hit::kDefaultTermFrequency));
@@ -433,8 +419,8 @@ TEST(PostingListUsedHitSerializerTest, PostingListPrependHitArrayPostingList) {
IsOkAndHolds(ElementsAreArray(hits_pushed)));
}
-TEST(PostingListUsedHitSerializerTest, PostingListPrependHitArrayTooManyHits) {
- PostingListUsedHitSerializer serializer;
+TEST(PostingListHitSerializerTest, PostingListPrependHitArrayTooManyHits) {
+ PostingListHitSerializer serializer;
static constexpr int kNumHits = 128;
static constexpr int kDeltaSize = 1;
@@ -442,8 +428,6 @@ TEST(PostingListUsedHitSerializerTest, PostingListPrependHitArrayTooManyHits) {
static constexpr size_t kHitsSize =
((kNumHits * (kDeltaSize + kTermFrequencySize)) / 5) * 5;
- std::unique_ptr<char[]> hits_buf = std::make_unique<char[]>(kHitsSize);
-
// Create an array with one too many hits
std::vector<Hit> hits_in_too_many =
CreateHits(kNumHits + 1, /*desired_byte_length=*/1);
@@ -454,8 +438,7 @@ TEST(PostingListUsedHitSerializerTest, PostingListPrependHitArrayTooManyHits) {
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf.get()),
- serializer.GetMinPostingListSize()));
+ &serializer, serializer.GetMinPostingListSize()));
// PrependHitArray should fail because hit_elts_in_too_many is far too large
// for the minimum size pl.
@@ -467,8 +450,7 @@ TEST(PostingListUsedHitSerializerTest, PostingListPrependHitArrayTooManyHits) {
ICING_ASSERT_OK_AND_ASSIGN(
pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf.get()), kHitsSize));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, kHitsSize));
// PrependHitArray should fail because hit_elts_in_too_many is one hit too
// large for this pl.
num_could_fit = serializer.PrependHitArray<HitElt, HitElt::get_hit>(
@@ -478,20 +460,19 @@ TEST(PostingListUsedHitSerializerTest, PostingListPrependHitArrayTooManyHits) {
ASSERT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(IsEmpty()));
}
-TEST(PostingListUsedHitSerializerTest,
+TEST(PostingListHitSerializerTest,
PostingListStatusJumpFromNotFullToFullAndBack) {
- PostingListUsedHitSerializer serializer;
+ PostingListHitSerializer serializer;
const uint32_t pl_size = 3 * sizeof(Hit);
- char hits_buf[pl_size];
- ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, hits_buf, pl_size));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListUsed pl,
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
ICING_ASSERT_OK(serializer.PrependHit(&pl, Hit(Hit::kInvalidValue - 1, 0)));
uint32_t bytes_used = serializer.GetBytesUsed(&pl);
// Status not full.
ASSERT_THAT(bytes_used,
- Le(pl_size - PostingListUsedHitSerializer::kSpecialHitsSize));
+ Le(pl_size - PostingListHitSerializer::kSpecialHitsSize));
ICING_ASSERT_OK(serializer.PrependHit(&pl, Hit(Hit::kInvalidValue >> 2, 0)));
// Status should jump to full directly.
ASSERT_THAT(serializer.GetBytesUsed(&pl), Eq(pl_size));
@@ -500,13 +481,13 @@ TEST(PostingListUsedHitSerializerTest,
ASSERT_THAT(serializer.GetBytesUsed(&pl), Eq(bytes_used));
}
-TEST(PostingListUsedHitSerializerTest, DeltaOverflow) {
- PostingListUsedHitSerializer serializer;
+TEST(PostingListHitSerializerTest, DeltaOverflow) {
+ PostingListHitSerializer serializer;
- char hits_buf[1000];
- ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, hits_buf, 4 * sizeof(Hit)));
+ const uint32_t pl_size = 4 * sizeof(Hit);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListUsed pl,
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
static const Hit::Value kOverflow[4] = {
Hit::kInvalidValue >> 2,
@@ -521,8 +502,8 @@ TEST(PostingListUsedHitSerializerTest, DeltaOverflow) {
}
// Cannot fit 4 overflow values.
- ICING_ASSERT_OK_AND_ASSIGN(pl, PostingListUsed::CreateFromUnitializedRegion(
- &serializer, hits_buf, 4 * sizeof(Hit)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ pl, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
ICING_EXPECT_OK(serializer.PrependHit(&pl, Hit(kOverflow[3])));
ICING_EXPECT_OK(serializer.PrependHit(&pl, Hit(kOverflow[2])));
@@ -532,26 +513,22 @@ TEST(PostingListUsedHitSerializerTest, DeltaOverflow) {
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
-TEST(PostingListUsedHitSerializerTest, MoveFrom) {
- PostingListUsedHitSerializer serializer;
+TEST(PostingListHitSerializerTest, MoveFrom) {
+ PostingListHitSerializer serializer;
int size = 3 * serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> hits_buf1 = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used1,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf1.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<Hit> hits1 =
CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
for (const Hit &hit : hits1) {
ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit));
}
- std::unique_ptr<char[]> hits_buf2 = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used2,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf2.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<Hit> hits2 =
CreateHits(/*num_hits=*/5, /*desired_byte_length=*/2);
for (const Hit &hit : hits2) {
@@ -564,16 +541,13 @@ TEST(PostingListUsedHitSerializerTest, MoveFrom) {
EXPECT_THAT(serializer.GetHits(&pl_used1), IsOkAndHolds(IsEmpty()));
}
-TEST(PostingListUsedHitSerializerTest,
- MoveFromNullArgumentReturnsInvalidArgument) {
- PostingListUsedHitSerializer serializer;
+TEST(PostingListHitSerializerTest, MoveFromNullArgumentReturnsInvalidArgument) {
+ PostingListHitSerializer serializer;
int size = 3 * serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> hits_buf1 = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used1,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf1.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<Hit> hits = CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
for (const Hit &hit : hits) {
ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit));
@@ -585,27 +559,23 @@ TEST(PostingListUsedHitSerializerTest,
IsOkAndHolds(ElementsAreArray(hits.rbegin(), hits.rend())));
}
-TEST(PostingListUsedHitSerializerTest,
+TEST(PostingListHitSerializerTest,
MoveFromInvalidPostingListReturnsInvalidArgument) {
- PostingListUsedHitSerializer serializer;
+ PostingListHitSerializer serializer;
int size = 3 * serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> hits_buf1 = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used1,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf1.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<Hit> hits1 =
CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
for (const Hit &hit : hits1) {
ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit));
}
- std::unique_ptr<char[]> hits_buf2 = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used2,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf2.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<Hit> hits2 =
CreateHits(/*num_hits=*/5, /*desired_byte_length=*/2);
for (const Hit &hit : hits2) {
@@ -614,7 +584,7 @@ TEST(PostingListUsedHitSerializerTest,
// Write invalid hits to the beginning of pl_used1 to make it invalid.
Hit invalid_hit;
- Hit *first_hit = reinterpret_cast<Hit *>(hits_buf1.get());
+ Hit *first_hit = reinterpret_cast<Hit *>(pl_used1.posting_list_buffer());
*first_hit = invalid_hit;
++first_hit;
*first_hit = invalid_hit;
@@ -624,27 +594,23 @@ TEST(PostingListUsedHitSerializerTest,
IsOkAndHolds(ElementsAreArray(hits2.rbegin(), hits2.rend())));
}
-TEST(PostingListUsedHitSerializerTest,
+TEST(PostingListHitSerializerTest,
MoveToInvalidPostingListReturnsFailedPrecondition) {
- PostingListUsedHitSerializer serializer;
+ PostingListHitSerializer serializer;
int size = 3 * serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> hits_buf1 = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used1,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf1.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<Hit> hits1 =
CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
for (const Hit &hit : hits1) {
ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit));
}
- std::unique_ptr<char[]> hits_buf2 = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used2,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf2.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<Hit> hits2 =
CreateHits(/*num_hits=*/5, /*desired_byte_length=*/2);
for (const Hit &hit : hits2) {
@@ -653,7 +619,7 @@ TEST(PostingListUsedHitSerializerTest,
// Write invalid hits to the beginning of pl_used2 to make it invalid.
Hit invalid_hit;
- Hit *first_hit = reinterpret_cast<Hit *>(hits_buf2.get());
+ Hit *first_hit = reinterpret_cast<Hit *>(pl_used2.posting_list_buffer());
*first_hit = invalid_hit;
++first_hit;
*first_hit = invalid_hit;
@@ -663,28 +629,23 @@ TEST(PostingListUsedHitSerializerTest,
IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend())));
}
-TEST(PostingListUsedHitSerializerTest, MoveToPostingListTooSmall) {
- PostingListUsedHitSerializer serializer;
+TEST(PostingListHitSerializerTest, MoveToPostingListTooSmall) {
+ PostingListHitSerializer serializer;
int size = 3 * serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> hits_buf1 = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used1,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf1.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<Hit> hits1 =
CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
for (const Hit &hit : hits1) {
ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit));
}
- std::unique_ptr<char[]> hits_buf2 =
- std::make_unique<char[]>(serializer.GetMinPostingListSize());
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used2,
PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf2.get()),
- serializer.GetMinPostingListSize()));
+ &serializer, serializer.GetMinPostingListSize()));
std::vector<Hit> hits2 =
CreateHits(/*num_hits=*/1, /*desired_byte_length=*/2);
for (const Hit &hit : hits2) {
@@ -699,15 +660,13 @@ TEST(PostingListUsedHitSerializerTest, MoveToPostingListTooSmall) {
IsOkAndHolds(ElementsAreArray(hits2.rbegin(), hits2.rend())));
}
-TEST(PostingListUsedHitSerializerTest, PopHitsWithScores) {
- PostingListUsedHitSerializer serializer;
+TEST(PostingListHitSerializerTest, PopHitsWithScores) {
+ PostingListHitSerializer serializer;
int size = 2 * serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> hits_buf1 = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf1.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
// This posting list is 20-bytes. Create four hits that will have deltas of
// two bytes each and all of whom will have a non-default score. This posting
diff --git a/icing/index/numeric/doc-hit-info-iterator-numeric.h b/icing/index/numeric/doc-hit-info-iterator-numeric.h
index 1bfd193..fc66a1d 100644
--- a/icing/index/numeric/doc-hit-info-iterator-numeric.h
+++ b/icing/index/numeric/doc-hit-info-iterator-numeric.h
@@ -20,6 +20,7 @@
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/numeric/numeric-index.h"
#include "icing/util/status-macros.h"
@@ -35,12 +36,23 @@ class DocHitInfoIteratorNumeric : public DocHitInfoIterator {
: numeric_index_iter_(std::move(numeric_index_iter)) {}
libtextclassifier3::Status Advance() override {
+ // If the query property path doesn't exist (i.e. the storage doesn't
+ // exist), then numeric_index_iter_ will be nullptr.
+ if (numeric_index_iter_ == nullptr) {
+ return absl_ports::ResourceExhaustedError("End of iterator");
+ }
+
ICING_RETURN_IF_ERROR(numeric_index_iter_->Advance());
doc_hit_info_ = numeric_index_iter_->GetDocHitInfo();
return libtextclassifier3::Status::OK;
}
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override {
+ return absl_ports::InvalidArgumentError(
+ "Cannot generate suggestion if the last term is numeric operator.");
+ }
+
int32_t GetNumBlocksInspected() const override { return 0; }
int32_t GetNumLeafAdvanceCalls() const override { return 0; }
diff --git a/icing/index/numeric/dummy-numeric-index.h b/icing/index/numeric/dummy-numeric-index.h
index a1d20f8..ce5fa45 100644
--- a/icing/index/numeric/dummy-numeric-index.h
+++ b/icing/index/numeric/dummy-numeric-index.h
@@ -29,6 +29,8 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/persistent-storage.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/hit/hit.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
@@ -36,43 +38,73 @@
#include "icing/index/numeric/numeric-index.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
+#include "icing/util/crc32.h"
+#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
+// DummyNumericIndex: dummy class to help with testing and unblock e2e
+// integration for numeric search. It stores all numeric index data (keys and
+// hits) in memory without actual persistent storages. All PersistentStorage
+// features do not work as expected, i.e. they don't persist any data into disk
+// and therefore data are volatile.
template <typename T>
class DummyNumericIndex : public NumericIndex<T> {
public:
+ static libtextclassifier3::StatusOr<std::unique_ptr<DummyNumericIndex<T>>>
+ Create(const Filesystem& filesystem, std::string working_path) {
+ auto dummy_numeric_index = std::unique_ptr<DummyNumericIndex<T>>(
+ new DummyNumericIndex<T>(filesystem, std::move(working_path)));
+ ICING_RETURN_IF_ERROR(dummy_numeric_index->InitializeNewStorage());
+ return dummy_numeric_index;
+ }
+
~DummyNumericIndex() override = default;
std::unique_ptr<typename NumericIndex<T>::Editor> Edit(
- std::string_view property_name, DocumentId document_id,
+ std::string_view property_path, DocumentId document_id,
SectionId section_id) override {
- return std::make_unique<Editor>(property_name, document_id, section_id,
+ return std::make_unique<Editor>(property_path, document_id, section_id,
storage_);
}
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> GetIterator(
- std::string_view property_name, T key_lower, T key_upper) const override;
+ std::string_view property_path, T key_lower, T key_upper,
+ const DocumentStore&, const SchemaStore&, int64_t) const override;
+
+ libtextclassifier3::Status Optimize(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ DocumentId new_last_added_document_id) override;
- libtextclassifier3::Status Reset() override {
+ libtextclassifier3::Status Clear() override {
storage_.clear();
+ last_added_document_id_ = kInvalidDocumentId;
return libtextclassifier3::Status::OK;
}
- libtextclassifier3::Status PersistToDisk() override {
- return libtextclassifier3::Status::OK;
+ DocumentId last_added_document_id() const override {
+ return last_added_document_id_;
}
+ void set_last_added_document_id(DocumentId document_id) override {
+ if (last_added_document_id_ == kInvalidDocumentId ||
+ document_id > last_added_document_id_) {
+ last_added_document_id_ = document_id;
+ }
+ }
+
+ int num_property_indices() const override { return storage_.size(); }
+
private:
class Editor : public NumericIndex<T>::Editor {
public:
explicit Editor(
- std::string_view property_name, DocumentId document_id,
+ std::string_view property_path, DocumentId document_id,
SectionId section_id,
std::unordered_map<std::string, std::map<T, std::vector<BasicHit>>>&
storage)
- : NumericIndex<T>::Editor(property_name, document_id, section_id),
+ : NumericIndex<T>::Editor(property_path, document_id, section_id),
storage_(storage) {}
~Editor() override = default;
@@ -82,12 +114,12 @@ class DummyNumericIndex : public NumericIndex<T> {
return libtextclassifier3::Status::OK;
}
- libtextclassifier3::Status IndexAllBufferedKeys() override;
+ libtextclassifier3::Status IndexAllBufferedKeys() && override;
private:
std::unordered_set<T> seen_keys_;
std::unordered_map<std::string, std::map<T, std::vector<BasicHit>>>&
- storage_;
+ storage_; // Does not own.
};
class Iterator : public NumericIndex<T>::Iterator {
@@ -147,20 +179,58 @@ class DummyNumericIndex : public NumericIndex<T> {
DocHitInfo doc_hit_info_;
};
+ explicit DummyNumericIndex(const Filesystem& filesystem,
+ std::string&& working_path)
+ : NumericIndex<T>(filesystem, std::move(working_path),
+ PersistentStorage::WorkingPathType::kDummy),
+ dummy_crcs_buffer_(
+ std::make_unique<uint8_t[]>(sizeof(PersistentStorage::Crcs))),
+ last_added_document_id_(kInvalidDocumentId) {
+ memset(dummy_crcs_buffer_.get(), 0, sizeof(PersistentStorage::Crcs));
+ }
+
+ libtextclassifier3::Status PersistStoragesToDisk(bool force) override {
+ return libtextclassifier3::Status::OK;
+ }
+
+ libtextclassifier3::Status PersistMetadataToDisk(bool force) override {
+ return libtextclassifier3::Status::OK;
+ }
+
+ libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(bool force) override {
+ return Crc32(0);
+ }
+
+ libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+ bool force) override {
+ return Crc32(0);
+ }
+
+ PersistentStorage::Crcs& crcs() override {
+ return *reinterpret_cast<PersistentStorage::Crcs*>(
+ dummy_crcs_buffer_.get());
+ }
+ const PersistentStorage::Crcs& crcs() const override {
+ return *reinterpret_cast<const PersistentStorage::Crcs*>(
+ dummy_crcs_buffer_.get());
+ }
+
std::unordered_map<std::string, std::map<T, std::vector<BasicHit>>> storage_;
+ std::unique_ptr<uint8_t[]> dummy_crcs_buffer_;
+ DocumentId last_added_document_id_;
};
template <typename T>
libtextclassifier3::Status
-DummyNumericIndex<T>::Editor::IndexAllBufferedKeys() {
- auto property_map_iter = storage_.find(this->property_name_);
+DummyNumericIndex<T>::Editor::IndexAllBufferedKeys() && {
+ auto property_map_iter = storage_.find(this->property_path_);
if (property_map_iter == storage_.end()) {
const auto& [inserted_iter, insert_result] =
- storage_.insert({this->property_name_, {}});
+ storage_.insert({this->property_path_, {}});
if (!insert_result) {
return absl_ports::InternalError(
absl_ports::StrCat("Failed to create a new map for property \"",
- this->property_name_, "\""));
+ this->property_path_, "\""));
}
property_map_iter = inserted_iter;
}
@@ -184,7 +254,7 @@ DummyNumericIndex<T>::Editor::IndexAllBufferedKeys() {
template <typename T>
libtextclassifier3::Status DummyNumericIndex<T>::Iterator::Advance() {
if (pq_.empty()) {
- return absl_ports::OutOfRangeError("End of iterator");
+ return absl_ports::ResourceExhaustedError("End of iterator");
}
DocumentId document_id = pq_.top().GetCurrentBasicHit().document_id();
@@ -207,17 +277,18 @@ libtextclassifier3::Status DummyNumericIndex<T>::Iterator::Advance() {
template <typename T>
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
-DummyNumericIndex<T>::GetIterator(std::string_view property_name, T key_lower,
- T key_upper) const {
+DummyNumericIndex<T>::GetIterator(std::string_view property_path, T key_lower,
+ T key_upper, const DocumentStore&,
+ const SchemaStore&, int64_t) const {
if (key_lower > key_upper) {
return absl_ports::InvalidArgumentError(
"key_lower should not be greater than key_upper");
}
- auto property_map_iter = storage_.find(std::string(property_name));
+ auto property_map_iter = storage_.find(std::string(property_path));
if (property_map_iter == storage_.end()) {
- return absl_ports::NotFoundError(
- absl_ports::StrCat("Property \"", property_name, "\" not found"));
+ // Return an empty iterator.
+ return std::make_unique<DocHitInfoIteratorNumeric<T>>(nullptr);
}
std::vector<typename Iterator::BucketInfo> bucket_info_vec;
@@ -233,6 +304,38 @@ DummyNumericIndex<T>::GetIterator(std::string_view property_name, T key_lower,
std::move(bucket_info_vec)));
}
+template <typename T>
+libtextclassifier3::Status DummyNumericIndex<T>::Optimize(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ DocumentId new_last_added_document_id) {
+ std::unordered_map<std::string, std::map<T, std::vector<BasicHit>>>
+ new_storage;
+
+ for (const auto& [property_path, old_property_map] : storage_) {
+ std::map<T, std::vector<BasicHit>> new_property_map;
+ for (const auto& [key, hits] : old_property_map) {
+ for (const BasicHit& hit : hits) {
+ DocumentId old_doc_id = hit.document_id();
+ if (old_doc_id >= document_id_old_to_new.size() ||
+ document_id_old_to_new[old_doc_id] == kInvalidDocumentId) {
+ continue;
+ }
+
+ new_property_map[key].push_back(
+ BasicHit(hit.section_id(), document_id_old_to_new[old_doc_id]));
+ }
+ }
+
+ if (!new_property_map.empty()) {
+ new_storage[property_path] = std::move(new_property_map);
+ }
+ }
+
+ storage_ = std::move(new_storage);
+ last_added_document_id_ = new_last_added_document_id;
+ return libtextclassifier3::Status::OK;
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/index/numeric/integer-index-bucket-util.cc b/icing/index/numeric/integer-index-bucket-util.cc
new file mode 100644
index 0000000..a05baab
--- /dev/null
+++ b/icing/index/numeric/integer-index-bucket-util.cc
@@ -0,0 +1,205 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/numeric/integer-index-bucket-util.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <utility>
+#include <vector>
+
+#include "icing/index/numeric/integer-index-data.h"
+
+namespace icing {
+namespace lib {
+
+namespace integer_index_bucket_util {
+
+namespace {
+
+// Helper function to determine if data slice [start, end) forms a "full
+// single-range bucket".
+//
+// Full single-range bucket: keys of all data are identical and # of them exceed
+// num_data_threshold.
+//
+// REQUIRES: data slice [start, end) are sorted by key.
+inline bool WouldBeFullSingleRangeBucket(
+ const std::vector<IntegerIndexData>::iterator& start,
+ const std::vector<IntegerIndexData>::iterator& end,
+ int32_t num_data_threshold) {
+ return std::distance(start, end) > num_data_threshold &&
+ start->key() == (end - 1)->key();
+}
+
+// Helper function to determine if a bucket is full single-range.
+//
+// REQUIRES:
+// bucket.key_lower <= [bucket.start, bucket.end)->key() <= bucket.key_upper
+inline bool IsFullSingleRangeBucket(const DataRangeAndBucketInfo& bucket,
+ int32_t num_data_threshold) {
+ return bucket.key_lower == bucket.key_upper &&
+ WouldBeFullSingleRangeBucket(bucket.start, bucket.end,
+ num_data_threshold);
+}
+
+// Helper function to append new bucket(s) with corresponding data slice for
+// range [curr_key_lower, last_key] where last_key = (it_end - 1)->key().
+//
+// Also it handles an edge case:
+// If data slice [it_start, it_end) forms a "full single-range bucket" (see
+// WouldBeFullSingleRangeBucket for definition), then we have to put them into a
+// single range bucket [last_key, last_key] instead of [curr_key_lower,
+// last_key]. Also we have to deal with range [curr_key_lower, last_key - 1]:
+// - If the previous bucket exists and it is not a "full single-range bucket",
+// then merge [curr_key_lower, last_key - 1] into the previous bucket, i.e.
+// change the previous bucket's key_upper to (last_key - 1). Then we will end
+// up having:
+// - [prev_bucket.key_lower, last_key - 1]
+// - [last_key, last_key]
+// - Otherwise, we have to create [curr_key_lower, last_key - 1] with
+// empty data. Then we will end up having (Note: prev_bucket.key_upper ==
+// curr_key_lower - 1):
+// - [prev_bucket.key_lower, curr_key_lower - 1]
+// - [curr_key_lower, last_key - 1]
+// - [last_key, last_key]
+// This will avoid split bucket being called too frequently.
+// For example, original_key_lower = 0, original_key_upper = 50. If we have
+// (num_data_threshold + 1) data with key = 20 and another data with key = 40:
+// - Without this part, we will split them into [[0, 20], [21, 50]]. Then when
+// adding data with key = 10 next round, we will invoke split again and split
+// [0, 20] to [[0, 10], [11, 20]].
+// - With this part, we will split them into [[0, 19], [20, 20], [21, 50]],
+// which will avoid splitting in the next round for key = 20.
+//
+// REQUIRES: it_start < it_end
+void AppendNewBuckets(const std::vector<IntegerIndexData>::iterator& it_start,
+ const std::vector<IntegerIndexData>::iterator& it_end,
+ int64_t curr_key_lower, int32_t num_data_threshold,
+ std::vector<DataRangeAndBucketInfo>& results) {
+ int64_t last_key = (it_end - 1)->key();
+ if (curr_key_lower < last_key &&
+ WouldBeFullSingleRangeBucket(it_start, it_end, num_data_threshold)) {
+ if (!results.empty() &&
+ !IsFullSingleRangeBucket(results.back(), num_data_threshold)) {
+ // Previous bucket is not full single-range, so merge it to now hold the
+ // range [prev_bucket.key_lower, last_key - 1].
+ results.back().key_upper = last_key - 1;
+ } else {
+ // There is either no previous bucket or the previous bucket is full
+ // single-range. So add an empty bucket for the range [curr_key_lower,
+ // last_key - 1].
+ results.push_back(DataRangeAndBucketInfo(it_start, it_start,
+ curr_key_lower, last_key - 1));
+ }
+ curr_key_lower = last_key;
+ }
+ results.push_back(
+ DataRangeAndBucketInfo(it_start, it_end, curr_key_lower, last_key));
+}
+
+} // namespace
+
+std::vector<DataRangeAndBucketInfo> Split(std::vector<IntegerIndexData>& data,
+ int64_t original_key_lower,
+ int64_t original_key_upper,
+ int32_t num_data_threshold) {
+ // Early return if there is no need to split.
+ if (data.size() <= num_data_threshold) {
+ return {DataRangeAndBucketInfo(data.begin(), data.end(), original_key_lower,
+ original_key_upper)};
+ }
+
+ // Sort data by key.
+ std::sort(
+ data.begin(), data.end(),
+ [](const IntegerIndexData& lhs, const IntegerIndexData& rhs) -> bool {
+ return lhs.key() < rhs.key();
+ });
+
+ std::vector<DataRangeAndBucketInfo> results;
+ int64_t curr_key_lower = original_key_lower;
+ // Sliding window [it_start, it_end) to separate data into different buckets.
+ auto it_start = data.begin();
+ auto it_end = data.begin();
+ while (it_end != data.end()) {
+ // Attempt to extend it_end by 1, but we have to include all data with the
+ // same key since they cannot be separated into different buckets. Also use
+ // extend_it_end to avoid modifying it_end directly. For some edge cases,
+ // the extension in a single round is extremely large (i.e. a lot of data
+ // have the same key), and we want to separate them. For example:
+ // - key = 0: 5 data
+ // - key = 1: num_data_threshold - 1 data
+ // In the second round, # of data in the sliding window will exceed the
+ // threshold. We want to separate all data with key = 0 into a single bucket
+ // instead of putting key = 0 and key = 1 together. Therefore, using
+ // extend_it_end allow us to preserve it_end of the previous round and be
+ // able to deal with this case.
+ auto extend_it_end = it_end + 1;
+ while (extend_it_end != data.end() &&
+ it_end->key() == extend_it_end->key()) {
+ ++extend_it_end;
+ }
+
+ if (std::distance(it_start, extend_it_end) > num_data_threshold &&
+ it_start != it_end) {
+ // Split data between [it_start, it_end) into range [curr_key_lower,
+ // (it_end - 1)->key()].
+ AppendNewBuckets(it_start, it_end, curr_key_lower, num_data_threshold,
+ results);
+
+ // it_end at this moment won't be data.end(), so the last element of the
+ // new bucket can't have key == INT64_MAX. Therefore, it is safe to set
+ // curr_key_lower as ((it_end - 1)->key() + 1).
+ curr_key_lower = (it_end - 1)->key() + 1;
+ it_start = it_end;
+ }
+ it_end = extend_it_end;
+ }
+
+ // Handle the final range [curr_key_lower, original_key_upper].
+ if (curr_key_lower <= original_key_upper) {
+ if (it_start != it_end) {
+ AppendNewBuckets(it_start, it_end, curr_key_lower, num_data_threshold,
+ results);
+
+ // AppendNewBuckets only handles range [curr_key_lower, (it_end -
+ // 1)->key()], so we have to handle range [(it_end - 1)->key() + 1,
+ // original_key_upper] if needed.
+ int64_t last_key = (it_end - 1)->key();
+ if (last_key != std::numeric_limits<int64_t>::max() &&
+ last_key + 1 <= original_key_upper) {
+ if (!results.empty() &&
+ !IsFullSingleRangeBucket(results.back(), num_data_threshold)) {
+ results.back().key_upper = original_key_upper;
+ } else {
+ results.push_back(DataRangeAndBucketInfo(
+ it_start, it_start, last_key + 1, original_key_upper));
+ }
+ }
+ } else {
+ results.push_back(DataRangeAndBucketInfo(it_start, it_end, curr_key_lower,
+ original_key_upper));
+ }
+ }
+
+ return results;
+}
+
+} // namespace integer_index_bucket_util
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/numeric/integer-index-bucket-util.h b/icing/index/numeric/integer-index-bucket-util.h
new file mode 100644
index 0000000..d6fc245
--- /dev/null
+++ b/icing/index/numeric/integer-index-bucket-util.h
@@ -0,0 +1,81 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_NUMERIC_INTEGER_INDEX_BUCKET_UTIL_H_
+#define ICING_INDEX_NUMERIC_INTEGER_INDEX_BUCKET_UTIL_H_
+
+#include <cstdint>
+#include <utility>
+#include <vector>
+
+#include "icing/index/numeric/integer-index-data.h"
+
+namespace icing {
+namespace lib {
+
+namespace integer_index_bucket_util {
+
+// A wrapper struct that contains information of a bucket.
+// - The bucket contains data within the iterator [start, end).
+// - Bucket range is [key_lower, key_upper], and all data within [start, end)
+// should have keys in the bucket range.
+//
+// Note: the caller should make sure the lifecycle of data vector is longer than
+// instances of this wrapper struct.
+struct DataRangeAndBucketInfo {
+ std::vector<IntegerIndexData>::iterator start;
+ std::vector<IntegerIndexData>::iterator end;
+ int64_t key_lower;
+ int64_t key_upper;
+
+ explicit DataRangeAndBucketInfo(
+ std::vector<IntegerIndexData>::iterator start_in,
+ std::vector<IntegerIndexData>::iterator end_in, int64_t key_lower_in,
+ int64_t key_upper_in)
+ : start(std::move(start_in)),
+ end(std::move(end_in)),
+ key_lower(key_lower_in),
+ key_upper(key_upper_in) {}
+};
+
+// Helper function to split data (that are originally in a bucket with range
+// [original_key_lower, original_key_upper]) into different buckets according to
+// num_data_threshold.
+// - The input vector `data` will be sorted by key in ascending order (unless
+// there's no need to split in which case data is returned unmodified)
+// - Data with the same key will be in the same bucket even if # of them exceed
+// num_data_threshold.
+// - Range of all buckets will be disjoint, and the range union will be
+// [original_key_lower, original_key_upper].
+// - Data slice (i.e. [start, end)) can be empty.
+//
+// REQUIRES:
+// - original_key_lower < original_key_upper
+// - num_data_threshold > 0
+// - Keys of all data are in range [original_key_lower, original_key_upper]
+//
+// Returns: a vector of DataRangeAndBucketInfo that contain all bucket info
+// after splitting. Also the returned vector should contain at least one
+// bucket, otherwise it is considered an error.
+std::vector<DataRangeAndBucketInfo> Split(std::vector<IntegerIndexData>& data,
+ int64_t original_key_lower,
+ int64_t original_key_upper,
+ int32_t num_data_threshold);
+
+} // namespace integer_index_bucket_util
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_INDEX_NUMERIC_INTEGER_INDEX_BUCKET_UTIL_H_
diff --git a/icing/index/numeric/integer-index-bucket-util_test.cc b/icing/index/numeric/integer-index-bucket-util_test.cc
new file mode 100644
index 0000000..82c593e
--- /dev/null
+++ b/icing/index/numeric/integer-index-bucket-util_test.cc
@@ -0,0 +1,1112 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/numeric/integer-index-bucket-util.h"
+
+#include <limits>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/index/numeric/integer-index-data.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+namespace integer_index_bucket_util {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::Ne;
+using ::testing::SizeIs;
+
+static constexpr DocumentId kDefaultDocumentId = 123;
+static constexpr SectionId kDefaultSectionId = 31;
+
+TEST(IntegerIndexBucketUtilTest, Split_numDataNotDivisibleByThreshold) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)};
+ int64_t key_lower = -10;
+ int64_t key_upper = 10;
+ int32_t num_data_threshold = 3;
+ ASSERT_THAT(data.size() % num_data_threshold, Ne(0));
+
+ // Keys = [-10, -3, -2, 0, 1, 2, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, key_lower, key_upper, num_data_threshold);
+ ASSERT_THAT(results, SizeIs(3));
+ // Bucket 0: key lower = -10, key upper = -2, keys = [-10, -3, -2].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(-2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)));
+ // Bucket 1: key lower = -1, key upper = 2, keys = [0, 1, 2].
+ EXPECT_THAT(results[1].key_lower, Eq(-1));
+ EXPECT_THAT(results[1].key_upper, Eq(2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+ // Bucket 2: key lower = 3, key upper = 10, keys = [10].
+ EXPECT_THAT(results[2].key_lower, Eq(3));
+ EXPECT_THAT(results[2].key_upper, Eq(10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest, Split_numDataDivisibleByThreshold) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)};
+ int64_t key_lower = -10;
+ int64_t key_upper = 10;
+ int32_t num_data_threshold = 3;
+ ASSERT_THAT(data.size() % num_data_threshold, Eq(0));
+
+ // Keys = [-10, -3, -2, 0, 2, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, key_lower, key_upper, num_data_threshold);
+ ASSERT_THAT(results, SizeIs(2));
+ // Bucket 0: key lower = -10, key upper = -2, keys = [-10, -3, -2].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(-2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)));
+ // Bucket 1: key lower = -1, key upper = 2, keys = [0, 2, 10].
+ EXPECT_THAT(results[1].key_lower, Eq(-1));
+ EXPECT_THAT(results[1].key_upper, Eq(10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest, Split_shouldIncludeOriginalKeyRange) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)};
+ int64_t key_lower = -1000;
+ int64_t key_upper = 1000;
+ int32_t num_data_threshold = 3;
+
+ // Keys = [-10, -3, -2, 0, 1, 2, 10].
+ // Split should include the original key_lower and key_upper even if there is
+ // no key at boundary.
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, key_lower, key_upper, num_data_threshold);
+ ASSERT_THAT(results, SizeIs(3));
+ // Bucket 0: key lower = -1000, key upper = -2, keys = [-10, -3, -2].
+ EXPECT_THAT(results[0].key_lower, Eq(-1000));
+ EXPECT_THAT(results[0].key_upper, Eq(-2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)));
+ // Bucket 1: key lower = -1, key upper = 2, keys = [0, 1, 2].
+ EXPECT_THAT(results[1].key_lower, Eq(-1));
+ EXPECT_THAT(results[1].key_upper, Eq(2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+ // Bucket 2: key lower = 3, key upper = 1000, keys = [10].
+ EXPECT_THAT(results[2].key_lower, Eq(3));
+ EXPECT_THAT(results[2].key_upper, Eq(1000));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest, Split_singleBucketWithoutSplitting) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)};
+ int64_t key_lower = -1000;
+ int64_t key_upper = 1000;
+ int32_t num_data_threshold = 100;
+
+ // Keys = [-10, -3, -2, 0, 1, 2, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, key_lower, key_upper, num_data_threshold);
+ ASSERT_THAT(results, SizeIs(1));
+ // Bucket 0: key lower = -1000, key upper = 1000, keys = [-10, -3, -2, 0, 1,
+ // 2, 10]. Since # of data <= threshold, data vector won't be sorted and thus
+ // [start, end) will have data with the original order.
+ EXPECT_THAT(results[0].key_lower, Eq(-1000));
+ EXPECT_THAT(results[0].key_upper, Eq(1000));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)));
+}
+
+TEST(IntegerIndexBucketUtilTest, Split_emptyData) {
+ std::vector<IntegerIndexData> empty_data;
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(empty_data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ ASSERT_THAT(results, SizeIs(1));
+ // Bucket 0: key lower = -10, key upper = 10, keys = [].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(10));
+ EXPECT_THAT(std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ IsEmpty());
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_sameKeysExceedingThreshold_firstBucket_keyEqualsKeyLower) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+ // Keys = [-10, -10, -10, -10, -10, 0, 3, 5, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // - Even though # of data with key = -10 exceeds the threshold, they should
+ // still be in the same bucket.
+ // - They should be separated from key = 0, 3, ....
+ ASSERT_THAT(results, SizeIs(3));
+ // Bucket 0: key lower = -10, key upper = -10, keys = [-10, -10, -10, -10,
+ // -10].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(-10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10)));
+ // Bucket 1: key lower = -9, key upper = 5, keys = [0, 3, 5].
+ EXPECT_THAT(results[1].key_lower, Eq(-9));
+ EXPECT_THAT(results[1].key_upper, Eq(5));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5)));
+ // Bucket 2: key lower = 6, key upper = 10, keys = [10].
+ EXPECT_THAT(results[2].key_lower, Eq(6));
+ EXPECT_THAT(results[2].key_upper, Eq(10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_sameKeysExceedingThreshold_firstBucket_keyGreaterThanKeyLower) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+ // Keys = [-7, -7, -7, -7, -7, 0, 3, 5, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // - Even though # of data with key = -7 exceeds the threshold, they should
+ // still be in the same bucket.
+ // - They should be separated from key = 0, 3, ....
+ // - They should be in a single range bucket [-7, -7], and another bucket
+ // [-10, -8] with empty data should be created before it.
+ ASSERT_THAT(results, SizeIs(4));
+ // Bucket 0: key lower = -10, key upper = -8, keys = [].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(-8));
+ EXPECT_THAT(std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ IsEmpty());
+ // Bucket 1: key lower = -7, key upper = -7, keys = [-7, -7, -7, -7, -7].
+ EXPECT_THAT(results[1].key_lower, Eq(-7));
+ EXPECT_THAT(results[1].key_upper, Eq(-7));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7)));
+ // Bucket 2: key lower = -6, key upper = 5, keys = [0, 3, 5].
+ EXPECT_THAT(results[2].key_lower, Eq(-6));
+ EXPECT_THAT(results[2].key_upper, Eq(5));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5)));
+ // Bucket 3: key lower = 6, key upper = 10, keys = [10].
+ EXPECT_THAT(results[3].key_lower, Eq(6));
+ EXPECT_THAT(results[3].key_upper, Eq(10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[3].start, results[3].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_sameKeysExceedingThreshold_midBucket_keyEqualsKeyLower) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+ // Keys = [-10, -5, -4, -4, -4, -4, -4, 5, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // - Even though # of data with key = -4 exceeds the threshold, they should
+ // still be in the same bucket.
+ // - They should be separated from key = -10, -5, 5, 10.
+ ASSERT_THAT(results, SizeIs(3));
+ // Bucket 0: key lower = -10, key upper = -5, keys = [-10, -5].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(-5));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -5)));
+ // Bucket 1: key lower = -4, key upper = -4, keys = [-4, -4, -4, -4, -4].
+ EXPECT_THAT(results[1].key_lower, Eq(-4));
+ EXPECT_THAT(results[1].key_upper, Eq(-4));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4)));
+ // Bucket 2: key lower = -3, key upper = 10, keys = [5, 10].
+ EXPECT_THAT(results[2].key_lower, Eq(-3));
+ EXPECT_THAT(results[2].key_upper, Eq(10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_sameKeysExceedingThreshold_midBucket_keyGreaterThanKeyLower) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+ // Keys = [-10, -5, -1, -1, -1, -1, -1, 5, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // - Even though # of data with key = -1 exceeds the threshold, they should
+ // still be in the same bucket.
+ // - They should be separated from key = -10, -5, 5, 10.
+ // - They should be in a single range bucket [-1, -1], and range [-4, -2]
+ // should be merged into the previous bucket.
+ ASSERT_THAT(results, SizeIs(3));
+ // Bucket 0: key lower = -10, key upper = -2, keys = [-10, -5].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(-2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -5)));
+ // Bucket 1: key lower = -1, key upper = -1, keys = [-1, -1, -1, -1, -1].
+ EXPECT_THAT(results[1].key_lower, Eq(-1));
+ EXPECT_THAT(results[1].key_upper, Eq(-1));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1)));
+ // Bucket 2: key lower = 0, key upper = 10, keys = [5, 10].
+ EXPECT_THAT(results[2].key_lower, Eq(0));
+ EXPECT_THAT(results[2].key_upper, Eq(10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_sameKeysExceedingThreshold_lastBucket_keyEqualsKeyLower) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3)};
+
+ // Keys = [-10, -3, 0, 2, 3, 3, 3, 3, 3].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // - Even though # of data with key = 3 exceeds the threshold, they should
+ // still be in the same bucket.
+ // - They should be separated from key = -10, -3, 0, 2.
+ // - They should be in a single range bucket [3, 3], and another bucket
+ // [4, 10] with empty data should be created after it.
+ ASSERT_THAT(results, SizeIs(4));
+ // Bucket 0: key lower = -10, key upper = 0, keys = [-10, -3, 0].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(0));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0)));
+ // Bucket 1: key lower = 1, key upper = 2, keys = [2].
+ EXPECT_THAT(results[1].key_lower, Eq(1));
+ EXPECT_THAT(results[1].key_upper, Eq(2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+ // Bucket 2: key lower = 3, key upper = 10, keys = [3, 3, 3, 3, 3].
+ EXPECT_THAT(results[2].key_lower, Eq(3));
+ EXPECT_THAT(results[2].key_upper, Eq(3));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3)));
+ // Bucket 3: key lower = 4, key upper = 10, keys = [].
+ EXPECT_THAT(results[3].key_lower, Eq(4));
+ EXPECT_THAT(results[3].key_upper, Eq(10));
+ EXPECT_THAT(std::vector<IntegerIndexData>(results[3].start, results[3].end),
+ IsEmpty());
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_sameKeysExceedingThreshold_lastBucket_keyWithinKeyLowerAndUpper) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6)};
+
+ // Keys = [-10, -3, 0, 2, 6, 6, 6, 6, 6].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // - Even though # of data with key = 6 exceeds the threshold, they should
+ // still be in the same bucket.
+ // - They should be separated from key = -10, -3, 0, 2.
+ // - They should be in a single range bucket [6, 6]. Range [3, 5] should be
+ // merged into the previous bucket. and another bucket [7, 10] with empty
+ // data should be created after it.
+ ASSERT_THAT(results, SizeIs(4));
+ // Bucket 0: key lower = -10, key upper = 0, keys = [-10, -3, 0].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(0));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0)));
+ // Bucket 1: key lower = 1, key upper = 5, keys = [2].
+ EXPECT_THAT(results[1].key_lower, Eq(1));
+ EXPECT_THAT(results[1].key_upper, Eq(5));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+ // Bucket 2: key lower = 6, key upper = 6, keys = [6, 6, 6, 6, 6].
+ EXPECT_THAT(results[2].key_lower, Eq(6));
+ EXPECT_THAT(results[2].key_upper, Eq(6));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6)));
+ // Bucket 3: key lower = 7, key upper = 10, keys = [].
+ EXPECT_THAT(results[3].key_lower, Eq(7));
+ EXPECT_THAT(results[3].key_upper, Eq(10));
+ EXPECT_THAT(std::vector<IntegerIndexData>(results[3].start, results[3].end),
+ IsEmpty());
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_sameKeysExceedingThreshold_lastBucket_keyEqualsKeyUpper) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+ // Keys = [-10, -3, 0, 2, 10, 10, 10, 10, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // - Even though # of data with key = 10 exceeds the threshold, they should
+ // still be in the same bucket.
+ // - They should be separated from key = -10, -3, 0, 2.
+ // - They should be in a single range bucket [10, 10], and range [3, 9] should
+ // be merged into the previous bucket.
+ ASSERT_THAT(results, SizeIs(3));
+ // Bucket 0: key lower = -10, key upper = 0, keys = [-10, -3, 0].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(0));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0)));
+ // Bucket 1: key lower = 1, key upper = 9, keys = [2].
+ EXPECT_THAT(results[1].key_lower, Eq(1));
+ EXPECT_THAT(results[1].key_upper, Eq(9));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+ // Bucket 2: key lower = 10, key upper = 10, keys = [10, 10, 10, 10, 10].
+ EXPECT_THAT(results[2].key_lower, Eq(10));
+ EXPECT_THAT(results[2].key_upper, Eq(10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_sameKeysExceedingThreshold_shouldNotMergeIntoPreviousBucket) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+ // Keys = [-10, -2, -2, -2, -2, -2, 5, 5, 5, 5, 5, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // - Data with key = -2 and 5 should be put into a single bucket respectively.
+ // - When dealing with key = 5, range [-1, 4] should not be merged into the
+ // previous bucket [-2, -2] because [-2, -2] also contains single key data
+ // exceeding the threshold. Instead, we should create bucket [-1, 4] with
+ // empty data.
+ ASSERT_THAT(results, SizeIs(5));
+ // Bucket 0: key lower = -10, key upper = -3, keys = [-10].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(-3));
+ EXPECT_THAT(std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId,
+ kDefaultDocumentId, -10)));
+ // Bucket 1: key lower = -2, key upper = -2, keys = [-2, -2, -2, -2, -2].
+ EXPECT_THAT(results[1].key_lower, Eq(-2));
+ EXPECT_THAT(results[1].key_upper, Eq(-2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)));
+ // Bucket 2: key lower = -1, key upper = 4, keys = [].
+ EXPECT_THAT(results[2].key_lower, Eq(-1));
+ EXPECT_THAT(results[2].key_upper, Eq(4));
+ EXPECT_THAT(std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ IsEmpty());
+ // Bucket 3: key lower = 5, key upper = 5, keys = [5, 5, 5, 5, 5].
+ EXPECT_THAT(results[3].key_lower, Eq(5));
+ EXPECT_THAT(results[3].key_upper, Eq(5));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[3].start, results[3].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5)));
+ // Bucket 4: key lower = 6, key upper = 10, keys = [10].
+ EXPECT_THAT(results[4].key_lower, Eq(6));
+ EXPECT_THAT(results[4].key_upper, Eq(10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[4].start, results[4].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_sameKeysExceedingThreshold_shouldMergeIntoPreviousBucket) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -8),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+ // Keys = [-10, -8, -3, -2, -2, -2, 5, 5, 5, 5, 5, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // - Data with key = 5 should be put into a single bucket.
+ // - When dealing with key = 5, range [-1, 4] should be merged into the
+ // previous bucket [-2, -2] because # of data in [-2, -2] doesn't exceed the
+ // threshold.
+ ASSERT_THAT(results, SizeIs(4));
+ // Bucket 0: key lower = -10, key upper = -3, keys = [-10, -8, -3].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(-3));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -8),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3)));
+ // Bucket 1: key lower = -2, key upper = 4, keys = [-2, -2, -2].
+ EXPECT_THAT(results[1].key_lower, Eq(-2));
+ EXPECT_THAT(results[1].key_upper, Eq(4));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)));
+ // Bucket 2: key lower = 5, key upper = 5, keys = [5, 5, 5, 5, 5].
+ EXPECT_THAT(results[2].key_lower, Eq(5));
+ EXPECT_THAT(results[2].key_upper, Eq(5));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5)));
+ // Bucket 3: key lower = 6, key upper = 10, keys = [10].
+ EXPECT_THAT(results[3].key_lower, Eq(6));
+ EXPECT_THAT(results[3].key_upper, Eq(10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[3].start, results[3].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_sameKeysExceedingThreshold_singleBucket_keyEqualsKeyLower) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10)};
+
+ // Keys = [-10, -10, -10, -10, -10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // - Even though # of data with key = -10 exceeds the threshold, they should
+ // still be in the same bucket.
+ // - They should be in a single range bucket [-10, -10], and another bucket
+ // [-9, 10] with empty data should be created after it.
+ ASSERT_THAT(results, SizeIs(2));
+ // Bucket 0: key lower = -10, key upper = -10, keys = [-10, -10, -10, -10,
+ // -10].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(-10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10)));
+ // Bucket 1: key lower = -9, key upper = 10, keys = [].
+ EXPECT_THAT(results[1].key_lower, Eq(-9));
+ EXPECT_THAT(results[1].key_upper, Eq(10));
+ EXPECT_THAT(std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ IsEmpty());
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_sameKeysExceedingThreshold_singleBucket_keyWithinKeyLowerAndUpper) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0)};
+
+ // Keys = [0, 0, 0, 0, 0].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // - Even though # of data with key = 0 exceeds the threshold, they should
+ // still be in the same bucket.
+ // - They should be in a single range bucket [0, 0]. Another bucket [-10, -1]
+ // with empty data should be created before it, and another bucket [1, 10]
+ // with empty data should be created after it.
+ ASSERT_THAT(results, SizeIs(3));
+ // Bucket 0: key lower = -10, key upper = -1, keys = [].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(-1));
+ EXPECT_THAT(std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ IsEmpty());
+ // Bucket 1: key lower = 0, key upper = 0, keys = [0, 0, 0, 0, 0].
+ EXPECT_THAT(results[1].key_lower, Eq(0));
+ EXPECT_THAT(results[1].key_upper, Eq(0));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0)));
+ // Bucket 2: key lower = 1, key upper = 10, keys = [].
+ EXPECT_THAT(results[2].key_lower, Eq(1));
+ EXPECT_THAT(results[2].key_upper, Eq(10));
+ EXPECT_THAT(std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ IsEmpty());
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_sameKeysExceedingThreshold_singleBucket_keyEqualsKeyUpper) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+ // Keys = [10, 10, 10, 10, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // - Even though # of data with key = 10 exceeds the threshold, they should
+ // still be in the same bucket.
+ // - They should be in a single range bucket [10, 10], and another bucket
+ // [-10, 9] with empty data should be created before it.
+ ASSERT_THAT(results, SizeIs(2));
+ // Bucket 0: key lower = -10, key upper = 9, keys = [].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(9));
+ EXPECT_THAT(std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ IsEmpty());
+ // Bucket 1: key lower = -10, key upper = 10, keys = [10, 10, 10, 10, 10].
+ EXPECT_THAT(results[1].key_lower, Eq(10));
+ EXPECT_THAT(results[1].key_upper, Eq(10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_adjacentKeysTotalNumDataExceedThreshold) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+ // Keys = [-10, -10, -1, -1, 2, 2, 10, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // Even though # of data with the same key is within the threshold, since
+ // total # of data of adjacent keys exceed the threshold, they should be
+ // separated into different buckets.
+ ASSERT_THAT(results, SizeIs(4));
+ // Bucket 0: key lower = -10, key upper = -10, keys = [-10, -10].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(-10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10)));
+ // Bucket 1: key lower = -9, key upper = -1, keys = [-1, -1].
+ EXPECT_THAT(results[1].key_lower, Eq(-9));
+ EXPECT_THAT(results[1].key_upper, Eq(-1));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1)));
+ // Bucket 2: key lower = 0, key upper = 2, keys = [2, 2].
+ EXPECT_THAT(results[2].key_lower, Eq(0));
+ EXPECT_THAT(results[2].key_upper, Eq(2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+ // Bucket 3: key lower = 3, key upper = 10, keys = [10, 10].
+ EXPECT_THAT(results[3].key_lower, Eq(3));
+ EXPECT_THAT(results[3].key_upper, Eq(10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[3].start, results[3].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_keyLowerEqualsIntMin_smallestKeyGreaterThanKeyLower) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min() + 1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+ // Keys = [INT64_MIN + 1, -10, -1, 2, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*original_key_upper=*/std::numeric_limits<int64_t>::max(),
+ /*num_data_threshold=*/3);
+ ASSERT_THAT(results, SizeIs(2));
+ // Bucket 0: key lower = INT64_MIN, key upper = -1, keys = [INT64_MIN + 1,
+ // -10, -1].
+ EXPECT_THAT(results[0].key_lower, Eq(std::numeric_limits<int64_t>::min()));
+ EXPECT_THAT(results[0].key_upper, Eq(-1));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min() + 1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1)));
+ // Bucket 1: key lower = 0, key upper = INT64_MAX, keys = [2, 10].
+ EXPECT_THAT(results[1].key_lower, Eq(0));
+ EXPECT_THAT(results[1].key_upper, Eq(std::numeric_limits<int64_t>::max()));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_keyLowerEqualsIntMin_smallestKeyEqualsKeyLower) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+ // Keys = [INT64_MIN, -10, -1, 2, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*original_key_upper=*/std::numeric_limits<int64_t>::max(),
+ /*num_data_threshold=*/3);
+ ASSERT_THAT(results, SizeIs(2));
+ // Bucket 0: key lower = INT64_MIN, key upper = -1, keys = [INT64_MIN, -10,
+ // -1].
+ EXPECT_THAT(results[0].key_lower, Eq(std::numeric_limits<int64_t>::min()));
+ EXPECT_THAT(results[0].key_upper, Eq(-1));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1)));
+ // Bucket 1: key lower = 0, key upper = INT64_MAX, keys = [2, 10].
+ EXPECT_THAT(results[1].key_lower, Eq(0));
+ EXPECT_THAT(results[1].key_upper, Eq(std::numeric_limits<int64_t>::max()));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_keyLowerEqualsIntMin_keyIntMinExceedingThreshold) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+ // Keys = [INT64_MIN, INT64_MIN, INT64_MIN, INT64_MIN, INT64_MIN, -10, -1, 2,
+ // 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*original_key_upper=*/std::numeric_limits<int64_t>::max(),
+ /*num_data_threshold=*/3);
+ ASSERT_THAT(results, SizeIs(3));
+ // Bucket 0: key lower = INT64_MIN, key upper = INT64_MIN, keys = [INT64_MIN,
+ // INT64_MIN, INT64_MIN, INT64_MIN, INT64_MIN].
+ EXPECT_THAT(results[0].key_lower, Eq(std::numeric_limits<int64_t>::min()));
+ EXPECT_THAT(results[0].key_upper, Eq(std::numeric_limits<int64_t>::min()));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min())));
+ // Bucket 1: key lower = INT64_MIN + 1, key upper = 2, keys = [-10, -1, 2].
+ EXPECT_THAT(results[1].key_lower,
+ Eq(std::numeric_limits<int64_t>::min() + 1));
+ EXPECT_THAT(results[1].key_upper, Eq(2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+ // Bucket 2: key lower = 3, key upper = INT64_MAX, keys = [10].
+ EXPECT_THAT(results[2].key_lower, Eq(3));
+ EXPECT_THAT(results[2].key_upper, Eq(std::numeric_limits<int64_t>::max()));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_keyUpperEqualsIntMax_largestKeySmallerThanKeyUpper) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max() - 1),
+ };
+
+ // Keys = [-10, -1, 2, 10, INT64_MAX - 1].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*original_key_upper=*/std::numeric_limits<int64_t>::max(),
+ /*num_data_threshold=*/3);
+ ASSERT_THAT(results, SizeIs(2));
+ // Bucket 0: key lower = INT64_MIN, key upper = 2, keys = [-10, -1, 2].
+ EXPECT_THAT(results[0].key_lower, Eq(std::numeric_limits<int64_t>::min()));
+ EXPECT_THAT(results[0].key_upper, Eq(2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+ // Bucket 1: key lower = 3, key upper = INT64_MAX, keys = [10, INT64_MAX - 1].
+ EXPECT_THAT(results[1].key_lower, Eq(3));
+ EXPECT_THAT(results[1].key_upper, Eq(std::numeric_limits<int64_t>::max()));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max() - 1)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_keyUpperEqualsIntMax_largestKeyEqualsKeyUpper) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max()),
+ };
+
+ // Keys = [-10, -1, 2, 10, INT64_MAX].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*original_key_upper=*/std::numeric_limits<int64_t>::max(),
+ /*num_data_threshold=*/3);
+ ASSERT_THAT(results, SizeIs(2));
+ // Bucket 0: key lower = INT64_MIN, key upper = 2, keys = [-10, -1, 2].
+ EXPECT_THAT(results[0].key_lower, Eq(std::numeric_limits<int64_t>::min()));
+ EXPECT_THAT(results[0].key_upper, Eq(2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+ // Bucket 1: key lower = 3, key upper = INT64_MAX, keys = [10, INT64_MAX].
+ EXPECT_THAT(results[1].key_lower, Eq(3));
+ EXPECT_THAT(results[1].key_upper, Eq(std::numeric_limits<int64_t>::max()));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max())));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_keyUpperEqualsIntMax_keyIntMaxExceedingThreshold) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max())};
+
+ // Keys = [-10, -1, 2, 10, INT64_MAX, INT64_MAX, INT64_MAX, INT64_MAX,
+ // INT64_MAX].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*original_key_upper=*/std::numeric_limits<int64_t>::max(),
+ /*num_data_threshold=*/3);
+ ASSERT_THAT(results, SizeIs(3));
+ // Bucket 0: key lower = INT64_MIN, key upper = 2, keys = [-10, -1, 2].
+ EXPECT_THAT(results[0].key_lower, Eq(std::numeric_limits<int64_t>::min()));
+ EXPECT_THAT(results[0].key_upper, Eq(2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+ // Bucket 1: key lower = 3, key upper = INT_MAX - 1, keys = [10].
+ EXPECT_THAT(results[1].key_lower, Eq(3));
+ EXPECT_THAT(results[1].key_upper,
+ Eq(std::numeric_limits<int64_t>::max() - 1));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+ // Bucket 2: key lower = INT64_MAX, key upper = INT64_MAX, keys = [INT64_MAX,
+ // INT64_MAX, INT64_MAX, INT64_MAX, INT64_MAX].
+ EXPECT_THAT(results[2].key_lower, Eq(std::numeric_limits<int64_t>::max()));
+ EXPECT_THAT(results[2].key_upper, Eq(std::numeric_limits<int64_t>::max()));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max())));
+}
+
+} // namespace
+
+} // namespace integer_index_bucket_util
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/numeric/integer-index-storage.cc b/icing/index/numeric/integer-index-storage.cc
new file mode 100644
index 0000000..f0212da
--- /dev/null
+++ b/icing/index/numeric/integer-index-storage.cc
@@ -0,0 +1,1147 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/numeric/integer-index-storage.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <functional>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <queue>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/file-backed-vector.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/doc-hit-info-iterator-numeric.h"
+#include "icing/index/numeric/integer-index-bucket-util.h"
+#include "icing/index/numeric/integer-index-data.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/index/numeric/posting-list-integer-index-accessor.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/util/crc32.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Helper function to flush data between [it_start, it_end) into posting list(s)
+// and return posting list id.
+// Note: it will sort data between [it_start, it_end) by basic hit value, so the
+// caller should be aware that the data order will be changed after calling this
+// function.
+libtextclassifier3::StatusOr<PostingListIdentifier> FlushDataIntoPostingLists(
+ FlashIndexStorage* flash_index_storage,
+ PostingListIntegerIndexSerializer* posting_list_serializer,
+ const std::vector<IntegerIndexData>::iterator& it_start,
+ const std::vector<IntegerIndexData>::iterator& it_end) {
+ if (it_start == it_end) {
+ return PostingListIdentifier::kInvalid;
+ }
+
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<PostingListIntegerIndexAccessor> new_pl_accessor,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage,
+ posting_list_serializer));
+
+ std::sort(it_start, it_end);
+ for (auto it = it_end - 1; it >= it_start; --it) {
+ ICING_RETURN_IF_ERROR(new_pl_accessor->PrependData(*it));
+ }
+
+ PostingListAccessor::FinalizeResult result =
+ std::move(*new_pl_accessor).Finalize();
+ if (!result.status.ok()) {
+ return result.status;
+ }
+ if (!result.id.is_valid()) {
+ return absl_ports::InternalError("Fail to flush data into posting list(s)");
+ }
+ return result.id;
+}
+
+// The following 4 methods are helper functions to get the correct file path of
+// metadata/sorted_buckets/unsorted_buckets/flash_index_storage, according to
+// the given working directory.
+std::string GetMetadataFilePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/", IntegerIndexStorage::kFilePrefix,
+ ".m");
+}
+
+std::string GetSortedBucketsFilePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/", IntegerIndexStorage::kFilePrefix,
+ ".s");
+}
+
+std::string GetUnsortedBucketsFilePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/", IntegerIndexStorage::kFilePrefix,
+ ".u");
+}
+
+std::string GetFlashIndexStorageFilePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/", IntegerIndexStorage::kFilePrefix,
+ ".f");
+}
+
+} // namespace
+
+// We add (BasicHits, key) into a bucket in DocumentId descending and SectionId
+// ascending order. When doing range query, we may access buckets and want to
+// return BasicHits to callers sorted by DocumentId. Therefore, this problem is
+// actually "merge K sorted lists".
+// To implement this algorithm via priority_queue, we create this wrapper class
+// to store PostingListIntegerIndexAccessor for iterating through the posting
+// list chain.
+// - Non-relevant (i.e. not in range [key_lower, key_upper]) will be skipped.
+// - Relevant BasicHits will be returned.
+class BucketPostingListIterator {
+ public:
+ class Comparator {
+ public:
+ // REQUIRES: 2 BucketPostingListIterator* instances (lhs, rhs) should be
+ // valid, i.e. the preceding AdvanceAndFilter() succeeded.
+ bool operator()(const BucketPostingListIterator* lhs,
+ const BucketPostingListIterator* rhs) const {
+ // std::priority_queue is a max heap and we should return BasicHits in
+ // DocumentId descending order.
+ // - BucketPostingListIterator::operator< should have the same order as
+ // DocumentId.
+ // - BasicHit encodes inverted document id and BasicHit::operator<
+ // compares the encoded raw value directly.
+ // - Therefore, BucketPostingListIterator::operator< should compare
+ // BasicHit reversely.
+ // - This will make priority_queue return buckets in DocumentId
+ // descending and SectionId ascending order.
+ // - Whatever direction we sort SectionId by (or pop by priority_queue)
+ // doesn't matter because all hits for the same DocumentId will be
+ // merged into a single DocHitInfo.
+ return rhs->GetCurrentBasicHit() < lhs->GetCurrentBasicHit();
+ }
+ };
+
+ explicit BucketPostingListIterator(
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor)
+ : pl_accessor_(std::move(pl_accessor)),
+ should_retrieve_next_batch_(true) {}
+
+ // Advances to the next relevant data. The posting list of a bucket contains
+ // keys within range [bucket.key_lower, bucket.key_upper], but some of them
+ // may be out of [query_key_lower, query_key_upper], so when advancing we have
+ // to filter out those non-relevant keys.
+ //
+ // Returns:
+ // - OK on success
+ // - RESOURCE_EXHAUSTED_ERROR if reaching the end (i.e. no more relevant
+ // data)
+ // - Any other PostingListIntegerIndexAccessor errors
+ libtextclassifier3::Status AdvanceAndFilter(int64_t query_key_lower,
+ int64_t query_key_upper) {
+ // Move curr_ until reaching a relevant data (i.e. key in range
+ // [query_key_lower, query_key_upper])
+ do {
+ if (!should_retrieve_next_batch_) {
+ ++curr_;
+ should_retrieve_next_batch_ =
+ curr_ >= cached_batch_integer_index_data_.cend();
+ }
+ if (should_retrieve_next_batch_) {
+ ICING_RETURN_IF_ERROR(GetNextDataBatch());
+ should_retrieve_next_batch_ = false;
+ }
+ } while (curr_->key() < query_key_lower || curr_->key() > query_key_upper);
+
+ return libtextclassifier3::Status::OK;
+ }
+
+ const BasicHit& GetCurrentBasicHit() const { return curr_->basic_hit(); }
+
+ private:
+ // Gets next batch of data from the posting list chain, caches in
+ // cached_batch_integer_index_data_, and sets curr_ to the begin of the cache.
+ libtextclassifier3::Status GetNextDataBatch() {
+ auto cached_batch_integer_index_data_or = pl_accessor_->GetNextDataBatch();
+ if (!cached_batch_integer_index_data_or.ok()) {
+ ICING_LOG(WARNING)
+ << "Fail to get next batch data from posting list due to: "
+ << cached_batch_integer_index_data_or.status().error_message();
+ return std::move(cached_batch_integer_index_data_or).status();
+ }
+
+ cached_batch_integer_index_data_ =
+ std::move(cached_batch_integer_index_data_or).ValueOrDie();
+ curr_ = cached_batch_integer_index_data_.cbegin();
+
+ if (cached_batch_integer_index_data_.empty()) {
+ return absl_ports::ResourceExhaustedError("End of iterator");
+ }
+
+ return libtextclassifier3::Status::OK;
+ }
+
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor_;
+ std::vector<IntegerIndexData> cached_batch_integer_index_data_;
+ std::vector<IntegerIndexData>::const_iterator curr_;
+ bool should_retrieve_next_batch_;
+};
+
+// Wrapper class to iterate through IntegerIndexStorage to get relevant data.
+// It uses multiple BucketPostingListIterator instances from different candidate
+// buckets and merges all relevant BasicHits from these buckets by
+// std::priority_queue in DocumentId descending order. Also different SectionIds
+// of the same DocumentId will be merged into SectionIdMask and returned as a
+// single DocHitInfo.
+class IntegerIndexStorageIterator : public NumericIndex<int64_t>::Iterator {
+ public:
+ explicit IntegerIndexStorageIterator(
+ int64_t query_key_lower, int64_t query_key_upper,
+ std::vector<std::unique_ptr<BucketPostingListIterator>>&& bucket_pl_iters)
+ : NumericIndex<int64_t>::Iterator(query_key_lower, query_key_upper) {
+ std::vector<BucketPostingListIterator*> bucket_pl_iters_raw_ptrs;
+ for (std::unique_ptr<BucketPostingListIterator>& bucket_pl_itr :
+ bucket_pl_iters) {
+ // Before adding BucketPostingListIterator* into the priority queue, we
+ // have to advance the bucket iterator to the first valid data since the
+ // priority queue needs valid data to compare the order.
+ // Note: it is possible that the bucket iterator fails to advance for the
+ // first round, because data could be filtered out by [query_key_lower,
+ // query_key_upper]. In this case, just discard the iterator.
+ if (bucket_pl_itr->AdvanceAndFilter(query_key_lower, query_key_upper)
+ .ok()) {
+ bucket_pl_iters_raw_ptrs.push_back(bucket_pl_itr.get());
+ bucket_pl_iters_.push_back(std::move(bucket_pl_itr));
+ }
+ }
+
+ pq_ = std::priority_queue<BucketPostingListIterator*,
+ std::vector<BucketPostingListIterator*>,
+ BucketPostingListIterator::Comparator>(
+ comparator_, std::move(bucket_pl_iters_raw_ptrs));
+ }
+
+ ~IntegerIndexStorageIterator() override = default;
+
+ // Advances to the next DocHitInfo. Note: several BucketPostingListIterator
+ // instances may be advanced if they point to data with the same DocumentId.
+ //
+ // Returns:
+ // - OK on success
+ // - RESOURCE_EXHAUSTED_ERROR if reaching the end (i.e. no more relevant
+ // data)
+ // - Any BucketPostingListIterator errors
+ libtextclassifier3::Status Advance() override;
+
+ DocHitInfo GetDocHitInfo() const override { return doc_hit_info_; }
+
+ private:
+ BucketPostingListIterator::Comparator comparator_;
+
+ // We have to fetch and pop the top BucketPostingListIterator from
+ // std::priority_queue to perform "merge K sorted lists algorithm".
+ // - Since std::priority_queue::pop() doesn't return the top element, we have
+ // to call top() and pop() together.
+ // - std::move the top() element by const_cast is not an appropriate way
+ // because it introduces transient unstable state for std::priority_queue.
+ // - We don't want to copy BucketPostingListIterator, either.
+ // - Therefore, add bucket_pl_iters_ for the ownership of all
+ // BucketPostingListIterator instances and std::priority_queue uses the raw
+ // pointer. So when calling top(), we can simply copy the raw pointer via
+ // top() and avoid transient unstable state.
+ std::vector<std::unique_ptr<BucketPostingListIterator>> bucket_pl_iters_;
+ std::priority_queue<BucketPostingListIterator*,
+ std::vector<BucketPostingListIterator*>,
+ BucketPostingListIterator::Comparator>
+ pq_;
+
+ DocHitInfo doc_hit_info_;
+};
+
+libtextclassifier3::Status IntegerIndexStorageIterator::Advance() {
+ if (pq_.empty()) {
+ return absl_ports::ResourceExhaustedError("End of iterator");
+ }
+
+ DocumentId document_id = pq_.top()->GetCurrentBasicHit().document_id();
+ doc_hit_info_ = DocHitInfo(document_id);
+ // Merge sections with same document_id into a single DocHitInfo
+ while (!pq_.empty() &&
+ pq_.top()->GetCurrentBasicHit().document_id() == document_id) {
+ BucketPostingListIterator* bucket_itr = pq_.top();
+ pq_.pop();
+
+ libtextclassifier3::Status advance_status;
+ do {
+ doc_hit_info_.UpdateSection(
+ bucket_itr->GetCurrentBasicHit().section_id());
+ advance_status = bucket_itr->AdvanceAndFilter(key_lower_, key_upper_);
+ } while (advance_status.ok() &&
+ bucket_itr->GetCurrentBasicHit().document_id() == document_id);
+ if (advance_status.ok()) {
+ pq_.push(bucket_itr);
+ }
+ }
+
+ return libtextclassifier3::Status::OK;
+}
+
+bool IntegerIndexStorage::Options::IsValid() const {
+ if (num_data_threshold_for_bucket_split <=
+ kMinNumDataThresholdForBucketSplit) {
+ return false;
+ }
+
+ if (!HasCustomInitBuckets()) {
+ return true;
+ }
+
+ // Verify if the range of buckets are disjoint and the range union is
+ // [INT64_MIN, INT64_MAX].
+ std::vector<Bucket> buckets;
+ buckets.reserve(custom_init_sorted_buckets.size() +
+ custom_init_unsorted_buckets.size());
+ buckets.insert(buckets.end(), custom_init_sorted_buckets.begin(),
+ custom_init_sorted_buckets.end());
+ buckets.insert(buckets.end(), custom_init_unsorted_buckets.begin(),
+ custom_init_unsorted_buckets.end());
+ if (buckets.empty()) {
+ return false;
+ }
+ std::sort(buckets.begin(), buckets.end());
+ int64_t prev_upper = std::numeric_limits<int64_t>::min();
+ for (int i = 0; i < buckets.size(); ++i) {
+ // key_lower should not be greater than key_upper and init bucket should
+ // have invalid posting list identifier.
+ if (buckets[i].key_lower() > buckets[i].key_upper() ||
+ buckets[i].posting_list_identifier().is_valid()) {
+ return false;
+ }
+
+ // Previous upper bound should not be INT64_MAX since it is not the last
+ // bucket.
+ if (prev_upper == std::numeric_limits<int64_t>::max()) {
+ return false;
+ }
+
+ int64_t expected_lower =
+ (i == 0 ? std::numeric_limits<int64_t>::min() : prev_upper + 1);
+ if (buckets[i].key_lower() != expected_lower) {
+ return false;
+ }
+
+ prev_upper = buckets[i].key_upper();
+ }
+
+ return prev_upper == std::numeric_limits<int64_t>::max();
+}
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+IntegerIndexStorage::Create(
+ const Filesystem& filesystem, std::string working_path, Options options,
+ PostingListIntegerIndexSerializer* posting_list_serializer) {
+ if (!options.IsValid()) {
+ return absl_ports::InvalidArgumentError(
+ "Invalid IntegerIndexStorage options");
+ }
+
+ if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str()) ||
+ !filesystem.FileExists(GetSortedBucketsFilePath(working_path).c_str()) ||
+ !filesystem.FileExists(
+ GetUnsortedBucketsFilePath(working_path).c_str()) ||
+ !filesystem.FileExists(
+ GetFlashIndexStorageFilePath(working_path).c_str())) {
+ // Discard working_path if any of them is missing, and reinitialize.
+ if (filesystem.DirectoryExists(working_path.c_str())) {
+ ICING_RETURN_IF_ERROR(Discard(filesystem, working_path));
+ }
+ return InitializeNewFiles(filesystem, std::move(working_path),
+ std::move(options), posting_list_serializer);
+ }
+ return InitializeExistingFiles(filesystem, std::move(working_path),
+ std::move(options), posting_list_serializer);
+}
+
+IntegerIndexStorage::~IntegerIndexStorage() {
+ if (!PersistToDisk().ok()) {
+ ICING_LOG(WARNING)
+ << "Failed to persist hash map to disk while destructing "
+ << working_path_;
+ }
+}
+
+class IntegerIndexStorageComparator {
+ public:
+ bool operator()(const IntegerIndexStorage::Bucket& lhs, int64_t rhs) const {
+ return lhs.key_upper() < rhs;
+ }
+} kComparator;
+
+libtextclassifier3::Status IntegerIndexStorage::AddKeys(
+ DocumentId document_id, SectionId section_id,
+ std::vector<int64_t>&& new_keys) {
+ if (new_keys.empty()) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ SetDirty();
+
+ std::sort(new_keys.begin(), new_keys.end());
+
+ // Dedupe
+ auto last = std::unique(new_keys.begin(), new_keys.end());
+ new_keys.erase(last, new_keys.end());
+
+ if (static_cast<int32_t>(new_keys.size()) >
+ std::numeric_limits<int32_t>::max() - info().num_data) {
+ return absl_ports::ResourceExhaustedError(
+ "# of keys in this integer index storage exceed the limit");
+ }
+
+ // When adding keys into a bucket, we potentially split it into 2 new buckets
+ // and one of them will be added into the unsorted bucket array.
+ // When handling keys belonging to buckets in the unsorted bucket array, we
+ // don't have to (and must not) handle these newly split buckets. Therefore,
+ // collect all newly split buckets in another vector and append them into the
+ // unsorted bucket array after adding all keys.
+ std::vector<Bucket> new_buckets;
+
+ // Binary search range of the sorted bucket array.
+ const Bucket* sorted_bucket_arr_begin = sorted_buckets_->array();
+ const Bucket* sorted_bucket_arr_end =
+ sorted_buckets_->array() + sorted_buckets_->num_elements();
+
+ // Step 1: handle keys belonging to buckets in the sorted bucket array. Skip
+ // keys belonging to the unsorted bucket array and deal with them in
+ // the next step.
+ // - Iterate through new_keys by it_start.
+ // - Binary search (std::lower_bound comparing key with bucket.key_upper()) to
+ // find the first bucket in the sorted bucket array with key_upper is not
+ // smaller than (>=) the key.
+ // - Skip (and advance it_start) all keys smaller than the target bucket's
+ // key_lower. It means these keys belong to buckets in the unsorted bucket
+ // array and we will deal with them later.
+ // - Find it_end such that all keys within range [it_start, it_end) belong to
+ // the target bucket.
+ // - Batch add keys within range [it_start, it_end) into the target bucket.
+ auto it_start = new_keys.cbegin();
+ while (it_start != new_keys.cend() &&
+ sorted_bucket_arr_begin < sorted_bucket_arr_end) {
+ // Use std::lower_bound to find the first bucket in the sorted bucket array
+ // with key_upper >= *it_start.
+ const Bucket* target_bucket = std::lower_bound(
+ sorted_bucket_arr_begin, sorted_bucket_arr_end, *it_start, kComparator);
+ if (target_bucket >= sorted_bucket_arr_end) {
+ // Keys in range [it_start, new_keys.cend()) are greater than all sorted
+ // buckets' key_upper, so we can end step 1. In fact, they belong to
+ // buckets in the unsorted bucket array and we will deal with them in
+ // step 2.
+ break;
+ }
+
+ // Sequential instead of binary search to advance it_start and it_end for
+ // several reasons:
+ // - Eventually we have to iterate through all keys within range [it_start,
+ // it_end) and add them into the posting list, so binary search doesn't
+ // improve the overall time complexity.
+ // - Binary search may jump to far-away indices, which potentially
+ // downgrades the cache performance.
+
+ // After binary search, we've ensured *it_start <=
+ // target_bucket->key_upper(), but it is still possible that *it_start (and
+ // the next several keys) is still smaller than target_bucket->key_lower(),
+ // so we have to skip them. In fact, they belong to buckets in the unsorted
+ // bucket array.
+ //
+ // For example:
+ // - sorted bucket array: [(INT_MIN, 0), (1, 5), (100, 300), (301, 550)]
+ // - unsorted bucket array: [(550, INT_MAX), (6, 99)]
+ // - new_keys: [10, 20, 40, 102, 150, 200, 500, 600]
+ // std::lower_bound (target = 10) will get target_bucket = (100, 300), but
+ // we have to skip 10, 20, 40 because they are smaller than 100 (the
+ // bucket's key_lower). We should move it_start pointing to key 102.
+ while (it_start != new_keys.cend() &&
+ *it_start < target_bucket->key_lower()) {
+ ++it_start;
+ }
+
+ // Locate it_end such that all keys within range [it_start, it_end) belong
+ // to target_bucket and all keys outside this range don't belong to
+ // target_bucket.
+ //
+ // For example (continue above), we should locate it_end to point to key
+ // 500.
+ auto it_end = it_start;
+ while (it_end != new_keys.cend() && *it_end <= target_bucket->key_upper()) {
+ ++it_end;
+ }
+
+ // Now, keys within range [it_start, it_end) belong to target_bucket, so
+ // construct IntegerIndexData and add them into the bucket's posting list.
+ if (it_start != it_end) {
+ ICING_ASSIGN_OR_RETURN(
+ FileBackedVector<Bucket>::MutableView mutable_bucket,
+ sorted_buckets_->GetMutable(target_bucket -
+ sorted_buckets_->array()));
+ ICING_ASSIGN_OR_RETURN(
+ std::vector<Bucket> round_new_buckets,
+ AddKeysIntoBucketAndSplitIfNecessary(
+ document_id, section_id, it_start, it_end, mutable_bucket));
+ new_buckets.insert(new_buckets.end(), round_new_buckets.begin(),
+ round_new_buckets.end());
+ }
+
+ it_start = it_end;
+ sorted_bucket_arr_begin = target_bucket + 1;
+ }
+
+ // Step 2: handle keys belonging to buckets in the unsorted bucket array. They
+ // were skipped in step 1.
+ // For each bucket in the unsorted bucket array, find [it_start, it_end) such
+ // that all keys within this range belong to the bucket and add them.
+ // - Binary search (std::lower_bound comparing bucket.key_lower() with key) to
+ // find it_start.
+ // - Sequential advance (start from it_start) to find it_end. Same reason as
+ // above for choosing sequential advance instead of binary search.
+ // - Add keys within range [it_start, it_end) into the bucket.
+ for (int32_t i = 0; i < unsorted_buckets_->num_elements(); ++i) {
+ ICING_ASSIGN_OR_RETURN(FileBackedVector<Bucket>::MutableView mutable_bucket,
+ unsorted_buckets_->GetMutable(i));
+ auto it_start = std::lower_bound(new_keys.cbegin(), new_keys.cend(),
+ mutable_bucket.Get().key_lower());
+ if (it_start == new_keys.cend()) {
+ continue;
+ }
+
+ // Sequential advance instead of binary search to find the correct position
+ // of it_end for the same reasons mentioned above in step 1.
+ auto it_end = it_start;
+ while (it_end != new_keys.cend() &&
+ *it_end <= mutable_bucket.Get().key_upper()) {
+ ++it_end;
+ }
+
+ // Now, key within range [it_start, it_end) belong to the bucket, so
+ // construct IntegerIndexData and add them into the bucket's posting list.
+ if (it_start != it_end) {
+ ICING_ASSIGN_OR_RETURN(
+ std::vector<Bucket> round_new_buckets,
+ AddKeysIntoBucketAndSplitIfNecessary(
+ document_id, section_id, it_start, it_end, mutable_bucket));
+ new_buckets.insert(new_buckets.end(), round_new_buckets.begin(),
+ round_new_buckets.end());
+ }
+ }
+
+ // Step 3: append new buckets into the unsorted bucket array.
+ if (!new_buckets.empty()) {
+ ICING_ASSIGN_OR_RETURN(
+ typename FileBackedVector<Bucket>::MutableArrayView mutable_new_arr,
+ unsorted_buckets_->Allocate(new_buckets.size()));
+ mutable_new_arr.SetArray(/*idx=*/0, new_buckets.data(), new_buckets.size());
+ }
+
+ // Step 4: sort and merge the unsorted bucket array into the sorted bucket
+ // array if the length of the unsorted bucket array exceeds the
+ // threshold.
+ if (unsorted_buckets_->num_elements() > kUnsortedBucketsLengthThreshold) {
+ ICING_RETURN_IF_ERROR(SortBuckets());
+ }
+
+ info().num_data += new_keys.size();
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
+IntegerIndexStorage::GetIterator(int64_t query_key_lower,
+ int64_t query_key_upper) const {
+ if (query_key_lower > query_key_upper) {
+ return absl_ports::InvalidArgumentError(
+ "key_lower should not be greater than key_upper");
+ }
+
+ std::vector<std::unique_ptr<BucketPostingListIterator>> bucket_pl_iters;
+
+ // Sorted bucket array
+ const Bucket* sorted_bucket_arr_begin = sorted_buckets_->array();
+ const Bucket* sorted_bucket_arr_end =
+ sorted_buckets_->array() + sorted_buckets_->num_elements();
+ for (const Bucket* bucket =
+ std::lower_bound(sorted_bucket_arr_begin, sorted_bucket_arr_end,
+ query_key_lower, kComparator);
+ bucket < sorted_bucket_arr_end && bucket->key_lower() <= query_key_upper;
+ ++bucket) {
+ if (!bucket->posting_list_identifier().is_valid()) {
+ continue;
+ }
+
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+ PostingListIntegerIndexAccessor::CreateFromExisting(
+ flash_index_storage_.get(), posting_list_serializer_,
+ bucket->posting_list_identifier()));
+ bucket_pl_iters.push_back(
+ std::make_unique<BucketPostingListIterator>(std::move(pl_accessor)));
+ }
+
+ // Unsorted bucket array
+ for (int32_t i = 0; i < unsorted_buckets_->num_elements(); ++i) {
+ ICING_ASSIGN_OR_RETURN(const Bucket* bucket, unsorted_buckets_->Get(i));
+ if (query_key_upper < bucket->key_lower() ||
+ query_key_lower > bucket->key_upper() ||
+ !bucket->posting_list_identifier().is_valid()) {
+ // Skip bucket whose range doesn't overlap with [query_key_lower,
+ // query_key_upper] or posting_list_identifier is invalid.
+ continue;
+ }
+
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+ PostingListIntegerIndexAccessor::CreateFromExisting(
+ flash_index_storage_.get(), posting_list_serializer_,
+ bucket->posting_list_identifier()));
+ bucket_pl_iters.push_back(
+ std::make_unique<BucketPostingListIterator>(std::move(pl_accessor)));
+ }
+
+ return std::make_unique<DocHitInfoIteratorNumeric<int64_t>>(
+ std::make_unique<IntegerIndexStorageIterator>(
+ query_key_lower, query_key_upper, std::move(bucket_pl_iters)));
+}
+
+libtextclassifier3::Status IntegerIndexStorage::TransferIndex(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ IntegerIndexStorage* new_storage) const {
+ // Discard all pre-existing buckets in new_storage since we will append newly
+ // merged buckets gradually into new_storage.
+ if (new_storage->sorted_buckets_->num_elements() > 0) {
+ ICING_RETURN_IF_ERROR(new_storage->sorted_buckets_->TruncateTo(0));
+ }
+ if (new_storage->unsorted_buckets_->num_elements() > 0) {
+ ICING_RETURN_IF_ERROR(new_storage->unsorted_buckets_->TruncateTo(0));
+ }
+
+ // "Reference sort" the original storage buckets.
+ std::vector<std::reference_wrapper<const Bucket>> temp_buckets;
+ temp_buckets.reserve(sorted_buckets_->num_elements() +
+ unsorted_buckets_->num_elements());
+ temp_buckets.insert(
+ temp_buckets.end(), sorted_buckets_->array(),
+ sorted_buckets_->array() + sorted_buckets_->num_elements());
+ temp_buckets.insert(
+ temp_buckets.end(), unsorted_buckets_->array(),
+ unsorted_buckets_->array() + unsorted_buckets_->num_elements());
+ std::sort(temp_buckets.begin(), temp_buckets.end(),
+ [](const std::reference_wrapper<const Bucket>& lhs,
+ const std::reference_wrapper<const Bucket>& rhs) -> bool {
+ return lhs.get() < rhs.get();
+ });
+
+ const int32_t num_data_threshold_for_bucket_merge =
+ kNumDataThresholdRatioForBucketMerge *
+ new_storage->options_.num_data_threshold_for_bucket_split;
+ int64_t curr_key_lower = std::numeric_limits<int64_t>::min();
+ int64_t curr_key_upper = std::numeric_limits<int64_t>::min();
+ std::vector<IntegerIndexData> accumulated_data;
+ for (const std::reference_wrapper<const Bucket>& bucket_ref : temp_buckets) {
+ // Read all data from the bucket.
+ std::vector<IntegerIndexData> new_data;
+ if (bucket_ref.get().posting_list_identifier().is_valid()) {
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<PostingListIntegerIndexAccessor> old_pl_accessor,
+ PostingListIntegerIndexAccessor::CreateFromExisting(
+ flash_index_storage_.get(), posting_list_serializer_,
+ bucket_ref.get().posting_list_identifier()));
+
+ ICING_ASSIGN_OR_RETURN(std::vector<IntegerIndexData> batch_old_data,
+ old_pl_accessor->GetNextDataBatch());
+ while (!batch_old_data.empty()) {
+ for (const IntegerIndexData& old_data : batch_old_data) {
+ DocumentId new_document_id =
+ old_data.basic_hit().document_id() < document_id_old_to_new.size()
+ ? document_id_old_to_new[old_data.basic_hit().document_id()]
+ : kInvalidDocumentId;
+ // Transfer the document id of the hit if the document is not deleted
+ // or outdated.
+ if (new_document_id != kInvalidDocumentId) {
+ new_data.push_back(
+ IntegerIndexData(old_data.basic_hit().section_id(),
+ new_document_id, old_data.key()));
+ }
+ }
+ ICING_ASSIGN_OR_RETURN(batch_old_data,
+ old_pl_accessor->GetNextDataBatch());
+ }
+ }
+
+ // Decide whether:
+ // - Flush accumulated_data and create a new bucket for them.
+ // - OR merge new_data into accumulated_data and go to the next round.
+ if (!accumulated_data.empty() && accumulated_data.size() + new_data.size() >
+ num_data_threshold_for_bucket_merge) {
+ // TODO(b/259743562): [Optimization 3] adjust upper bound to fit more data
+ // from new_data to accumulated_data.
+ ICING_RETURN_IF_ERROR(FlushDataIntoNewSortedBucket(
+ curr_key_lower, curr_key_upper, std::move(accumulated_data),
+ new_storage));
+
+ curr_key_lower = bucket_ref.get().key_lower();
+ accumulated_data = std::move(new_data);
+ } else {
+ // We can just append to accumulated data because
+ // FlushDataIntoNewSortedBucket will take care of sorting the contents.
+ std::move(new_data.begin(), new_data.end(),
+ std::back_inserter(accumulated_data));
+ }
+ curr_key_upper = bucket_ref.get().key_upper();
+ }
+
+ // Add the last round of bucket.
+ ICING_RETURN_IF_ERROR(
+ FlushDataIntoNewSortedBucket(curr_key_lower, curr_key_upper,
+ std::move(accumulated_data), new_storage));
+
+ return libtextclassifier3::Status::OK;
+}
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+IntegerIndexStorage::InitializeNewFiles(
+ const Filesystem& filesystem, std::string&& working_path, Options&& options,
+ PostingListIntegerIndexSerializer* posting_list_serializer) {
+ // IntegerIndexStorage uses working_path as working directory path.
+ // Create working directory.
+ if (!filesystem.CreateDirectory(working_path.c_str())) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Failed to create directory: ", working_path));
+ }
+
+ // Initialize sorted_buckets
+ int32_t pre_mapping_mmap_size = sizeof(Bucket) * (1 << 10);
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem, GetSortedBucketsFilePath(working_path),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ FileBackedVector<Bucket>::kMaxFileSize,
+ options.pre_mapping_fbv ? pre_mapping_mmap_size : 0));
+
+ // Initialize unsorted_buckets
+ pre_mapping_mmap_size = sizeof(Bucket) * kUnsortedBucketsLengthThreshold;
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem, GetUnsortedBucketsFilePath(working_path),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ FileBackedVector<Bucket>::kMaxFileSize,
+ options.pre_mapping_fbv ? pre_mapping_mmap_size : 0));
+
+ // Initialize flash_index_storage
+ ICING_ASSIGN_OR_RETURN(
+ FlashIndexStorage flash_index_storage,
+ FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_path),
+ &filesystem, posting_list_serializer));
+
+ if (options.HasCustomInitBuckets()) {
+ // Insert custom init buckets.
+ std::sort(options.custom_init_sorted_buckets.begin(),
+ options.custom_init_sorted_buckets.end());
+ ICING_ASSIGN_OR_RETURN(
+ typename FileBackedVector<Bucket>::MutableArrayView
+ mutable_new_sorted_bucket_arr,
+ sorted_buckets->Allocate(options.custom_init_sorted_buckets.size()));
+ mutable_new_sorted_bucket_arr.SetArray(
+ /*idx=*/0, options.custom_init_sorted_buckets.data(),
+ options.custom_init_sorted_buckets.size());
+
+ ICING_ASSIGN_OR_RETURN(typename FileBackedVector<Bucket>::MutableArrayView
+ mutable_new_unsorted_bucket_arr,
+ unsorted_buckets->Allocate(
+ options.custom_init_unsorted_buckets.size()));
+ mutable_new_unsorted_bucket_arr.SetArray(
+ /*idx=*/0, options.custom_init_unsorted_buckets.data(),
+ options.custom_init_unsorted_buckets.size());
+
+ // After inserting buckets, we can clear vectors since there is no need to
+ // cache them.
+ options.custom_init_sorted_buckets.clear();
+ options.custom_init_unsorted_buckets.clear();
+ } else {
+ // Insert one bucket with range [INT64_MIN, INT64_MAX].
+ ICING_RETURN_IF_ERROR(sorted_buckets->Append(Bucket(
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max())));
+ }
+ ICING_RETURN_IF_ERROR(sorted_buckets->PersistToDisk());
+
+ // Initialize metadata file. Create MemoryMappedFile with pre-mapping, and
+ // call GrowAndRemapIfNecessary to grow the underlying file.
+ ICING_ASSIGN_OR_RETURN(
+ MemoryMappedFile metadata_mmapped_file,
+ MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ /*max_file_size=*/kMetadataFileSize,
+ /*pre_mapping_file_offset=*/0,
+ /*pre_mapping_mmap_size=*/kMetadataFileSize));
+ ICING_RETURN_IF_ERROR(metadata_mmapped_file.GrowAndRemapIfNecessary(
+ /*file_offset=*/0, /*mmap_size=*/kMetadataFileSize));
+
+ // Create instance.
+ auto new_integer_index_storage =
+ std::unique_ptr<IntegerIndexStorage>(new IntegerIndexStorage(
+ filesystem, std::move(working_path), std::move(options),
+ posting_list_serializer,
+ std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
+ std::move(sorted_buckets), std::move(unsorted_buckets),
+ std::make_unique<FlashIndexStorage>(std::move(flash_index_storage))));
+ // Initialize info content by writing mapped memory directly.
+ Info& info_ref = new_integer_index_storage->info();
+ info_ref.magic = Info::kMagic;
+ info_ref.num_data = 0;
+ // Initialize new PersistentStorage. The initial checksums will be computed
+ // and set via InitializeNewStorage.
+ ICING_RETURN_IF_ERROR(new_integer_index_storage->InitializeNewStorage());
+
+ return new_integer_index_storage;
+}
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+IntegerIndexStorage::InitializeExistingFiles(
+ const Filesystem& filesystem, std::string&& working_path, Options&& options,
+ PostingListIntegerIndexSerializer* posting_list_serializer) {
+ // Mmap the content of the crcs and info.
+ ICING_ASSIGN_OR_RETURN(
+ MemoryMappedFile metadata_mmapped_file,
+ MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ /*max_file_size=*/kMetadataFileSize,
+ /*pre_mapping_file_offset=*/0,
+ /*pre_mapping_mmap_size=*/kMetadataFileSize));
+ if (metadata_mmapped_file.available_size() != kMetadataFileSize) {
+ return absl_ports::FailedPreconditionError("Incorrect metadata file size");
+ }
+
+ // Initialize sorted_buckets
+ int32_t pre_mapping_mmap_size = sizeof(Bucket) * (1 << 10);
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem, GetSortedBucketsFilePath(working_path),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ FileBackedVector<Bucket>::kMaxFileSize,
+ options.pre_mapping_fbv ? pre_mapping_mmap_size : 0));
+
+ // Initialize unsorted_buckets
+ pre_mapping_mmap_size = sizeof(Bucket) * kUnsortedBucketsLengthThreshold;
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem, GetUnsortedBucketsFilePath(working_path),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ FileBackedVector<Bucket>::kMaxFileSize,
+ options.pre_mapping_fbv ? pre_mapping_mmap_size : 0));
+
+ // Initialize flash_index_storage
+ ICING_ASSIGN_OR_RETURN(
+ FlashIndexStorage flash_index_storage,
+ FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_path),
+ &filesystem, posting_list_serializer));
+
+ // Create instance.
+ auto integer_index_storage =
+ std::unique_ptr<IntegerIndexStorage>(new IntegerIndexStorage(
+ filesystem, std::move(working_path), std::move(options),
+ posting_list_serializer,
+ std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
+ std::move(sorted_buckets), std::move(unsorted_buckets),
+ std::make_unique<FlashIndexStorage>(std::move(flash_index_storage))));
+ // Initialize existing PersistentStorage. Checksums will be validated.
+ ICING_RETURN_IF_ERROR(integer_index_storage->InitializeExistingStorage());
+
+ // Validate other values of info and options.
+ // Magic should be consistent with the codebase.
+ if (integer_index_storage->info().magic != Info::kMagic) {
+ return absl_ports::FailedPreconditionError("Incorrect magic value");
+ }
+
+ return integer_index_storage;
+}
+
+/* static */ libtextclassifier3::Status
+IntegerIndexStorage::FlushDataIntoNewSortedBucket(
+ int64_t key_lower, int64_t key_upper, std::vector<IntegerIndexData>&& data,
+ IntegerIndexStorage* storage) {
+ storage->SetDirty();
+
+ if (data.empty()) {
+ return storage->sorted_buckets_->Append(Bucket(
+ key_lower, key_upper, PostingListIdentifier::kInvalid, /*num_data=*/0));
+ }
+
+ ICING_ASSIGN_OR_RETURN(
+ PostingListIdentifier pl_id,
+ FlushDataIntoPostingLists(storage->flash_index_storage_.get(),
+ storage->posting_list_serializer_, data.begin(),
+ data.end()));
+
+ storage->info().num_data += data.size();
+ return storage->sorted_buckets_->Append(
+ Bucket(key_lower, key_upper, pl_id, data.size()));
+}
+
+libtextclassifier3::Status IntegerIndexStorage::PersistStoragesToDisk(
+ bool force) {
+ if (!force && !is_storage_dirty()) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ ICING_RETURN_IF_ERROR(sorted_buckets_->PersistToDisk());
+ ICING_RETURN_IF_ERROR(unsorted_buckets_->PersistToDisk());
+ if (!flash_index_storage_->PersistToDisk()) {
+ return absl_ports::InternalError(
+ "Fail to persist FlashIndexStorage to disk");
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IntegerIndexStorage::PersistMetadataToDisk(
+ bool force) {
+ // We can skip persisting metadata to disk only if both info and storage are
+ // clean.
+ if (!force && !is_info_dirty() && !is_storage_dirty()) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ // Changes should have been applied to the underlying file when using
+ // MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, but call msync() as an
+ // extra safety step to ensure they are written out.
+ return metadata_mmapped_file_->PersistToDisk();
+}
+
+libtextclassifier3::StatusOr<Crc32> IntegerIndexStorage::ComputeInfoChecksum(
+ bool force) {
+ if (!force && !is_info_dirty()) {
+ return Crc32(crcs().component_crcs.info_crc);
+ }
+
+ return info().ComputeChecksum();
+}
+
+libtextclassifier3::StatusOr<Crc32>
+IntegerIndexStorage::ComputeStoragesChecksum(bool force) {
+ if (!force && !is_storage_dirty()) {
+ return Crc32(crcs().component_crcs.storages_crc);
+ }
+
+ // Compute crcs
+ ICING_ASSIGN_OR_RETURN(Crc32 sorted_buckets_crc,
+ sorted_buckets_->ComputeChecksum());
+ ICING_ASSIGN_OR_RETURN(Crc32 unsorted_buckets_crc,
+ unsorted_buckets_->ComputeChecksum());
+
+ // TODO(b/259744228): implement and include flash_index_storage checksum
+ return Crc32(sorted_buckets_crc.Get() ^ unsorted_buckets_crc.Get());
+}
+
+libtextclassifier3::StatusOr<std::vector<IntegerIndexStorage::Bucket>>
+IntegerIndexStorage::AddKeysIntoBucketAndSplitIfNecessary(
+ DocumentId document_id, SectionId section_id,
+ const std::vector<int64_t>::const_iterator& it_start,
+ const std::vector<int64_t>::const_iterator& it_end,
+ FileBackedVector<Bucket>::MutableView& mutable_bucket) {
+ int32_t num_data_in_bucket = mutable_bucket.Get().num_data();
+ int32_t num_new_data = std::distance(it_start, it_end);
+ if (mutable_bucket.Get().key_lower() < mutable_bucket.Get().key_upper() &&
+ num_new_data + num_data_in_bucket >
+ options_.num_data_threshold_for_bucket_split) {
+ // Split bucket.
+
+ // 1. Read all data and free all posting lists.
+ std::vector<IntegerIndexData> all_data;
+ if (mutable_bucket.Get().posting_list_identifier().is_valid()) {
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+ PostingListIntegerIndexAccessor::CreateFromExisting(
+ flash_index_storage_.get(), posting_list_serializer_,
+ mutable_bucket.Get().posting_list_identifier()));
+ ICING_ASSIGN_OR_RETURN(all_data, pl_accessor->GetAllDataAndFree());
+ }
+
+ // 2. Append all new data.
+ all_data.reserve(all_data.size() + num_new_data);
+ for (auto it = it_start; it != it_end; ++it) {
+ all_data.push_back(IntegerIndexData(section_id, document_id, *it));
+ }
+
+ // 3. Run bucket splitting algorithm to decide new buckets and dispatch
+ // data.
+ // - # of data in a full bucket =
+ // options_.num_data_threshold_for_bucket_split.
+ // - Bucket splitting logic will be invoked if adding new data
+ // (num_new_data >= 1) into a full bucket.
+ // - In order to achieve good (amortized) time complexity, we want # of
+ // data in new buckets to be around half_of_threshold (i.e.
+ // options_.num_data_threshold_for_bucket_split / 2).
+ // - Using half_of_threshold as the cutoff threshold will cause splitting
+ // buckets with [half_of_threshold, half_of_threshold, num_new_data]
+ // data, which is not ideal because num_new_data is usually small.
+ // - Thus, we pick (half_of_threshold + kNumDataAfterSplitAdjustment) as
+ // the cutoff threshold to avoid over-splitting. It can tolerate
+ // num_new_data up to (2 * kNumDataAfterSplitAdjustment) and
+ // split only 2 buckets (instead of 3) with
+ // [half_of_threshold + kNumDataAfterSplitAdjustment,
+ // half_of_threshold + (kNumDataAfterSplitAdjustment - num_new_data)].
+ int32_t cutoff_threshold =
+ options_.num_data_threshold_for_bucket_split / 2 +
+ kNumDataAfterSplitAdjustment;
+ std::vector<integer_index_bucket_util::DataRangeAndBucketInfo>
+ new_bucket_infos = integer_index_bucket_util::Split(
+ all_data, mutable_bucket.Get().key_lower(),
+ mutable_bucket.Get().key_upper(), cutoff_threshold);
+ if (new_bucket_infos.empty()) {
+ ICING_LOG(WARNING)
+ << "No buckets after splitting. This should not happen.";
+ return absl_ports::InternalError("Split error");
+ }
+
+ // 4. Flush data and create new buckets.
+ std::vector<Bucket> new_buckets;
+ for (int i = 0; i < new_bucket_infos.size(); ++i) {
+ int32_t num_data_in_new_bucket =
+ std::distance(new_bucket_infos[i].start, new_bucket_infos[i].end);
+ ICING_ASSIGN_OR_RETURN(
+ PostingListIdentifier pl_id,
+ FlushDataIntoPostingLists(
+ flash_index_storage_.get(), posting_list_serializer_,
+ new_bucket_infos[i].start, new_bucket_infos[i].end));
+ if (i == 0) {
+ // Reuse mutable_bucket
+ mutable_bucket.Get().set_key_lower(new_bucket_infos[i].key_lower);
+ mutable_bucket.Get().set_key_upper(new_bucket_infos[i].key_upper);
+ mutable_bucket.Get().set_posting_list_identifier(pl_id);
+ mutable_bucket.Get().set_num_data(num_data_in_new_bucket);
+ } else {
+ new_buckets.push_back(Bucket(new_bucket_infos[i].key_lower,
+ new_bucket_infos[i].key_upper, pl_id,
+ num_data_in_new_bucket));
+ }
+ }
+
+ return new_buckets;
+ }
+
+ // Otherwise, we don't need to split bucket. Just simply add all new data into
+ // the bucket.
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor;
+ if (mutable_bucket.Get().posting_list_identifier().is_valid()) {
+ ICING_ASSIGN_OR_RETURN(
+ pl_accessor, PostingListIntegerIndexAccessor::CreateFromExisting(
+ flash_index_storage_.get(), posting_list_serializer_,
+ mutable_bucket.Get().posting_list_identifier()));
+ } else {
+ ICING_ASSIGN_OR_RETURN(
+ pl_accessor, PostingListIntegerIndexAccessor::Create(
+ flash_index_storage_.get(), posting_list_serializer_));
+ }
+
+ for (auto it = it_start; it != it_end; ++it) {
+ ICING_RETURN_IF_ERROR(pl_accessor->PrependData(
+ IntegerIndexData(section_id, document_id, *it)));
+ }
+
+ PostingListAccessor::FinalizeResult result =
+ std::move(*pl_accessor).Finalize();
+ if (!result.status.ok()) {
+ return result.status;
+ }
+ if (!result.id.is_valid()) {
+ return absl_ports::InternalError("Fail to flush data into posting list(s)");
+ }
+
+ mutable_bucket.Get().set_posting_list_identifier(result.id);
+ // We've already verified num_new_data won't exceed the limit of the entire
+ // storage, so it is safe to add to the counter of the bucket.
+ mutable_bucket.Get().set_num_data(num_data_in_bucket + num_new_data);
+
+ return std::vector<Bucket>();
+}
+
+libtextclassifier3::Status IntegerIndexStorage::SortBuckets() {
+ if (unsorted_buckets_->num_elements() == 0) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ int32_t sorted_len = sorted_buckets_->num_elements();
+ int32_t unsorted_len = unsorted_buckets_->num_elements();
+ if (sorted_len > FileBackedVector<Bucket>::kMaxNumElements - unsorted_len) {
+ return absl_ports::OutOfRangeError(
+ "Sorted buckets length exceeds the limit after merging");
+ }
+
+ ICING_RETURN_IF_ERROR(sorted_buckets_->Allocate(unsorted_len));
+
+ // Sort unsorted_buckets_.
+ ICING_RETURN_IF_ERROR(
+ unsorted_buckets_->Sort(/*begin_idx=*/0, /*end_idx=*/unsorted_len));
+
+ // Merge unsorted_buckets_ into sorted_buckets_ and clear unsorted_buckets_.
+ // Note that we could have used std::sort + std::inplace_merge, but it is more
+ // complicated to deal with FileBackedVector SetDirty logic, so implement our
+ // own merging with FileBackedVector methods.
+ //
+ // Merge buckets from back. This could save some iterations and avoid setting
+ // dirty for unchanged elements of the original sorted segments.
+ // For example, we can avoid setting dirty for elements [1, 2, 3, 5] for the
+ // following sorted/unsorted data:
+ // - sorted: [1, 2, 3, 5, 8, 13, _, _, _, _)]
+ // - unsorted: [6, 10, 14, 15]
+ int32_t sorted_write_idx = sorted_len + unsorted_len - 1;
+ int32_t sorted_curr_idx = sorted_len - 1;
+ int32_t unsorted_curr_idx = unsorted_len - 1;
+ while (unsorted_curr_idx >= 0) {
+ if (sorted_curr_idx >= 0 && unsorted_buckets_->array()[unsorted_curr_idx] <
+ sorted_buckets_->array()[sorted_curr_idx]) {
+ ICING_RETURN_IF_ERROR(sorted_buckets_->Set(
+ sorted_write_idx, sorted_buckets_->array()[sorted_curr_idx]));
+ --sorted_curr_idx;
+
+ } else {
+ ICING_RETURN_IF_ERROR(sorted_buckets_->Set(
+ sorted_write_idx, unsorted_buckets_->array()[unsorted_curr_idx]));
+ --unsorted_curr_idx;
+ }
+ --sorted_write_idx;
+ }
+
+ ICING_RETURN_IF_ERROR(unsorted_buckets_->TruncateTo(0));
+
+ return libtextclassifier3::Status::OK;
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/numeric/integer-index-storage.h b/icing/index/numeric/integer-index-storage.h
index 2048e76..0c1afbb 100644
--- a/icing/index/numeric/integer-index-storage.h
+++ b/icing/index/numeric/integer-index-storage.h
@@ -19,13 +19,21 @@
#include <memory>
#include <string>
#include <string_view>
+#include <vector>
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/file/file-backed-vector.h"
#include "icing/file/filesystem.h"
#include "icing/file/memory-mapped-file.h"
+#include "icing/file/persistent-storage.h"
#include "icing/file/posting_list/flash-index-storage.h"
#include "icing/file/posting_list/posting-list-identifier.h"
-#include "icing/index/numeric/posting-list-used-integer-index-data-serializer.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/integer-index-data.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
#include "icing/util/crc32.h"
namespace icing {
@@ -64,49 +72,13 @@ namespace lib {
// choose sorted/unsorted bucket array.
// - Then we do binary search on the sorted bucket array and sequential search
// on the unsorted bucket array.
-class IntegerIndexStorage {
+class IntegerIndexStorage : public PersistentStorage {
public:
- // Crcs and Info will be written into the metadata file.
- // File layout: <Crcs><Info>
- // Crcs
- struct Crcs {
- static constexpr int32_t kFileOffset = 0;
-
- struct ComponentCrcs {
- uint32_t info_crc;
- uint32_t sorted_buckets_crc;
- uint32_t unsorted_buckets_crc;
- uint32_t flash_index_storage_crc;
-
- bool operator==(const ComponentCrcs& other) const {
- return info_crc == other.info_crc &&
- sorted_buckets_crc == other.sorted_buckets_crc &&
- unsorted_buckets_crc == other.unsorted_buckets_crc &&
- flash_index_storage_crc == other.flash_index_storage_crc;
- }
-
- Crc32 ComputeChecksum() const {
- return Crc32(std::string_view(reinterpret_cast<const char*>(this),
- sizeof(ComponentCrcs)));
- }
- } __attribute__((packed));
-
- bool operator==(const Crcs& other) const {
- return all_crc == other.all_crc && component_crcs == other.component_crcs;
- }
-
- uint32_t all_crc;
- ComponentCrcs component_crcs;
- } __attribute__((packed));
- static_assert(sizeof(Crcs) == 20, "");
-
- // Info
struct Info {
- static constexpr int32_t kFileOffset = static_cast<int32_t>(sizeof(Crcs));
- static constexpr int32_t kMagic = 0xc4bf0ccc;
+ static constexpr int32_t kMagic = 0x6470e547;
int32_t magic;
- int32_t num_keys;
+ int32_t num_data;
Crc32 ComputeChecksum() const {
return Crc32(
@@ -126,10 +98,17 @@ class IntegerIndexStorage {
static constexpr int32_t kMaxNumBuckets = 1 << 23;
explicit Bucket(int64_t key_lower, int64_t key_upper,
- PostingListIdentifier posting_list_identifier)
+ PostingListIdentifier posting_list_identifier =
+ PostingListIdentifier::kInvalid,
+ int32_t num_data = 0)
: key_lower_(key_lower),
key_upper_(key_upper),
- posting_list_identifier_(posting_list_identifier) {}
+ posting_list_identifier_(posting_list_identifier),
+ num_data_(num_data) {}
+
+ bool operator<(const Bucket& other) const {
+ return key_lower_ < other.key_lower_;
+ }
// For FileBackedVector
bool operator==(const Bucket& other) const {
@@ -137,6 +116,14 @@ class IntegerIndexStorage {
posting_list_identifier_ == other.posting_list_identifier_;
}
+ int64_t key_lower() const { return key_lower_; }
+
+ int64_t key_upper() const { return key_upper_; }
+
+ void set_key_lower(int64_t key_lower) { key_lower_ = key_lower; }
+
+ void set_key_upper(int64_t key_upper) { key_upper_ = key_upper; }
+
PostingListIdentifier posting_list_identifier() const {
return posting_list_identifier_;
}
@@ -145,12 +132,16 @@ class IntegerIndexStorage {
posting_list_identifier_ = posting_list_identifier;
}
+ int32_t num_data() const { return num_data_; }
+ void set_num_data(int32_t num_data) { num_data_ = num_data; }
+
private:
int64_t key_lower_;
int64_t key_upper_;
PostingListIdentifier posting_list_identifier_;
+ int32_t num_data_;
} __attribute__((packed));
- static_assert(sizeof(Bucket) == 20, "");
+ static_assert(sizeof(Bucket) == 24, "");
static_assert(sizeof(Bucket) == FileBackedVector<Bucket>::kElementTypeSize,
"Bucket type size is inconsistent with FileBackedVector "
"element type size");
@@ -160,24 +151,353 @@ class IntegerIndexStorage {
FileBackedVector<Bucket>::kElementTypeSize,
"Max # of buckets cannot fit into FileBackedVector");
+ struct Options {
+ // - According to the benchmark result, the more # of buckets, the higher
+ // latency for range query. Therefore, this number cannot be too small to
+ // avoid splitting bucket too aggressively.
+ // - We use `num_data_threshold_for_bucket_split / 2 + 5` as the cutoff
+ // threshold after splitting. This number cannot be too small (e.g. 10)
+ // because in this case we will have similar # of data in a single bucket
+ // before and after splitting, which contradicts the purpose of splitting.
+ // - For convenience, let's set 64 as the minimum value.
+ static constexpr int32_t kMinNumDataThresholdForBucketSplit = 64;
+
+ explicit Options(int32_t num_data_threshold_for_bucket_split_in,
+ bool pre_mapping_fbv_in)
+ : num_data_threshold_for_bucket_split(
+ num_data_threshold_for_bucket_split_in),
+ pre_mapping_fbv(pre_mapping_fbv_in) {}
+
+ explicit Options(std::vector<Bucket> custom_init_sorted_buckets_in,
+ std::vector<Bucket> custom_init_unsorted_buckets_in,
+ int32_t num_data_threshold_for_bucket_split_in,
+ bool pre_mapping_fbv_in)
+ : custom_init_sorted_buckets(std::move(custom_init_sorted_buckets_in)),
+ custom_init_unsorted_buckets(
+ std::move(custom_init_unsorted_buckets_in)),
+ num_data_threshold_for_bucket_split(
+ num_data_threshold_for_bucket_split_in),
+ pre_mapping_fbv(pre_mapping_fbv_in) {}
+
+ bool IsValid() const;
+
+ bool HasCustomInitBuckets() const {
+ return !custom_init_sorted_buckets.empty() ||
+ !custom_init_unsorted_buckets.empty();
+ }
+
+ // Custom buckets when initializing new files. If both are empty, then the
+ // initial bucket is (INT64_MIN, INT64_MAX). Usually we only set them in the
+ // unit test. Note that all buckets in custom_init_sorted_buckets and
+ // custom_init_unsorted_buckets should be disjoint and the range union
+ // should be [INT64_MIN, INT64_MAX].
+ std::vector<Bucket> custom_init_sorted_buckets;
+ std::vector<Bucket> custom_init_unsorted_buckets;
+
+ // Threshold for invoking bucket splitting. If # of data in a bucket exceeds
+ // this number after adding new data, then it will invoke bucket splitting
+ // logic.
+ //
+ // Note: num_data_threshold_for_bucket_split should be >=
+ // kMinNumDataThresholdForBucketSplit.
+ int32_t num_data_threshold_for_bucket_split;
+
+ // Flag indicating whether memory map max possible file size for underlying
+ // FileBackedVector before growing the actual file size.
+ bool pre_mapping_fbv;
+ };
+
+ // Metadata file layout: <Crcs><Info>
+ static constexpr int32_t kCrcsMetadataFileOffset = 0;
+ static constexpr int32_t kInfoMetadataFileOffset =
+ static_cast<int32_t>(sizeof(Crcs));
+ static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info);
+ static_assert(kMetadataFileSize == 20, "");
+
+ static constexpr WorkingPathType kWorkingPathType =
+ WorkingPathType::kDirectory;
+ static constexpr std::string_view kFilePrefix = "integer_index_storage";
+
+ // Default # of data threshold for bucket splitting during indexing (AddKeys).
+ // When # of data in a bucket reaches this number, we will try to split data
+ // into multiple buckets according to their keys.
+ static constexpr int32_t kDefaultNumDataThresholdForBucketSplit = 65536;
+
+ // # of data threshold for bucket merging during optimization (TransferIndex)
+ // = kNumDataThresholdRatioForBucketMerge *
+ // options.num_data_threshold_for_bucket_split
+ //
+ // If total # data of adjacent buckets exceed this threshold, then flush the
+ // accumulated data. Otherwise merge buckets and their data.
+ static constexpr double kNumDataThresholdRatioForBucketMerge = 0.7;
+
+ // Length threshold to sort and merge unsorted buckets into sorted buckets. If
+ // the length of unsorted_buckets exceed the threshold, then call
+ // SortBuckets().
+ // TODO(b/259743562): decide if removing unsorted buckets given that we
+ // changed bucket splitting threshold and # of buckets are small now.
+ static constexpr int32_t kUnsortedBucketsLengthThreshold = 5;
+
+ // Creates a new IntegerIndexStorage instance to index integers (for a single
+ // property). If any of the underlying file is missing, then delete the whole
+ // working_path and (re)initialize with new ones. Otherwise initialize and
+ // create the instance by existing files.
+ //
+ // filesystem: Object to make system level calls
+ // working_path: Specifies the working path for PersistentStorage.
+ // IntegerIndexStorage uses working path as working directory
+ // and all related files will be stored under this directory. It
+ // takes full ownership and of working_path_, including
+ // creation/deletion. It is the caller's responsibility to
+ // specify correct working path and avoid mixing different
+ // persistent storages together under the same path. Also the
+ // caller has the ownership for the parent directory of
+ // working_path_, and it is responsible for parent directory
+ // creation/deletion. See PersistentStorage for more details
+ // about the concept of working_path.
+ // options: Options instance.
+ // posting_list_serializer: a PostingListIntegerIndexSerializer instance to
+ // serialize/deserialize integer index data to/from
+ // posting lists.
+ //
+ // Returns:
+ // - INVALID_ARGUMENT_ERROR if any value in options is invalid.
+ // - FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
+ // checksum.
+ // - INTERNAL_ERROR on I/O errors.
+ // - Any FileBackedVector/FlashIndexStorage errors.
+ static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+ Create(const Filesystem& filesystem, std::string working_path,
+ Options options,
+ PostingListIntegerIndexSerializer* posting_list_serializer);
+
+ // Deletes IntegerIndexStorage under working_path.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ static libtextclassifier3::Status Discard(const Filesystem& filesystem,
+ const std::string& working_path) {
+ return PersistentStorage::Discard(filesystem, working_path,
+ kWorkingPathType);
+ }
+
+ // Delete copy and move constructor/assignment operator.
+ IntegerIndexStorage(const IntegerIndexStorage&) = delete;
+ IntegerIndexStorage& operator=(const IntegerIndexStorage&) = delete;
+
+ IntegerIndexStorage(IntegerIndexStorage&&) = delete;
+ IntegerIndexStorage& operator=(IntegerIndexStorage&&) = delete;
+
+ ~IntegerIndexStorage() override;
+
+ // Batch adds new keys (of the same DocumentId and SectionId) into the integer
+ // index storage.
+ // Note that since we separate different property names into different integer
+ // index storages, it is impossible to have keys in a single document across
+ // multiple sections to add into the same integer index storage.
+ //
+ // Returns:
+ // - OK on success
+ // - RESOURCE_EXHAUSTED_ERROR if # of integers in this storage exceed
+ // INT_MAX after adding new_keys
+ // - Any FileBackedVector or PostingList errors
+ libtextclassifier3::Status AddKeys(DocumentId document_id,
+ SectionId section_id,
+ std::vector<int64_t>&& new_keys);
+
+ // Returns a DocHitInfoIteratorNumeric<int64_t> (in DocHitInfoIterator
+ // interface type format) for iterating through all docs which have the
+ // specified (integer) property contents in range [query_key_lower,
+ // query_key_upper].
+ // When iterating through all relevant doc hits, it:
+ // - Merges multiple SectionIds of doc hits with same DocumentId into a single
+ // SectionIdMask and constructs DocHitInfo.
+ // - Returns DocHitInfo in descending DocumentId order.
+ //
+ // Returns:
+ // - On success: a DocHitInfoIterator(Numeric)
+ // - INVALID_ARGUMENT_ERROR if query_key_lower > query_key_upper
+ // - Any FileBackedVector or PostingList errors
+ libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> GetIterator(
+ int64_t query_key_lower, int64_t query_key_upper) const;
+
+ // Transfers integer index data from the current storage to new_storage and
+ // optimizes buckets (for new_storage only), i.e. merging adjacent buckets if
+ // total # of data among them are less than or equal to
+ // kNumDataThresholdForBucketMerge.
+ //
+ // REQUIRES: new_storage should be a newly created storage instance, i.e. not
+ // contain any data. Otherwise, existing data and posting lists won't be
+ // freed and space will be wasted.
+ //
+ // Returns:
+ // - OK on success
+ // - OUT_OF_RANGE_ERROR if sorted buckets length exceeds the limit after
+ // merging
+ // - INTERNAL_ERROR on IO error
+ libtextclassifier3::Status TransferIndex(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ IntegerIndexStorage* new_storage) const;
+
+ int32_t num_data() const { return info().num_data; }
+
private:
+ static constexpr int8_t kNumDataAfterSplitAdjustment = 5;
+
explicit IntegerIndexStorage(
- const Filesystem& filesystem, std::string_view base_dir,
- PostingListUsedIntegerIndexDataSerializer* serializer,
+ const Filesystem& filesystem, std::string&& working_path,
+ Options&& options,
+ PostingListIntegerIndexSerializer* posting_list_serializer,
std::unique_ptr<MemoryMappedFile> metadata_mmapped_file,
std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
- std::unique_ptr<FlashIndexStorage> flash_index_storage);
+ std::unique_ptr<FlashIndexStorage> flash_index_storage)
+ : PersistentStorage(filesystem, std::move(working_path),
+ kWorkingPathType),
+ options_(std::move(options)),
+ posting_list_serializer_(posting_list_serializer),
+ metadata_mmapped_file_(std::move(metadata_mmapped_file)),
+ sorted_buckets_(std::move(sorted_buckets)),
+ unsorted_buckets_(std::move(unsorted_buckets)),
+ flash_index_storage_(std::move(flash_index_storage)),
+ is_info_dirty_(false),
+ is_storage_dirty_(false) {}
+
+ static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+ InitializeNewFiles(
+ const Filesystem& filesystem, std::string&& working_path,
+ Options&& options,
+ PostingListIntegerIndexSerializer* posting_list_serializer);
+
+ static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+ InitializeExistingFiles(
+ const Filesystem& filesystem, std::string&& working_path,
+ Options&& options,
+ PostingListIntegerIndexSerializer* posting_list_serializer);
+
+ // Flushes data into posting list(s), creates a new bucket with range
+ // [key_lower, key_upper], and appends it into sorted buckets for storage.
+ // It is a helper function for TransferIndex.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR if fails to write existing data into posting list(s)
+ // - Any FileBackedVector or PostingList errors
+ static libtextclassifier3::Status FlushDataIntoNewSortedBucket(
+ int64_t key_lower, int64_t key_upper,
+ std::vector<IntegerIndexData>&& data, IntegerIndexStorage* storage);
+
+ // Flushes contents of all storages to underlying files.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status PersistStoragesToDisk(bool force) override;
- const Filesystem& filesystem_;
- std::string base_dir_;
+ // Flushes contents of metadata file.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status PersistMetadataToDisk(bool force) override;
- PostingListUsedIntegerIndexDataSerializer* serializer_; // Does not own.
+ // Computes and returns Info checksum.
+ //
+ // Returns:
+ // - Crc of the Info on success
+ libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(bool force) override;
+
+ // Computes and returns all storages checksum. Checksums of sorted_buckets_,
+ // unsorted_buckets_ will be combined together by XOR.
+ // TODO(b/259744228): implement and include flash_index_storage checksum
+ //
+ // Returns:
+ // - Crc of all storages on success
+ // - INTERNAL_ERROR if any data inconsistency
+ libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+ bool force) override;
+
+ // Helper function to add keys in range [it_start, it_end) into the given
+ // bucket. It handles the bucket and its corresponding posting list(s) to make
+ // searching and indexing efficient.
+ //
+ // When the (single) posting list of the bucket is full:
+ // - If the size of posting list hasn't reached the max size, then just simply
+ // add a new key into it, and PostingListAccessor mechanism will
+ // automatically double the size of the posting list.
+ // - Else:
+ // - If the bucket is splittable (i.e. key_lower < key_upper), then split it
+ // into several new buckets with new ranges, and split the data (according
+ // to their keys and the range of new buckets) of the original posting
+ // list into several new posting lists.
+ // - Otherwise, just simply add a new key into it, and PostingListAccessor
+ // mechanism will automatically create a new max size posting list and
+ // chain them.
+ //
+ // Returns:
+ // - On success: a vector of new Buckets (to add into the unsorted bucket
+ // array later)
+ // - Any FileBackedVector or PostingList errors
+ libtextclassifier3::StatusOr<std::vector<Bucket>>
+ AddKeysIntoBucketAndSplitIfNecessary(
+ DocumentId document_id, SectionId section_id,
+ const std::vector<int64_t>::const_iterator& it_start,
+ const std::vector<int64_t>::const_iterator& it_end,
+ FileBackedVector<Bucket>::MutableView& mutable_bucket);
+
+ // Merges all unsorted buckets into sorted buckets and clears unsorted
+ // buckets.
+ //
+ // Returns:
+ // - OK on success
+ // - OUT_OF_RANGE_ERROR if sorted buckets length exceeds the limit after
+ // merging
+ // - Any FileBackedVector errors
+ libtextclassifier3::Status SortBuckets();
+
+ Crcs& crcs() override {
+ return *reinterpret_cast<Crcs*>(metadata_mmapped_file_->mutable_region() +
+ kCrcsMetadataFileOffset);
+ }
+
+ const Crcs& crcs() const override {
+ return *reinterpret_cast<const Crcs*>(metadata_mmapped_file_->region() +
+ kCrcsMetadataFileOffset);
+ }
+
+ Info& info() {
+ return *reinterpret_cast<Info*>(metadata_mmapped_file_->mutable_region() +
+ kInfoMetadataFileOffset);
+ }
+
+ const Info& info() const {
+ return *reinterpret_cast<const Info*>(metadata_mmapped_file_->region() +
+ kInfoMetadataFileOffset);
+ }
+
+ void SetInfoDirty() { is_info_dirty_ = true; }
+ // When storage is dirty, we have to set info dirty as well. So just expose
+ // SetDirty to set both.
+ void SetDirty() {
+ is_info_dirty_ = true;
+ is_storage_dirty_ = true;
+ }
+
+ bool is_info_dirty() const { return is_info_dirty_; }
+ bool is_storage_dirty() const { return is_storage_dirty_; }
+
+ Options options_;
+
+ PostingListIntegerIndexSerializer* posting_list_serializer_; // Does not own.
std::unique_ptr<MemoryMappedFile> metadata_mmapped_file_;
std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets_;
std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets_;
std::unique_ptr<FlashIndexStorage> flash_index_storage_;
+
+ bool is_info_dirty_;
+ bool is_storage_dirty_;
};
} // namespace lib
diff --git a/icing/index/numeric/integer-index-storage_benchmark.cc b/icing/index/numeric/integer-index-storage_benchmark.cc
new file mode 100644
index 0000000..85d381d
--- /dev/null
+++ b/icing/index/numeric/integer-index-storage_benchmark.cc
@@ -0,0 +1,407 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/destructible-directory.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/integer-index-storage.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/numeric/normal-distribution-number-generator.h"
+#include "icing/testing/numeric/number-generator.h"
+#include "icing/testing/numeric/uniform-distribution-integer-generator.h"
+#include "icing/testing/tmp-directory.h"
+
+// Run on a Linux workstation:
+// $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
+// //icing/index/numeric:integer-index-storage_benchmark
+//
+// $ blaze-bin/icing/index/numeric/integer-index-storage_benchmark
+// --benchmark_filter=all --benchmark_memory_usage
+//
+// Run on an Android device:
+// $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
+// --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
+// //icing/index/numeric:integer-index-storage_benchmark
+//
+// $ adb push
+// blaze-bin/icing/index/numeric/integer-index-storage_benchmark
+// /data/local/tmp/
+//
+// $ adb shell /data/local/tmp/integer-index-storage_benchmark
+// --benchmark_filter=all
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::SizeIs;
+
+static constexpr int32_t kNumDataThresholdForBucketSplit =
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit;
+static constexpr bool kPreMappingFbv = true;
+
+static constexpr SectionId kDefaultSectionId = 12;
+static constexpr int kDefaultSeed = 12345;
+
+enum DistributionTypeEnum {
+ kUniformDistribution,
+ kNormalDistribution,
+};
+
+class IntegerIndexStorageBenchmark {
+ public:
+ Filesystem filesystem;
+ std::string working_path;
+
+ PostingListIntegerIndexSerializer posting_list_serializer;
+
+ explicit IntegerIndexStorageBenchmark()
+ : working_path(GetTestTempDir() + "/integer_index_benchmark") {}
+
+ ~IntegerIndexStorageBenchmark() {
+ filesystem.DeleteDirectoryRecursively(working_path.c_str());
+ }
+};
+
+libtextclassifier3::StatusOr<std::unique_ptr<NumberGenerator<int64_t>>>
+CreateIntegerGenerator(DistributionTypeEnum distribution_type, int seed,
+ int num_keys) {
+ switch (distribution_type) {
+ case DistributionTypeEnum::kUniformDistribution:
+ // Since the collision # follows poisson distribution with lambda =
+ // (num_keys / range), we set the range 10x (lambda = 0.1) to avoid too
+ // many collisions.
+ //
+ // Distribution:
+ // - keys in range being picked for 0 times: 90.5%
+ // - keys in range being picked for 1 time: 9%
+ // - keys in range being picked for 2 times: 0.45%
+ // - keys in range being picked for 3 times: 0.015%
+ //
+ // For example, num_keys = 1M, range = 10M. Then there will be ~904837
+ // unique keys, 45242 keys being picked twice, 1508 keys being picked
+ // thrice ...
+ return std::make_unique<UniformDistributionIntegerGenerator<int64_t>>(
+ seed, /*range_lower=*/0,
+ /*range_upper=*/static_cast<int64_t>(num_keys) * 10 - 1);
+ case DistributionTypeEnum::kNormalDistribution:
+ // Normal distribution with mean = 0 and stddev = num_keys / 1024.
+ // - keys in range [-1 * stddev, 1 * stddev]: 68.2%
+ // - keys in range [-2 * stddev, 2 * stddev]: 95.4%
+ // - keys in range [-3 * stddev, 3 * stddev]: 99.7%
+ //
+ // - When generating num_keys integers, 68.2% of them will be in range
+ // [-num_keys / 1024, num_keys / 1024]
+ // - Each number in this range will be sampled (num_keys * 0.682) /
+ // ((num_keys / 1024) * 2) = 349 times on average and become
+ // "single-range bucket".
+ return std::make_unique<NormalDistributionNumberGenerator<int64_t>>(
+ seed, /*mean=*/0.0, /*stddev=*/num_keys / 1024.0);
+ default:
+ return absl_ports::InvalidArgumentError("Unknown type");
+ }
+}
+
+void BM_Index(benchmark::State& state) {
+ DistributionTypeEnum distribution_type =
+ static_cast<DistributionTypeEnum>(state.range(0));
+ int num_keys = state.range(1);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumberGenerator<int64_t>> generator,
+ CreateIntegerGenerator(distribution_type, kDefaultSeed, num_keys));
+ std::vector<int64_t> keys(num_keys);
+ for (int i = 0; i < num_keys; ++i) {
+ keys[i] = generator->Generate();
+ }
+
+ IntegerIndexStorageBenchmark benchmark;
+ for (auto _ : state) {
+ state.PauseTiming();
+ benchmark.filesystem.DeleteDirectoryRecursively(
+ benchmark.working_path.c_str());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ benchmark.filesystem, benchmark.working_path,
+ IntegerIndexStorage::Options(kNumDataThresholdForBucketSplit,
+ kPreMappingFbv),
+ &benchmark.posting_list_serializer));
+ state.ResumeTiming();
+
+ for (int i = 0; i < num_keys; ++i) {
+ ICING_ASSERT_OK(storage->AddKeys(static_cast<DocumentId>(i),
+ kDefaultSectionId, {keys[i]}));
+ }
+ ICING_ASSERT_OK(storage->PersistToDisk());
+
+ state.PauseTiming();
+ storage.reset();
+ state.ResumeTiming();
+ }
+}
+BENCHMARK(BM_Index)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 10)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 11)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 12)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 13)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 14)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 15)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 16)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 17)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 18)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 19)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 10)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 11)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 12)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 13)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 14)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 15)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 16)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 17)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 18)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 19)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 20);
+
+void BM_BatchIndex(benchmark::State& state) {
+ DistributionTypeEnum distribution_type =
+ static_cast<DistributionTypeEnum>(state.range(0));
+ int num_keys = state.range(1);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumberGenerator<int64_t>> generator,
+ CreateIntegerGenerator(distribution_type, kDefaultSeed, num_keys));
+ std::vector<int64_t> keys(num_keys);
+ for (int i = 0; i < num_keys; ++i) {
+ keys[i] = generator->Generate();
+ }
+
+ IntegerIndexStorageBenchmark benchmark;
+ for (auto _ : state) {
+ state.PauseTiming();
+ benchmark.filesystem.DeleteDirectoryRecursively(
+ benchmark.working_path.c_str());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ benchmark.filesystem, benchmark.working_path,
+ IntegerIndexStorage::Options(kNumDataThresholdForBucketSplit,
+ kPreMappingFbv),
+ &benchmark.posting_list_serializer));
+ std::vector<int64_t> keys_copy(keys);
+ state.ResumeTiming();
+
+ ICING_ASSERT_OK(storage->AddKeys(static_cast<DocumentId>(0),
+ kDefaultSectionId, std::move(keys_copy)));
+ ICING_ASSERT_OK(storage->PersistToDisk());
+
+ state.PauseTiming();
+ storage.reset();
+ state.ResumeTiming();
+ }
+}
+BENCHMARK(BM_BatchIndex)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 10)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 11)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 12)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 13)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 14)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 15)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 16)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 17)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 18)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 19)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 10)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 11)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 12)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 13)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 14)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 15)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 16)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 17)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 18)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 19)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 20);
+
+void BM_ExactQuery(benchmark::State& state) {
+ DistributionTypeEnum distribution_type =
+ static_cast<DistributionTypeEnum>(state.range(0));
+ int num_keys = state.range(1);
+
+ IntegerIndexStorageBenchmark benchmark;
+ benchmark.filesystem.DeleteDirectoryRecursively(
+ benchmark.working_path.c_str());
+ DestructibleDirectory ddir(&benchmark.filesystem, benchmark.working_path);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ benchmark.filesystem, benchmark.working_path,
+ IntegerIndexStorage::Options(kNumDataThresholdForBucketSplit,
+ kPreMappingFbv),
+ &benchmark.posting_list_serializer));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumberGenerator<int64_t>> generator,
+ CreateIntegerGenerator(distribution_type, kDefaultSeed, num_keys));
+ std::unordered_map<int64_t, std::vector<DocumentId>> keys;
+ for (int i = 0; i < num_keys; ++i) {
+ int64_t key = generator->Generate();
+ keys[key].push_back(static_cast<DocumentId>(i));
+ ICING_ASSERT_OK(
+ storage->AddKeys(static_cast<DocumentId>(i), kDefaultSectionId, {key}));
+ }
+ ICING_ASSERT_OK(storage->PersistToDisk());
+
+ for (auto _ : state) {
+ int64_t exact_query_key = generator->Generate();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> iterator,
+ storage->GetIterator(/*query_key_lower=*/exact_query_key,
+ /*query_key_upper=*/exact_query_key));
+ std::vector<DocHitInfo> data;
+ while (iterator->Advance().ok()) {
+ data.push_back(iterator->doc_hit_info());
+ }
+
+ state.PauseTiming();
+ const auto it = keys.find(exact_query_key);
+ if (it == keys.end()) {
+ ASSERT_THAT(data, IsEmpty());
+ } else {
+ ASSERT_THAT(data, SizeIs(it->second.size()));
+ std::reverse(data.begin(), data.end());
+ for (int i = 0; i < data.size(); ++i) {
+ ASSERT_THAT(data[i].document_id(), Eq(it->second[i]));
+ ASSERT_THAT(data[i].hit_section_ids_mask(), Eq(1 << kDefaultSectionId));
+ }
+ }
+ state.ResumeTiming();
+ }
+}
+BENCHMARK(BM_ExactQuery)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 10)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 11)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 12)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 13)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 14)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 15)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 16)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 17)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 18)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 19)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 10)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 11)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 12)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 13)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 14)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 15)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 16)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 17)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 18)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 19)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 20);
+
+void BM_RangeQueryAll(benchmark::State& state) {
+ DistributionTypeEnum distribution_type =
+ static_cast<DistributionTypeEnum>(state.range(0));
+ int num_keys = state.range(1);
+
+ IntegerIndexStorageBenchmark benchmark;
+ benchmark.filesystem.DeleteDirectoryRecursively(
+ benchmark.working_path.c_str());
+ DestructibleDirectory ddir(&benchmark.filesystem, benchmark.working_path);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ benchmark.filesystem, benchmark.working_path,
+ IntegerIndexStorage::Options(kNumDataThresholdForBucketSplit,
+ kPreMappingFbv),
+ &benchmark.posting_list_serializer));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumberGenerator<int64_t>> generator,
+ CreateIntegerGenerator(distribution_type, kDefaultSeed, num_keys));
+ for (int i = 0; i < num_keys; ++i) {
+ ICING_ASSERT_OK(storage->AddKeys(static_cast<DocumentId>(i),
+ kDefaultSectionId,
+ {generator->Generate()}));
+ }
+ ICING_ASSERT_OK(storage->PersistToDisk());
+
+ for (auto _ : state) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> iterator,
+ storage->GetIterator(
+ /*query_key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*query_key_upper=*/std::numeric_limits<int64_t>::max()));
+ std::vector<DocHitInfo> data;
+ while (iterator->Advance().ok()) {
+ data.push_back(iterator->doc_hit_info());
+ }
+
+ ASSERT_THAT(data, SizeIs(num_keys));
+ }
+}
+BENCHMARK(BM_RangeQueryAll)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 10)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 11)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 12)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 13)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 14)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 15)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 16)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 17)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 18)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 19)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 10)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 11)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 12)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 13)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 14)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 15)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 16)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 17)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 18)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 19)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 20);
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/numeric/integer-index-storage_test.cc b/icing/index/numeric/integer-index-storage_test.cc
new file mode 100644
index 0000000..8675172
--- /dev/null
+++ b/icing/index/numeric/integer-index-storage_test.cc
@@ -0,0 +1,2036 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/numeric/integer-index-storage.h"
+
+#include <unistd.h>
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/file-backed-vector.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/persistent-storage.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Contains;
+using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
+using ::testing::Eq;
+using ::testing::Ge;
+using ::testing::Gt;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+using ::testing::Key;
+using ::testing::Le;
+using ::testing::Ne;
+using ::testing::Not;
+
+using Bucket = IntegerIndexStorage::Bucket;
+using Crcs = PersistentStorage::Crcs;
+using Info = IntegerIndexStorage::Info;
+using Options = IntegerIndexStorage::Options;
+
+static constexpr int32_t kCorruptedValueOffset = 3;
+static constexpr DocumentId kDefaultDocumentId = 123;
+static constexpr SectionId kDefaultSectionId = 31;
+
+class IntegerIndexStorageTest : public ::testing::TestWithParam<bool> {
+ protected:
+ void SetUp() override {
+ base_dir_ = GetTestTempDir() + "/icing";
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+ IsTrue());
+
+ working_path_ = base_dir_ + "/integer_index_storage_test";
+
+ serializer_ = std::make_unique<PostingListIntegerIndexSerializer>();
+ }
+
+ void TearDown() override {
+ serializer_.reset();
+ filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+ }
+
+ Filesystem filesystem_;
+ std::string base_dir_;
+ std::string working_path_;
+ std::unique_ptr<PostingListIntegerIndexSerializer> serializer_;
+};
+
+libtextclassifier3::StatusOr<std::vector<DocHitInfo>> Query(
+ const IntegerIndexStorage* storage, int64_t key_lower, int64_t key_upper) {
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<DocHitInfoIterator> iter,
+ storage->GetIterator(key_lower, key_upper));
+ std::vector<DocHitInfo> hits;
+ while (iter->Advance().ok()) {
+ hits.push_back(iter->doc_hit_info());
+ }
+ return hits;
+}
+
+TEST_P(IntegerIndexStorageTest, OptionsEmptyCustomInitBucketsShouldBeValid) {
+ EXPECT_THAT(
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsTrue());
+}
+
+TEST_P(IntegerIndexStorageTest, OptionsInvalidNumDataThresholdForBucketSplit) {
+ EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/{},
+ /*custom_init_unsorted_buckets_in=*/{},
+ /*num_data_threshold_for_bucket_split=*/-1,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsFalse());
+ EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/{},
+ /*custom_init_unsorted_buckets_in=*/{},
+ /*num_data_threshold_for_bucket_split=*/0,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsFalse());
+ EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/{},
+ /*custom_init_unsorted_buckets_in=*/{},
+ /*num_data_threshold_for_bucket_split=*/63,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsFalse());
+}
+
+TEST_P(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsRange) {
+ // Invalid custom init sorted bucket
+ EXPECT_THAT(
+ Options(/*custom_init_sorted_buckets_in=*/
+ {Bucket(std::numeric_limits<int64_t>::min(), 5), Bucket(9, 6)},
+ /*custom_init_unsorted_buckets_in=*/
+ {Bucket(10, std::numeric_limits<int64_t>::max())},
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsFalse());
+
+ // Invalid custom init unsorted bucket
+ EXPECT_THAT(
+ Options(/*custom_init_sorted_buckets_in=*/
+ {Bucket(10, std::numeric_limits<int64_t>::max())},
+ /*custom_init_unsorted_buckets_in=*/
+ {Bucket(std::numeric_limits<int64_t>::min(), 5), Bucket(9, 6)},
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsFalse());
+}
+
+TEST_P(IntegerIndexStorageTest,
+ OptionsInvalidCustomInitBucketsPostingListIdentifier) {
+ // Custom init buckets should contain invalid posting list identifier.
+ PostingListIdentifier valid_posting_list_identifier(0, 0, 0);
+ ASSERT_THAT(valid_posting_list_identifier.is_valid(), IsTrue());
+
+ // Invalid custom init sorted bucket
+ EXPECT_THAT(
+ Options(/*custom_init_sorted_buckets_in=*/
+ {Bucket(std::numeric_limits<int64_t>::min(),
+ std::numeric_limits<int64_t>::max(),
+ valid_posting_list_identifier)},
+ /*custom_init_unsorted_buckets_in=*/{},
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsFalse());
+
+ // Invalid custom init unsorted bucket
+ EXPECT_THAT(
+ Options(/*custom_init_sorted_buckets_in=*/{},
+ /*custom_init_unsorted_buckets_in=*/
+ {Bucket(std::numeric_limits<int64_t>::min(),
+ std::numeric_limits<int64_t>::max(),
+ valid_posting_list_identifier)},
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsFalse());
+}
+
+TEST_P(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsOverlapping) {
+ // sorted buckets overlap
+ EXPECT_THAT(
+ Options(/*custom_init_sorted_buckets_in=*/
+ {Bucket(std::numeric_limits<int64_t>::min(), -100),
+ Bucket(-100, std::numeric_limits<int64_t>::max())},
+ /*custom_init_unsorted_buckets_in=*/{},
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsFalse());
+
+ // unsorted buckets overlap
+ EXPECT_THAT(
+ Options(/*custom_init_sorted_buckets_in=*/{},
+ /*custom_init_unsorted_buckets_in=*/
+ {Bucket(-100, std::numeric_limits<int64_t>::max()),
+ Bucket(std::numeric_limits<int64_t>::min(), -100)},
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsFalse());
+
+ // Cross buckets overlap
+ EXPECT_THAT(
+ Options(/*custom_init_sorted_buckets_in=*/
+ {Bucket(std::numeric_limits<int64_t>::min(), -100),
+ Bucket(-99, 0)},
+ /*custom_init_unsorted_buckets_in=*/
+ {Bucket(200, std::numeric_limits<int64_t>::max()), Bucket(0, 50),
+ Bucket(51, 199)},
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsFalse());
+}
+
+TEST_P(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsUnion) {
+ // Missing INT64_MAX
+ EXPECT_THAT(
+ Options(/*custom_init_sorted_buckets_in=*/
+ {Bucket(std::numeric_limits<int64_t>::min(), -100),
+ Bucket(-99, 0)},
+ /*custom_init_unsorted_buckets_in=*/{Bucket(1, 1000)},
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsFalse());
+
+ // Missing INT64_MIN
+ EXPECT_THAT(
+ Options(/*custom_init_sorted_buckets_in=*/
+ {Bucket(-200, -100), Bucket(-99, 0)},
+ /*custom_init_unsorted_buckets_in=*/
+ {Bucket(1, std::numeric_limits<int64_t>::max())},
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsFalse());
+
+ // Missing some intermediate ranges
+ EXPECT_THAT(
+ Options(/*custom_init_sorted_buckets_in=*/
+ {Bucket(std::numeric_limits<int64_t>::min(), -100)},
+ /*custom_init_unsorted_buckets_in=*/
+ {Bucket(1, std::numeric_limits<int64_t>::max())},
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsFalse());
+}
+
+TEST_P(IntegerIndexStorageTest, InvalidWorkingPath) {
+ EXPECT_THAT(
+ IntegerIndexStorage::Create(
+ filesystem_, "/dev/null/integer_index_storage_test",
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_P(IntegerIndexStorageTest, CreateWithInvalidOptionsShouldFail) {
+ Options invalid_options(
+ /*custom_init_sorted_buckets_in=*/{},
+ /*custom_init_unsorted_buckets_in=*/
+ {Bucket(-100, std::numeric_limits<int64_t>::max()),
+ Bucket(std::numeric_limits<int64_t>::min(), -100)},
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam());
+ ASSERT_THAT(invalid_options.IsValid(), IsFalse());
+
+ EXPECT_THAT(IntegerIndexStorage::Create(filesystem_, working_path_,
+ invalid_options, serializer_.get()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(IntegerIndexStorageTest, InitializeNewFiles) {
+ {
+ // Create new integer index storage
+ ASSERT_FALSE(filesystem_.DirectoryExists(working_path_.c_str()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ ICING_ASSERT_OK(storage->PersistToDisk());
+ }
+
+ // Metadata file should be initialized correctly for both info and crcs
+ // sections.
+ const std::string metadata_file_path = absl_ports::StrCat(
+ working_path_, "/", IntegerIndexStorage::kFilePrefix, ".m");
+ ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ ASSERT_TRUE(metadata_sfd.is_valid());
+
+ // Check info section
+ Info info;
+ ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
+ IntegerIndexStorage::kInfoMetadataFileOffset));
+ EXPECT_THAT(info.magic, Eq(Info::kMagic));
+ EXPECT_THAT(info.num_data, Eq(0));
+
+ // Check crcs section
+ Crcs crcs;
+ ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
+ IntegerIndexStorage::kCrcsMetadataFileOffset));
+ // # of elements in sorted_buckets should be 1, so it should have non-zero
+ // all storages crc value.
+ EXPECT_THAT(crcs.component_crcs.storages_crc, Ne(0));
+ EXPECT_THAT(crcs.component_crcs.info_crc,
+ Eq(Crc32(std::string_view(reinterpret_cast<const char*>(&info),
+ sizeof(Info)))
+ .Get()));
+ EXPECT_THAT(crcs.all_crc,
+ Eq(Crc32(std::string_view(
+ reinterpret_cast<const char*>(&crcs.component_crcs),
+ sizeof(Crcs::ComponentCrcs)))
+ .Get()));
+}
+
+TEST_P(IntegerIndexStorageTest,
+ InitializationShouldFailWithoutPersistToDiskOrDestruction) {
+ // Create new integer index storage
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ // Insert some data.
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/0, /*section_id=*/20,
+ /*new_keys=*/{0, 100, -100}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/1, /*section_id=*/2,
+ /*new_keys=*/{3, -1000, 500}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/2, /*section_id=*/15,
+ /*new_keys=*/{-6, 321, 98}));
+
+ // Without calling PersistToDisk, checksums will not be recomputed or synced
+ // to disk, so initializing another instance on the same files should fail.
+ EXPECT_THAT(
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_P(IntegerIndexStorageTest, InitializationShouldSucceedWithPersistToDisk) {
+ // Create new integer index storage
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage1,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ // Insert some data.
+ ICING_ASSERT_OK(storage1->AddKeys(/*document_id=*/0, /*section_id=*/20,
+ /*new_keys=*/{0, 100, -100}));
+ ICING_ASSERT_OK(storage1->AddKeys(/*document_id=*/1, /*section_id=*/2,
+ /*new_keys=*/{3, -1000, 500}));
+ ICING_ASSERT_OK(storage1->AddKeys(/*document_id=*/2, /*section_id=*/15,
+ /*new_keys=*/{-6, 321, 98}));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<DocHitInfo> doc_hit_info_vec,
+ Query(storage1.get(),
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()));
+
+ // After calling PersistToDisk, all checksums should be recomputed and synced
+ // correctly to disk, so initializing another instance on the same files
+ // should succeed, and we should be able to get the same contents.
+ ICING_EXPECT_OK(storage1->PersistToDisk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage2,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+ EXPECT_THAT(
+ Query(storage2.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(
+ ElementsAreArray(doc_hit_info_vec.begin(), doc_hit_info_vec.end())));
+}
+
+TEST_P(IntegerIndexStorageTest, InitializationShouldSucceedAfterDestruction) {
+ std::vector<DocHitInfo> doc_hit_info_vec;
+ {
+ // Create new integer index storage
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ // Insert some data.
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/0, /*section_id=*/20,
+ /*new_keys=*/{0, 100, -100}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/1, /*section_id=*/2,
+ /*new_keys=*/{3, -1000, 500}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/2, /*section_id=*/15,
+ /*new_keys=*/{-6, 321, 98}));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ doc_hit_info_vec,
+ Query(storage.get(),
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()));
+ }
+
+ {
+ // The previous instance went out of scope and was destructed. Although we
+ // didn't call PersistToDisk explicitly, the destructor should invoke it and
+ // thus initializing another instance on the same files should succeed, and
+ // we should be able to get the same contents.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+ EXPECT_THAT(
+ Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(ElementsAreArray(doc_hit_info_vec.begin(),
+ doc_hit_info_vec.end())));
+ }
+}
+
+TEST_P(IntegerIndexStorageTest,
+ InitializeExistingFilesWithWrongAllCrcShouldFail) {
+ {
+ // Create new integer index storage
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+ ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId,
+ /*new_keys=*/{0, 100, -100}));
+
+ ICING_ASSERT_OK(storage->PersistToDisk());
+ }
+
+ const std::string metadata_file_path = absl_ports::StrCat(
+ working_path_, "/", IntegerIndexStorage::kFilePrefix, ".m");
+ ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ ASSERT_TRUE(metadata_sfd.is_valid());
+
+ Crcs crcs;
+ ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
+ IntegerIndexStorage::kCrcsMetadataFileOffset));
+
+ // Manually corrupt all_crc
+ crcs.all_crc += kCorruptedValueOffset;
+ ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(),
+ IntegerIndexStorage::kCrcsMetadataFileOffset,
+ &crcs, sizeof(Crcs)));
+ metadata_sfd.reset();
+
+ {
+ // Attempt to create the integer index storage with metadata containing
+ // corrupted all_crc. This should fail.
+ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+ storage_or = IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get());
+ EXPECT_THAT(storage_or,
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(storage_or.status().error_message(),
+ HasSubstr("Invalid all crc"));
+ }
+}
+
+TEST_P(IntegerIndexStorageTest,
+ InitializeExistingFilesWithCorruptedInfoShouldFail) {
+ {
+ // Create new integer index storage
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+ ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId,
+ /*new_keys=*/{0, 100, -100}));
+
+ ICING_ASSERT_OK(storage->PersistToDisk());
+ }
+
+ const std::string metadata_file_path = absl_ports::StrCat(
+ working_path_, "/", IntegerIndexStorage::kFilePrefix, ".m");
+ ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ ASSERT_TRUE(metadata_sfd.is_valid());
+
+ Info info;
+ ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
+ IntegerIndexStorage::kInfoMetadataFileOffset));
+
+ // Modify info, but don't update the checksum. This would be similar to
+ // corruption of info.
+ info.num_data += kCorruptedValueOffset;
+ ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(),
+ IntegerIndexStorage::kInfoMetadataFileOffset,
+ &info, sizeof(Info)));
+ metadata_sfd.reset();
+
+ {
+ // Attempt to create the integer index storage with info that doesn't match
+ // its checksum and confirm that it fails.
+ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+ storage_or = IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get());
+ EXPECT_THAT(storage_or,
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(storage_or.status().error_message(),
+ HasSubstr("Invalid info crc"));
+ }
+}
+
+TEST_P(IntegerIndexStorageTest,
+ InitializeExistingFilesWithCorruptedSortedBucketsShouldFail) {
+ {
+ // Create new integer index storage
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+ ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId,
+ /*new_keys=*/{0, 100, -100}));
+
+ ICING_ASSERT_OK(storage->PersistToDisk());
+ }
+
+ {
+ // Corrupt sorted buckets manually.
+ const std::string sorted_buckets_file_path = absl_ports::StrCat(
+ working_path_, "/", IntegerIndexStorage::kFilePrefix, ".s");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem_, sorted_buckets_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc,
+ sorted_buckets->ComputeChecksum());
+ ICING_ASSERT_OK(sorted_buckets->Append(Bucket(
+ /*key_lower=*/0, /*key_upper=*/std::numeric_limits<int64_t>::max())));
+ ICING_ASSERT_OK(sorted_buckets->PersistToDisk());
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc,
+ sorted_buckets->ComputeChecksum());
+ ASSERT_THAT(old_crc, Not(Eq(new_crc)));
+ }
+
+ {
+ // Attempt to create the integer index storage with metadata containing
+ // corrupted sorted_buckets_crc. This should fail.
+ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+ storage_or = IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get());
+ EXPECT_THAT(storage_or,
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(storage_or.status().error_message(),
+ HasSubstr("Invalid storages crc"));
+ }
+}
+
+TEST_P(IntegerIndexStorageTest,
+ InitializeExistingFilesWithCorruptedUnsortedBucketsShouldFail) {
+ {
+ // Create new integer index storage
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+ ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId,
+ /*new_keys=*/{0, 100, -100}));
+
+ ICING_ASSERT_OK(storage->PersistToDisk());
+ }
+
+ {
+ // Corrupt unsorted buckets manually.
+ const std::string unsorted_buckets_file_path = absl_ports::StrCat(
+ working_path_, "/", IntegerIndexStorage::kFilePrefix, ".u");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem_, unsorted_buckets_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ /*max_file_size=*/sizeof(Bucket) * 100 +
+ FileBackedVector<Bucket>::Header::kHeaderSize));
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc,
+ unsorted_buckets->ComputeChecksum());
+ ICING_ASSERT_OK(unsorted_buckets->Append(Bucket(
+ /*key_lower=*/0, /*key_upper=*/std::numeric_limits<int64_t>::max())));
+ ICING_ASSERT_OK(unsorted_buckets->PersistToDisk());
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc,
+ unsorted_buckets->ComputeChecksum());
+ ASSERT_THAT(old_crc, Not(Eq(new_crc)));
+ }
+
+ {
+ // Attempt to create the integer index storage with metadata containing
+ // corrupted unsorted_buckets_crc. This should fail.
+ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+ storage_or = IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get());
+ EXPECT_THAT(storage_or,
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(storage_or.status().error_message(),
+ HasSubstr("Invalid storages crc"));
+ }
+}
+
+// TODO(b/259744228): add test for corrupted flash_index_storage
+
+TEST_P(IntegerIndexStorageTest, InvalidQuery) {
+ // Create new integer index storage
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+ EXPECT_THAT(
+ storage->GetIterator(/*query_key_lower=*/0, /*query_key_upper=*/-1),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(IntegerIndexStorageTest, AddKeysShouldUpdateNumData) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ // Add some keys into buckets [(-1000,-100), (200,300), (-99,-1)].
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+ /*new_keys=*/{-51, -500}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{201, 209, -149}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+ /*new_keys=*/{208}),
+ IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(6));
+
+ ICING_ASSERT_OK(storage->PersistToDisk());
+ }
+
+ // Check sorted_buckets manually.
+ const std::string sorted_buckets_file_path = absl_ports::StrCat(
+ working_path_, "/", IntegerIndexStorage::kFilePrefix, ".s");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem_, sorted_buckets_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ EXPECT_THAT(sorted_buckets->num_elements(), Eq(5));
+
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* sbk1,
+ sorted_buckets->Get(/*idx=*/0));
+ EXPECT_THAT(sbk1->key_lower(), Eq(-1000));
+ EXPECT_THAT(sbk1->key_upper(), Eq(-100));
+ EXPECT_THAT(sbk1->num_data(), Eq(2));
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* sbk2,
+ sorted_buckets->Get(/*idx=*/1));
+ EXPECT_THAT(sbk2->key_lower(), Eq(0));
+ EXPECT_THAT(sbk2->key_upper(), Eq(100));
+ EXPECT_THAT(sbk2->num_data(), Eq(0));
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* sbk3,
+ sorted_buckets->Get(/*idx=*/2));
+ EXPECT_THAT(sbk3->key_lower(), Eq(150));
+ EXPECT_THAT(sbk3->key_upper(), Eq(199));
+ EXPECT_THAT(sbk3->num_data(), Eq(0));
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* sbk4,
+ sorted_buckets->Get(/*idx=*/3));
+ EXPECT_THAT(sbk4->key_lower(), Eq(200));
+ EXPECT_THAT(sbk4->key_upper(), Eq(300));
+ EXPECT_THAT(sbk4->num_data(), Eq(3));
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* sbk5,
+ sorted_buckets->Get(/*idx=*/4));
+ EXPECT_THAT(sbk5->key_lower(), Eq(301));
+ EXPECT_THAT(sbk5->key_upper(), Eq(999));
+ EXPECT_THAT(sbk5->num_data(), Eq(0));
+
+ // Check unsorted_buckets and unsorted buckets manually.
+ const std::string unsorted_buckets_file_path = absl_ports::StrCat(
+ working_path_, "/", IntegerIndexStorage::kFilePrefix, ".u");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem_, unsorted_buckets_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ EXPECT_THAT(unsorted_buckets->num_elements(), Eq(4));
+
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* ubk1,
+ unsorted_buckets->Get(/*idx=*/0));
+ EXPECT_THAT(ubk1->key_lower(), Eq(1000));
+ EXPECT_THAT(ubk1->key_upper(), Eq(std::numeric_limits<int64_t>::max()));
+ EXPECT_THAT(ubk1->num_data(), Eq(0));
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* ubk2,
+ unsorted_buckets->Get(/*idx=*/1));
+ EXPECT_THAT(ubk2->key_lower(), Eq(-99));
+ EXPECT_THAT(ubk2->key_upper(), Eq(-1));
+ EXPECT_THAT(ubk2->num_data(), Eq(1));
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* ubk3,
+ unsorted_buckets->Get(/*idx=*/2));
+ EXPECT_THAT(ubk3->key_lower(), Eq(101));
+ EXPECT_THAT(ubk3->key_upper(), Eq(149));
+ EXPECT_THAT(ubk3->num_data(), Eq(0));
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* ubk4,
+ unsorted_buckets->Get(/*idx=*/3));
+ EXPECT_THAT(ubk4->key_lower(), Eq(std::numeric_limits<int64_t>::min()));
+ EXPECT_THAT(ubk4->key_upper(), Eq(-1001));
+ EXPECT_THAT(ubk4->num_data(), Eq(0));
+}
+
+TEST_P(IntegerIndexStorageTest, ExactQuerySortedBuckets) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ // Add some keys into sorted buckets [(-1000,-100), (200,300)].
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+ /*new_keys=*/{-500}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{208}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+ /*new_keys=*/{-200}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+ /*new_keys=*/{-1000}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/4, kDefaultSectionId,
+ /*new_keys=*/{300}),
+ IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(5));
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ // Exact query on key in each sorted bucket should get the correct result.
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-500, /*key_upper=*/-500),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/208, /*key_upper=*/208),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-200, /*key_upper=*/-200),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-1000, /*key_upper=*/-1000),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/3, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/300, /*key_upper=*/300),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/4, expected_sections))));
+}
+
+TEST_P(IntegerIndexStorageTest, ExactQueryUnsortedBuckets) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ // Add some keys into unsorted buckets [(1000,INT64_MAX), (INT64_MIN,-1001)].
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+ /*new_keys=*/{1024}),
+ IsOk());
+ EXPECT_THAT(
+ storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{std::numeric_limits<int64_t>::max()}),
+ IsOk());
+ EXPECT_THAT(
+ storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+ /*new_keys=*/{std::numeric_limits<int64_t>::min()}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+ /*new_keys=*/{-1500}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/4, kDefaultSectionId,
+ /*new_keys=*/{2000}),
+ IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(5));
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ // Exact query on key in each unsorted bucket should get the correct result.
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/1024, /*key_upper=*/1024),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(
+ Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::max(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(
+ ElementsAre(EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+ EXPECT_THAT(
+ Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::min()),
+ IsOkAndHolds(
+ ElementsAre(EqualsDocHitInfo(/*document_id=*/2, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-1500, /*key_upper=*/-1500),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/3, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/2000, /*key_upper=*/2000),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/4, expected_sections))));
+}
+
+TEST_P(IntegerIndexStorageTest, ExactQueryIdenticalKeys) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ // Add some keys into buckets [(0,100), (1000,INT64_MAX)].
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+ /*new_keys=*/{1024}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{1024}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+ /*new_keys=*/{20}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+ /*new_keys=*/{20}),
+ IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(4));
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ // Exact query on key with multiple hits should get the correct result.
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/1024, /*key_upper=*/1024),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/20, /*key_upper=*/20),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/3, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections))));
+}
+
+TEST_P(IntegerIndexStorageTest, RangeQueryEmptyIntegerIndexStorage) {
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ EXPECT_THAT(
+ Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST_P(IntegerIndexStorageTest, RangeQuerySingleEntireSortedBucket) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ // Add some keys into sorted buckets [(-1000,-100), (200,300)].
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+ /*new_keys=*/{-500}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{208}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+ /*new_keys=*/{-200}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+ /*new_keys=*/{-1000}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/4, kDefaultSectionId,
+ /*new_keys=*/{300}),
+ IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(5));
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ // Range query on each sorted bucket boundary should get the correct result.
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-1000, /*key_upper=*/-100),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/3, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/0, /*key_upper=*/100),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/150, /*key_upper=*/199),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/200, /*key_upper=*/300),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/4, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/301, /*key_upper=*/999),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST_P(IntegerIndexStorageTest, RangeQuerySingleEntireUnsortedBucket) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ // Add some keys into unsorted buckets [(1000,INT64_MAX), (INT64_MIN,-1001)].
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+ /*new_keys=*/{1024}),
+ IsOk());
+ EXPECT_THAT(
+ storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{std::numeric_limits<int64_t>::max()}),
+ IsOk());
+ EXPECT_THAT(
+ storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+ /*new_keys=*/{std::numeric_limits<int64_t>::min()}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+ /*new_keys=*/{-1500}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/4, kDefaultSectionId,
+ /*new_keys=*/{2000}),
+ IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(5));
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ // Range query on each unsorted bucket boundary should get the correct result.
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/1000,
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/4, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-99, /*key_upper=*/-1),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/101, /*key_upper=*/149),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(
+ Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/-1001),
+ IsOkAndHolds(
+ ElementsAre(EqualsDocHitInfo(/*document_id=*/3, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections))));
+}
+
+TEST_P(IntegerIndexStorageTest, RangeQuerySinglePartialSortedBucket) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ // Add some keys into sorted bucket (0,100).
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+ /*new_keys=*/{43}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{30}),
+ IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(2));
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ // Range query on partial range of each sorted bucket should get the correct
+ // result.
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/25, /*key_upper=*/200),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-1000, /*key_upper=*/49),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/25, /*key_upper=*/49),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/31, /*key_upper=*/49),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/25, /*key_upper=*/31),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/3, /*key_upper=*/5),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST_P(IntegerIndexStorageTest, RangeQuerySinglePartialUnsortedBucket) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ // Add some keys into unsorted buckets (-99,-1).
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+ /*new_keys=*/{-19}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{-72}),
+ IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(2));
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ // Range query on partial range of each unsorted bucket should get the correct
+ // result.
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-1000, /*key_upper=*/-15),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-80, /*key_upper=*/149),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-80, /*key_upper=*/-15),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-38, /*key_upper=*/-15),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-80, /*key_upper=*/-38),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-95, /*key_upper=*/-92),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST_P(IntegerIndexStorageTest, RangeQueryMultipleBuckets) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ // Add some keys into buckets [(-1000,-100), (200,300), (1000,INT64_MAX),
+ // (INT64_MIN,-1001)]
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+ /*new_keys=*/{-500}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{1024}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+ /*new_keys=*/{-200}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+ /*new_keys=*/{208}),
+ IsOk());
+ EXPECT_THAT(
+ storage->AddKeys(/*document_id=*/4, kDefaultSectionId,
+ /*new_keys=*/{std::numeric_limits<int64_t>::max()}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/5, kDefaultSectionId,
+ /*new_keys=*/{-1000}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/6, kDefaultSectionId,
+ /*new_keys=*/{300}),
+ IsOk());
+ EXPECT_THAT(
+ storage->AddKeys(/*document_id=*/7, kDefaultSectionId,
+ /*new_keys=*/{std::numeric_limits<int64_t>::min()}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/8, kDefaultSectionId,
+ /*new_keys=*/{-1500}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/9, kDefaultSectionId,
+ /*new_keys=*/{2000}),
+ IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(10));
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ // Range query should get the correct result.
+ EXPECT_THAT(
+ Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(
+ ElementsAre(EqualsDocHitInfo(/*document_id=*/9, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/8, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/7, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/6, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/5, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/4, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/3, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-199,
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/9, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/6, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/4, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/3, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+ EXPECT_THAT(
+ Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/-200),
+ IsOkAndHolds(
+ ElementsAre(EqualsDocHitInfo(/*document_id=*/8, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/7, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/5, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+}
+
+TEST_P(IntegerIndexStorageTest, BatchAdd) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ // Batch add the following keys (including some edge cases) to test the
+ // correctness of the sort and binary search logic in AddKeys().
+ // clang-format off
+ std::vector<int64_t> keys = {4000, 3000, 2000, 300, 201, 200, 106, 104,
+ 100, 3, 2, 1, 0, -97, -98, -99,
+ -100, -200, -1000, -1001, -1500, -2000,
+ std::numeric_limits<int64_t>::max(),
+ std::numeric_limits<int64_t>::min()};
+ // clang-format on
+ EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId,
+ std::vector<int64_t>(keys)),
+ IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(keys.size()));
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ for (int64_t key : keys) {
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/key, /*key_upper=*/key),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(kDefaultDocumentId, expected_sections))));
+ }
+}
+
+TEST_P(IntegerIndexStorageTest, BatchAddShouldDedupeKeys) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ std::vector<int64_t> keys = {2, 3, 1, 2, 4, -1, -1, 100, 3};
+ EXPECT_THAT(
+ storage->AddKeys(kDefaultDocumentId, kDefaultSectionId, std::move(keys)),
+ IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(6));
+}
+
+TEST_P(IntegerIndexStorageTest, MultipleKeysShouldMergeAndDedupeDocHitInfo) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ // Add some keys with same document id and section id.
+ EXPECT_THAT(
+ storage->AddKeys(
+ /*document_id=*/0, kDefaultSectionId, /*new_keys=*/
+ {-500, 1024, -200, 208, std::numeric_limits<int64_t>::max(), -1000,
+ 300, std::numeric_limits<int64_t>::min(), -1500, 2000}),
+ IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(10));
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ EXPECT_THAT(
+ Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(
+ ElementsAre(EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+}
+
+TEST_P(IntegerIndexStorageTest,
+ MultipleSectionsShouldMergeSectionsAndDedupeDocHitInfo) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ // Add some keys with same document id but different section ids.
+ EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, /*section_id=*/63,
+ /*new_keys=*/{-500}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, /*section_id=*/62,
+ /*new_keys=*/{1024}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, /*section_id=*/61,
+ /*new_keys=*/{-200}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, /*section_id=*/60,
+ /*new_keys=*/{208}),
+ IsOk());
+ EXPECT_THAT(
+ storage->AddKeys(kDefaultDocumentId, /*section_id=*/59,
+ /*new_keys=*/{std::numeric_limits<int64_t>::max()}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, /*section_id=*/58,
+ /*new_keys=*/{-1000}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, /*section_id=*/57,
+ /*new_keys=*/{300}),
+ IsOk());
+ EXPECT_THAT(
+ storage->AddKeys(kDefaultDocumentId, /*section_id=*/56,
+ /*new_keys=*/{std::numeric_limits<int64_t>::min()}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, /*section_id=*/55,
+ /*new_keys=*/{-1500}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, /*section_id=*/54,
+ /*new_keys=*/{2000}),
+ IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(10));
+
+ std::vector<SectionId> expected_sections = {63, 62, 61, 60, 59,
+ 58, 57, 56, 55, 54};
+ EXPECT_THAT(
+ Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(kDefaultDocumentId, expected_sections))));
+}
+
+TEST_P(IntegerIndexStorageTest, SplitBuckets) {
+ int32_t custom_num_data_threshold_for_bucket_split = 300;
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(/*custom_init_sorted_buckets_in=*/{},
+ /*custom_init_unsorted_buckets_in=*/{},
+ custom_num_data_threshold_for_bucket_split,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ // Add custom_num_data_threshold_for_bucket_split + 1 keys to invoke bucket
+ // splitting.
+ // - Keys: custom_num_data_threshold_for_bucket_split to 0 Document
+ // - ids: 0 to custom_num_data_threshold_for_bucket_split
+ std::unordered_map<int64_t, DocumentId> data;
+ int64_t key = custom_num_data_threshold_for_bucket_split;
+ DocumentId document_id = 0;
+ for (int i = 0; i < custom_num_data_threshold_for_bucket_split + 1; ++i) {
+ data[key] = document_id;
+ ICING_ASSERT_OK(
+ storage->AddKeys(document_id, kDefaultSectionId, /*new_keys=*/{key}));
+ ++document_id;
+ --key;
+ }
+ ICING_ASSERT_OK(storage->PersistToDisk());
+
+ // Manually check sorted and unsorted buckets.
+ {
+ // Check sorted buckets.
+ const std::string sorted_buckets_file_path = absl_ports::StrCat(
+ working_path_, "/", IntegerIndexStorage::kFilePrefix, ".s");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem_, sorted_buckets_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+ EXPECT_THAT(sorted_buckets->num_elements(), Eq(1));
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* bucket1,
+ sorted_buckets->Get(/*idx=*/0));
+ EXPECT_THAT(bucket1->key_lower(), Eq(std::numeric_limits<int64_t>::min()));
+ EXPECT_THAT(bucket1->key_upper(), Ne(std::numeric_limits<int64_t>::max()));
+
+ int64_t sorted_bucket_key_upper = bucket1->key_upper();
+
+ // Check unsorted buckets.
+ const std::string unsorted_buckets_file_path = absl_ports::StrCat(
+ working_path_, "/", IntegerIndexStorage::kFilePrefix, ".u");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem_, unsorted_buckets_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+ EXPECT_THAT(unsorted_buckets->num_elements(), Ge(1));
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* bucket2,
+ unsorted_buckets->Get(/*idx=*/0));
+ EXPECT_THAT(bucket2->key_lower(), Eq(sorted_bucket_key_upper + 1));
+ }
+
+ // Ensure that search works normally.
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ for (int64_t key = custom_num_data_threshold_for_bucket_split; key >= 0;
+ key--) {
+ ASSERT_THAT(data, Contains(Key(key)));
+ DocumentId expected_document_id = data[key];
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/key, /*key_upper=*/key),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(expected_document_id,
+ expected_sections))));
+ }
+}
+
+TEST_P(IntegerIndexStorageTest, SplitBucketsTriggerSortBuckets) {
+ int32_t custom_num_data_threshold_for_bucket_split = 300;
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(/*custom_init_sorted_buckets_in=*/{},
+ /*custom_init_unsorted_buckets_in=*/{},
+ custom_num_data_threshold_for_bucket_split,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ // Add IntegerIndexStorage::kUnsortedBucketsLengthThreshold keys. For each
+ // key, add custom_num_data_threshold_for_bucket_split + 1 data. Then we will
+ // get:
+ // - Bucket splitting will create kUnsortedBucketsLengthThreshold + 1 unsorted
+ // buckets [[50, 50], [49, 49], ..., [1, 1], [51, INT64_MAX]].
+ // - Since there are kUnsortedBucketsLengthThreshold + 1 unsorted buckets, we
+ // should sort and merge buckets.
+ std::unordered_map<int64_t, std::vector<DocumentId>> data;
+ int64_t key = IntegerIndexStorage::kUnsortedBucketsLengthThreshold;
+ DocumentId document_id = 0;
+ for (int i = 0; i < IntegerIndexStorage::kUnsortedBucketsLengthThreshold;
+ ++i) {
+ for (int j = 0; j < custom_num_data_threshold_for_bucket_split + 1; ++j) {
+ data[key].push_back(document_id);
+ ICING_ASSERT_OK(
+ storage->AddKeys(document_id, kDefaultSectionId, /*new_keys=*/{key}));
+ ++document_id;
+ }
+ --key;
+ }
+ ICING_ASSERT_OK(storage->PersistToDisk());
+
+ // Manually check sorted and unsorted buckets.
+ {
+ // Check unsorted buckets.
+ const std::string unsorted_buckets_file_path = absl_ports::StrCat(
+ working_path_, "/", IntegerIndexStorage::kFilePrefix, ".u");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem_, unsorted_buckets_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ EXPECT_THAT(unsorted_buckets->num_elements(), Eq(0));
+
+ // Check sorted buckets.
+ const std::string sorted_buckets_file_path = absl_ports::StrCat(
+ working_path_, "/", IntegerIndexStorage::kFilePrefix, ".s");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem_, sorted_buckets_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ EXPECT_THAT(sorted_buckets->num_elements(), Gt(1));
+ }
+
+ // Ensure that search works normally.
+ for (key = 1; key <= IntegerIndexStorage::kUnsortedBucketsLengthThreshold;
+ ++key) {
+ ASSERT_THAT(data, Contains(Key(key)));
+
+ std::vector<DocHitInfo> expected_doc_hit_infos;
+ for (DocumentId doc_id : data[key]) {
+ expected_doc_hit_infos.push_back(DocHitInfo(
+ doc_id, /*hit_section_ids_mask=*/UINT64_C(1) << kDefaultSectionId));
+ }
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/key, /*key_upper=*/key),
+ IsOkAndHolds(ElementsAreArray(expected_doc_hit_infos.rbegin(),
+ expected_doc_hit_infos.rend())));
+ }
+}
+
+TEST_P(IntegerIndexStorageTest, TransferIndex) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{-500}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+ /*new_keys=*/{1024}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+ /*new_keys=*/{-200}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/5, kDefaultSectionId,
+ /*new_keys=*/{-60}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/8, kDefaultSectionId,
+ /*new_keys=*/{-60}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/13, kDefaultSectionId,
+ /*new_keys=*/{-500}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/21, kDefaultSectionId,
+ /*new_keys=*/{2048}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/34, kDefaultSectionId,
+ /*new_keys=*/{156}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/55, kDefaultSectionId,
+ /*new_keys=*/{20}));
+ ASSERT_THAT(storage->num_data(), Eq(9));
+
+ // Delete doc id = 5, 34, compress and keep the rest.
+ std::vector<DocumentId> document_id_old_to_new(56, kInvalidDocumentId);
+ document_id_old_to_new[1] = 8;
+ document_id_old_to_new[2] = 3;
+ document_id_old_to_new[3] = 0;
+ document_id_old_to_new[8] = 2;
+ document_id_old_to_new[13] = 6;
+ document_id_old_to_new[21] = 1;
+ document_id_old_to_new[55] = 4;
+
+ // Transfer to new storage.
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> new_storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_ + "_temp",
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+ EXPECT_THAT(
+ storage->TransferIndex(document_id_old_to_new, new_storage.get()),
+ IsOk());
+ ICING_ASSERT_OK(new_storage->PersistToDisk());
+ }
+
+ // Verify after transferring and reinitializing the instance.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> new_storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_ + "_temp",
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ EXPECT_THAT(new_storage->num_data(), Eq(7));
+
+ // -500 had hits for old_docids 1 and 13, which are now 6 and 8.
+ EXPECT_THAT(Query(new_storage.get(), /*key_lower=*/-500, /*key_upper=*/-500),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/8, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/6, expected_sections))));
+
+ // 1024 had a hit for old_docid 2, which is now 3.
+ EXPECT_THAT(Query(new_storage.get(), /*key_lower=*/1024, /*key_upper=*/1024),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/3, expected_sections))));
+
+ // -200 had a hit for old_docid 3, which is now 0.
+ EXPECT_THAT(Query(new_storage.get(), /*key_lower=*/-200, /*key_upper=*/-200),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+
+ // -60 had hits for old_docids 5 and 8, which is now only 2 (because doc 5 has
+ // been deleted).
+ EXPECT_THAT(Query(new_storage.get(), /*key_lower=*/-60, /*key_upper=*/-60),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections))));
+
+ // 2048 had a hit for old_docid 21, which is now 1.
+ EXPECT_THAT(Query(new_storage.get(), /*key_lower=*/2048, /*key_upper=*/2048),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+
+ // 156 had a hit for old_docid 34, which is not found now (because doc 34 has
+ // been deleted).
+ EXPECT_THAT(Query(new_storage.get(), /*key_lower=*/156, /*key_upper=*/156),
+ IsOkAndHolds(IsEmpty()));
+
+ // 20 had a hit for old_docid 55, which is now 4.
+ EXPECT_THAT(Query(new_storage.get(), /*key_lower=*/20, /*key_upper=*/20),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/4, expected_sections))));
+}
+
+TEST_P(IntegerIndexStorageTest, TransferIndexOutOfRangeDocumentId) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{120}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+ /*new_keys=*/{-2000}));
+ ASSERT_THAT(storage->num_data(), Eq(2));
+
+ // Create document_id_old_to_new with size = 2. TransferIndex should handle
+ // out of range DocumentId properly.
+ std::vector<DocumentId> document_id_old_to_new = {kInvalidDocumentId, 0};
+
+ // Transfer to new storage.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> new_storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_ + "_temp",
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+ EXPECT_THAT(storage->TransferIndex(document_id_old_to_new, new_storage.get()),
+ IsOk());
+
+ // Verify after transferring.
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ EXPECT_THAT(new_storage->num_data(), Eq(1));
+ EXPECT_THAT(Query(new_storage.get(), /*key_lower=*/120, /*key_upper=*/120),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(
+ Query(new_storage.get(), /*key_lower=*/-2000, /*key_upper=*/-2000),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST_P(IntegerIndexStorageTest, TransferEmptyIndex) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+ ASSERT_THAT(storage->num_data(), Eq(0));
+
+ std::vector<DocumentId> document_id_old_to_new = {kInvalidDocumentId, 0, 1,
+ kInvalidDocumentId, 2};
+
+ // Transfer to new storage.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> new_storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_ + "_temp",
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+ EXPECT_THAT(storage->TransferIndex(document_id_old_to_new, new_storage.get()),
+ IsOk());
+
+ // Verify after transferring.
+ EXPECT_THAT(new_storage->num_data(), Eq(0));
+ EXPECT_THAT(Query(new_storage.get(),
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST_P(IntegerIndexStorageTest, TransferIndexDeleteAll) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{-500}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+ /*new_keys=*/{1024}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+ /*new_keys=*/{-200}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/5, kDefaultSectionId,
+ /*new_keys=*/{-60}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/8, kDefaultSectionId,
+ /*new_keys=*/{-60}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/13, kDefaultSectionId,
+ /*new_keys=*/{-500}));
+ ASSERT_THAT(storage->num_data(), Eq(6));
+
+ // Delete all documents.
+ std::vector<DocumentId> document_id_old_to_new(14, kInvalidDocumentId);
+
+ // Transfer to new storage.
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> new_storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_ + "_temp",
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+ EXPECT_THAT(
+ storage->TransferIndex(document_id_old_to_new, new_storage.get()),
+ IsOk());
+ ICING_ASSERT_OK(new_storage->PersistToDisk());
+ }
+
+ // Verify after transferring and reinitializing the instance.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> new_storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_ + "_temp",
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ EXPECT_THAT(new_storage->num_data(), Eq(0));
+ EXPECT_THAT(Query(new_storage.get(),
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST_P(IntegerIndexStorageTest, TransferIndexShouldInvokeMergeBuckets) {
+ int32_t custom_num_data_threshold_for_bucket_split = 300;
+ int32_t custom_num_data_threshold_for_bucket_merge =
+ IntegerIndexStorage::kNumDataThresholdRatioForBucketMerge *
+ custom_num_data_threshold_for_bucket_split;
+
+ // This test verifies that if TransferIndex invokes bucket merging logic to
+ // ensure sure we're able to avoid having mostly empty buckets after inserting
+ // and deleting data for many rounds.
+
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets),
+ custom_num_data_threshold_for_bucket_split,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+ /*new_keys=*/{-500}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{1024}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+ /*new_keys=*/{-200}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+ /*new_keys=*/{-60}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/4, kDefaultSectionId,
+ /*new_keys=*/{-60}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/5, kDefaultSectionId,
+ /*new_keys=*/{-500}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/6, kDefaultSectionId,
+ /*new_keys=*/{2048}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/7, kDefaultSectionId,
+ /*new_keys=*/{156}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/8, kDefaultSectionId,
+ /*new_keys=*/{20}));
+ ASSERT_THAT(storage->num_data(), Eq(9));
+ ASSERT_THAT(storage->num_data(),
+ Le(custom_num_data_threshold_for_bucket_merge));
+
+ // Create document_id_old_to_new that keeps all existing documents.
+ std::vector<DocumentId> document_id_old_to_new(9);
+ std::iota(document_id_old_to_new.begin(), document_id_old_to_new.end(), 0);
+
+ // Transfer to new storage. It should result in 1 bucket: [INT64_MIN,
+ // INT64_MAX] after transferring.
+ const std::string new_storage_working_path = working_path_ + "_temp";
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> new_storage,
+ IntegerIndexStorage::Create(
+ filesystem_, new_storage_working_path,
+ Options(/*custom_init_sorted_buckets_in=*/{},
+ /*custom_init_unsorted_buckets_in=*/{},
+ custom_num_data_threshold_for_bucket_split,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+ EXPECT_THAT(
+ storage->TransferIndex(document_id_old_to_new, new_storage.get()),
+ IsOk());
+ EXPECT_THAT(new_storage->num_data(), Eq(9));
+ }
+
+ // Check new_storage->sorted_bucket_ manually.
+ const std::string sorted_buckets_file_path = absl_ports::StrCat(
+ new_storage_working_path, "/", IntegerIndexStorage::kFilePrefix, ".s");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem_, sorted_buckets_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ EXPECT_THAT(sorted_buckets->num_elements(), Eq(1));
+
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* bk1, sorted_buckets->Get(/*idx=*/0));
+ EXPECT_THAT(bk1->key_lower(), Eq(std::numeric_limits<int64_t>::min()));
+ EXPECT_THAT(bk1->key_upper(), Eq(std::numeric_limits<int64_t>::max()));
+ EXPECT_THAT(bk1->num_data(), Eq(9));
+}
+
+TEST_P(IntegerIndexStorageTest, TransferIndexExceedsMergeThreshold) {
+ int32_t custom_num_data_threshold_for_bucket_split = 300;
+ int32_t custom_num_data_threshold_for_bucket_merge =
+ IntegerIndexStorage::kNumDataThresholdRatioForBucketMerge *
+ custom_num_data_threshold_for_bucket_split;
+
+ // This test verifies that if TransferIndex invokes bucket merging logic and
+ // doesn't merge buckets too aggressively to ensure we won't get a bucket with
+ // too many data.
+
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets),
+ custom_num_data_threshold_for_bucket_split,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ // Insert data into 2 buckets so that total # of these 2 buckets exceed
+ // custom_num_data_threshold_for_bucket_merge.
+ // - Bucket 1: [-1000, -100]
+ // - Bucket 2: [101, 149]
+ DocumentId document_id = 0;
+ int num_data_for_bucket1 = custom_num_data_threshold_for_bucket_merge - 50;
+ for (int i = 0; i < num_data_for_bucket1; ++i) {
+ ICING_ASSERT_OK(storage->AddKeys(document_id, kDefaultSectionId,
+ /*new_keys=*/{-200}));
+ ++document_id;
+ }
+
+ int num_data_for_bucket2 = 150;
+ for (int i = 0; i < num_data_for_bucket2; ++i) {
+ ICING_ASSERT_OK(storage->AddKeys(document_id, kDefaultSectionId,
+ /*new_keys=*/{120}));
+ ++document_id;
+ }
+
+ ASSERT_THAT(storage->num_data(),
+ Eq(num_data_for_bucket1 + num_data_for_bucket2));
+ ASSERT_THAT(num_data_for_bucket1 + num_data_for_bucket2,
+ Gt(custom_num_data_threshold_for_bucket_merge));
+
+ // Create document_id_old_to_new that keeps all existing documents.
+ std::vector<DocumentId> document_id_old_to_new(document_id);
+ std::iota(document_id_old_to_new.begin(), document_id_old_to_new.end(), 0);
+
+ // Transfer to new storage. This should result in 2 buckets: [INT64_MIN, 100]
+ // and [101, INT64_MAX]
+ const std::string new_storage_working_path = working_path_ + "_temp";
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> new_storage,
+ IntegerIndexStorage::Create(
+ filesystem_, new_storage_working_path,
+ Options(/*custom_init_sorted_buckets_in=*/{},
+ /*custom_init_unsorted_buckets_in=*/{},
+ custom_num_data_threshold_for_bucket_split,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+ EXPECT_THAT(
+ storage->TransferIndex(document_id_old_to_new, new_storage.get()),
+ IsOk());
+ EXPECT_THAT(new_storage->num_data(),
+ Eq(num_data_for_bucket1 + num_data_for_bucket2));
+ }
+
+ // Check new_storage->sorted_bucket_ manually.
+ const std::string sorted_buckets_file_path = absl_ports::StrCat(
+ new_storage_working_path, "/", IntegerIndexStorage::kFilePrefix, ".s");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem_, sorted_buckets_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ EXPECT_THAT(sorted_buckets->num_elements(), Eq(2));
+
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* bk1, sorted_buckets->Get(/*idx=*/0));
+ EXPECT_THAT(bk1->key_lower(), Eq(std::numeric_limits<int64_t>::min()));
+ EXPECT_THAT(bk1->key_upper(), Eq(100));
+ EXPECT_THAT(bk1->num_data(), Eq(num_data_for_bucket1));
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* bk2, sorted_buckets->Get(/*idx=*/1));
+ EXPECT_THAT(bk2->key_lower(), Eq(101));
+ EXPECT_THAT(bk2->key_upper(), Eq(std::numeric_limits<int64_t>::max()));
+ EXPECT_THAT(bk2->num_data(), Eq(num_data_for_bucket2));
+}
+
+INSTANTIATE_TEST_SUITE_P(IntegerIndexStorageTest, IntegerIndexStorageTest,
+ testing::Values(true, false));
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/numeric/integer-index.cc b/icing/index/numeric/integer-index.cc
new file mode 100644
index 0000000..b2fe159
--- /dev/null
+++ b/icing/index/numeric/integer-index.cc
@@ -0,0 +1,651 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/numeric/integer-index.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/destructible-directory.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
+#include "icing/index/numeric/doc-hit-info-iterator-numeric.h"
+#include "icing/index/numeric/integer-index-storage.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/store/document-id.h"
+#include "icing/util/crc32.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Helper function to get the file name of metadata.
+std::string GetMetadataFileName() {
+ return absl_ports::StrCat(IntegerIndex::kFilePrefix, ".m");
+}
+
+// Helper function to get the file path of metadata according to the given
+// working directory.
+std::string GetMetadataFilePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/", GetMetadataFileName());
+}
+
+constexpr std::string_view kWildcardPropertyIndexFileName =
+ "wildcard_property_index";
+
+constexpr std::string_view kWildcardPropertyStorageFileName =
+ "wildcard_property_storage";
+
+std::string GetWildcardPropertyStorageFilePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/",
+ kWildcardPropertyStorageFileName);
+}
+
+// Helper function to get the sub working (directory) path of
+// IntegerIndexStorage according to the given working directory and property
+// path.
+std::string GetPropertyIndexStoragePath(std::string_view working_path,
+ std::string_view property_path) {
+ return absl_ports::StrCat(working_path, "/", property_path);
+}
+
+// Helper function to get all existing property paths by listing all
+// directories.
+libtextclassifier3::StatusOr<std::vector<std::string>>
+GetAllExistingPropertyPaths(const Filesystem& filesystem,
+ const std::string& working_path) {
+ std::vector<std::string> property_paths;
+ std::unordered_set<std::string> excludes = {
+ GetMetadataFileName(), std::string(kWildcardPropertyStorageFileName)};
+ if (!filesystem.ListDirectory(working_path.c_str(), excludes,
+ /*recursive=*/false, &property_paths)) {
+ return absl_ports::InternalError("Failed to list directory");
+ }
+ return property_paths;
+}
+
+libtextclassifier3::StatusOr<IntegerIndex::PropertyToStorageMapType>
+GetPropertyIntegerIndexStorageMap(
+ const Filesystem& filesystem, const std::string& working_path,
+ PostingListIntegerIndexSerializer* posting_list_serializer,
+ int32_t num_data_threshold_for_bucket_split, bool pre_mapping_fbv) {
+ ICING_ASSIGN_OR_RETURN(std::vector<std::string> property_paths,
+ GetAllExistingPropertyPaths(filesystem, working_path));
+
+ IntegerIndex::PropertyToStorageMapType property_to_storage_map;
+ for (const std::string& property_path : property_paths) {
+ if (property_path == kWildcardPropertyIndexFileName) {
+ continue;
+ }
+ std::string storage_working_path =
+ GetPropertyIndexStoragePath(working_path, property_path);
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem, storage_working_path,
+ IntegerIndexStorage::Options(num_data_threshold_for_bucket_split,
+ pre_mapping_fbv),
+ posting_list_serializer));
+ property_to_storage_map.insert(
+ std::make_pair(property_path, std::move(storage)));
+ }
+
+ return property_to_storage_map;
+}
+
+// RETURNS:
+// - On success, an unordered_set representing the list of property paths
+// stored in the WildcardPropertyStorage managed by property_storage
+// - INTERNAL_ERROR on any failure to successfully read the underlying proto.
+libtextclassifier3::StatusOr<std::unordered_set<std::string>> CreatePropertySet(
+ const FileBackedProto<WildcardPropertyStorage>& property_storage) {
+ std::unordered_set<std::string> wildcard_properties_set;
+ auto wildcard_properties_or = property_storage.Read();
+ if (!wildcard_properties_or.ok()) {
+ if (absl_ports::IsNotFound(wildcard_properties_or.status())) {
+ return wildcard_properties_set;
+ }
+ return wildcard_properties_or.status();
+ }
+
+ const WildcardPropertyStorage* wildcard_properties =
+ wildcard_properties_or.ValueOrDie();
+ wildcard_properties_set.reserve(wildcard_properties->property_entries_size());
+ for (const std::string& property : wildcard_properties->property_entries()) {
+ wildcard_properties_set.insert(property);
+ }
+ return wildcard_properties_set;
+}
+
+} // namespace
+
+libtextclassifier3::Status IntegerIndex::Editor::IndexAllBufferedKeys() && {
+ integer_index_.SetDirty();
+
+ auto iter = integer_index_.property_to_storage_map_.find(property_path_);
+ IntegerIndexStorage* target_storage = nullptr;
+ // 1. Check if this property already has its own individual index.
+ if (iter != integer_index_.property_to_storage_map_.end()) {
+ target_storage = iter->second.get();
+ // 2. Check if this property was added to wildcard storage.
+ } else if (integer_index_.wildcard_properties_set_.find(property_path_) !=
+ integer_index_.wildcard_properties_set_.end()) {
+ target_storage = integer_index_.wildcard_index_storage_.get();
+ // 3. Check if we've reach the limit of individual property storages.
+ } else if (integer_index_.property_to_storage_map_.size() >=
+ kMaxPropertyStorages) {
+ // 3a. Create the wildcard storage if it doesn't exist.
+ if (integer_index_.wildcard_index_storage_ == nullptr) {
+ ICING_ASSIGN_OR_RETURN(
+ integer_index_.wildcard_index_storage_,
+ IntegerIndexStorage::Create(
+ integer_index_.filesystem_,
+ GetPropertyIndexStoragePath(integer_index_.working_path_,
+ kWildcardPropertyIndexFileName),
+ IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
+ pre_mapping_fbv_),
+ integer_index_.posting_list_serializer_.get()));
+ }
+ ICING_RETURN_IF_ERROR(
+ integer_index_.AddPropertyToWildcardStorage(property_path_));
+ target_storage = integer_index_.wildcard_index_storage_.get();
+ // 4. Create a new individual storage for this new property.
+ } else {
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<IntegerIndexStorage> new_storage,
+ IntegerIndexStorage::Create(
+ integer_index_.filesystem_,
+ GetPropertyIndexStoragePath(integer_index_.working_path_,
+ property_path_),
+ IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
+ pre_mapping_fbv_),
+ integer_index_.posting_list_serializer_.get()));
+ target_storage = new_storage.get();
+ integer_index_.property_to_storage_map_.insert(
+ std::make_pair(property_path_, std::move(new_storage)));
+ }
+
+ return target_storage->AddKeys(document_id_, section_id_,
+ std::move(seen_keys_));
+}
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+IntegerIndex::Create(const Filesystem& filesystem, std::string working_path,
+ int32_t num_data_threshold_for_bucket_split,
+ bool pre_mapping_fbv) {
+ if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str())) {
+ // Discard working_path if metadata file is missing, and reinitialize.
+ if (filesystem.DirectoryExists(working_path.c_str())) {
+ ICING_RETURN_IF_ERROR(Discard(filesystem, working_path));
+ }
+ return InitializeNewFiles(filesystem, std::move(working_path),
+ num_data_threshold_for_bucket_split,
+ pre_mapping_fbv);
+ }
+ return InitializeExistingFiles(filesystem, std::move(working_path),
+ num_data_threshold_for_bucket_split,
+ pre_mapping_fbv);
+}
+
+IntegerIndex::~IntegerIndex() {
+ if (!PersistToDisk().ok()) {
+ ICING_LOG(WARNING)
+ << "Failed to persist integer index to disk while destructing "
+ << working_path_;
+ }
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
+IntegerIndex::GetIterator(std::string_view property_path, int64_t key_lower,
+ int64_t key_upper,
+ const DocumentStore& document_store,
+ const SchemaStore& schema_store,
+ int64_t current_time_ms) const {
+ std::string property_path_str(property_path);
+ auto iter = property_to_storage_map_.find(property_path_str);
+ if (iter != property_to_storage_map_.end()) {
+ return iter->second->GetIterator(key_lower, key_upper);
+ }
+
+ if (wildcard_properties_set_.find(property_path_str) !=
+ wildcard_properties_set_.end()) {
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<DocHitInfoIterator> delegate,
+ wildcard_index_storage_->GetIterator(key_lower, key_upper));
+ std::set<std::string> property_paths = {std::move(property_path_str)};
+ return std::make_unique<DocHitInfoIteratorSectionRestrict>(
+ std::move(delegate), &document_store, &schema_store,
+ std::move(property_paths), current_time_ms);
+ }
+
+ // Return an empty iterator.
+ return std::make_unique<DocHitInfoIteratorNumeric<int64_t>>(
+ /*numeric_index_iter=*/nullptr);
+}
+
+libtextclassifier3::Status IntegerIndex::AddPropertyToWildcardStorage(
+ const std::string& property_path) {
+ SetDirty();
+
+ WildcardPropertyStorage wildcard_properties;
+ wildcard_properties.mutable_property_entries()->Reserve(
+ wildcard_properties_set_.size());
+ for (const std::string& property_path : wildcard_properties_set_) {
+ wildcard_properties.add_property_entries(property_path);
+ }
+ ICING_RETURN_IF_ERROR(wildcard_property_storage_->Write(
+ std::make_unique<WildcardPropertyStorage>(
+ std::move(wildcard_properties))));
+
+ wildcard_properties_set_.insert(property_path);
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IntegerIndex::Optimize(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ DocumentId new_last_added_document_id) {
+ std::string temp_working_path = working_path_ + "_temp";
+ ICING_RETURN_IF_ERROR(Discard(filesystem_, temp_working_path));
+
+ DestructibleDirectory temp_working_path_ddir(&filesystem_,
+ std::move(temp_working_path));
+ if (!temp_working_path_ddir.is_valid()) {
+ return absl_ports::InternalError(
+ "Unable to create temp directory to build new integer index");
+ }
+
+ {
+ // Transfer all indexed data from current integer index to new integer
+ // index. Also PersistToDisk and destruct the instance after finishing, so
+ // we can safely swap directories later.
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<IntegerIndex> new_integer_index,
+ Create(filesystem_, temp_working_path_ddir.dir(),
+ num_data_threshold_for_bucket_split_, pre_mapping_fbv_));
+ ICING_RETURN_IF_ERROR(
+ TransferIndex(document_id_old_to_new, new_integer_index.get()));
+ new_integer_index->set_last_added_document_id(new_last_added_document_id);
+ ICING_RETURN_IF_ERROR(new_integer_index->PersistToDisk());
+ }
+
+ // Destruct current storage instances to safely swap directories.
+ metadata_mmapped_file_.reset();
+ property_to_storage_map_.clear();
+ wildcard_index_storage_.reset();
+ wildcard_property_storage_.reset();
+ if (!filesystem_.SwapFiles(temp_working_path_ddir.dir().c_str(),
+ working_path_.c_str())) {
+ return absl_ports::InternalError(
+ "Unable to apply new integer index due to failed swap");
+ }
+
+ // Reinitialize the integer index.
+ std::string metadata_file_path = GetMetadataFilePath(working_path_);
+ ICING_ASSIGN_OR_RETURN(
+ MemoryMappedFile metadata_mmapped_file,
+ MemoryMappedFile::Create(filesystem_, metadata_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ /*max_file_size=*/kMetadataFileSize,
+ /*pre_mapping_file_offset=*/0,
+ /*pre_mapping_mmap_size=*/kMetadataFileSize));
+ if (metadata_mmapped_file.available_size() != kMetadataFileSize) {
+ return absl_ports::InternalError(
+ "Invalid metadata file size after Optimize");
+ }
+ metadata_mmapped_file_ =
+ std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file));
+
+ // Recreate all of the data structures tracking the wildcard storage.
+ std::string wildcard_property_path =
+ GetWildcardPropertyStorageFilePath(working_path_);
+ wildcard_property_storage_ =
+ std::make_unique<FileBackedProto<WildcardPropertyStorage>>(
+ filesystem_, wildcard_property_path);
+
+ ICING_ASSIGN_OR_RETURN(wildcard_properties_set_,
+ CreatePropertySet(*wildcard_property_storage_));
+ if (!wildcard_properties_set_.empty()) {
+ ICING_ASSIGN_OR_RETURN(
+ wildcard_index_storage_,
+ IntegerIndexStorage::Create(
+ filesystem_,
+ GetPropertyIndexStoragePath(working_path_,
+ kWildcardPropertyIndexFileName),
+ IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
+ pre_mapping_fbv_),
+ posting_list_serializer_.get()));
+ }
+
+ // Initialize all existing integer index storages.
+ ICING_ASSIGN_OR_RETURN(
+ property_to_storage_map_,
+ GetPropertyIntegerIndexStorageMap(
+ filesystem_, working_path_, posting_list_serializer_.get(),
+ num_data_threshold_for_bucket_split_, pre_mapping_fbv_));
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IntegerIndex::Clear() {
+ SetDirty();
+
+ // Step 1: clear property_to_storage_map_.
+ property_to_storage_map_.clear();
+ wildcard_index_storage_.reset();
+
+ // Step 2: delete all IntegerIndexStorages. It is safe because there is no
+ // active IntegerIndexStorage after clearing the map.
+ ICING_ASSIGN_OR_RETURN(
+ std::vector<std::string> property_paths,
+ GetAllExistingPropertyPaths(filesystem_, working_path_));
+ for (const std::string& property_path : property_paths) {
+ ICING_RETURN_IF_ERROR(IntegerIndexStorage::Discard(
+ filesystem_,
+ GetPropertyIndexStoragePath(working_path_, property_path)));
+ }
+
+ // Step 3: Delete the wildcard property storage
+ std::string wildcard_property_path =
+ GetWildcardPropertyStorageFilePath(working_path_);
+ if (filesystem_.FileExists(wildcard_property_path.c_str()) ||
+ !filesystem_.DeleteFile(wildcard_property_path.c_str())) {
+ return absl_ports::InternalError(absl_ports::StrCat(
+ "Unable to delete file at path ", wildcard_property_path));
+ }
+
+ info().last_added_document_id = kInvalidDocumentId;
+ return libtextclassifier3::Status::OK;
+}
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+IntegerIndex::InitializeNewFiles(const Filesystem& filesystem,
+ std::string&& working_path,
+ int32_t num_data_threshold_for_bucket_split,
+ bool pre_mapping_fbv) {
+ // Create working directory.
+ if (!filesystem.CreateDirectoryRecursively(working_path.c_str())) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Failed to create directory: ", working_path));
+ }
+
+ // Initialize metadata file. Create MemoryMappedFile with pre-mapping, and
+ // call GrowAndRemapIfNecessary to grow the underlying file.
+ ICING_ASSIGN_OR_RETURN(
+ MemoryMappedFile metadata_mmapped_file,
+ MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ /*max_file_size=*/kMetadataFileSize,
+ /*pre_mapping_file_offset=*/0,
+ /*pre_mapping_mmap_size=*/kMetadataFileSize));
+ ICING_RETURN_IF_ERROR(metadata_mmapped_file.GrowAndRemapIfNecessary(
+ /*file_offset=*/0, /*mmap_size=*/kMetadataFileSize));
+
+ std::string wildcard_property_path =
+ GetWildcardPropertyStorageFilePath(working_path);
+ auto wildcard_property_storage =
+ std::make_unique<FileBackedProto<WildcardPropertyStorage>>(
+ filesystem, wildcard_property_path);
+
+ // Create instance.
+ auto new_integer_index = std::unique_ptr<IntegerIndex>(new IntegerIndex(
+ filesystem, std::move(working_path),
+ std::make_unique<PostingListIntegerIndexSerializer>(),
+ std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
+ /*property_to_storage_map=*/{}, std::move(wildcard_property_storage),
+ /*wildcard_properties_set=*/{}, /*wildcard_index_storage=*/nullptr,
+ num_data_threshold_for_bucket_split, pre_mapping_fbv));
+
+ // Initialize info content by writing mapped memory directly.
+ Info& info_ref = new_integer_index->info();
+ info_ref.magic = Info::kMagic;
+ info_ref.last_added_document_id = kInvalidDocumentId;
+ info_ref.num_data_threshold_for_bucket_split =
+ num_data_threshold_for_bucket_split;
+ // Initialize new PersistentStorage. The initial checksums will be computed
+ // and set via InitializeNewStorage.
+ ICING_RETURN_IF_ERROR(new_integer_index->InitializeNewStorage());
+
+ return new_integer_index;
+}
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+IntegerIndex::InitializeExistingFiles(
+ const Filesystem& filesystem, std::string&& working_path,
+ int32_t num_data_threshold_for_bucket_split, bool pre_mapping_fbv) {
+ // Mmap the content of the crcs and info.
+ ICING_ASSIGN_OR_RETURN(
+ MemoryMappedFile metadata_mmapped_file,
+ MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ /*max_file_size=*/kMetadataFileSize,
+ /*pre_mapping_file_offset=*/0,
+ /*pre_mapping_mmap_size=*/kMetadataFileSize));
+ if (metadata_mmapped_file.available_size() != kMetadataFileSize) {
+ return absl_ports::FailedPreconditionError("Incorrect metadata file size");
+ }
+
+ auto posting_list_serializer =
+ std::make_unique<PostingListIntegerIndexSerializer>();
+
+ // Initialize all existing integer index storages.
+ ICING_ASSIGN_OR_RETURN(
+ PropertyToStorageMapType property_to_storage_map,
+ GetPropertyIntegerIndexStorageMap(
+ filesystem, working_path, posting_list_serializer.get(),
+ num_data_threshold_for_bucket_split, pre_mapping_fbv));
+
+ std::string wildcard_property_path =
+ GetWildcardPropertyStorageFilePath(working_path);
+ auto wildcard_property_storage =
+ std::make_unique<FileBackedProto<WildcardPropertyStorage>>(
+ filesystem, wildcard_property_path);
+
+ ICING_ASSIGN_OR_RETURN(
+ std::unordered_set<std::string> wildcard_properties_set,
+ CreatePropertySet(*wildcard_property_storage));
+
+ std::unique_ptr<IntegerIndexStorage> wildcard_index_storage;
+ if (!wildcard_properties_set.empty()) {
+ ICING_ASSIGN_OR_RETURN(
+ wildcard_index_storage,
+ IntegerIndexStorage::Create(
+ filesystem,
+ GetPropertyIndexStoragePath(working_path,
+ kWildcardPropertyIndexFileName),
+ IntegerIndexStorage::Options(num_data_threshold_for_bucket_split,
+ pre_mapping_fbv),
+ posting_list_serializer.get()));
+ }
+
+ // Create instance.
+ auto integer_index = std::unique_ptr<IntegerIndex>(new IntegerIndex(
+ filesystem, std::move(working_path), std::move(posting_list_serializer),
+ std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
+ std::move(property_to_storage_map), std::move(wildcard_property_storage),
+ std::move(wildcard_properties_set), std::move(wildcard_index_storage),
+ num_data_threshold_for_bucket_split, pre_mapping_fbv));
+ // Initialize existing PersistentStorage. Checksums will be validated.
+ ICING_RETURN_IF_ERROR(integer_index->InitializeExistingStorage());
+
+ // Validate magic.
+ if (integer_index->info().magic != Info::kMagic) {
+ return absl_ports::FailedPreconditionError("Incorrect magic value");
+ }
+
+ // If num_data_threshold_for_bucket_split mismatches, then return error to let
+ // caller rebuild.
+ if (integer_index->info().num_data_threshold_for_bucket_split !=
+ num_data_threshold_for_bucket_split) {
+ return absl_ports::FailedPreconditionError(
+ "Mismatch num_data_threshold_for_bucket_split");
+ }
+
+ return integer_index;
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+IntegerIndex::TransferIntegerIndexStorage(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ const IntegerIndexStorage* old_storage, const std::string& property_path,
+ IntegerIndex* new_integer_index) const {
+ std::string new_storage_working_path = GetPropertyIndexStoragePath(
+ new_integer_index->working_path_, property_path);
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<IntegerIndexStorage> new_storage,
+ IntegerIndexStorage::Create(
+ new_integer_index->filesystem_, new_storage_working_path,
+ IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
+ pre_mapping_fbv_),
+ new_integer_index->posting_list_serializer_.get()));
+
+ ICING_RETURN_IF_ERROR(
+ old_storage->TransferIndex(document_id_old_to_new, new_storage.get()));
+
+ if (new_storage->num_data() == 0) {
+ new_storage.reset();
+ ICING_RETURN_IF_ERROR(
+ IntegerIndexStorage::Discard(filesystem_, new_storage_working_path));
+ }
+ return new_storage;
+}
+
+libtextclassifier3::Status IntegerIndex::TransferWildcardStorage(
+ IntegerIndex* new_integer_index) const {
+ auto property_storage = std::make_unique<WildcardPropertyStorage>();
+ property_storage->mutable_property_entries()->Reserve(
+ wildcard_properties_set_.size());
+ for (const std::string& property : wildcard_properties_set_) {
+ property_storage->add_property_entries(property);
+ }
+
+ ICING_RETURN_IF_ERROR(new_integer_index->wildcard_property_storage_->Write(
+ std::move(property_storage)));
+ new_integer_index->wildcard_properties_set_ = wildcard_properties_set_;
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IntegerIndex::TransferIndex(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ IntegerIndex* new_integer_index) const {
+ // Transfer over the integer index storages
+ std::unique_ptr<IntegerIndexStorage> new_storage;
+ for (const auto& [property_path, old_storage] : property_to_storage_map_) {
+ ICING_ASSIGN_OR_RETURN(
+ new_storage,
+ TransferIntegerIndexStorage(document_id_old_to_new, old_storage.get(),
+ property_path, new_integer_index));
+ if (new_storage != nullptr) {
+ new_integer_index->property_to_storage_map_.insert(
+ {property_path, std::move(new_storage)});
+ }
+ }
+ if (wildcard_index_storage_ != nullptr) {
+ ICING_ASSIGN_OR_RETURN(
+ new_storage,
+ TransferIntegerIndexStorage(
+ document_id_old_to_new, wildcard_index_storage_.get(),
+ std::string(kWildcardPropertyIndexFileName), new_integer_index));
+ if (new_storage != nullptr) {
+ new_integer_index->wildcard_index_storage_ = std::move(new_storage);
+
+ // The only time we need to copy over the list of properties using
+ // wildcard storage is if wildcard_index_storage and new_storage are both
+ // non-null. Otherwise, the new wildcard index storage won't have any
+ // data.
+ ICING_RETURN_IF_ERROR(TransferWildcardStorage(new_integer_index));
+ }
+ }
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IntegerIndex::PersistStoragesToDisk(bool force) {
+ if (!force && !is_storage_dirty()) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ for (auto& [_, storage] : property_to_storage_map_) {
+ ICING_RETURN_IF_ERROR(storage->PersistToDisk());
+ }
+ // No need to persist wildcard_properties_storage_. All calls to
+ // FileBackedProto::Write are fully written through at the time of the call.
+ if (wildcard_index_storage_) {
+ ICING_RETURN_IF_ERROR(wildcard_index_storage_->PersistToDisk());
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IntegerIndex::PersistMetadataToDisk(bool force) {
+ if (!force && !is_info_dirty() && !is_storage_dirty()) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ // Changes should have been applied to the underlying file when using
+ // MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, but call msync() as an
+ // extra safety step to ensure they are written out.
+ return metadata_mmapped_file_->PersistToDisk();
+}
+
+libtextclassifier3::StatusOr<Crc32> IntegerIndex::ComputeInfoChecksum(
+ bool force) {
+ if (!force && !is_info_dirty()) {
+ return Crc32(crcs().component_crcs.info_crc);
+ }
+
+ return info().ComputeChecksum();
+}
+
+libtextclassifier3::StatusOr<Crc32> IntegerIndex::ComputeStoragesChecksum(
+ bool force) {
+ if (!force && !is_storage_dirty()) {
+ return Crc32(crcs().component_crcs.storages_crc);
+ }
+
+ // XOR all crcs of all storages. Since XOR is commutative and associative,
+ // the order doesn't matter.
+ uint32_t storages_checksum = 0;
+ for (auto& [property_path, storage] : property_to_storage_map_) {
+ ICING_ASSIGN_OR_RETURN(Crc32 storage_crc, storage->UpdateChecksums());
+ storage_crc.Append(property_path);
+
+ storages_checksum ^= storage_crc.Get();
+ }
+
+ if (wildcard_index_storage_ != nullptr) {
+ ICING_ASSIGN_OR_RETURN(Crc32 storage_crc,
+ wildcard_index_storage_->UpdateChecksums());
+ storages_checksum ^= storage_crc.Get();
+ }
+
+ ICING_ASSIGN_OR_RETURN(Crc32 wildcard_properties_crc,
+ wildcard_property_storage_->ComputeChecksum());
+ storages_checksum ^= wildcard_properties_crc.Get();
+
+ return Crc32(storages_checksum);
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/numeric/integer-index.h b/icing/index/numeric/integer-index.h
new file mode 100644
index 0000000..e7a3127
--- /dev/null
+++ b/icing/index/numeric/integer-index.h
@@ -0,0 +1,409 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_NUMERIC_INTEGER_INDEX_H_
+#define ICING_INDEX_NUMERIC_INTEGER_INDEX_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/file-backed-proto.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/index/numeric/integer-index-storage.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/index/numeric/wildcard-property-storage.pb.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+// IntegerIndex: a wrapper class for managing IntegerIndexStorage (a lower level
+// persistent storage class for indexing and searching contents of integer type
+// sections in documents) instances for different property paths.
+// We separate indexable integer data from different properties into different
+// storages, and IntegerIndex manages and handles indexable integer data
+// appropriately to their corresponding IntegerIndexStorage instance according
+// to the given property path.
+class IntegerIndex : public NumericIndex<int64_t> {
+ public:
+ using PropertyToStorageMapType =
+ std::unordered_map<std::string, std::unique_ptr<IntegerIndexStorage>>;
+
+ // Maximum number of individual property storages that this index will allow
+ // before falling back to placing hits for any new properties into the
+ // 'wildcard' storage.
+ static constexpr int kMaxPropertyStorages = 32;
+
+ static constexpr int32_t kDefaultNumDataThresholdForBucketSplit =
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit;
+
+ struct Info {
+ static constexpr int32_t kMagic = 0x5d8a1e8a;
+
+ int32_t magic;
+ DocumentId last_added_document_id;
+ int32_t num_data_threshold_for_bucket_split;
+
+ Crc32 ComputeChecksum() const {
+ return Crc32(
+ std::string_view(reinterpret_cast<const char*>(this), sizeof(Info)));
+ }
+ } __attribute__((packed));
+ static_assert(sizeof(Info) == 12, "");
+
+ // Metadata file layout: <Crcs><Info>
+ static constexpr int32_t kCrcsMetadataFileOffset = 0;
+ static constexpr int32_t kInfoMetadataFileOffset =
+ static_cast<int32_t>(sizeof(Crcs));
+ static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info);
+ static_assert(kMetadataFileSize == 24, "");
+
+ static constexpr WorkingPathType kWorkingPathType =
+ WorkingPathType::kDirectory;
+ static constexpr std::string_view kFilePrefix = "integer_index";
+
+ // Creates a new IntegerIndex instance to index integers. If any of the
+ // underlying file is missing, then delete the whole working_path and
+ // (re)initialize with new ones. Otherwise initialize and create the instance
+ // by existing files.
+ //
+ // filesystem: Object to make system level calls
+ // working_path: Specifies the working path for PersistentStorage.
+ // IntegerIndex uses working path as working directory and all
+ // related files will be stored under this directory. See
+ // PersistentStorage for more details about the concept of
+ // working_path.
+ // num_data_threshold_for_bucket_split: see IntegerIndexStorage::Options for
+ // more details.
+ // pre_mapping_fbv: flag indicating whether memory map max possible file size
+ // for underlying FileBackedVector before growing the actual
+ // file size.
+ //
+ // Returns:
+ // - FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
+ // checksum.
+ // - INTERNAL_ERROR on I/O errors.
+ // - Any FileBackedVector/MemoryMappedFile errors.
+ static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>> Create(
+ const Filesystem& filesystem, std::string working_path,
+ int32_t num_data_threshold_for_bucket_split, bool pre_mapping_fbv);
+
+ // Deletes IntegerIndex under working_path.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ static libtextclassifier3::Status Discard(const Filesystem& filesystem,
+ const std::string& working_path) {
+ return PersistentStorage::Discard(filesystem, working_path,
+ kWorkingPathType);
+ }
+
+ ~IntegerIndex() override;
+
+ // Returns an Editor instance for adding new records into integer index for a
+ // given property, DocumentId and SectionId. See Editor for more details.
+ std::unique_ptr<typename NumericIndex<int64_t>::Editor> Edit(
+ std::string_view property_path, DocumentId document_id,
+ SectionId section_id) override {
+ return std::make_unique<Editor>(property_path, document_id, section_id,
+ *this, num_data_threshold_for_bucket_split_,
+ pre_mapping_fbv_);
+ }
+
+ // Returns a DocHitInfoIterator for iterating through all docs which have the
+ // specified (integer) property contents in range [query_key_lower,
+ // query_key_upper].
+ // When iterating through all relevant doc hits, it:
+ // - Merges multiple SectionIds of doc hits with same DocumentId into a single
+ // SectionIdMask and constructs DocHitInfo.
+ // - Returns DocHitInfo in descending DocumentId order.
+ //
+ // Returns:
+ // - On success: a DocHitInfoIterator instance
+ // - NOT_FOUND_ERROR if the given property_path doesn't exist
+ // - Any IntegerIndexStorage errors
+ libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> GetIterator(
+ std::string_view property_path, int64_t key_lower, int64_t key_upper,
+ const DocumentStore& document_store, const SchemaStore& schema_store,
+ int64_t current_time_ms) const override;
+
+ // Reduces internal file sizes by reclaiming space and ids of deleted
+ // documents. Integer index will convert all data (hits) to the new document
+ // ids and regenerate all index files. If all data in a property path are
+ // completely deleted, then the underlying storage will be discarded as well.
+ //
+ // - document_id_old_to_new: a map for converting old document id to new
+ // document id.
+ // - new_last_added_document_id: will be used to update the last added
+ // document id in the integer index.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on IO error
+ libtextclassifier3::Status Optimize(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ DocumentId new_last_added_document_id) override;
+
+ // Clears all integer index data by discarding all existing storages, and set
+ // last_added_document_id to kInvalidDocumentId.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status Clear() override;
+
+ DocumentId last_added_document_id() const override {
+ return info().last_added_document_id;
+ }
+
+ void set_last_added_document_id(DocumentId document_id) override {
+ SetInfoDirty();
+
+ Info& info_ref = info();
+ if (info_ref.last_added_document_id == kInvalidDocumentId ||
+ document_id > info_ref.last_added_document_id) {
+ info_ref.last_added_document_id = document_id;
+ }
+ }
+
+ int num_property_indices() const override {
+ return property_to_storage_map_.size() +
+ ((wildcard_index_storage_ == nullptr) ? 0 : 1);
+ }
+
+ private:
+ class Editor : public NumericIndex<int64_t>::Editor {
+ public:
+ explicit Editor(std::string_view property_path, DocumentId document_id,
+ SectionId section_id, IntegerIndex& integer_index,
+ int32_t num_data_threshold_for_bucket_split,
+ bool pre_mapping_fbv)
+ : NumericIndex<int64_t>::Editor(property_path, document_id, section_id),
+ integer_index_(integer_index),
+ num_data_threshold_for_bucket_split_(
+ num_data_threshold_for_bucket_split),
+ pre_mapping_fbv_(pre_mapping_fbv) {}
+
+ ~Editor() override = default;
+
+ libtextclassifier3::Status BufferKey(int64_t key) override {
+ seen_keys_.push_back(key);
+ return libtextclassifier3::Status::OK;
+ }
+
+ libtextclassifier3::Status IndexAllBufferedKeys() && override;
+
+ private:
+ // Vector for caching all seen keys. Since IntegerIndexStorage::AddKeys
+ // sorts and dedupes keys, we can just simply use vector here and move it to
+ // AddKeys().
+ std::vector<int64_t> seen_keys_;
+
+ IntegerIndex& integer_index_; // Does not own.
+
+ int32_t num_data_threshold_for_bucket_split_;
+
+ // Flag indicating whether memory map max possible file size for underlying
+ // FileBackedVector before growing the actual file size.
+ bool pre_mapping_fbv_;
+ };
+
+ explicit IntegerIndex(
+ const Filesystem& filesystem, std::string&& working_path,
+ std::unique_ptr<PostingListIntegerIndexSerializer>
+ posting_list_serializer,
+ std::unique_ptr<MemoryMappedFile> metadata_mmapped_file,
+ PropertyToStorageMapType&& property_to_storage_map,
+ std::unique_ptr<FileBackedProto<WildcardPropertyStorage>>
+ wildcard_property_storage,
+ std::unordered_set<std::string> wildcard_properties_set,
+ std::unique_ptr<icing::lib::IntegerIndexStorage> wildcard_index_storage,
+ int32_t num_data_threshold_for_bucket_split, bool pre_mapping_fbv)
+ : NumericIndex<int64_t>(filesystem, std::move(working_path),
+ kWorkingPathType),
+ posting_list_serializer_(std::move(posting_list_serializer)),
+ metadata_mmapped_file_(std::move(metadata_mmapped_file)),
+ property_to_storage_map_(std::move(property_to_storage_map)),
+ wildcard_property_storage_(std::move(wildcard_property_storage)),
+ wildcard_properties_set_(std::move(wildcard_properties_set)),
+ wildcard_index_storage_(std::move(wildcard_index_storage)),
+ num_data_threshold_for_bucket_split_(
+ num_data_threshold_for_bucket_split),
+ pre_mapping_fbv_(pre_mapping_fbv),
+ is_info_dirty_(false),
+ is_storage_dirty_(false) {}
+
+ static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+ InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path,
+ int32_t num_data_threshold_for_bucket_split,
+ bool pre_mapping_fbv);
+
+ static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+ InitializeExistingFiles(const Filesystem& filesystem,
+ std::string&& working_path,
+ int32_t num_data_threshold_for_bucket_split,
+ bool pre_mapping_fbv);
+
+ // Adds the property path to the list of properties using wildcard storage.
+ // This will both update the in-memory list (wildcard_properties_set_) and
+ // the persistent list (wilcard_property_storage_).
+ //
+ // RETURNS:
+ // - OK on success
+ // - INTERNAL_ERROR if unable to successfully persist updated properties
+ // list in wildcard_property_storage_.
+ libtextclassifier3::Status AddPropertyToWildcardStorage(
+ const std::string& property_path);
+
+ // Transfers integer index data from the current integer index to
+ // new_integer_index.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error. This could potentially leave the storages
+ // in an invalid state and the caller should handle it properly (e.g.
+ // discard and rebuild)
+ libtextclassifier3::Status TransferIndex(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ IntegerIndex* new_integer_index) const;
+
+ // Transfers integer index data from old_storage to new_integer_index.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error. This could potentially leave the storages
+ // in an invalid state and the caller should handle it properly (e.g.
+ // discard and rebuild)
+ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+ TransferIntegerIndexStorage(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ const IntegerIndexStorage* old_storage, const std::string& property_path,
+ IntegerIndex* new_integer_index) const;
+
+ // Transfers the persistent and in-memory list of properties using the
+ // wildcard storage from old_storage to new_integer_index.
+ //
+ // RETURNS:
+ // - OK on success
+ // - INTERNAL_ERROR if unable to successfully persist updated properties
+ // list in new_integer_index.
+ libtextclassifier3::Status TransferWildcardStorage(
+ IntegerIndex* new_integer_index) const;
+
+ // Flushes contents of all storages to underlying files.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status PersistStoragesToDisk(bool force) override;
+
+ // Flushes contents of metadata file.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status PersistMetadataToDisk(bool force) override;
+
+ // Computes and returns Info checksum.
+ //
+ // Returns:
+ // - Crc of the Info on success
+ libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(bool force) override;
+
+ // Computes and returns all storages checksum. Checksums of (storage_crc,
+ // property_path) for all existing property paths will be combined together by
+ // XOR.
+ //
+ // Returns:
+ // - Crc of all storages on success
+ // - INTERNAL_ERROR if any data inconsistency
+ libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+ bool force) override;
+
+ Crcs& crcs() override {
+ return *reinterpret_cast<Crcs*>(metadata_mmapped_file_->mutable_region() +
+ kCrcsMetadataFileOffset);
+ }
+
+ const Crcs& crcs() const override {
+ return *reinterpret_cast<const Crcs*>(metadata_mmapped_file_->region() +
+ kCrcsMetadataFileOffset);
+ }
+
+ Info& info() {
+ return *reinterpret_cast<Info*>(metadata_mmapped_file_->mutable_region() +
+ kInfoMetadataFileOffset);
+ }
+
+ const Info& info() const {
+ return *reinterpret_cast<const Info*>(metadata_mmapped_file_->region() +
+ kInfoMetadataFileOffset);
+ }
+
+ void SetInfoDirty() { is_info_dirty_ = true; }
+ // When storage is dirty, we have to set info dirty as well. So just expose
+ // SetDirty to set both.
+ void SetDirty() {
+ is_info_dirty_ = true;
+ is_storage_dirty_ = true;
+ }
+
+ bool is_info_dirty() const { return is_info_dirty_; }
+ bool is_storage_dirty() const { return is_storage_dirty_; }
+
+ std::unique_ptr<PostingListIntegerIndexSerializer> posting_list_serializer_;
+
+ std::unique_ptr<MemoryMappedFile> metadata_mmapped_file_;
+
+ // Property path to integer index storage map.
+ PropertyToStorageMapType property_to_storage_map_;
+
+ // Persistent list of properties that have added content to
+ // wildcard_index_storage_.
+ std::unique_ptr<FileBackedProto<WildcardPropertyStorage>>
+ wildcard_property_storage_;
+
+ // In-memory list of properties that have added content to
+ // wildcard_index_storage_.
+ std::unordered_set<std::string> wildcard_properties_set_;
+
+ // The index storage that is used once we have already created
+ // kMaxPropertyStorages in property_to_storage_map.
+ std::unique_ptr<icing::lib::IntegerIndexStorage> wildcard_index_storage_;
+
+ int32_t num_data_threshold_for_bucket_split_;
+
+ // Flag indicating whether memory map max possible file size for underlying
+ // FileBackedVector before growing the actual file size.
+ bool pre_mapping_fbv_;
+
+ bool is_info_dirty_;
+ bool is_storage_dirty_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_INDEX_NUMERIC_INTEGER_INDEX_H_
diff --git a/icing/index/numeric/integer-index_test.cc b/icing/index/numeric/integer-index_test.cc
new file mode 100644
index 0000000..b2e3fbe
--- /dev/null
+++ b/icing/index/numeric/integer-index_test.cc
@@ -0,0 +1,2465 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/numeric/integer-index.h"
+
+#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <type_traits>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/dummy-numeric-index.h"
+#include "icing/index/numeric/integer-index-storage.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
+using ::testing::Eq;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+using ::testing::Lt;
+
+using Crcs = PersistentStorage::Crcs;
+using Info = IntegerIndex::Info;
+
+static constexpr int32_t kCorruptedValueOffset = 3;
+constexpr static std::string_view kDefaultTestPropertyPath = "test.property";
+
+constexpr SectionId kDefaultSectionId = 0;
+
+template <typename T>
+class NumericIndexIntegerTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ base_dir_ = GetTestTempDir() + "/icing";
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+ IsTrue());
+
+ working_path_ = base_dir_ + "/numeric_index_integer_test";
+ std::string schema_dir = base_dir_ + "/schema_test";
+
+ ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(schema_dir.c_str()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_, SchemaStore::Create(&filesystem_, schema_dir, &clock_));
+
+ std::string document_store_dir = base_dir_ + "/doc_store_test";
+ ASSERT_TRUE(
+ filesystem_.CreateDirectoryRecursively(document_store_dir.c_str()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult doc_store_create_result,
+ DocumentStore::Create(
+ &filesystem_, document_store_dir, &clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
+ doc_store_ = std::move(doc_store_create_result.document_store);
+ }
+
+ void TearDown() override {
+ doc_store_.reset();
+ schema_store_.reset();
+ filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+ }
+
+ template <typename UnknownIntegerIndexType>
+ libtextclassifier3::StatusOr<std::unique_ptr<NumericIndex<int64_t>>>
+ CreateIntegerIndex() {
+ return absl_ports::InvalidArgumentError("Unknown type");
+ }
+
+ template <>
+ libtextclassifier3::StatusOr<std::unique_ptr<NumericIndex<int64_t>>>
+ CreateIntegerIndex<DummyNumericIndex<int64_t>>() {
+ return DummyNumericIndex<int64_t>::Create(filesystem_, working_path_);
+ }
+
+ template <>
+ libtextclassifier3::StatusOr<std::unique_ptr<NumericIndex<int64_t>>>
+ CreateIntegerIndex<IntegerIndex>() {
+ return IntegerIndex::Create(
+ filesystem_, working_path_, /*num_data_threshold_for_bucket_split=*/
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv=*/false);
+ }
+
+ template <typename NotIntegerIndexType>
+ bool is_integer_index() const {
+ return false;
+ }
+
+ template <>
+ bool is_integer_index<IntegerIndex>() const {
+ return true;
+ }
+
+ libtextclassifier3::StatusOr<std::vector<DocumentId>> CompactDocStore() {
+ std::string document_store_dir = base_dir_ + "/doc_store_test";
+ std::string document_store_compact_dir =
+ base_dir_ + "/doc_store_compact_test";
+ if (!filesystem_.CreateDirectoryRecursively(
+ document_store_compact_dir.c_str())) {
+ return absl_ports::InternalError("Unable to create compact directory");
+ }
+ ICING_ASSIGN_OR_RETURN(
+ std::vector<DocumentId> docid_map,
+ doc_store_->OptimizeInto(document_store_compact_dir, nullptr));
+
+ doc_store_.reset();
+ if (!filesystem_.SwapFiles(document_store_dir.c_str(),
+ document_store_compact_dir.c_str())) {
+ return absl_ports::InternalError("Unable to swap directories.");
+ }
+ if (!filesystem_.DeleteDirectoryRecursively(
+ document_store_compact_dir.c_str())) {
+ return absl_ports::InternalError("Unable to delete compact directory");
+ }
+
+ ICING_ASSIGN_OR_RETURN(
+ DocumentStore::CreateResult doc_store_create_result,
+ DocumentStore::Create(
+ &filesystem_, document_store_dir, &clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
+ doc_store_ = std::move(doc_store_create_result.document_store);
+ return docid_map;
+ }
+
+ libtextclassifier3::StatusOr<std::vector<DocHitInfo>> Query(
+ const NumericIndex<int64_t>* integer_index,
+ std::string_view property_path, int64_t key_lower, int64_t key_upper) {
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<DocHitInfoIterator> iter,
+ integer_index->GetIterator(property_path, key_lower, key_upper,
+ *doc_store_, *schema_store_,
+ clock_.GetSystemTimeMilliseconds()));
+
+ std::vector<DocHitInfo> result;
+ while (iter->Advance().ok()) {
+ result.push_back(iter->doc_hit_info());
+ }
+ return result;
+ }
+
+ Filesystem filesystem_;
+ std::string base_dir_;
+ std::string working_path_;
+ std::unique_ptr<SchemaStore> schema_store_;
+ std::unique_ptr<DocumentStore> doc_store_;
+ Clock clock_;
+};
+
+void Index(NumericIndex<int64_t>* integer_index, std::string_view property_path,
+ DocumentId document_id, SectionId section_id,
+ std::vector<int64_t> keys) {
+ std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
+ integer_index->Edit(property_path, document_id, section_id);
+
+ for (const auto& key : keys) {
+ ICING_EXPECT_OK(editor->BufferKey(key));
+ }
+ ICING_EXPECT_OK(std::move(*editor).IndexAllBufferedKeys());
+}
+
+using TestTypes = ::testing::Types<DummyNumericIndex<int64_t>, IntegerIndex>;
+TYPED_TEST_SUITE(NumericIndexIntegerTest, TestTypes);
+
+TYPED_TEST(NumericIndexIntegerTest, SetLastAddedDocumentId) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ EXPECT_THAT(integer_index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+ constexpr DocumentId kDocumentId = 100;
+ integer_index->set_last_added_document_id(kDocumentId);
+ EXPECT_THAT(integer_index->last_added_document_id(), Eq(kDocumentId));
+
+ constexpr DocumentId kNextDocumentId = 123;
+ integer_index->set_last_added_document_id(kNextDocumentId);
+ EXPECT_THAT(integer_index->last_added_document_id(), Eq(kNextDocumentId));
+}
+
+TYPED_TEST(
+ NumericIndexIntegerTest,
+ SetLastAddedDocumentIdShouldIgnoreNewDocumentIdNotGreaterThanTheCurrent) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ constexpr DocumentId kDocumentId = 123;
+ integer_index->set_last_added_document_id(kDocumentId);
+ ASSERT_THAT(integer_index->last_added_document_id(), Eq(kDocumentId));
+
+ constexpr DocumentId kNextDocumentId = 100;
+ ASSERT_THAT(kNextDocumentId, Lt(kDocumentId));
+ integer_index->set_last_added_document_id(kNextDocumentId);
+ // last_added_document_id() should remain unchanged.
+ EXPECT_THAT(integer_index->last_added_document_id(), Eq(kDocumentId));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, SingleKeyExactQuery) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ kDefaultSectionId, /*keys=*/{1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ kDefaultSectionId, /*keys=*/{3});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ kDefaultSectionId, /*keys=*/{2});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+ kDefaultSectionId, /*keys=*/{0});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/4,
+ kDefaultSectionId, /*keys=*/{4});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+ kDefaultSectionId, /*keys=*/{2});
+
+ int64_t query_key = 2;
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/query_key, /*key_upper=*/query_key),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/5, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, SingleKeyRangeQuery) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ kDefaultSectionId, /*keys=*/{1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ kDefaultSectionId, /*keys=*/{3});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ kDefaultSectionId, /*keys=*/{2});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+ kDefaultSectionId, /*keys=*/{0});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/4,
+ kDefaultSectionId, /*keys=*/{4});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+ kDefaultSectionId, /*keys=*/{2});
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/1, /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/5, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, WildcardStorageQuery) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ // This test sets its schema assuming that max property storages == 32.
+ ASSERT_THAT(IntegerIndex::kMaxPropertyStorages, Eq(32));
+
+ PropertyConfigProto int_property_config =
+ PropertyConfigBuilder()
+ .SetName("otherProperty1")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .Build();
+ // Create a schema with two types:
+ // - TypeA has 34 properties:
+ // 'desiredProperty', 'otherProperty'*, 'undesiredProperty'
+ // - TypeB has 2 properties: 'anotherProperty', 'desiredProperty'
+ // 1. The 32 'otherProperty's will consume all of the individual storages
+ // 2. TypeA.desiredProperty and TypeB.anotherProperty will both be assigned
+ // SectionId = 0 for their respective types.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(int_property_config)
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty2"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty3"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty4"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty5"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty6"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty7"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty8"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty9"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty10"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty11"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty12"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty13"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty14"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty15"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty16"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty17"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty18"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty19"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty20"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty21"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty22"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty23"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty24"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty25"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty26"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty27"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty28"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty29"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty30"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty31"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty32"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("desiredProperty"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("undesiredProperty")))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("TypeB")
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("anotherProperty"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("desiredProperty")))
+ .Build();
+ ICING_ASSERT_OK(this->schema_store_->SetSchema(
+ schema,
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // Put 11 docs of "TypeA" into the document store.
+ DocumentProto doc =
+ DocumentBuilder().SetKey("ns1", "uri0").SetSchema("TypeA").Build();
+ ICING_ASSERT_OK(this->doc_store_->Put(doc));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri1").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri2").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri3").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri4").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri5").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri6").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri7").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri8").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri9").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri10").Build()));
+
+ // Put 5 docs of "TypeB" into the document store.
+ doc = DocumentBuilder(doc).SetUri("uri11").SetSchema("TypeB").Build();
+ ICING_ASSERT_OK(this->doc_store_->Put(doc));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri12").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri13").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri14").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri15").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri16").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri17").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri18").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri19").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri20").Build()));
+
+ // Ids are assigned alphabetically, so the property ids are:
+ // TypeA.desiredProperty = 0
+ // TypeA.otherPropertyN = N
+ // TypeA.undesiredProperty = 33
+ // TypeB.anotherProperty = 0
+ // TypeB.desiredProperty = 1
+ SectionId typea_desired_prop_id = 0;
+ SectionId typea_undesired_prop_id = 33;
+ SectionId typeb_another_prop_id = 0;
+ SectionId typeb_desired_prop_id = 1;
+
+ // Index numeric content for other properties to force our property into the
+ // wildcard storage.
+ std::string other_property_path = "otherProperty";
+ for (int i = 1; i <= IntegerIndex::kMaxPropertyStorages; ++i) {
+ Index(integer_index.get(),
+ absl_ports::StrCat(other_property_path, std::to_string(i)),
+ /*document_id=*/0, /*section_id=*/i, /*keys=*/{i});
+ }
+
+ // Index numeric content for TypeA.desiredProperty
+ std::string desired_property = "desiredProperty";
+ Index(integer_index.get(), desired_property, /*document_id=*/0,
+ typea_desired_prop_id, /*keys=*/{1});
+ Index(integer_index.get(), desired_property, /*document_id=*/1,
+ typea_desired_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), desired_property, /*document_id=*/2,
+ typea_desired_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), desired_property, /*document_id=*/3,
+ typea_desired_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), desired_property, /*document_id=*/4,
+ typea_desired_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), desired_property, /*document_id=*/5,
+ typea_desired_prop_id, /*keys=*/{2});
+
+ // Index the same numeric content for TypeA.undesiredProperty
+ std::string undesired_property = "undesiredProperty";
+ Index(integer_index.get(), undesired_property, /*document_id=*/6,
+ typea_undesired_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), undesired_property, /*document_id=*/7,
+ typea_undesired_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), undesired_property, /*document_id=*/8,
+ typea_undesired_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), undesired_property, /*document_id=*/9,
+ typea_undesired_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), undesired_property, /*document_id=*/10,
+ typea_undesired_prop_id, /*keys=*/{2});
+
+ // Index the same numeric content for TypeB.anotherProperty
+ std::string another_property = "anotherProperty";
+ Index(integer_index.get(), another_property, /*document_id=*/11,
+ typeb_another_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), another_property, /*document_id=*/12,
+ typeb_another_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), another_property, /*document_id=*/13,
+ typeb_another_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), another_property, /*document_id=*/14,
+ typeb_another_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), another_property, /*document_id=*/15,
+ typeb_another_prop_id, /*keys=*/{2});
+
+ // Finally, index the same numeric content for TypeB.desiredProperty
+ Index(integer_index.get(), desired_property, /*document_id=*/16,
+ typeb_desired_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), desired_property, /*document_id=*/17,
+ typeb_desired_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), desired_property, /*document_id=*/18,
+ typeb_desired_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), desired_property, /*document_id=*/19,
+ typeb_desired_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), desired_property, /*document_id=*/20,
+ typeb_desired_prop_id, /*keys=*/{2});
+
+ if (this->template is_integer_index<TypeParam>()) {
+ EXPECT_THAT(integer_index->num_property_indices(), Eq(33));
+ } else {
+ EXPECT_THAT(integer_index->num_property_indices(), Eq(35));
+ }
+
+ // Only the hits for 'desired_prop_id' should be returned.
+ std::vector<SectionId> expected_sections_typea = {typea_desired_prop_id};
+ std::vector<SectionId> expected_sections_typeb = {typeb_desired_prop_id};
+ EXPECT_THAT(
+ this->Query(integer_index.get(), desired_property,
+ /*key_lower=*/2, /*key_upper=*/2),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/20, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/17, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/5, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections_typea))));
+
+ EXPECT_THAT(
+ this->Query(integer_index.get(), desired_property,
+ /*key_lower=*/1, /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/20, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/17, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/16, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/5, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections_typea))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, EmptyResult) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ kDefaultSectionId, /*keys=*/{1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ kDefaultSectionId, /*keys=*/{3});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ kDefaultSectionId, /*keys=*/{2});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+ kDefaultSectionId, /*keys=*/{0});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/4,
+ kDefaultSectionId, /*keys=*/{4});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+ kDefaultSectionId, /*keys=*/{2});
+
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/10, /*key_upper=*/10),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/100, /*key_upper=*/200),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TYPED_TEST(NumericIndexIntegerTest,
+ NonExistingPropertyPathShouldReturnEmptyResult) {
+ constexpr std::string_view kAnotherPropertyPath = "another_property";
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ kDefaultSectionId, /*keys=*/{1});
+
+ EXPECT_THAT(this->Query(integer_index.get(), kAnotherPropertyPath,
+ /*key_lower=*/100, /*key_upper=*/200),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TYPED_TEST(NumericIndexIntegerTest,
+ MultipleKeysShouldMergeAndDedupeDocHitInfo) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ // Construct several documents with mutiple keys under the same section.
+ // Range query [1, 3] will find hits with same (DocumentId, SectionId) for
+ // mutiple times. For example, (2, kDefaultSectionId) will be found twice
+ // (once for key = 1 and once for key = 3).
+ // Test if the iterator dedupes correctly.
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ kDefaultSectionId, /*keys=*/{-1000, 0});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ kDefaultSectionId, /*keys=*/{-100, 0, 1, 2, 3, 4, 5});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ kDefaultSectionId, /*keys=*/{3, 1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+ kDefaultSectionId, /*keys=*/{4, 1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/4,
+ kDefaultSectionId, /*keys=*/{1, 6});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+ kDefaultSectionId, /*keys=*/{2, 100});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/6,
+ kDefaultSectionId, /*keys=*/{1000, 2});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/7,
+ kDefaultSectionId, /*keys=*/{4, -1000});
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/1, /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/6, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/5, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/4, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/3, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, EdgeNumericValues) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ kDefaultSectionId, /*keys=*/{0});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ kDefaultSectionId, /*keys=*/{-100});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ kDefaultSectionId, /*keys=*/{-80});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+ kDefaultSectionId, /*keys=*/{std::numeric_limits<int64_t>::max()});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/4,
+ kDefaultSectionId, /*keys=*/{std::numeric_limits<int64_t>::min()});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+ kDefaultSectionId, /*keys=*/{200});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/6,
+ kDefaultSectionId, /*keys=*/{100});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/7,
+ kDefaultSectionId, /*keys=*/{std::numeric_limits<int64_t>::max()});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/8,
+ kDefaultSectionId, /*keys=*/{0});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/9,
+ kDefaultSectionId, /*keys=*/{std::numeric_limits<int64_t>::min()});
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+
+ // Negative key
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/-100, /*key_upper=*/-70),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+
+ // INT64_MAX key
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/std::numeric_limits<int64_t>::max(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/7, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/3, expected_sections))));
+
+ // INT64_MIN key
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::min()),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/9, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/4, expected_sections))));
+
+ // Key = 0
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/0, /*key_upper=*/0),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/8, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+
+ // All keys from INT64_MIN to INT64_MAX
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/9, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/8, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/7, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/6, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/5, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/4, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/3, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest,
+ MultipleSectionsShouldMergeSectionsAndDedupeDocHitInfo) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ // Construct several documents with mutiple numeric sections.
+ // Range query [1, 3] will find hits with same DocumentIds but multiple
+ // different SectionIds. For example, there will be 2 hits (1, 0), (1, 1) for
+ // DocumentId=1.
+ // Test if the iterator merges multiple sections into a single SectionIdMask
+ // correctly.
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ /*section_id=*/2, /*keys=*/{0});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ /*section_id=*/1, /*keys=*/{1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ /*section_id=*/0, /*keys=*/{-1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ /*section_id=*/2, /*keys=*/{2});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ /*section_id=*/1, /*keys=*/{1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ /*section_id=*/0, /*keys=*/{4});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ /*section_id=*/5, /*keys=*/{3});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ /*section_id=*/4, /*keys=*/{2});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ /*section_id=*/3, /*keys=*/{5});
+
+ EXPECT_THAT(
+ this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/1,
+ /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/2, std::vector<SectionId>{4, 5}),
+ EqualsDocHitInfo(/*document_id=*/1, std::vector<SectionId>{1, 2}),
+ EqualsDocHitInfo(/*document_id=*/0, std::vector<SectionId>{1}))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, NonRelevantPropertyShouldNotBeIncluded) {
+ constexpr std::string_view kNonRelevantProperty = "non_relevant_property";
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ kDefaultSectionId, /*keys=*/{1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ kDefaultSectionId, /*keys=*/{3});
+ Index(integer_index.get(), kNonRelevantProperty, /*document_id=*/2,
+ kDefaultSectionId, /*keys=*/{2});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+ kDefaultSectionId, /*keys=*/{0});
+ Index(integer_index.get(), kNonRelevantProperty, /*document_id=*/4,
+ kDefaultSectionId, /*keys=*/{4});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+ kDefaultSectionId, /*keys=*/{2});
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/1, /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/5, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest,
+ RangeQueryKeyLowerGreaterThanKeyUpperShouldReturnError) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ kDefaultSectionId, /*keys=*/{1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ kDefaultSectionId, /*keys=*/{3});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ kDefaultSectionId, /*keys=*/{2});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+ kDefaultSectionId, /*keys=*/{0});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/4,
+ kDefaultSectionId, /*keys=*/{4});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+ kDefaultSectionId, /*keys=*/{2});
+
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/3, /*key_upper=*/1),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, Optimize) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ kDefaultSectionId, /*keys=*/{1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ kDefaultSectionId, /*keys=*/{3});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+ kDefaultSectionId, /*keys=*/{2});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+ kDefaultSectionId, /*keys=*/{0});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/8,
+ kDefaultSectionId, /*keys=*/{4});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/13,
+ kDefaultSectionId, /*keys=*/{2});
+
+ // Delete doc id = 3, 5, compress and keep the rest.
+ std::vector<DocumentId> document_id_old_to_new(14, kInvalidDocumentId);
+ document_id_old_to_new[1] = 0;
+ document_id_old_to_new[2] = 1;
+ document_id_old_to_new[8] = 2;
+ document_id_old_to_new[13] = 3;
+
+ DocumentId new_last_added_document_id = 3;
+ EXPECT_THAT(integer_index->Optimize(document_id_old_to_new,
+ new_last_added_document_id),
+ IsOk());
+ EXPECT_THAT(integer_index->last_added_document_id(),
+ Eq(new_last_added_document_id));
+
+ // Verify index and query API still work normally after Optimize().
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/1, /*key_upper=*/1),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/3, /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/0, /*key_upper=*/0),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/4, /*key_upper=*/4),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections))));
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/2, /*key_upper=*/2),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/3, expected_sections))));
+
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+ kDefaultSectionId, /*keys=*/{123});
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/123, /*key_upper=*/123),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/5, expected_sections))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, OptimizeMultiplePropertyPaths) {
+ constexpr std::string_view kPropertyPath1 = "prop1";
+ constexpr SectionId kSectionId1 = 0;
+ constexpr std::string_view kPropertyPath2 = "prop2";
+ constexpr SectionId kSectionId2 = 1;
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ // Doc id = 1: insert 2 data for "prop1", "prop2"
+ Index(integer_index.get(), kPropertyPath2, /*document_id=*/1, kSectionId2,
+ /*keys=*/{1});
+ Index(integer_index.get(), kPropertyPath1, /*document_id=*/1, kSectionId1,
+ /*keys=*/{2});
+
+ // Doc id = 2: insert 1 data for "prop1".
+ Index(integer_index.get(), kPropertyPath1, /*document_id=*/2, kSectionId1,
+ /*keys=*/{3});
+
+ // Doc id = 3: insert 2 data for "prop2"
+ Index(integer_index.get(), kPropertyPath2, /*document_id=*/3, kSectionId2,
+ /*keys=*/{4});
+
+ // Doc id = 5: insert 3 data for "prop1", "prop2"
+ Index(integer_index.get(), kPropertyPath2, /*document_id=*/5, kSectionId2,
+ /*keys=*/{1});
+ Index(integer_index.get(), kPropertyPath1, /*document_id=*/5, kSectionId1,
+ /*keys=*/{2});
+
+ // Doc id = 8: insert 1 data for "prop2".
+ Index(integer_index.get(), kPropertyPath2, /*document_id=*/8, kSectionId2,
+ /*keys=*/{3});
+
+ // Doc id = 13: insert 1 data for "prop1".
+ Index(integer_index.get(), kPropertyPath1, /*document_id=*/13, kSectionId1,
+ /*keys=*/{4});
+
+ // Delete doc id = 3, 5, compress and keep the rest.
+ std::vector<DocumentId> document_id_old_to_new(14, kInvalidDocumentId);
+ document_id_old_to_new[1] = 0;
+ document_id_old_to_new[2] = 1;
+ document_id_old_to_new[8] = 2;
+ document_id_old_to_new[13] = 3;
+
+ DocumentId new_last_added_document_id = 3;
+ EXPECT_THAT(integer_index->Optimize(document_id_old_to_new,
+ new_last_added_document_id),
+ IsOk());
+ EXPECT_THAT(integer_index->last_added_document_id(),
+ Eq(new_last_added_document_id));
+
+ // Verify index and query API still work normally after Optimize().
+ // Key = 1
+ EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath1, /*key_lower=*/1,
+ /*key_upper=*/1),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath2, /*key_lower=*/1,
+ /*key_upper=*/1),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/0, std::vector<SectionId>{kSectionId2}))));
+
+ // key = 2
+ EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath1, /*key_lower=*/2,
+ /*key_upper=*/2),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/0, std::vector<SectionId>{kSectionId1}))));
+ EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath2, /*key_lower=*/2,
+ /*key_upper=*/2),
+ IsOkAndHolds(IsEmpty()));
+
+ // key = 3
+ EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath1, /*key_lower=*/3,
+ /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/1, std::vector<SectionId>{kSectionId1}))));
+ EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath2, /*key_lower=*/3,
+ /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/2, std::vector<SectionId>{kSectionId2}))));
+
+ // key = 4
+ EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath1, /*key_lower=*/4,
+ /*key_upper=*/4),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/3, std::vector<SectionId>{kSectionId1}))));
+ EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath2, /*key_lower=*/4,
+ /*key_upper=*/4),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, OptimizeShouldDiscardEmptyPropertyStorage) {
+ constexpr std::string_view kPropertyPath1 = "prop1";
+ constexpr SectionId kSectionId1 = 0;
+ constexpr std::string_view kPropertyPath2 = "prop2";
+ constexpr SectionId kSectionId2 = 1;
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ // Doc id = 1: insert 2 data for "prop1", "prop2"
+ Index(integer_index.get(), kPropertyPath2, /*document_id=*/1, kSectionId2,
+ /*keys=*/{1});
+ Index(integer_index.get(), kPropertyPath1, /*document_id=*/1, kSectionId1,
+ /*keys=*/{2});
+
+ // Doc id = 2: insert 1 data for "prop1".
+ Index(integer_index.get(), kPropertyPath1, /*document_id=*/2, kSectionId1,
+ /*keys=*/{3});
+
+ // Doc id = 3: insert 2 data for "prop2"
+ Index(integer_index.get(), kPropertyPath2, /*document_id=*/3, kSectionId2,
+ /*keys=*/{4});
+
+ // Delete doc id = 1, 3, compress and keep the rest.
+ std::vector<DocumentId> document_id_old_to_new(4, kInvalidDocumentId);
+ document_id_old_to_new[2] = 0;
+
+ DocumentId new_last_added_document_id = 0;
+ EXPECT_THAT(integer_index->Optimize(document_id_old_to_new,
+ new_last_added_document_id),
+ IsOk());
+ EXPECT_THAT(integer_index->last_added_document_id(),
+ Eq(new_last_added_document_id));
+
+ // All data in "prop2" as well as the underlying storage should be deleted, so
+ // when querying "prop2", we should get empty result.
+ EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath2,
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(IsEmpty()));
+ if (std::is_same_v<IntegerIndex, TypeParam>) {
+ std::string prop2_storage_working_path =
+ absl_ports::StrCat(this->working_path_, "/", kPropertyPath2);
+ EXPECT_THAT(
+ this->filesystem_.DirectoryExists(prop2_storage_working_path.c_str()),
+ IsFalse());
+ }
+
+ // Verify we can still index and query for "prop2".
+ Index(integer_index.get(), kPropertyPath2, /*document_id=*/100, kSectionId2,
+ /*keys=*/{123});
+ EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath2,
+ /*key_lower=*/123, /*key_upper=*/123),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/100, std::vector<SectionId>{kSectionId2}))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, OptimizeOutOfRangeDocumentId) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ kDefaultSectionId, /*keys=*/{1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ kDefaultSectionId, /*keys=*/{3});
+
+ // Create document_id_old_to_new with size = 2. Optimize should handle out of
+ // range DocumentId properly.
+ std::vector<DocumentId> document_id_old_to_new(2, kInvalidDocumentId);
+
+ EXPECT_THAT(integer_index->Optimize(
+ document_id_old_to_new,
+ /*new_last_added_document_id=*/kInvalidDocumentId),
+ IsOk());
+ EXPECT_THAT(integer_index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+ // Verify all data are discarded after Optimize().
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, OptimizeDeleteAll) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ kDefaultSectionId, /*keys=*/{1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ kDefaultSectionId, /*keys=*/{3});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+ kDefaultSectionId, /*keys=*/{2});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+ kDefaultSectionId, /*keys=*/{0});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/8,
+ kDefaultSectionId, /*keys=*/{4});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/13,
+ kDefaultSectionId, /*keys=*/{2});
+
+ // Delete all documents.
+ std::vector<DocumentId> document_id_old_to_new(14, kInvalidDocumentId);
+
+ EXPECT_THAT(integer_index->Optimize(
+ document_id_old_to_new,
+ /*new_last_added_document_id=*/kInvalidDocumentId),
+ IsOk());
+ EXPECT_THAT(integer_index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+ // Verify all data are discarded after Optimize().
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, Clear) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ Index(integer_index.get(), /*property_path=*/"A", /*document_id=*/0,
+ kDefaultSectionId, /*keys=*/{1});
+ Index(integer_index.get(), /*property_path=*/"B", /*document_id=*/1,
+ kDefaultSectionId, /*keys=*/{3});
+ integer_index->set_last_added_document_id(1);
+
+ ASSERT_THAT(integer_index->last_added_document_id(), Eq(1));
+ ASSERT_THAT(
+ this->Query(integer_index.get(), /*property_path=*/"A", /*key_lower=*/1,
+ /*key_upper=*/1),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/0, std::vector<SectionId>{kDefaultSectionId}))));
+ ASSERT_THAT(
+ this->Query(integer_index.get(), /*property_path=*/"B", /*key_lower=*/3,
+ /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/1, std::vector<SectionId>{kDefaultSectionId}))));
+
+ // After resetting, last_added_document_id should be set to
+ // kInvalidDocumentId, and the previous added keys should be deleted.
+ ICING_ASSERT_OK(integer_index->Clear());
+ EXPECT_THAT(integer_index->last_added_document_id(), Eq(kInvalidDocumentId));
+ EXPECT_THAT(
+ this->Query(integer_index.get(), /*property_path=*/"A", /*key_lower=*/1,
+ /*key_upper=*/1),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(
+ this->Query(integer_index.get(), /*property_path=*/"B", /*key_lower=*/3,
+ /*key_upper=*/3),
+ IsOkAndHolds(IsEmpty()));
+
+ // Integer index should be able to work normally after Clear().
+ Index(integer_index.get(), /*property_path=*/"A", /*document_id=*/3,
+ kDefaultSectionId, /*keys=*/{123});
+ Index(integer_index.get(), /*property_path=*/"B", /*document_id=*/4,
+ kDefaultSectionId, /*keys=*/{456});
+ integer_index->set_last_added_document_id(4);
+
+ EXPECT_THAT(integer_index->last_added_document_id(), Eq(4));
+ EXPECT_THAT(
+ this->Query(integer_index.get(), /*property_path=*/"A", /*key_lower=*/123,
+ /*key_upper=*/123),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/3, std::vector<SectionId>{kDefaultSectionId}))));
+ EXPECT_THAT(
+ this->Query(integer_index.get(), /*property_path=*/"B", /*key_lower=*/456,
+ /*key_upper=*/456),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/4, std::vector<SectionId>{kDefaultSectionId}))));
+}
+
+struct IntegerIndexTestParam {
+ int32_t num_data_threshold_for_bucket_split;
+ bool pre_mapping_fbv;
+
+ explicit IntegerIndexTestParam(int32_t num_data_threshold_for_bucket_split_in,
+ bool pre_mapping_fbv_in)
+ : num_data_threshold_for_bucket_split(
+ num_data_threshold_for_bucket_split_in),
+ pre_mapping_fbv(pre_mapping_fbv_in) {}
+};
+
+// Tests for persistent integer index only
+class IntegerIndexTest
+ : public NumericIndexIntegerTest<IntegerIndex>,
+ public ::testing::WithParamInterface<IntegerIndexTestParam> {};
+
+TEST_P(IntegerIndexTest, InvalidWorkingPath) {
+ EXPECT_THAT(
+ IntegerIndex::Create(filesystem_, "/dev/null/integer_index_test",
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_P(IntegerIndexTest, InitializeNewFiles) {
+ {
+ ASSERT_FALSE(filesystem_.DirectoryExists(working_path_.c_str()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
+
+ ICING_ASSERT_OK(integer_index->PersistToDisk());
+ }
+
+ // Metadata file should be initialized correctly for both info and crcs
+ // sections.
+ const std::string metadata_file_path =
+ absl_ports::StrCat(working_path_, "/", IntegerIndex::kFilePrefix, ".m");
+ ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ ASSERT_TRUE(metadata_sfd.is_valid());
+
+ // Check info section
+ Info info;
+ ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
+ IntegerIndex::kInfoMetadataFileOffset));
+ EXPECT_THAT(info.magic, Eq(Info::kMagic));
+ EXPECT_THAT(info.last_added_document_id, Eq(kInvalidDocumentId));
+ EXPECT_THAT(info.num_data_threshold_for_bucket_split,
+ Eq(GetParam().num_data_threshold_for_bucket_split));
+
+ // Check crcs section
+ Crcs crcs;
+ ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
+ IntegerIndex::kCrcsMetadataFileOffset));
+ // There are no storages initially, so storages_crc should be 0.
+ EXPECT_THAT(crcs.component_crcs.storages_crc, Eq(0));
+ EXPECT_THAT(crcs.component_crcs.info_crc,
+ Eq(Crc32(std::string_view(reinterpret_cast<const char*>(&info),
+ sizeof(Info)))
+ .Get()));
+ EXPECT_THAT(crcs.all_crc,
+ Eq(Crc32(std::string_view(
+ reinterpret_cast<const char*>(&crcs.component_crcs),
+ sizeof(Crcs::ComponentCrcs)))
+ .Get()));
+}
+
+TEST_P(IntegerIndexTest,
+ InitializationShouldFailWithoutPersistToDiskOrDestruction) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
+
+ // Insert some data.
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ /*section_id=*/20, /*keys=*/{0, 100, -100});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ /*section_id=*/2, /*keys=*/{3, -1000, 500});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ /*section_id=*/15, /*keys=*/{-6, 321, 98});
+
+ // Without calling PersistToDisk, checksums will not be recomputed or synced
+ // to disk, so initializing another instance on the same files should fail.
+ EXPECT_THAT(
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_P(IntegerIndexTest, InitializationShouldSucceedWithPersistToDisk) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index1,
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
+
+ // Insert some data.
+ Index(integer_index1.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ /*section_id=*/20, /*keys=*/{0, 100, -100});
+ Index(integer_index1.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ /*section_id=*/2, /*keys=*/{3, -1000, 500});
+ Index(integer_index1.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ /*section_id=*/15, /*keys=*/{-6, 321, 98});
+ integer_index1->set_last_added_document_id(2);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<DocHitInfo> doc_hit_info_vec,
+ Query(integer_index1.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()));
+
+ // After calling PersistToDisk, all checksums should be recomputed and synced
+ // correctly to disk, so initializing another instance on the same files
+ // should succeed, and we should be able to get the same contents.
+ ICING_EXPECT_OK(integer_index1->PersistToDisk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index2,
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
+ EXPECT_THAT(integer_index2->last_added_document_id(), Eq(2));
+ EXPECT_THAT(Query(integer_index2.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(ElementsAreArray(doc_hit_info_vec.begin(),
+ doc_hit_info_vec.end())));
+}
+
+TEST_P(IntegerIndexTest, InitializationShouldSucceedAfterDestruction) {
+ std::vector<DocHitInfo> doc_hit_info_vec;
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
+
+ // Insert some data.
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ /*section_id=*/20, /*keys=*/{0, 100, -100});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ /*section_id=*/2, /*keys=*/{3, -1000, 500});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ /*section_id=*/15, /*keys=*/{-6, 321, 98});
+ integer_index->set_last_added_document_id(2);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ doc_hit_info_vec,
+ Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()));
+ }
+
+ {
+ // The previous instance went out of scope and was destructed. Although we
+ // didn't call PersistToDisk explicitly, the destructor should invoke it and
+ // thus initializing another instance on the same files should succeed, and
+ // we should be able to get the same contents.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
+ EXPECT_THAT(integer_index->last_added_document_id(), Eq(2));
+ EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(ElementsAreArray(doc_hit_info_vec.begin(),
+ doc_hit_info_vec.end())));
+ }
+}
+
+TEST_P(IntegerIndexTest, InitializeExistingFilesWithWrongAllCrcShouldFail) {
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
+ // Insert some data.
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ /*section_id=*/20, /*keys=*/{0, 100, -100});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ /*section_id=*/2, /*keys=*/{3, -1000, 500});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ /*section_id=*/15, /*keys=*/{-6, 321, 98});
+
+ ICING_ASSERT_OK(integer_index->PersistToDisk());
+ }
+
+ const std::string metadata_file_path =
+ absl_ports::StrCat(working_path_, "/", IntegerIndex::kFilePrefix, ".m");
+ ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ ASSERT_TRUE(metadata_sfd.is_valid());
+
+ Crcs crcs;
+ ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
+ IntegerIndex::kCrcsMetadataFileOffset));
+
+ // Manually corrupt all_crc
+ crcs.all_crc += kCorruptedValueOffset;
+ ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(),
+ IntegerIndexStorage::kCrcsMetadataFileOffset,
+ &crcs, sizeof(Crcs)));
+ metadata_sfd.reset();
+
+ {
+ // Attempt to create the integer index with metadata containing corrupted
+ // all_crc. This should fail.
+ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+ integer_index_or =
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv);
+ EXPECT_THAT(integer_index_or,
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(integer_index_or.status().error_message(),
+ HasSubstr("Invalid all crc"));
+ }
+}
+
+TEST_P(IntegerIndexTest, InitializeExistingFilesWithCorruptedInfoShouldFail) {
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
+ // Insert some data.
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ /*section_id=*/20, /*keys=*/{0, 100, -100});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ /*section_id=*/2, /*keys=*/{3, -1000, 500});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ /*section_id=*/15, /*keys=*/{-6, 321, 98});
+
+ ICING_ASSERT_OK(integer_index->PersistToDisk());
+ }
+
+ const std::string metadata_file_path =
+ absl_ports::StrCat(working_path_, "/", IntegerIndex::kFilePrefix, ".m");
+ ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ ASSERT_TRUE(metadata_sfd.is_valid());
+
+ Info info;
+ ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
+ IntegerIndex::kInfoMetadataFileOffset));
+
+ // Modify info, but don't update the checksum. This would be similar to
+ // corruption of info.
+ info.last_added_document_id += kCorruptedValueOffset;
+ ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(),
+ IntegerIndex::kInfoMetadataFileOffset, &info,
+ sizeof(Info)));
+ metadata_sfd.reset();
+
+ {
+ // Attempt to create the integer index with info that doesn't match its
+ // checksum and confirm that it fails.
+ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+ integer_index_or =
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv);
+ EXPECT_THAT(integer_index_or,
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(integer_index_or.status().error_message(),
+ HasSubstr("Invalid info crc"));
+ }
+}
+
+TEST_P(IntegerIndexTest,
+ InitializeExistingFilesWithCorruptedStoragesShouldFail) {
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
+ // Insert some data.
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ /*section_id=*/20, /*keys=*/{0, 100, -100});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ /*section_id=*/2, /*keys=*/{3, -1000, 500});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ /*section_id=*/15, /*keys=*/{-6, 321, 98});
+
+ ICING_ASSERT_OK(integer_index->PersistToDisk());
+ }
+
+ {
+ // Corrupt integer index storage for kDefaultTestPropertyPath manually.
+ PostingListIntegerIndexSerializer posting_list_integer_index_serializer;
+ std::string storage_working_path =
+ absl_ports::StrCat(working_path_, "/", kDefaultTestPropertyPath);
+ ASSERT_TRUE(filesystem_.DirectoryExists(storage_working_path.c_str()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, std::move(storage_working_path),
+ IntegerIndexStorage::Options(
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv),
+ &posting_list_integer_index_serializer));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/3, /*section_id=*/4,
+ /*new_keys=*/{3, 4, 5}));
+
+ ICING_ASSERT_OK(storage->PersistToDisk());
+ }
+
+ {
+ // Attempt to create the integer index with corrupted storages. This should
+ // fail.
+ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+ integer_index_or =
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv);
+ EXPECT_THAT(integer_index_or,
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(integer_index_or.status().error_message(),
+ HasSubstr("Invalid storages crc"));
+ }
+}
+
+TEST_P(
+ IntegerIndexTest,
+ InitializeExistingFilesWithMismatchNumDataThresholdForBucketSplitShouldFail) {
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
+ // Insert some data.
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ /*section_id=*/20, /*keys=*/{0, 100, -100});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ /*section_id=*/2, /*keys=*/{3, -1000, 500});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ /*section_id=*/15, /*keys=*/{-6, 321, 98});
+
+ ICING_ASSERT_OK(integer_index->PersistToDisk());
+ }
+
+ {
+ // Attempt to create the integer index with different
+ // num_data_threshold_for_bucket_split. This should fail.
+ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+ integer_index_or = IntegerIndex::Create(
+ filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split + 1,
+ GetParam().pre_mapping_fbv);
+ EXPECT_THAT(integer_index_or,
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(integer_index_or.status().error_message(),
+ HasSubstr("Mismatch num_data_threshold_for_bucket_split"));
+ }
+}
+
+TEST_P(IntegerIndexTest, WildcardStoragePersistenceQuery) {
+ // This test sets its schema assuming that max property storages == 32.
+ ASSERT_THAT(IntegerIndex::kMaxPropertyStorages, Eq(32));
+
+ PropertyConfigProto int_property_config =
+ PropertyConfigBuilder()
+ .SetName("otherProperty1")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .Build();
+ // Create a schema with two types:
+ // - TypeA has 34 properties:
+ // 'desiredProperty', 'otherProperty'*, 'undesiredProperty'
+ // - TypeB has 2 properties: 'anotherProperty', 'desiredProperty'
+ // 1. The 32 'otherProperty's will consume all of the individual storages
+ // 2. TypeA.desiredProperty and TypeB.anotherProperty will both be assigned
+ // SectionId = 0 for their respective types.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(int_property_config)
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty2"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty3"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty4"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty5"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty6"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty7"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty8"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty9"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty10"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty11"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty12"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty13"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty14"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty15"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty16"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty17"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty18"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty19"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty20"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty21"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty22"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty23"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty24"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty25"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty26"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty27"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty28"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty29"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty30"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty31"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty32"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("desiredProperty"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("undesiredProperty")))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("TypeB")
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("anotherProperty"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("desiredProperty")))
+ .Build();
+ ICING_ASSERT_OK(this->schema_store_->SetSchema(
+ schema,
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // Ids are assigned alphabetically, so the property ids are:
+ // TypeA.desiredProperty = 0
+ // TypeA.otherPropertyN = N
+ // TypeA.undesiredProperty = 33
+ // TypeB.anotherProperty = 0
+ // TypeB.desiredProperty = 1
+ SectionId typea_desired_prop_id = 0;
+ SectionId typea_undesired_prop_id = 33;
+ SectionId typeb_another_prop_id = 0;
+ SectionId typeb_desired_prop_id = 1;
+ std::string desired_property = "desiredProperty";
+ std::string undesired_property = "undesiredProperty";
+ std::string another_property = "anotherProperty";
+
+ // Put 11 docs of "TypeA" into the document store.
+ DocumentProto doc =
+ DocumentBuilder().SetKey("ns1", "uri0").SetSchema("TypeA").Build();
+ ICING_ASSERT_OK(this->doc_store_->Put(doc));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri1").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri2").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri3").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri4").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri5").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri6").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri7").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri8").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri9").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri10").Build()));
+
+ // Put 10 docs of "TypeB" into the document store.
+ doc = DocumentBuilder(doc).SetUri("uri11").SetSchema("TypeB").Build();
+ ICING_ASSERT_OK(this->doc_store_->Put(doc));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri12").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri13").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri14").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri15").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri16").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri17").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri18").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri19").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri20").Build()));
+
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
+
+ // Index numeric content for other properties to force our property into the
+ // wildcard storage.
+ std::string other_property_path = "otherProperty";
+ for (int i = 1; i <= IntegerIndex::kMaxPropertyStorages; ++i) {
+ Index(integer_index.get(),
+ absl_ports::StrCat(other_property_path, std::to_string(i)),
+ /*document_id=*/0, /*section_id=*/i, /*keys=*/{i});
+ }
+
+ // Index numeric content for TypeA.desiredProperty
+ Index(integer_index.get(), desired_property, /*document_id=*/0,
+ typea_desired_prop_id, /*keys=*/{1});
+ Index(integer_index.get(), desired_property, /*document_id=*/1,
+ typea_desired_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), desired_property, /*document_id=*/2,
+ typea_desired_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), desired_property, /*document_id=*/3,
+ typea_desired_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), desired_property, /*document_id=*/4,
+ typea_desired_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), desired_property, /*document_id=*/5,
+ typea_desired_prop_id, /*keys=*/{2});
+
+ // Index the same numeric content for TypeA.undesiredProperty
+ Index(integer_index.get(), undesired_property, /*document_id=*/6,
+ typea_undesired_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), undesired_property, /*document_id=*/7,
+ typea_undesired_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), undesired_property, /*document_id=*/8,
+ typea_undesired_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), undesired_property, /*document_id=*/9,
+ typea_undesired_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), undesired_property, /*document_id=*/10,
+ typea_undesired_prop_id, /*keys=*/{2});
+
+ // Index the same numeric content for TypeB.undesiredProperty
+ Index(integer_index.get(), another_property, /*document_id=*/11,
+ typeb_another_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), another_property, /*document_id=*/12,
+ typeb_another_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), another_property, /*document_id=*/13,
+ typeb_another_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), another_property, /*document_id=*/14,
+ typeb_another_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), another_property, /*document_id=*/15,
+ typeb_another_prop_id, /*keys=*/{2});
+
+ // Finally, index the same numeric content for TypeB.desiredProperty
+ Index(integer_index.get(), desired_property, /*document_id=*/16,
+ typeb_desired_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), desired_property, /*document_id=*/17,
+ typeb_desired_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), desired_property, /*document_id=*/18,
+ typeb_desired_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), desired_property, /*document_id=*/19,
+ typeb_desired_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), desired_property, /*document_id=*/20,
+ typeb_desired_prop_id, /*keys=*/{2});
+ }
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
+
+ EXPECT_THAT(integer_index->num_property_indices(), Eq(33));
+
+ // Only the hits for 'desired_prop_id' should be returned.
+ std::vector<SectionId> expected_sections_typea = {typea_desired_prop_id};
+ std::vector<SectionId> expected_sections_typeb = {typeb_desired_prop_id};
+ EXPECT_THAT(
+ Query(integer_index.get(), desired_property,
+ /*key_lower=*/2, /*key_upper=*/2),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/20, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/17, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/5, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections_typea))));
+
+ EXPECT_THAT(
+ Query(integer_index.get(), desired_property,
+ /*key_lower=*/1, /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/20, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/17, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/16, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/5, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections_typea))));
+}
+
+TEST_P(IntegerIndexTest,
+ IntegerIndexShouldWorkAfterOptimizeAndReinitialization) {
+ constexpr std::string_view kPropertyPath1 = "prop1";
+ constexpr SectionId kSectionId1 = 0;
+ constexpr std::string_view kPropertyPath2 = "prop2";
+ constexpr SectionId kSectionId2 = 1;
+
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
+
+ // Doc id = 1: insert 2 data for "prop1", "prop2"
+ Index(integer_index.get(), kPropertyPath2, /*document_id=*/1, kSectionId2,
+ /*keys=*/{1});
+ Index(integer_index.get(), kPropertyPath1, /*document_id=*/1, kSectionId1,
+ /*keys=*/{2});
+
+ // Doc id = 2: insert 1 data for "prop1".
+ Index(integer_index.get(), kPropertyPath1, /*document_id=*/2, kSectionId1,
+ /*keys=*/{3});
+
+ // Doc id = 3: insert 2 data for "prop2"
+ Index(integer_index.get(), kPropertyPath2, /*document_id=*/3, kSectionId2,
+ /*keys=*/{4});
+
+ // Doc id = 5: insert 3 data for "prop1", "prop2"
+ Index(integer_index.get(), kPropertyPath2, /*document_id=*/5, kSectionId2,
+ /*keys=*/{1});
+ Index(integer_index.get(), kPropertyPath1, /*document_id=*/5, kSectionId1,
+ /*keys=*/{2});
+
+ // Doc id = 8: insert 1 data for "prop2".
+ Index(integer_index.get(), kPropertyPath2, /*document_id=*/8, kSectionId2,
+ /*keys=*/{3});
+
+ // Doc id = 13: insert 1 data for "prop1".
+ Index(integer_index.get(), kPropertyPath1, /*document_id=*/13, kSectionId1,
+ /*keys=*/{4});
+
+ // Delete doc id = 3, 5, compress and keep the rest.
+ std::vector<DocumentId> document_id_old_to_new(14, kInvalidDocumentId);
+ document_id_old_to_new[1] = 0;
+ document_id_old_to_new[2] = 1;
+ document_id_old_to_new[8] = 2;
+ document_id_old_to_new[13] = 3;
+
+ DocumentId new_last_added_document_id = 3;
+ EXPECT_THAT(integer_index->Optimize(document_id_old_to_new,
+ new_last_added_document_id),
+ IsOk());
+ EXPECT_THAT(integer_index->last_added_document_id(),
+ Eq(new_last_added_document_id));
+ }
+
+ {
+ // Reinitialize IntegerIndex and verify index and query API still work
+ // normally.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
+
+ // Key = 1
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath1, /*key_lower=*/1,
+ /*key_upper=*/1),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath2, /*key_lower=*/1,
+ /*key_upper=*/1),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/0, std::vector<SectionId>{kSectionId2}))));
+
+ // key = 2
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath1, /*key_lower=*/2,
+ /*key_upper=*/2),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/0, std::vector<SectionId>{kSectionId1}))));
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath2, /*key_lower=*/2,
+ /*key_upper=*/2),
+ IsOkAndHolds(IsEmpty()));
+
+ // key = 3
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath1, /*key_lower=*/3,
+ /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/1, std::vector<SectionId>{kSectionId1}))));
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath2, /*key_lower=*/3,
+ /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/2, std::vector<SectionId>{kSectionId2}))));
+
+ // key = 4
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath1, /*key_lower=*/4,
+ /*key_upper=*/4),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/3, std::vector<SectionId>{kSectionId1}))));
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath2, /*key_lower=*/4,
+ /*key_upper=*/4),
+ IsOkAndHolds(IsEmpty()));
+
+ // Index new data.
+ Index(integer_index.get(), kPropertyPath2, /*document_id=*/100, kSectionId2,
+ /*keys=*/{123});
+ Index(integer_index.get(), kPropertyPath1, /*document_id=*/100, kSectionId1,
+ /*keys=*/{456});
+ EXPECT_THAT(
+ Query(integer_index.get(), kPropertyPath2, /*key_lower=*/123,
+ /*key_upper=*/456),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/100, std::vector<SectionId>{kSectionId2}))));
+ EXPECT_THAT(
+ Query(integer_index.get(), kPropertyPath1, /*key_lower=*/123,
+ /*key_upper=*/456),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/100, std::vector<SectionId>{kSectionId1}))));
+ }
+}
+
+TEST_P(IntegerIndexTest, WildcardStorageWorksAfterOptimize) {
+ // This test sets its schema assuming that max property storages == 32.
+ ASSERT_THAT(IntegerIndex::kMaxPropertyStorages, Eq(32));
+
+ PropertyConfigProto int_property_config =
+ PropertyConfigBuilder()
+ .SetName("otherProperty1")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .Build();
+ // Create a schema with two types:
+ // - TypeA has 34 properties:
+ // 'desiredProperty', 'otherProperty'*, 'undesiredProperty'
+ // - TypeB has 2 properties: 'anotherProperty', 'desiredProperty'
+ // 1. The 32 'otherProperty's will consume all of the individual storages
+ // 2. TypeA.desiredProperty and TypeB.anotherProperty will both be assigned
+ // SectionId = 0 for their respective types.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(int_property_config)
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty2"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty3"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty4"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty5"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty6"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty7"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty8"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty9"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty10"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty11"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty12"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty13"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty14"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty15"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty16"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty17"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty18"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty19"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty20"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty21"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty22"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty23"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty24"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty25"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty26"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty27"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty28"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty29"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty30"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty31"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty32"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("desiredProperty"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("undesiredProperty")))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("TypeB")
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("anotherProperty"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("desiredProperty")))
+ .Build();
+ ICING_ASSERT_OK(this->schema_store_->SetSchema(
+ schema,
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // Ids are assigned alphabetically, so the property ids are:
+ // TypeA.desiredProperty = 0
+ // TypeA.otherPropertyN = N
+ // TypeA.undesiredProperty = 33
+ // TypeB.anotherProperty = 0
+ // TypeB.desiredProperty = 1
+ SectionId typea_desired_prop_id = 0;
+ SectionId typea_undesired_prop_id = 33;
+ SectionId typeb_another_prop_id = 0;
+ SectionId typeb_desired_prop_id = 1;
+ std::string desired_property = "desiredProperty";
+ std::string undesired_property = "undesiredProperty";
+ std::string another_property = "anotherProperty";
+
+ // Only the hits for 'desired_prop_id' should be returned.
+ std::vector<SectionId> expected_sections_typea = {typea_desired_prop_id};
+ std::vector<SectionId> expected_sections_typeb = {typeb_desired_prop_id};
+
+ // Put 11 docs of "TypeA" into the document store.
+ DocumentProto doc =
+ DocumentBuilder().SetKey("ns1", "uri0").SetSchema("TypeA").Build();
+ ICING_ASSERT_OK(this->doc_store_->Put(doc));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri1").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri2").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri3").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri4").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri5").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri6").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri7").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri8").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri9").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri10").Build()));
+
+ // Put 10 docs of "TypeB" into the document store.
+ doc = DocumentBuilder(doc).SetUri("uri11").SetSchema("TypeB").Build();
+ ICING_ASSERT_OK(this->doc_store_->Put(doc));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri12").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri13").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri14").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri15").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri16").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri17").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri18").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri19").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri20").Build()));
+
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
+
+ // Index numeric content for other properties to force our property into the
+ // wildcard storage.
+ std::string other_property_path = "otherProperty";
+ for (int i = 1; i <= IntegerIndex::kMaxPropertyStorages; ++i) {
+ Index(integer_index.get(),
+ absl_ports::StrCat(other_property_path, std::to_string(i)),
+ /*document_id=*/0, /*section_id=*/i, /*keys=*/{i});
+ }
+
+ // Index numeric content for TypeA.desiredProperty
+ Index(integer_index.get(), desired_property, /*document_id=*/0,
+ typea_desired_prop_id, /*keys=*/{1});
+ Index(integer_index.get(), desired_property, /*document_id=*/1,
+ typea_desired_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), desired_property, /*document_id=*/2,
+ typea_desired_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), desired_property, /*document_id=*/3,
+ typea_desired_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), desired_property, /*document_id=*/4,
+ typea_desired_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), desired_property, /*document_id=*/5,
+ typea_desired_prop_id, /*keys=*/{2});
+
+ // Index the same numeric content for TypeA.undesiredProperty
+ Index(integer_index.get(), undesired_property, /*document_id=*/6,
+ typea_undesired_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), undesired_property, /*document_id=*/7,
+ typea_undesired_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), undesired_property, /*document_id=*/8,
+ typea_undesired_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), undesired_property, /*document_id=*/9,
+ typea_undesired_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), undesired_property, /*document_id=*/10,
+ typea_undesired_prop_id, /*keys=*/{2});
+
+ // Index the same numeric content for TypeB.undesiredProperty
+ Index(integer_index.get(), another_property, /*document_id=*/11,
+ typeb_another_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), another_property, /*document_id=*/12,
+ typeb_another_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), another_property, /*document_id=*/13,
+ typeb_another_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), another_property, /*document_id=*/14,
+ typeb_another_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), another_property, /*document_id=*/15,
+ typeb_another_prop_id, /*keys=*/{2});
+
+ // Finally, index the same numeric content for TypeB.desiredProperty
+ Index(integer_index.get(), desired_property, /*document_id=*/16,
+ typeb_desired_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), desired_property, /*document_id=*/17,
+ typeb_desired_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), desired_property, /*document_id=*/18,
+ typeb_desired_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), desired_property, /*document_id=*/19,
+ typeb_desired_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), desired_property, /*document_id=*/20,
+ typeb_desired_prop_id, /*keys=*/{2});
+
+ ICING_ASSERT_OK(doc_store_->Delete(/*document_id=*/3,
+ clock_.GetSystemTimeMilliseconds()));
+ ICING_ASSERT_OK(doc_store_->Delete(/*document_id=*/5,
+ clock_.GetSystemTimeMilliseconds()));
+ // Delete doc id = 3, 5, compress and keep the rest.
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<DocumentId> document_id_old_to_new,
+ CompactDocStore());
+
+ DocumentId new_last_added_document_id = 18;
+ EXPECT_THAT(integer_index->Optimize(document_id_old_to_new,
+ new_last_added_document_id),
+ IsOk());
+ EXPECT_THAT(integer_index->last_added_document_id(),
+ Eq(new_last_added_document_id));
+
+ EXPECT_THAT(
+ Query(integer_index.get(), desired_property,
+ /*key_lower=*/2, /*key_upper=*/2),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/20 - 2, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/17 - 2, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections_typea))));
+
+ EXPECT_THAT(
+ Query(integer_index.get(), desired_property,
+ /*key_lower=*/1, /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/20 - 2, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/17 - 2, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/16 - 2, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections_typea))));
+ }
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
+
+ EXPECT_THAT(integer_index->num_property_indices(), Eq(33));
+
+ EXPECT_THAT(
+ Query(integer_index.get(), desired_property,
+ /*key_lower=*/2, /*key_upper=*/2),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/20 - 2, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/17 - 2, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections_typea))));
+
+ EXPECT_THAT(
+ Query(integer_index.get(), desired_property,
+ /*key_lower=*/1, /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/20 - 2, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/17 - 2, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/16 - 2, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections_typea))));
+}
+
+// This test covers the situation where Optimize causes us to throw out some of
+// the individual index storages (because they don't have any hits anymore).
+// In this case, any properties that added content to the wildcard storage (even
+// if all of their content was also deleted) should still be placed in the
+// wildcard storage.
+TEST_P(IntegerIndexTest, WildcardStorageAvailableIndicesAfterOptimize) {
+ // This test sets its schema assuming that max property storages == 32.
+ ASSERT_THAT(IntegerIndex::kMaxPropertyStorages, Eq(32));
+
+ PropertyConfigProto int_property_config =
+ PropertyConfigBuilder()
+ .SetName("otherProperty1")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .Build();
+ // Create a schema with two types:
+ // - TypeA has 34 properties:
+ // 'desiredProperty', 'otherProperty'*, 'undesiredProperty'
+ // - TypeB has 2 properties: 'anotherProperty', 'desiredProperty'
+ // 1. The 32 'otherProperty's will consume all of the individual storages
+ // 2. TypeA.desiredProperty and TypeB.anotherProperty will both be assigned
+ // SectionId = 0 for their respective types.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(int_property_config)
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty2"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty3"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty4"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty5"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty6"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty7"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty8"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty9"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty10"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty11"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty12"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty13"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty14"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty15"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty16"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty17"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty18"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty19"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty20"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty21"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty22"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty23"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty24"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty25"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty26"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty27"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty28"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty29"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty30"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty31"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty32"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("desiredProperty"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("undesiredProperty")))
+ .Build();
+ ICING_ASSERT_OK(this->schema_store_->SetSchema(
+ schema,
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // Ids are assigned alphabetically, so the property ids are:
+ // TypeA.desiredProperty = 0
+ // TypeA.otherPropertyN = N
+ // TypeA.undesiredProperty = 33
+ // TypeB.anotherProperty = 0
+ // TypeB.desiredProperty = 1
+ SectionId typea_desired_prop_id = 0;
+ SectionId typea_undesired_prop_id = 33;
+ SectionId typea_other1_prop_id = 1;
+ std::string desired_property = "desiredProperty";
+ std::string undesired_property = "undesiredProperty";
+ std::string another_property = "anotherProperty";
+ std::string other_property_1 = "otherProperty1";
+
+ // Only the hits for 'desired_prop_id' should be returned.
+ std::vector<SectionId> expected_sections_typea = {typea_desired_prop_id};
+
+ // Put 11 docs of "TypeA" into the document store.
+ DocumentProto doc =
+ DocumentBuilder().SetKey("ns1", "uri0").SetSchema("TypeA").Build();
+ ICING_ASSERT_OK(this->doc_store_->Put(doc));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri1").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri2").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri3").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri4").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri5").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri6").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri7").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri8").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri9").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri10").Build()));
+
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
+
+ // Index numeric content for other properties to force our property into the
+ // wildcard storage.
+ std::string other_property_path = "otherProperty";
+ for (int i = 1; i <= IntegerIndex::kMaxPropertyStorages; ++i) {
+ Index(integer_index.get(),
+ absl_ports::StrCat(other_property_path, std::to_string(i)),
+ /*document_id=*/0, /*section_id=*/i, /*keys=*/{i});
+ }
+
+ // Index numeric content for TypeA.desiredProperty
+ Index(integer_index.get(), desired_property, /*document_id=*/0,
+ typea_desired_prop_id, /*keys=*/{1});
+ Index(integer_index.get(), desired_property, /*document_id=*/1,
+ typea_desired_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), desired_property, /*document_id=*/2,
+ typea_desired_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), desired_property, /*document_id=*/3,
+ typea_desired_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), desired_property, /*document_id=*/4,
+ typea_desired_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), desired_property, /*document_id=*/5,
+ typea_desired_prop_id, /*keys=*/{2});
+
+ // Index the same numeric content for TypeA.undesiredProperty
+ Index(integer_index.get(), undesired_property, /*document_id=*/6,
+ typea_undesired_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), undesired_property, /*document_id=*/7,
+ typea_undesired_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), undesired_property, /*document_id=*/8,
+ typea_undesired_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), undesired_property, /*document_id=*/9,
+ typea_undesired_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), undesired_property, /*document_id=*/10,
+ typea_undesired_prop_id, /*keys=*/{2});
+
+ // Delete all the docs that had hits in otherProperty* and
+ // undesiredProperty.
+ ICING_ASSERT_OK(doc_store_->Delete(/*document_id=*/0,
+ clock_.GetSystemTimeMilliseconds()));
+ ICING_ASSERT_OK(doc_store_->Delete(/*document_id=*/6,
+ clock_.GetSystemTimeMilliseconds()));
+ ICING_ASSERT_OK(doc_store_->Delete(/*document_id=*/7,
+ clock_.GetSystemTimeMilliseconds()));
+ ICING_ASSERT_OK(doc_store_->Delete(/*document_id=*/8,
+ clock_.GetSystemTimeMilliseconds()));
+ ICING_ASSERT_OK(doc_store_->Delete(/*document_id=*/9,
+ clock_.GetSystemTimeMilliseconds()));
+ ICING_ASSERT_OK(doc_store_->Delete(/*document_id=*/10,
+ clock_.GetSystemTimeMilliseconds()));
+ // Delete doc id = 0, 6, 7, 8, 9, 10. Compress and keep the rest.
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<DocumentId> document_id_old_to_new,
+ CompactDocStore());
+
+ DocumentId new_last_added_document_id = 5 - 1;
+ EXPECT_THAT(integer_index->Optimize(document_id_old_to_new,
+ new_last_added_document_id),
+ IsOk());
+ EXPECT_THAT(integer_index->last_added_document_id(),
+ Eq(new_last_added_document_id));
+
+ EXPECT_THAT(
+ Query(integer_index.get(), desired_property,
+ /*key_lower=*/2, /*key_upper=*/2),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/5 - 1, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/2 - 1, expected_sections_typea))));
+
+ EXPECT_THAT(
+ Query(integer_index.get(), desired_property,
+ /*key_lower=*/1, /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/5 - 1, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/2 - 1, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/1 - 1, expected_sections_typea))));
+ }
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
+
+ EXPECT_THAT(integer_index->num_property_indices(), Eq(1));
+
+ // Add a new doc (docid==5) and a hit for desiredProperty. This should still
+ // be placed into the wildcard integer storage.
+ doc = DocumentBuilder().SetKey("ns1", "uri11").SetSchema("TypeA").Build();
+ ICING_ASSERT_OK(this->doc_store_->Put(doc));
+ Index(integer_index.get(), desired_property, /*document_id=*/5,
+ typea_desired_prop_id, /*keys=*/{12});
+ EXPECT_THAT(integer_index->num_property_indices(), Eq(1));
+
+ EXPECT_THAT(Query(integer_index.get(), desired_property,
+ /*key_lower=*/12, /*key_upper=*/12),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/5, expected_sections_typea))));
+
+ // Add a new doc (docid==6) and a hit for undesiredProperty. This should still
+ // be placed into the wildcard integer storage.
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri12").Build()));
+ Index(integer_index.get(), undesired_property, /*document_id=*/6,
+ typea_undesired_prop_id, /*keys=*/{3});
+ EXPECT_THAT(integer_index->num_property_indices(), Eq(1));
+
+ expected_sections_typea = {typea_undesired_prop_id};
+ EXPECT_THAT(Query(integer_index.get(), undesired_property,
+ /*key_lower=*/3, /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/6, expected_sections_typea))));
+
+ // Add a new doc (docid==7) and a hit for otherProperty1. This should be given
+ // its own individual storage.
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri13").Build()));
+ Index(integer_index.get(), other_property_1, /*document_id=*/7,
+ typea_other1_prop_id, /*keys=*/{3});
+ EXPECT_THAT(integer_index->num_property_indices(), Eq(2));
+
+ expected_sections_typea = {typea_other1_prop_id};
+ EXPECT_THAT(Query(integer_index.get(), other_property_1,
+ /*key_lower=*/3, /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/7, expected_sections_typea))));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ IntegerIndexTest, IntegerIndexTest,
+ testing::Values(
+ IntegerIndexTestParam(/*num_data_threshold_for_bucket_split_in=*/341,
+ /*pre_mapping_fbv_in=*/false),
+ IntegerIndexTestParam(/*num_data_threshold_for_bucket_split_in=*/341,
+ /*pre_mapping_fbv_in=*/true),
+
+ IntegerIndexTestParam(/*num_data_threshold_for_bucket_split_in=*/16384,
+ /*pre_mapping_fbv_in=*/false),
+ IntegerIndexTestParam(/*num_data_threshold_for_bucket_split_in=*/32768,
+ /*pre_mapping_fbv_in=*/false),
+ IntegerIndexTestParam(/*num_data_threshold_for_bucket_split_in=*/65536,
+ /*pre_mapping_fbv_in=*/false)));
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/numeric/numeric-index.h b/icing/index/numeric/numeric-index.h
index 6798f8d..57911de 100644
--- a/icing/index/numeric/numeric-index.h
+++ b/icing/index/numeric/numeric-index.h
@@ -21,15 +21,18 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/persistent-storage.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
namespace icing {
namespace lib {
template <typename T>
-class NumericIndex {
+class NumericIndex : public PersistentStorage {
public:
using value_type = T;
@@ -46,9 +49,9 @@ class NumericIndex {
// add these records into numeric index.
class Editor {
public:
- explicit Editor(std::string_view property_name, DocumentId document_id,
+ explicit Editor(std::string_view property_path, DocumentId document_id,
SectionId section_id)
- : property_name_(property_name),
+ : property_path_(property_path),
document_id_(document_id),
section_id_(section_id) {}
@@ -66,10 +69,10 @@ class NumericIndex {
// Returns:
// - OK on success
// - Any other errors, depending on the actual implementation
- virtual libtextclassifier3::Status IndexAllBufferedKeys() = 0;
+ virtual libtextclassifier3::Status IndexAllBufferedKeys() && = 0;
protected:
- std::string property_name_;
+ std::string property_path_;
DocumentId document_id_;
SectionId section_id_;
};
@@ -106,7 +109,7 @@ class NumericIndex {
// Returns an Editor instance for adding new records into numeric index for a
// given property, DocumentId and SectionId. See Editor for more details.
- virtual std::unique_ptr<Editor> Edit(std::string_view property_name,
+ virtual std::unique_ptr<Editor> Edit(std::string_view property_path,
DocumentId document_id,
SectionId section_id) = 0;
@@ -121,23 +124,73 @@ class NumericIndex {
//
// Returns:
// - std::unique_ptr<DocHitInfoIterator> on success
- // - NOT_FOUND_ERROR if there is no numeric index for property_name
+ // - NOT_FOUND_ERROR if there is no numeric index for property_path
// - INVALID_ARGUMENT_ERROR if key_lower > key_upper
// - Any other errors, depending on the actual implementation
virtual libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
- GetIterator(std::string_view property_name, T key_lower,
- T key_upper) const = 0;
-
- // Clears all files created by the index. Returns OK if all files were
- // cleared.
- virtual libtextclassifier3::Status Reset() = 0;
+ GetIterator(std::string_view property_path, T key_lower, T key_upper,
+ const DocumentStore& document_store,
+ const SchemaStore& schema_store,
+ int64_t current_time_ms) const = 0;
+
+ // Reduces internal file sizes by reclaiming space and ids of deleted
+ // documents. Numeric index will convert all data (hits) to the new document
+ // ids and regenerate all index files. If all data in a property path are
+ // completely deleted, then the underlying storage must be discarded as well.
+ //
+ // - document_id_old_to_new: a map for converting old document id to new
+ // document id.
+ // - new_last_added_document_id: will be used to update the last added
+ // document id in the numeric index.
+ //
+ // Returns:
+ // - OK on success
+ // - Any other errors, depending on the actual implementation
+ virtual libtextclassifier3::Status Optimize(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ DocumentId new_last_added_document_id) = 0;
- // Syncs all the data and metadata changes to disk.
+ // Clears all data in the integer index and set last_added_document_id to
+ // kInvalidDocumentId.
//
// Returns:
- // OK on success
- // INTERNAL_ERROR on I/O errors
- virtual libtextclassifier3::Status PersistToDisk() = 0;
+ // - OK on success
+ // - Any other errors, depending on the actual implementation
+ virtual libtextclassifier3::Status Clear() = 0;
+
+ // Returns the largest document_id added to the index. Note that DocumentIds
+ // are always inserted in increasing order.
+ virtual DocumentId last_added_document_id() const = 0;
+
+ // Sets last_added_document_id to document_id so long as document_id >
+ // last_added_document_id() or last_added_document_id() is invalid.
+ virtual void set_last_added_document_id(DocumentId document_id) = 0;
+
+ // The number of individual indices that the NumericIndex has created to
+ // search over all indexed properties thus far.
+ virtual int num_property_indices() const = 0;
+
+ protected:
+ explicit NumericIndex(const Filesystem& filesystem,
+ std::string&& working_path,
+ PersistentStorage::WorkingPathType working_path_type)
+ : PersistentStorage(filesystem, std::move(working_path),
+ working_path_type) {}
+
+ virtual libtextclassifier3::Status PersistStoragesToDisk(
+ bool force) override = 0;
+
+ virtual libtextclassifier3::Status PersistMetadataToDisk(
+ bool force) override = 0;
+
+ virtual libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(
+ bool force) override = 0;
+
+ virtual libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+ bool force) override = 0;
+
+ virtual Crcs& crcs() override = 0;
+ virtual const Crcs& crcs() const override = 0;
};
} // namespace lib
diff --git a/icing/index/numeric/numeric-index_test.cc b/icing/index/numeric/numeric-index_test.cc
deleted file mode 100644
index 38769f6..0000000
--- a/icing/index/numeric/numeric-index_test.cc
+++ /dev/null
@@ -1,361 +0,0 @@
-// Copyright (C) 2022 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/index/numeric/numeric-index.h"
-
-#include <limits>
-#include <string>
-#include <string_view>
-#include <vector>
-
-#include "icing/text_classifier/lib3/utils/base/status.h"
-#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-#include "icing/index/hit/doc-hit-info.h"
-#include "icing/index/iterator/doc-hit-info-iterator.h"
-#include "icing/index/numeric/dummy-numeric-index.h"
-#include "icing/schema/section.h"
-#include "icing/store/document-id.h"
-#include "icing/testing/common-matchers.h"
-
-namespace icing {
-namespace lib {
-
-namespace {
-
-using ::testing::ElementsAre;
-using ::testing::IsEmpty;
-using ::testing::NotNull;
-
-constexpr static std::string_view kDefaultTestPropertyName = "test";
-
-constexpr SectionId kDefaultSectionId = 0;
-
-template <typename T>
-class NumericIndexTest : public ::testing::Test {
- protected:
- using INDEX_IMPL_TYPE = T;
-
- void SetUp() override {
- if (std::is_same_v<
- INDEX_IMPL_TYPE,
- DummyNumericIndex<typename INDEX_IMPL_TYPE::value_type>>) {
- numeric_index_ = std::make_unique<
- DummyNumericIndex<typename INDEX_IMPL_TYPE::value_type>>();
- }
-
- ASSERT_THAT(numeric_index_, NotNull());
- }
-
- void Index(std::string_view property_name, DocumentId document_id,
- SectionId section_id,
- std::vector<typename INDEX_IMPL_TYPE::value_type> keys) {
- std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
- this->numeric_index_->Edit(property_name, document_id, section_id);
-
- for (const auto& key : keys) {
- ICING_EXPECT_OK(editor->BufferKey(key));
- }
- ICING_EXPECT_OK(editor->IndexAllBufferedKeys());
- }
-
- libtextclassifier3::StatusOr<std::vector<DocHitInfo>> Query(
- std::string_view property_name,
- typename INDEX_IMPL_TYPE::value_type key_lower,
- typename INDEX_IMPL_TYPE::value_type key_upper) {
- ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<DocHitInfoIterator> iter,
- this->numeric_index_->GetIterator(property_name, key_lower, key_upper));
-
- std::vector<DocHitInfo> result;
- while (iter->Advance().ok()) {
- result.push_back(iter->doc_hit_info());
- }
- return result;
- }
-
- std::unique_ptr<NumericIndex<typename INDEX_IMPL_TYPE::value_type>>
- numeric_index_;
-};
-
-using TestTypes = ::testing::Types<DummyNumericIndex<int64_t>>;
-TYPED_TEST_SUITE(NumericIndexTest, TestTypes);
-
-TYPED_TEST(NumericIndexTest, SingleKeyExactQuery) {
- this->Index(kDefaultTestPropertyName, /*document_id=*/0, kDefaultSectionId,
- /*keys=*/{1});
- this->Index(kDefaultTestPropertyName, /*document_id=*/1, kDefaultSectionId,
- /*keys=*/{3});
- this->Index(kDefaultTestPropertyName, /*document_id=*/2, kDefaultSectionId,
- /*keys=*/{2});
- this->Index(kDefaultTestPropertyName, /*document_id=*/3, kDefaultSectionId,
- /*keys=*/{0});
- this->Index(kDefaultTestPropertyName, /*document_id=*/4, kDefaultSectionId,
- /*keys=*/{4});
- this->Index(kDefaultTestPropertyName, /*document_id=*/5, kDefaultSectionId,
- /*keys=*/{2});
-
- int64_t query_key = 2;
- std::vector<SectionId> expected_sections{kDefaultSectionId};
- EXPECT_THAT(this->Query(kDefaultTestPropertyName, /*key_lower=*/query_key,
- /*key_upper=*/query_key),
- IsOkAndHolds(ElementsAre(
- EqualsDocHitInfo(/*document_id=*/5, expected_sections),
- EqualsDocHitInfo(/*document_id=*/2, expected_sections))));
-}
-
-TYPED_TEST(NumericIndexTest, SingleKeyRangeQuery) {
- this->Index(kDefaultTestPropertyName, /*document_id=*/0, kDefaultSectionId,
- /*keys=*/{1});
- this->Index(kDefaultTestPropertyName, /*document_id=*/1, kDefaultSectionId,
- /*keys=*/{3});
- this->Index(kDefaultTestPropertyName, /*document_id=*/2, kDefaultSectionId,
- /*keys=*/{2});
- this->Index(kDefaultTestPropertyName, /*document_id=*/3, kDefaultSectionId,
- /*keys=*/{0});
- this->Index(kDefaultTestPropertyName, /*document_id=*/4, kDefaultSectionId,
- /*keys=*/{4});
- this->Index(kDefaultTestPropertyName, /*document_id=*/5, kDefaultSectionId,
- /*keys=*/{2});
-
- std::vector<SectionId> expected_sections{kDefaultSectionId};
- EXPECT_THAT(this->Query(kDefaultTestPropertyName, /*key_lower=*/1,
- /*key_upper=*/3),
- IsOkAndHolds(ElementsAre(
- EqualsDocHitInfo(/*document_id=*/5, expected_sections),
- EqualsDocHitInfo(/*document_id=*/2, expected_sections),
- EqualsDocHitInfo(/*document_id=*/1, expected_sections),
- EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
-}
-
-TYPED_TEST(NumericIndexTest, EmptyResult) {
- this->Index(kDefaultTestPropertyName, /*document_id=*/0, kDefaultSectionId,
- /*keys=*/{1});
- this->Index(kDefaultTestPropertyName, /*document_id=*/1, kDefaultSectionId,
- /*keys=*/{3});
- this->Index(kDefaultTestPropertyName, /*document_id=*/2, kDefaultSectionId,
- /*keys=*/{2});
- this->Index(kDefaultTestPropertyName, /*document_id=*/3, kDefaultSectionId,
- /*keys=*/{0});
- this->Index(kDefaultTestPropertyName, /*document_id=*/4, kDefaultSectionId,
- /*keys=*/{4});
- this->Index(kDefaultTestPropertyName, /*document_id=*/5, kDefaultSectionId,
- /*keys=*/{2});
-
- EXPECT_THAT(this->Query(kDefaultTestPropertyName, /*key_lower=*/100,
- /*key_upper=*/200),
- IsOkAndHolds(IsEmpty()));
-}
-
-TYPED_TEST(NumericIndexTest, MultipleKeysShouldMergeAndDedupeDocHitInfo) {
- // Construct several documents with mutiple keys under the same section.
- // Range query [1, 3] will find hits with same (DocumentId, SectionId) for
- // mutiple times. For example, (2, kDefaultSectionId) will be found twice
- // (once for key = 1 and once for key = 3).
- // Test if the iterator dedupes correctly.
- this->Index(kDefaultTestPropertyName, /*document_id=*/0, kDefaultSectionId,
- /*keys=*/{-1000, 0});
- this->Index(kDefaultTestPropertyName, /*document_id=*/1, kDefaultSectionId,
- /*keys=*/{-100, 0, 1, 2, 3, 4, 5});
- this->Index(kDefaultTestPropertyName, /*document_id=*/2, kDefaultSectionId,
- /*keys=*/{3, 1});
- this->Index(kDefaultTestPropertyName, /*document_id=*/3, kDefaultSectionId,
- /*keys=*/{4, 1});
- this->Index(kDefaultTestPropertyName, /*document_id=*/4, kDefaultSectionId,
- /*keys=*/{1, 6});
- this->Index(kDefaultTestPropertyName, /*document_id=*/5, kDefaultSectionId,
- /*keys=*/{2, 100});
- this->Index(kDefaultTestPropertyName, /*document_id=*/6, kDefaultSectionId,
- /*keys=*/{1000, 2});
- this->Index(kDefaultTestPropertyName, /*document_id=*/7, kDefaultSectionId,
- /*keys=*/{4, -1000});
-
- std::vector<SectionId> expected_sections{kDefaultSectionId};
- EXPECT_THAT(this->Query(kDefaultTestPropertyName, /*key_lower=*/1,
- /*key_upper=*/3),
- IsOkAndHolds(ElementsAre(
- EqualsDocHitInfo(/*document_id=*/6, expected_sections),
- EqualsDocHitInfo(/*document_id=*/5, expected_sections),
- EqualsDocHitInfo(/*document_id=*/4, expected_sections),
- EqualsDocHitInfo(/*document_id=*/3, expected_sections),
- EqualsDocHitInfo(/*document_id=*/2, expected_sections),
- EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
-}
-
-TYPED_TEST(NumericIndexTest, EdgeNumericValues) {
- this->Index(kDefaultTestPropertyName, /*document_id=*/0, kDefaultSectionId,
- /*keys=*/{0});
- this->Index(kDefaultTestPropertyName, /*document_id=*/1, kDefaultSectionId,
- /*keys=*/{-100});
- this->Index(kDefaultTestPropertyName, /*document_id=*/2, kDefaultSectionId,
- /*keys=*/{-80});
- this->Index(
- kDefaultTestPropertyName, /*document_id=*/3, kDefaultSectionId,
- /*keys=*/{std::numeric_limits<typename TypeParam::value_type>::max()});
- this->Index(
- kDefaultTestPropertyName, /*document_id=*/4, kDefaultSectionId,
- /*keys=*/{std::numeric_limits<typename TypeParam::value_type>::min()});
- this->Index(kDefaultTestPropertyName, /*document_id=*/5, kDefaultSectionId,
- /*keys=*/{200});
- this->Index(kDefaultTestPropertyName, /*document_id=*/6, kDefaultSectionId,
- /*keys=*/{100});
- this->Index(
- kDefaultTestPropertyName, /*document_id=*/7, kDefaultSectionId,
- /*keys=*/{std::numeric_limits<typename TypeParam::value_type>::max()});
- this->Index(kDefaultTestPropertyName, /*document_id=*/8, kDefaultSectionId,
- /*keys=*/{0});
- this->Index(
- kDefaultTestPropertyName, /*document_id=*/9, kDefaultSectionId,
- /*keys=*/{std::numeric_limits<typename TypeParam::value_type>::min()});
-
- std::vector<SectionId> expected_sections{kDefaultSectionId};
-
- // Negative key
- EXPECT_THAT(this->Query(kDefaultTestPropertyName, /*key_lower=*/-100,
- /*key_upper=*/-70),
- IsOkAndHolds(ElementsAre(
- EqualsDocHitInfo(/*document_id=*/2, expected_sections),
- EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
-
- // value_type max key
- EXPECT_THAT(
- this->Query(kDefaultTestPropertyName, /*key_lower=*/
- std::numeric_limits<typename TypeParam::value_type>::max(),
- /*key_upper=*/
- std::numeric_limits<typename TypeParam::value_type>::max()),
- IsOkAndHolds(
- ElementsAre(EqualsDocHitInfo(/*document_id=*/7, expected_sections),
- EqualsDocHitInfo(/*document_id=*/3, expected_sections))));
-
- // value_type min key
- EXPECT_THAT(
- this->Query(kDefaultTestPropertyName, /*key_lower=*/
- std::numeric_limits<typename TypeParam::value_type>::min(),
- /*key_upper=*/
- std::numeric_limits<typename TypeParam::value_type>::min()),
- IsOkAndHolds(
- ElementsAre(EqualsDocHitInfo(/*document_id=*/9, expected_sections),
- EqualsDocHitInfo(/*document_id=*/4, expected_sections))));
-
- // Key = 0
- EXPECT_THAT(
- this->Query(kDefaultTestPropertyName, /*key_lower=*/0, /*key_upper=*/0),
- IsOkAndHolds(
- ElementsAre(EqualsDocHitInfo(/*document_id=*/8, expected_sections),
- EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
-
- // All keys from value_type min to value_type max
- EXPECT_THAT(
- this->Query(kDefaultTestPropertyName, /*key_lower=*/
- std::numeric_limits<typename TypeParam::value_type>::min(),
- /*key_upper=*/
- std::numeric_limits<typename TypeParam::value_type>::max()),
- IsOkAndHolds(
- ElementsAre(EqualsDocHitInfo(/*document_id=*/9, expected_sections),
- EqualsDocHitInfo(/*document_id=*/8, expected_sections),
- EqualsDocHitInfo(/*document_id=*/7, expected_sections),
- EqualsDocHitInfo(/*document_id=*/6, expected_sections),
- EqualsDocHitInfo(/*document_id=*/5, expected_sections),
- EqualsDocHitInfo(/*document_id=*/4, expected_sections),
- EqualsDocHitInfo(/*document_id=*/3, expected_sections),
- EqualsDocHitInfo(/*document_id=*/2, expected_sections),
- EqualsDocHitInfo(/*document_id=*/1, expected_sections),
- EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
-}
-
-TYPED_TEST(NumericIndexTest,
- MultipleSectionsShouldMergeSectionsAndDedupeDocHitInfo) {
- // Construct several documents with mutiple numeric sections.
- // Range query [1, 3] will find hits with same DocumentIds but multiple
- // different SectionIds. For example, there will be 2 hits (1, 0), (1, 1) for
- // DocumentId=1.
- // Test if the iterator merges multiple sections into a single SectionIdMask
- // correctly.
- this->Index(kDefaultTestPropertyName, /*document_id=*/0, /*section_id=*/0,
- /*keys=*/{0});
- this->Index(kDefaultTestPropertyName, /*document_id=*/0, /*section_id=*/1,
- /*keys=*/{1});
- this->Index(kDefaultTestPropertyName, /*document_id=*/0, /*section_id=*/2,
- /*keys=*/{-1});
- this->Index(kDefaultTestPropertyName, /*document_id=*/1, /*section_id=*/0,
- /*keys=*/{2});
- this->Index(kDefaultTestPropertyName, /*document_id=*/1, /*section_id=*/1,
- /*keys=*/{1});
- this->Index(kDefaultTestPropertyName, /*document_id=*/1, /*section_id=*/2,
- /*keys=*/{4});
- this->Index(kDefaultTestPropertyName, /*document_id=*/2, /*section_id=*/3,
- /*keys=*/{3});
- this->Index(kDefaultTestPropertyName, /*document_id=*/2, /*section_id=*/4,
- /*keys=*/{2});
- this->Index(kDefaultTestPropertyName, /*document_id=*/2, /*section_id=*/5,
- /*keys=*/{5});
-
- EXPECT_THAT(
- this->Query(kDefaultTestPropertyName, /*key_lower=*/1,
- /*key_upper=*/3),
- IsOkAndHolds(ElementsAre(
- EqualsDocHitInfo(/*document_id=*/2, std::vector<SectionId>{3, 4}),
- EqualsDocHitInfo(/*document_id=*/1, std::vector<SectionId>{0, 1}),
- EqualsDocHitInfo(/*document_id=*/0, std::vector<SectionId>{1}))));
-}
-
-TYPED_TEST(NumericIndexTest, NonRelevantPropertyShouldNotBeIncluded) {
- constexpr std::string_view kNonRelevantProperty = "non_relevant_property";
- this->Index(kDefaultTestPropertyName, /*document_id=*/0, kDefaultSectionId,
- /*keys=*/{1});
- this->Index(kDefaultTestPropertyName, /*document_id=*/1, kDefaultSectionId,
- /*keys=*/{3});
- this->Index(kNonRelevantProperty, /*document_id=*/2, kDefaultSectionId,
- /*keys=*/{2});
- this->Index(kDefaultTestPropertyName, /*document_id=*/3, kDefaultSectionId,
- /*keys=*/{0});
- this->Index(kNonRelevantProperty, /*document_id=*/4, kDefaultSectionId,
- /*keys=*/{4});
- this->Index(kDefaultTestPropertyName, /*document_id=*/5, kDefaultSectionId,
- /*keys=*/{2});
-
- std::vector<SectionId> expected_sections{kDefaultSectionId};
- EXPECT_THAT(this->Query(kDefaultTestPropertyName, /*key_lower=*/1,
- /*key_upper=*/3),
- IsOkAndHolds(ElementsAre(
- EqualsDocHitInfo(/*document_id=*/5, expected_sections),
- EqualsDocHitInfo(/*document_id=*/1, expected_sections),
- EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
-}
-
-TYPED_TEST(NumericIndexTest,
- RangeQueryKeyLowerGreaterThanKeyUpperShouldReturnError) {
- this->Index(kDefaultTestPropertyName, /*document_id=*/0, kDefaultSectionId,
- /*keys=*/{1});
- this->Index(kDefaultTestPropertyName, /*document_id=*/1, kDefaultSectionId,
- /*keys=*/{3});
- this->Index(kDefaultTestPropertyName, /*document_id=*/2, kDefaultSectionId,
- /*keys=*/{2});
- this->Index(kDefaultTestPropertyName, /*document_id=*/3, kDefaultSectionId,
- /*keys=*/{0});
- this->Index(kDefaultTestPropertyName, /*document_id=*/4, kDefaultSectionId,
- /*keys=*/{4});
- this->Index(kDefaultTestPropertyName, /*document_id=*/5, kDefaultSectionId,
- /*keys=*/{2});
-
- EXPECT_THAT(this->Query(kDefaultTestPropertyName, /*key_lower=*/3,
- /*key_upper=*/1),
- StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-}
-
-} // namespace
-
-} // namespace lib
-} // namespace icing
diff --git a/icing/index/numeric/posting-list-integer-index-data-accessor.cc b/icing/index/numeric/posting-list-integer-index-accessor.cc
index 73b48e2..af2aea4 100644
--- a/icing/index/numeric/posting-list-integer-index-data-accessor.cc
+++ b/icing/index/numeric/posting-list-integer-index-accessor.cc
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/index/numeric/posting-list-integer-index-data-accessor.h"
+#include "icing/index/numeric/posting-list-integer-index-accessor.h"
#include <cstdint>
#include <memory>
@@ -26,39 +26,33 @@
#include "icing/file/posting_list/posting-list-identifier.h"
#include "icing/file/posting_list/posting-list-used.h"
#include "icing/index/numeric/integer-index-data.h"
-#include "icing/index/numeric/posting-list-used-integer-index-data-serializer.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
/* static */ libtextclassifier3::StatusOr<
- std::unique_ptr<PostingListIntegerIndexDataAccessor>>
-PostingListIntegerIndexDataAccessor::Create(
- FlashIndexStorage* storage,
- PostingListUsedIntegerIndexDataSerializer* serializer) {
+ std::unique_ptr<PostingListIntegerIndexAccessor>>
+PostingListIntegerIndexAccessor::Create(
+ FlashIndexStorage* storage, PostingListIntegerIndexSerializer* serializer) {
uint32_t max_posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
storage->block_size(), serializer->GetDataTypeBytes());
- std::unique_ptr<uint8_t[]> posting_list_buffer_array =
- std::make_unique<uint8_t[]>(max_posting_list_bytes);
- ICING_ASSIGN_OR_RETURN(
- PostingListUsed posting_list_buffer,
- PostingListUsed::CreateFromUnitializedRegion(
- serializer, posting_list_buffer_array.get(), max_posting_list_bytes));
- return std::unique_ptr<PostingListIntegerIndexDataAccessor>(
- new PostingListIntegerIndexDataAccessor(
- storage, std::move(posting_list_buffer_array),
- std::move(posting_list_buffer), serializer));
+ ICING_ASSIGN_OR_RETURN(PostingListUsed in_memory_posting_list,
+ PostingListUsed::CreateFromUnitializedRegion(
+ serializer, max_posting_list_bytes));
+ return std::unique_ptr<PostingListIntegerIndexAccessor>(
+ new PostingListIntegerIndexAccessor(
+ storage, std::move(in_memory_posting_list), serializer));
}
/* static */ libtextclassifier3::StatusOr<
- std::unique_ptr<PostingListIntegerIndexDataAccessor>>
-PostingListIntegerIndexDataAccessor::CreateFromExisting(
- FlashIndexStorage* storage,
- PostingListUsedIntegerIndexDataSerializer* serializer,
+ std::unique_ptr<PostingListIntegerIndexAccessor>>
+PostingListIntegerIndexAccessor::CreateFromExisting(
+ FlashIndexStorage* storage, PostingListIntegerIndexSerializer* serializer,
PostingListIdentifier existing_posting_list_id) {
ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<PostingListIntegerIndexDataAccessor> pl_accessor,
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
Create(storage, serializer));
ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
storage->GetPostingList(existing_posting_list_id));
@@ -69,32 +63,92 @@ PostingListIntegerIndexDataAccessor::CreateFromExisting(
// Returns the next batch of integer index data for the provided posting list.
libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
-PostingListIntegerIndexDataAccessor::GetNextDataBatch() {
+PostingListIntegerIndexAccessor::GetNextDataBatch() {
+ return GetNextDataBatchImpl(/*free_posting_list=*/false);
+}
+
+libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
+PostingListIntegerIndexAccessor::GetAllDataAndFree() {
+ if (preexisting_posting_list_ == nullptr) {
+ return absl_ports::FailedPreconditionError(
+ "Cannot retrieve data from a PostingListIntegerIndexAccessor that "
+ "was not created from a preexisting posting list.");
+ }
+
+ std::vector<IntegerIndexData> all_data;
+ while (true) {
+ ICING_ASSIGN_OR_RETURN(std::vector<IntegerIndexData> batch,
+ GetNextDataBatchImpl(/*free_posting_list=*/true));
+ if (batch.empty()) {
+ break;
+ }
+ std::move(batch.begin(), batch.end(), std::back_inserter(all_data));
+ }
+
+ return all_data;
+}
+
+libtextclassifier3::Status PostingListIntegerIndexAccessor::PrependData(
+ const IntegerIndexData& data) {
+ PostingListUsed& active_pl = (preexisting_posting_list_ != nullptr)
+ ? preexisting_posting_list_->posting_list
+ : in_memory_posting_list_;
+ libtextclassifier3::Status status =
+ serializer_->PrependData(&active_pl, data);
+ if (!absl_ports::IsResourceExhausted(status)) {
+ return status;
+ }
+ // There is no more room to add data to this current posting list! Therefore,
+ // we need to either move those data to a larger posting list or flush this
+ // posting list and create another max-sized posting list in the chain.
+ if (preexisting_posting_list_ != nullptr) {
+ ICING_RETURN_IF_ERROR(FlushPreexistingPostingList());
+ } else {
+ ICING_RETURN_IF_ERROR(FlushInMemoryPostingList());
+ }
+
+ // Re-add data. Should always fit since we just cleared
+ // in_memory_posting_list_. It's fine to explicitly reference
+ // in_memory_posting_list_ here because there's no way of reaching this line
+ // while preexisting_posting_list_ is still in use.
+ return serializer_->PrependData(&in_memory_posting_list_, data);
+}
+
+libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
+PostingListIntegerIndexAccessor::GetNextDataBatchImpl(bool free_posting_list) {
if (preexisting_posting_list_ == nullptr) {
if (has_reached_posting_list_chain_end_) {
return std::vector<IntegerIndexData>();
}
return absl_ports::FailedPreconditionError(
- "Cannot retrieve data from a PostingListIntegerIndexDataAccessor that "
+ "Cannot retrieve data from a PostingListIntegerIndexAccessor that "
"was not created from a preexisting posting list.");
}
ICING_ASSIGN_OR_RETURN(
std::vector<IntegerIndexData> batch,
serializer_->GetData(&preexisting_posting_list_->posting_list));
- uint32_t next_block_index;
+ uint32_t next_block_index = kInvalidBlockIndex;
// Posting lists will only be chained when they are max-sized, in which case
- // block.next_block_index() will point to the next block for the next posting
- // list. Otherwise, block.next_block_index() can be kInvalidBlockIndex or be
- // used to point to the next free list block, which is not relevant here.
- if (preexisting_posting_list_->block.max_num_posting_lists() == 1) {
- next_block_index = preexisting_posting_list_->block.next_block_index();
- } else {
- next_block_index = kInvalidBlockIndex;
+ // next_block_index will point to the next block for the next posting list.
+ // Otherwise, next_block_index can be kInvalidBlockIndex or be used to point
+ // to the next free list block, which is not relevant here.
+ if (preexisting_posting_list_->posting_list.size_in_bytes() ==
+ storage_->max_posting_list_bytes()) {
+ next_block_index = preexisting_posting_list_->next_block_index;
+ }
+
+ if (free_posting_list) {
+ ICING_RETURN_IF_ERROR(
+ storage_->FreePostingList(std::move(*preexisting_posting_list_)));
}
+
if (next_block_index != kInvalidBlockIndex) {
+ // Since we only have to deal with next block for max-sized posting list
+ // block, max_num_posting_lists is 1 and posting_list_index_bits is
+ // BitsToStore(1).
PostingListIdentifier next_posting_list_id(
next_block_index, /*posting_list_index=*/0,
- preexisting_posting_list_->block.posting_list_index_bits());
+ /*posting_list_index_bits=*/BitsToStore(1));
ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
storage_->GetPostingList(next_posting_list_id));
preexisting_posting_list_ =
@@ -106,31 +160,5 @@ PostingListIntegerIndexDataAccessor::GetNextDataBatch() {
return batch;
}
-libtextclassifier3::Status PostingListIntegerIndexDataAccessor::PrependData(
- const IntegerIndexData& data) {
- PostingListUsed& active_pl = (preexisting_posting_list_ != nullptr)
- ? preexisting_posting_list_->posting_list
- : posting_list_buffer_;
- libtextclassifier3::Status status =
- serializer_->PrependData(&active_pl, data);
- if (!absl_ports::IsResourceExhausted(status)) {
- return status;
- }
- // There is no more room to add data to this current posting list! Therefore,
- // we need to either move those data to a larger posting list or flush this
- // posting list and create another max-sized posting list in the chain.
- if (preexisting_posting_list_ != nullptr) {
- FlushPreexistingPostingList();
- } else {
- ICING_RETURN_IF_ERROR(FlushInMemoryPostingList());
- }
-
- // Re-add data. Should always fit since we just cleared posting_list_buffer_.
- // It's fine to explicitly reference posting_list_buffer_ here because there's
- // no way of reaching this line while preexisting_posting_list_ is still in
- // use.
- return serializer_->PrependData(&posting_list_buffer_, data);
-}
-
} // namespace lib
} // namespace icing
diff --git a/icing/index/numeric/posting-list-integer-index-accessor.h b/icing/index/numeric/posting-list-integer-index-accessor.h
new file mode 100644
index 0000000..4f667a0
--- /dev/null
+++ b/icing/index/numeric/posting-list-integer-index-accessor.h
@@ -0,0 +1,130 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_NUMERIC_POSTING_LIST_INTEGER_INDEX_ACCESSOR_H_
+#define ICING_INDEX_NUMERIC_POSTING_LIST_INTEGER_INDEX_ACCESSOR_H_
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/posting-list-accessor.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/index/numeric/integer-index-data.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+
+namespace icing {
+namespace lib {
+
+// TODO(b/259743562): Refactor PostingListAccessor derived classes
+
+// This class is used to provide a simple abstraction for adding integer index
+// data to posting lists. PostingListIntegerIndexAccessor handles:
+// 1) selection of properly-sized posting lists for the accumulated integer
+// index data during Finalize()
+// 2) chaining of max-sized posting lists.
+class PostingListIntegerIndexAccessor : public PostingListAccessor {
+ public:
+ // Creates an empty PostingListIntegerIndexAccessor.
+ //
+ // RETURNS:
+ // - On success, a valid instance of PostingListIntegerIndexAccessor
+ // - INVALID_ARGUMENT error if storage has an invalid block_size.
+ static libtextclassifier3::StatusOr<
+ std::unique_ptr<PostingListIntegerIndexAccessor>>
+ Create(FlashIndexStorage* storage,
+ PostingListIntegerIndexSerializer* serializer);
+
+ // Creates a PostingListIntegerIndexAccessor with an existing posting list
+ // identified by existing_posting_list_id.
+ //
+ // RETURNS:
+ // - On success, a valid instance of PostingListIntegerIndexAccessor
+ // - INVALID_ARGUMENT if storage has an invalid block_size.
+ static libtextclassifier3::StatusOr<
+ std::unique_ptr<PostingListIntegerIndexAccessor>>
+ CreateFromExisting(FlashIndexStorage* storage,
+ PostingListIntegerIndexSerializer* serializer,
+ PostingListIdentifier existing_posting_list_id);
+
+ PostingListSerializer* GetSerializer() override { return serializer_; }
+
+ // Retrieves the next batch of data in the posting list chain.
+ //
+ // RETURNS:
+ // - On success, a vector of integer index data in the posting list chain
+ // - FAILED_PRECONDITION_ERROR if called on an instance that was created via
+ // Create.
+ // - INTERNAL_ERROR if unable to read the next posting list in the chain or
+ // if the posting list has been corrupted somehow.
+ libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
+ GetNextDataBatch();
+
+ // Retrieves all data from the posting list chain and frees all posting
+ // list(s).
+ //
+ // RETURNS:
+ // - On success, a vector of integer index data in the posting list chain
+ // - FAILED_PRECONDITION_ERROR if called on an instance that was created via
+ // Create.
+ // - INTERNAL_ERROR if unable to read the next posting list in the chain or
+ // if the posting list has been corrupted somehow.
+ libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
+ GetAllDataAndFree();
+
+ // Prepends one data. This may result in flushing the posting list to disk (if
+ // the PostingListIntegerIndexAccessor holds a max-sized posting list that
+ // is full) or freeing a pre-existing posting list if it is too small to fit
+ // all data necessary.
+ //
+ // RETURNS:
+ // - OK, on success
+ // - INVALID_ARGUMENT if !data.is_valid() or if data is greater than the
+ // previously added data.
+ // - RESOURCE_EXHAUSTED error if unable to grow the index to allocate a new
+ // posting list.
+ libtextclassifier3::Status PrependData(const IntegerIndexData& data);
+
+ private:
+ explicit PostingListIntegerIndexAccessor(
+ FlashIndexStorage* storage, PostingListUsed in_memory_posting_list,
+ PostingListIntegerIndexSerializer* serializer)
+ : PostingListAccessor(storage, std::move(in_memory_posting_list)),
+ serializer_(serializer) {}
+
+ // Retrieves the next batch of data in the posting list chain.
+ //
+ // - free_posting_list: a boolean flag indicating whether freeing all posting
+ // lists after retrieving batch data.
+ //
+ // RETURNS:
+ // - On success, a vector of integer index data in the posting list chain
+ // - FAILED_PRECONDITION_ERROR if called on an instance that was created via
+ // Create.
+ // - INTERNAL_ERROR if unable to read the next posting list in the chain or
+ // if the posting list has been corrupted somehow.
+ libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
+ GetNextDataBatchImpl(bool free_posting_list);
+
+ PostingListIntegerIndexSerializer* serializer_; // Does not own.
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_INDEX_NUMERIC_POSTING_LIST_INTEGER_INDEX_ACCESSOR_H_
diff --git a/icing/index/numeric/posting-list-integer-index-data-accessor_test.cc b/icing/index/numeric/posting-list-integer-index-accessor_test.cc
index ca0804e..f655fea 100644
--- a/icing/index/numeric/posting-list-integer-index-data-accessor_test.cc
+++ b/icing/index/numeric/posting-list-integer-index-accessor_test.cc
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/index/numeric/posting-list-integer-index-data-accessor.h"
+#include "icing/index/numeric/posting-list-integer-index-accessor.h"
#include <cstdint>
#include <memory>
@@ -25,9 +25,10 @@
#include "gtest/gtest.h"
#include "icing/file/filesystem.h"
#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/posting-list-common.h"
#include "icing/file/posting_list/posting-list-identifier.h"
#include "icing/index/numeric/integer-index-data.h"
-#include "icing/index/numeric/posting-list-used-integer-index-data-serializer.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
#include "icing/testing/common-matchers.h"
@@ -42,9 +43,10 @@ using ::testing::ElementsAre;
using ::testing::ElementsAreArray;
using ::testing::Eq;
using ::testing::Lt;
+using ::testing::Ne;
using ::testing::SizeIs;
-class PostingListIntegerIndexDataAccessorTest : public ::testing::Test {
+class PostingListIntegerIndexAccessorTest : public ::testing::Test {
protected:
void SetUp() override {
test_dir_ = GetTestTempDir() + "/test_dir";
@@ -53,7 +55,7 @@ class PostingListIntegerIndexDataAccessorTest : public ::testing::Test {
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(test_dir_.c_str()));
- serializer_ = std::make_unique<PostingListUsedIntegerIndexDataSerializer>();
+ serializer_ = std::make_unique<PostingListIntegerIndexSerializer>();
ICING_ASSERT_OK_AND_ASSIGN(
FlashIndexStorage flash_index_storage,
@@ -71,7 +73,7 @@ class PostingListIntegerIndexDataAccessorTest : public ::testing::Test {
Filesystem filesystem_;
std::string test_dir_;
std::string file_name_;
- std::unique_ptr<PostingListUsedIntegerIndexDataSerializer> serializer_;
+ std::unique_ptr<PostingListIntegerIndexSerializer> serializer_;
std::unique_ptr<FlashIndexStorage> flash_index_storage_;
};
@@ -96,11 +98,11 @@ std::vector<IntegerIndexData> CreateData(int num_data,
return data;
}
-TEST_F(PostingListIntegerIndexDataAccessorTest, DataAddAndRetrieveProperly) {
+TEST_F(PostingListIntegerIndexAccessorTest, DataAddAndRetrieveProperly) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<PostingListIntegerIndexDataAccessor> pl_accessor,
- PostingListIntegerIndexDataAccessor::Create(flash_index_storage_.get(),
- serializer_.get()));
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
// Add some integer index data
std::vector<IntegerIndexData> data_vec =
CreateData(/*num_data=*/5, /*start_document_id=*/0, /*start_key=*/819);
@@ -119,14 +121,14 @@ TEST_F(PostingListIntegerIndexDataAccessorTest, DataAddAndRetrieveProperly) {
EXPECT_THAT(
serializer_->GetData(&pl_holder.posting_list),
IsOkAndHolds(ElementsAreArray(data_vec.rbegin(), data_vec.rend())));
- EXPECT_THAT(pl_holder.block.next_block_index(), Eq(kInvalidBlockIndex));
+ EXPECT_THAT(pl_holder.next_block_index, Eq(kInvalidBlockIndex));
}
-TEST_F(PostingListIntegerIndexDataAccessorTest, PreexistingPLKeepOnSameBlock) {
+TEST_F(PostingListIntegerIndexAccessorTest, PreexistingPLKeepOnSameBlock) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<PostingListIntegerIndexDataAccessor> pl_accessor,
- PostingListIntegerIndexDataAccessor::Create(flash_index_storage_.get(),
- serializer_.get()));
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
// Add a single data. This will fit in a min-sized posting list.
IntegerIndexData data1(/*section_id=*/1, /*document_id=*/0, /*key=*/12345);
ICING_ASSERT_OK(pl_accessor->PrependData(data1));
@@ -141,7 +143,7 @@ TEST_F(PostingListIntegerIndexDataAccessorTest, PreexistingPLKeepOnSameBlock) {
// two data, so this should NOT cause the previous pl to be reallocated.
ICING_ASSERT_OK_AND_ASSIGN(
pl_accessor,
- PostingListIntegerIndexDataAccessor::CreateFromExisting(
+ PostingListIntegerIndexAccessor::CreateFromExisting(
flash_index_storage_.get(), serializer_.get(), result1.id));
IntegerIndexData data2(/*section_id=*/1, /*document_id=*/1, /*key=*/23456);
ICING_ASSERT_OK(pl_accessor->PrependData(data2));
@@ -159,12 +161,11 @@ TEST_F(PostingListIntegerIndexDataAccessorTest, PreexistingPLKeepOnSameBlock) {
IsOkAndHolds(ElementsAre(data2, data1)));
}
-TEST_F(PostingListIntegerIndexDataAccessorTest,
- PreexistingPLReallocateToLargerPL) {
+TEST_F(PostingListIntegerIndexAccessorTest, PreexistingPLReallocateToLargerPL) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<PostingListIntegerIndexDataAccessor> pl_accessor,
- PostingListIntegerIndexDataAccessor::Create(flash_index_storage_.get(),
- serializer_.get()));
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
// Adding 3 data should cause Finalize allocating a 48-byte posting list,
// which can store at most 4 data.
std::vector<IntegerIndexData> data_vec1 =
@@ -182,7 +183,7 @@ TEST_F(PostingListIntegerIndexDataAccessorTest,
// Now add more data.
ICING_ASSERT_OK_AND_ASSIGN(
pl_accessor,
- PostingListIntegerIndexDataAccessor::CreateFromExisting(
+ PostingListIntegerIndexAccessor::CreateFromExisting(
flash_index_storage_.get(), serializer_.get(), result1.id));
// The current posting list can fit 1 more data. Adding 12 more data should
// result in these data being moved to a larger posting list. Also the total
@@ -217,12 +218,11 @@ TEST_F(PostingListIntegerIndexDataAccessorTest,
all_data_vec.rend())));
}
-TEST_F(PostingListIntegerIndexDataAccessorTest,
- MultiBlockChainsBlocksProperly) {
+TEST_F(PostingListIntegerIndexAccessorTest, MultiBlockChainsBlocksProperly) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<PostingListIntegerIndexDataAccessor> pl_accessor,
- PostingListIntegerIndexDataAccessor::Create(flash_index_storage_.get(),
- serializer_.get()));
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
// Block size is 4096, sizeof(BlockHeader) is 12 and sizeof(IntegerIndexData)
// is 12, so the max size posting list can store (4096 - 12) / 12 = 340 data.
// Adding 341 data should cause:
@@ -256,7 +256,7 @@ TEST_F(PostingListIntegerIndexDataAccessorTest,
ElementsAreArray(data_vec.rbegin(), first_block_data_start));
// Now retrieve all of the data that were on the first block.
- uint32_t first_block_id = pl_holder.block.next_block_index();
+ uint32_t first_block_id = pl_holder.next_block_index;
EXPECT_THAT(first_block_id, Eq(1));
PostingListIdentifier pl_id(first_block_id, /*posting_list_index=*/0,
@@ -268,12 +268,12 @@ TEST_F(PostingListIntegerIndexDataAccessorTest,
IsOkAndHolds(ElementsAreArray(first_block_data_start, data_vec.rend())));
}
-TEST_F(PostingListIntegerIndexDataAccessorTest,
+TEST_F(PostingListIntegerIndexAccessorTest,
PreexistingMultiBlockReusesBlocksProperly) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<PostingListIntegerIndexDataAccessor> pl_accessor,
- PostingListIntegerIndexDataAccessor::Create(flash_index_storage_.get(),
- serializer_.get()));
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
// Block size is 4096, sizeof(BlockHeader) is 12 and sizeof(IntegerIndexData)
// is 12, so the max size posting list can store (4096 - 12) / 12 = 340 data.
// Adding 341 data will cause:
@@ -296,7 +296,7 @@ TEST_F(PostingListIntegerIndexDataAccessorTest,
// fill it up.
ICING_ASSERT_OK_AND_ASSIGN(
pl_accessor,
- PostingListIntegerIndexDataAccessor::CreateFromExisting(
+ PostingListIntegerIndexAccessor::CreateFromExisting(
flash_index_storage_.get(), serializer_.get(), first_add_id));
std::vector<IntegerIndexData> data_vec2 = CreateData(
/*num_data=*/10,
@@ -330,7 +330,7 @@ TEST_F(PostingListIntegerIndexDataAccessorTest,
ElementsAreArray(all_data_vec.rbegin(), first_block_data_start));
// Now retrieve all of the data that were on the first block.
- uint32_t first_block_id = pl_holder.block.next_block_index();
+ uint32_t first_block_id = pl_holder.next_block_index;
EXPECT_THAT(first_block_id, Eq(1));
PostingListIdentifier pl_id(first_block_id, /*posting_list_index=*/0,
@@ -342,23 +342,23 @@ TEST_F(PostingListIntegerIndexDataAccessorTest,
all_data_vec.rend())));
}
-TEST_F(PostingListIntegerIndexDataAccessorTest,
+TEST_F(PostingListIntegerIndexAccessorTest,
InvalidDataShouldReturnInvalidArgument) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<PostingListIntegerIndexDataAccessor> pl_accessor,
- PostingListIntegerIndexDataAccessor::Create(flash_index_storage_.get(),
- serializer_.get()));
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
IntegerIndexData invalid_data;
EXPECT_THAT(pl_accessor->PrependData(invalid_data),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(PostingListIntegerIndexDataAccessorTest,
+TEST_F(PostingListIntegerIndexAccessorTest,
BasicHitIncreasingShouldReturnInvalidArgument) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<PostingListIntegerIndexDataAccessor> pl_accessor,
- PostingListIntegerIndexDataAccessor::Create(flash_index_storage_.get(),
- serializer_.get()));
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
IntegerIndexData data1(/*section_id=*/3, /*document_id=*/1, /*key=*/12345);
ICING_ASSERT_OK(pl_accessor->PrependData(data1));
@@ -371,24 +371,24 @@ TEST_F(PostingListIntegerIndexDataAccessorTest,
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(PostingListIntegerIndexDataAccessorTest,
+TEST_F(PostingListIntegerIndexAccessorTest,
NewPostingListNoDataAddedShouldReturnInvalidArgument) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<PostingListIntegerIndexDataAccessor> pl_accessor,
- PostingListIntegerIndexDataAccessor::Create(flash_index_storage_.get(),
- serializer_.get()));
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
PostingListAccessor::FinalizeResult result =
std::move(*pl_accessor).Finalize();
EXPECT_THAT(result.status,
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(PostingListIntegerIndexDataAccessorTest,
+TEST_F(PostingListIntegerIndexAccessorTest,
PreexistingPostingListNoDataAddedShouldSucceed) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<PostingListIntegerIndexDataAccessor> pl_accessor1,
- PostingListIntegerIndexDataAccessor::Create(flash_index_storage_.get(),
- serializer_.get()));
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor1,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
IntegerIndexData data1(/*section_id=*/3, /*document_id=*/1, /*key=*/12345);
ICING_ASSERT_OK(pl_accessor1->PrependData(data1));
PostingListAccessor::FinalizeResult result1 =
@@ -396,14 +396,139 @@ TEST_F(PostingListIntegerIndexDataAccessorTest,
ICING_ASSERT_OK(result1.status);
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<PostingListIntegerIndexDataAccessor> pl_accessor2,
- PostingListIntegerIndexDataAccessor::CreateFromExisting(
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor2,
+ PostingListIntegerIndexAccessor::CreateFromExisting(
flash_index_storage_.get(), serializer_.get(), result1.id));
PostingListAccessor::FinalizeResult result2 =
std::move(*pl_accessor2).Finalize();
EXPECT_THAT(result2.status, IsOk());
}
+TEST_F(PostingListIntegerIndexAccessorTest, GetAllDataAndFree) {
+ IntegerIndexData data1(/*section_id=*/3, /*document_id=*/1, /*key=*/123);
+ IntegerIndexData data2(/*section_id=*/3, /*document_id=*/2, /*key=*/456);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor1,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
+ // Add 2 data.
+ ICING_ASSERT_OK(pl_accessor1->PrependData(data1));
+ ICING_ASSERT_OK(pl_accessor1->PrependData(data2));
+ PostingListAccessor::FinalizeResult result1 =
+ std::move(*pl_accessor1).Finalize();
+ ICING_ASSERT_OK(result1.status);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor2,
+ PostingListIntegerIndexAccessor::CreateFromExisting(
+ flash_index_storage_.get(), serializer_.get(), result1.id));
+ EXPECT_THAT(pl_accessor2->GetAllDataAndFree(),
+ IsOkAndHolds(ElementsAre(data2, data1)));
+
+ // Allocate a new posting list with same size again.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor3,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
+ // Add 2 data.
+ ICING_ASSERT_OK(pl_accessor3->PrependData(data1));
+ ICING_ASSERT_OK(pl_accessor3->PrependData(data2));
+ PostingListAccessor::FinalizeResult result3 =
+ std::move(*pl_accessor3).Finalize();
+ ICING_ASSERT_OK(result3.status);
+ // We should get the same id if the previous one has been freed correctly by
+ // GetAllDataAndFree.
+ EXPECT_THAT(result3.id, Eq(result1.id));
+}
+
+TEST_F(PostingListIntegerIndexAccessorTest, GetAllDataAndFreePostingListChain) {
+ uint32_t block_size = FlashIndexStorage::SelectBlockSize();
+ uint32_t max_posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
+ block_size, serializer_->GetDataTypeBytes());
+ uint32_t max_num_data_single_posting_list =
+ max_posting_list_bytes / serializer_->GetDataTypeBytes();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor1,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
+
+ // Prepend max_num_data_single_posting_list + 1 data.
+ std::vector<IntegerIndexData> data_vec;
+ for (uint32_t i = 0; i < max_num_data_single_posting_list + 1; ++i) {
+ IntegerIndexData data(/*section_id=*/3, static_cast<DocumentId>(i),
+ /*key=*/i);
+ ICING_ASSERT_OK(pl_accessor1->PrependData(data));
+ data_vec.push_back(data);
+ }
+
+ // This will cause:
+ // - Allocate the first max-sized posting list at block index = 1, storing
+ // max_num_data_single_posting_list data.
+ // - Allocate the second max-sized posting list at block index = 2, storing 1
+ // data. Also its next_block_index is 1.
+ // - IOW, we will get 2 -> 1 and result1.id points to 2.
+ PostingListAccessor::FinalizeResult result1 =
+ std::move(*pl_accessor1).Finalize();
+ ICING_ASSERT_OK(result1.status);
+
+ uint32_t first_pl_block_index = kInvalidBlockIndex;
+ {
+ // result1.id points at the second (max-sized) PL, and next_block_index of
+ // the second PL points to the first PL's block. Fetch the first PL's block
+ // index manually.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListHolder pl_holder,
+ flash_index_storage_->GetPostingList(result1.id));
+ first_pl_block_index = pl_holder.next_block_index;
+ }
+ ASSERT_THAT(first_pl_block_index, Ne(kInvalidBlockIndex));
+
+ // Call GetAllDataAndFree. This will free block 2 and block 1.
+ // Free block list: 1 -> 2 (since free block list is LIFO).
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor2,
+ PostingListIntegerIndexAccessor::CreateFromExisting(
+ flash_index_storage_.get(), serializer_.get(), result1.id));
+ EXPECT_THAT(
+ pl_accessor2->GetAllDataAndFree(),
+ IsOkAndHolds(ElementsAreArray(data_vec.rbegin(), data_vec.rend())));
+ pl_accessor2.reset();
+
+ // Allocate a new posting list with same size again.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor3,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
+ // Add same set of data.
+ for (uint32_t i = 0; i < max_num_data_single_posting_list + 1; ++i) {
+ ICING_ASSERT_OK(pl_accessor3->PrependData(data_vec[i]));
+ }
+
+ // This will cause:
+ // - Allocate the first max-sized posting list from the free block list, which
+ // is block index = 1, storing max_num_data_single_posting_list data.
+ // - Allocate the second max-sized posting list from the next block in free
+ // block list, which is block index = 2, storing 1 data. Also its
+ // next_block_index should be 1.
+ PostingListAccessor::FinalizeResult result3 =
+ std::move(*pl_accessor3).Finalize();
+ ICING_ASSERT_OK(result3.status);
+ // We should get the same id if the previous one has been freed correctly by
+ // GetAllDataAndFree.
+ EXPECT_THAT(result3.id, Eq(result1.id));
+ // Also the first PL should be the same if it has been freed correctly by
+ // GetAllDataAndFree. Since it is a max-sized posting list, we just need to
+ // verify the block index.
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListHolder pl_holder,
+ flash_index_storage_->GetPostingList(result3.id));
+ EXPECT_THAT(pl_holder.next_block_index, Eq(first_pl_block_index));
+ }
+}
+
} // namespace
} // namespace lib
diff --git a/icing/index/numeric/posting-list-integer-index-data-accessor.h b/icing/index/numeric/posting-list-integer-index-data-accessor.h
deleted file mode 100644
index 7835bf9..0000000
--- a/icing/index/numeric/posting-list-integer-index-data-accessor.h
+++ /dev/null
@@ -1,108 +0,0 @@
-// Copyright (C) 2022 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_INDEX_NUMERIC_POSTING_LIST_INTEGER_INDEX_DATA_ACCESSOR_H_
-#define ICING_INDEX_NUMERIC_POSTING_LIST_INTEGER_INDEX_DATA_ACCESSOR_H_
-
-#include <cstdint>
-#include <memory>
-#include <vector>
-
-#include "icing/text_classifier/lib3/utils/base/status.h"
-#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/file/posting_list/flash-index-storage.h"
-#include "icing/file/posting_list/posting-list-accessor.h"
-#include "icing/file/posting_list/posting-list-identifier.h"
-#include "icing/file/posting_list/posting-list-used.h"
-#include "icing/index/numeric/integer-index-data.h"
-#include "icing/index/numeric/posting-list-used-integer-index-data-serializer.h"
-
-namespace icing {
-namespace lib {
-
-// TODO(b/259743562): Refactor PostingListAccessor derived classes
-
-// This class is used to provide a simple abstraction for adding integer index
-// data to posting lists. PostingListIntegerIndexDataAccessor handles:
-// 1) selection of properly-sized posting lists for the accumulated integer
-// index data during Finalize()
-// 2) chaining of max-sized posting lists.
-class PostingListIntegerIndexDataAccessor : public PostingListAccessor {
- public:
- // Creates an empty PostingListIntegerIndexDataAccessor.
- //
- // RETURNS:
- // - On success, a valid instance of PostingListIntegerIndexDataAccessor
- // - INVALID_ARGUMENT error if storage has an invalid block_size.
- static libtextclassifier3::StatusOr<
- std::unique_ptr<PostingListIntegerIndexDataAccessor>>
- Create(FlashIndexStorage* storage,
- PostingListUsedIntegerIndexDataSerializer* serializer);
-
- // Create a PostingListIntegerIndexDataAccessor with an existing posting list
- // identified by existing_posting_list_id.
- //
- // RETURNS:
- // - On success, a valid instance of PostingListIntegerIndexDataAccessor
- // - INVALID_ARGUMENT if storage has an invalid block_size.
- static libtextclassifier3::StatusOr<
- std::unique_ptr<PostingListIntegerIndexDataAccessor>>
- CreateFromExisting(FlashIndexStorage* storage,
- PostingListUsedIntegerIndexDataSerializer* serializer,
- PostingListIdentifier existing_posting_list_id);
-
- PostingListUsedSerializer* GetSerializer() override { return serializer_; }
-
- // Retrieve the next batch of data in the posting list chain
- //
- // RETURNS:
- // - On success, a vector of integer index data in the posting list chain
- // - INTERNAL if called on an instance that was created via Create, if
- // unable to read the next posting list in the chain or if the posting
- // list has been corrupted somehow.
- libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
- GetNextDataBatch();
-
- // Prepend one data. This may result in flushing the posting list to disk (if
- // the PostingListIntegerIndexDataAccessor holds a max-sized posting list that
- // is full) or freeing a pre-existing posting list if it is too small to fit
- // all data necessary.
- //
- // RETURNS:
- // - OK, on success
- // - INVALID_ARGUMENT if !data.is_valid() or if data is greater than the
- // previously added data.
- // - RESOURCE_EXHAUSTED error if unable to grow the index to allocate a new
- // posting list.
- libtextclassifier3::Status PrependData(const IntegerIndexData& data);
-
- // TODO(b/259743562): [Optimization 1] add GetAndClear, IsFull for split
-
- private:
- explicit PostingListIntegerIndexDataAccessor(
- FlashIndexStorage* storage,
- std::unique_ptr<uint8_t[]> posting_list_buffer_array,
- PostingListUsed posting_list_buffer,
- PostingListUsedIntegerIndexDataSerializer* serializer)
- : PostingListAccessor(storage, std::move(posting_list_buffer_array),
- std::move(posting_list_buffer)),
- serializer_(serializer) {}
-
- PostingListUsedIntegerIndexDataSerializer* serializer_; // Does not own.
-};
-
-} // namespace lib
-} // namespace icing
-
-#endif // ICING_INDEX_NUMERIC_POSTING_LIST_INTEGER_INDEX_DATA_ACCESSOR_H_
diff --git a/icing/index/numeric/posting-list-used-integer-index-data-serializer.cc b/icing/index/numeric/posting-list-integer-index-serializer.cc
index 800fd6b..6556451 100644
--- a/icing/index/numeric/posting-list-used-integer-index-data-serializer.cc
+++ b/icing/index/numeric/posting-list-integer-index-serializer.cc
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/index/numeric/posting-list-used-integer-index-data-serializer.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
#include <cstdint>
#include <vector>
@@ -29,7 +29,7 @@
namespace icing {
namespace lib {
-uint32_t PostingListUsedIntegerIndexDataSerializer::GetBytesUsed(
+uint32_t PostingListIntegerIndexSerializer::GetBytesUsed(
const PostingListUsed* posting_list_used) const {
// The special data will be included if they represent actual data. If they
// represent the data start offset or the invalid data sentinel, they are not
@@ -38,7 +38,7 @@ uint32_t PostingListUsedIntegerIndexDataSerializer::GetBytesUsed(
GetStartByteOffset(posting_list_used);
}
-uint32_t PostingListUsedIntegerIndexDataSerializer::GetMinPostingListSizeToFit(
+uint32_t PostingListIntegerIndexSerializer::GetMinPostingListSizeToFit(
const PostingListUsed* posting_list_used) const {
if (IsFull(posting_list_used) || IsAlmostFull(posting_list_used)) {
// If in either the FULL state or ALMOST_FULL state, this posting list *is*
@@ -57,7 +57,7 @@ uint32_t PostingListUsedIntegerIndexDataSerializer::GetMinPostingListSizeToFit(
return GetBytesUsed(posting_list_used) + GetDataTypeBytes();
}
-void PostingListUsedIntegerIndexDataSerializer::Clear(
+void PostingListIntegerIndexSerializer::Clear(
PostingListUsed* posting_list_used) const {
// Safe to ignore return value because posting_list_used->size_in_bytes() is
// a valid argument.
@@ -65,7 +65,7 @@ void PostingListUsedIntegerIndexDataSerializer::Clear(
/*offset=*/posting_list_used->size_in_bytes());
}
-libtextclassifier3::Status PostingListUsedIntegerIndexDataSerializer::MoveFrom(
+libtextclassifier3::Status PostingListIntegerIndexSerializer::MoveFrom(
PostingListUsed* dst, PostingListUsed* src) const {
ICING_RETURN_ERROR_IF_NULL(dst);
ICING_RETURN_ERROR_IF_NULL(src);
@@ -121,7 +121,7 @@ libtextclassifier3::Status PostingListUsedIntegerIndexDataSerializer::MoveFrom(
}
libtextclassifier3::Status
-PostingListUsedIntegerIndexDataSerializer::PrependDataToAlmostFull(
+PostingListIntegerIndexSerializer::PrependDataToAlmostFull(
PostingListUsed* posting_list_used, const IntegerIndexData& data) const {
SpecialDataType special_data = GetSpecialData(posting_list_used, /*index=*/1);
if (special_data.data().basic_hit() < data.basic_hit()) {
@@ -139,7 +139,7 @@ PostingListUsedIntegerIndexDataSerializer::PrependDataToAlmostFull(
return libtextclassifier3::Status::OK;
}
-void PostingListUsedIntegerIndexDataSerializer::PrependDataToEmpty(
+void PostingListIntegerIndexSerializer::PrependDataToEmpty(
PostingListUsed* posting_list_used, const IntegerIndexData& data) const {
// First data to be added. Just add verbatim, no compression.
if (posting_list_used->size_in_bytes() == kSpecialDataSize) {
@@ -165,7 +165,7 @@ void PostingListUsedIntegerIndexDataSerializer::PrependDataToEmpty(
}
libtextclassifier3::Status
-PostingListUsedIntegerIndexDataSerializer::PrependDataToNotFull(
+PostingListIntegerIndexSerializer::PrependDataToNotFull(
PostingListUsed* posting_list_used, const IntegerIndexData& data,
uint32_t offset) const {
IntegerIndexData cur;
@@ -193,8 +193,7 @@ PostingListUsedIntegerIndexDataSerializer::PrependDataToNotFull(
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::Status
-PostingListUsedIntegerIndexDataSerializer::PrependData(
+libtextclassifier3::Status PostingListIntegerIndexSerializer::PrependData(
PostingListUsed* posting_list_used, const IntegerIndexData& data) const {
static_assert(
sizeof(BasicHit::Value) <= sizeof(uint64_t),
@@ -223,7 +222,7 @@ PostingListUsedIntegerIndexDataSerializer::PrependData(
}
}
-uint32_t PostingListUsedIntegerIndexDataSerializer::PrependDataArray(
+uint32_t PostingListIntegerIndexSerializer::PrependDataArray(
PostingListUsed* posting_list_used, const IntegerIndexData* array,
uint32_t num_data, bool keep_prepended) const {
if (!IsPostingListValid(posting_list_used)) {
@@ -248,14 +247,14 @@ uint32_t PostingListUsedIntegerIndexDataSerializer::PrependDataArray(
}
libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
-PostingListUsedIntegerIndexDataSerializer::GetData(
+PostingListIntegerIndexSerializer::GetData(
const PostingListUsed* posting_list_used) const {
std::vector<IntegerIndexData> data_arr_out;
ICING_RETURN_IF_ERROR(GetData(posting_list_used, &data_arr_out));
return data_arr_out;
}
-libtextclassifier3::Status PostingListUsedIntegerIndexDataSerializer::GetData(
+libtextclassifier3::Status PostingListIntegerIndexSerializer::GetData(
const PostingListUsed* posting_list_used,
std::vector<IntegerIndexData>* data_arr_out) const {
return GetDataInternal(posting_list_used,
@@ -263,8 +262,7 @@ libtextclassifier3::Status PostingListUsedIntegerIndexDataSerializer::GetData(
/*pop=*/false, data_arr_out);
}
-libtextclassifier3::Status
-PostingListUsedIntegerIndexDataSerializer::PopFrontData(
+libtextclassifier3::Status PostingListIntegerIndexSerializer::PopFrontData(
PostingListUsed* posting_list_used, uint32_t num_data) const {
if (num_data == 1 && IsFull(posting_list_used)) {
// The PL is in FULL state which means that we save 2 uncompressed data in
@@ -345,8 +343,7 @@ PostingListUsedIntegerIndexDataSerializer::PopFrontData(
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::Status
-PostingListUsedIntegerIndexDataSerializer::GetDataInternal(
+libtextclassifier3::Status PostingListIntegerIndexSerializer::GetDataInternal(
const PostingListUsed* posting_list_used, uint32_t limit, bool pop,
std::vector<IntegerIndexData>* out) const {
// TODO(b/259743562): [Optimization 2] handle compressed data
@@ -404,8 +401,8 @@ PostingListUsedIntegerIndexDataSerializer::GetDataInternal(
return libtextclassifier3::Status::OK;
}
-PostingListUsedIntegerIndexDataSerializer::SpecialDataType
-PostingListUsedIntegerIndexDataSerializer::GetSpecialData(
+PostingListIntegerIndexSerializer::SpecialDataType
+PostingListIntegerIndexSerializer::GetSpecialData(
const PostingListUsed* posting_list_used, uint32_t index) const {
// It is ok to temporarily construct a SpecialData with offset = 0 since we're
// going to overwrite it by memcpy.
@@ -417,7 +414,7 @@ PostingListUsedIntegerIndexDataSerializer::GetSpecialData(
return special_data;
}
-void PostingListUsedIntegerIndexDataSerializer::SetSpecialData(
+void PostingListIntegerIndexSerializer::SetSpecialData(
PostingListUsed* posting_list_used, uint32_t index,
const SpecialDataType& special_data) const {
memcpy(posting_list_used->posting_list_buffer() +
@@ -425,7 +422,7 @@ void PostingListUsedIntegerIndexDataSerializer::SetSpecialData(
&special_data, sizeof(SpecialDataType));
}
-bool PostingListUsedIntegerIndexDataSerializer::IsPostingListValid(
+bool PostingListIntegerIndexSerializer::IsPostingListValid(
const PostingListUsed* posting_list_used) const {
if (IsAlmostFull(posting_list_used)) {
// Special data 1 should hold a valid data.
@@ -449,7 +446,7 @@ bool PostingListUsedIntegerIndexDataSerializer::IsPostingListValid(
return true;
}
-uint32_t PostingListUsedIntegerIndexDataSerializer::GetStartByteOffset(
+uint32_t PostingListIntegerIndexSerializer::GetStartByteOffset(
const PostingListUsed* posting_list_used) const {
if (IsFull(posting_list_used)) {
return 0;
@@ -460,7 +457,7 @@ uint32_t PostingListUsedIntegerIndexDataSerializer::GetStartByteOffset(
}
}
-bool PostingListUsedIntegerIndexDataSerializer::SetStartByteOffset(
+bool PostingListIntegerIndexSerializer::SetStartByteOffset(
PostingListUsed* posting_list_used, uint32_t offset) const {
if (offset > posting_list_used->size_in_bytes()) {
ICING_LOG(ERROR) << "offset cannot be a value greater than size "
@@ -497,7 +494,7 @@ bool PostingListUsedIntegerIndexDataSerializer::SetStartByteOffset(
}
libtextclassifier3::StatusOr<uint32_t>
-PostingListUsedIntegerIndexDataSerializer::PrependDataUncompressed(
+PostingListIntegerIndexSerializer::PrependDataUncompressed(
PostingListUsed* posting_list_used, const IntegerIndexData& data,
uint32_t offset) const {
if (offset < kSpecialDataSize + sizeof(IntegerIndexData)) {
diff --git a/icing/index/numeric/posting-list-used-integer-index-data-serializer.h b/icing/index/numeric/posting-list-integer-index-serializer.h
index 49007e3..ea2f2da 100644
--- a/icing/index/numeric/posting-list-used-integer-index-data-serializer.h
+++ b/icing/index/numeric/posting-list-integer-index-serializer.h
@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#ifndef ICING_INDEX_NUMERIC_POSTING_LIST_USED_INTEGER_INDEX_DATA_SERIALIZER_H_
-#define ICING_INDEX_NUMERIC_POSTING_LIST_USED_INTEGER_INDEX_DATA_SERIALIZER_H_
+#ifndef ICING_INDEX_NUMERIC_POSTING_LIST_INTEGER_INDEX_SERIALIZER_H_
+#define ICING_INDEX_NUMERIC_POSTING_LIST_INTEGER_INDEX_SERIALIZER_H_
#include <cstdint>
#include <vector>
@@ -28,8 +28,7 @@ namespace icing {
namespace lib {
// A serializer class to serialize IntegerIndexData to PostingListUsed.
-class PostingListUsedIntegerIndexDataSerializer
- : public PostingListUsedSerializer {
+class PostingListIntegerIndexSerializer : public PostingListSerializer {
public:
using SpecialDataType = SpecialData<IntegerIndexData>;
static_assert(sizeof(SpecialDataType) == sizeof(IntegerIndexData), "");
@@ -112,6 +111,12 @@ class PostingListUsedIntegerIndexDataSerializer
libtextclassifier3::Status PopFrontData(PostingListUsed* posting_list_used,
uint32_t num_data) const;
+ // Helper function to determine if posting list is full.
+ bool IsFull(const PostingListUsed* posting_list_used) const {
+ return GetSpecialData(posting_list_used, /*index=*/0).data().is_valid() &&
+ GetSpecialData(posting_list_used, /*index=*/1).data().is_valid();
+ }
+
private:
// Posting list layout formats:
//
@@ -229,11 +234,6 @@ class PostingListUsedIntegerIndexDataSerializer
// +-----------------+-----------------+---+--------+-----+--------+--------+
// Helpers to determine what state the posting list is in.
- bool IsFull(const PostingListUsed* posting_list_used) const {
- return GetSpecialData(posting_list_used, /*index=*/0).data().is_valid() &&
- GetSpecialData(posting_list_used, /*index=*/1).data().is_valid();
- }
-
bool IsAlmostFull(const PostingListUsed* posting_list_used) const {
return !GetSpecialData(posting_list_used, /*index=*/0).data().is_valid() &&
GetSpecialData(posting_list_used, /*index=*/1).data().is_valid();
@@ -335,4 +335,4 @@ class PostingListUsedIntegerIndexDataSerializer
} // namespace lib
} // namespace icing
-#endif // ICING_INDEX_NUMERIC_POSTING_LIST_USED_INTEGER_INDEX_DATA_SERIALIZER_H_
+#endif // ICING_INDEX_NUMERIC_POSTING_LIST_INTEGER_INDEX_SERIALIZER_H_
diff --git a/icing/index/numeric/posting-list-used-integer-index-data-serializer_test.cc b/icing/index/numeric/posting-list-integer-index-serializer_test.cc
index c270137..bfb4e71 100644
--- a/icing/index/numeric/posting-list-used-integer-index-data-serializer_test.cc
+++ b/icing/index/numeric/posting-list-integer-index-serializer_test.cc
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/index/numeric/posting-list-used-integer-index-data-serializer.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
#include <memory>
#include <vector>
@@ -39,16 +39,13 @@ namespace {
// without ALMOST_FULL) test cases, including for
// PopFrontData.
-TEST(PostingListUsedIntegerIndexDataSerializerTest,
- GetMinPostingListSizeToFitNotNull) {
- PostingListUsedIntegerIndexDataSerializer serializer;
+TEST(PostingListIntegerIndexSerializerTest, GetMinPostingListSizeToFitNotNull) {
+ PostingListIntegerIndexSerializer serializer;
int size = 2551 * sizeof(IntegerIndexData);
- std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
ASSERT_THAT(serializer.PrependData(
&pl_used, IntegerIndexData(/*section_id=*/0,
@@ -65,16 +62,14 @@ TEST(PostingListUsedIntegerIndexDataSerializerTest,
Eq(3 * sizeof(IntegerIndexData)));
}
-TEST(PostingListUsedIntegerIndexDataSerializerTest,
+TEST(PostingListIntegerIndexSerializerTest,
GetMinPostingListSizeToFitAlmostFull) {
- PostingListUsedIntegerIndexDataSerializer serializer;
+ PostingListIntegerIndexSerializer serializer;
int size = 3 * sizeof(IntegerIndexData);
- std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
ASSERT_THAT(serializer.PrependData(
&pl_used, IntegerIndexData(/*section_id=*/0,
@@ -87,16 +82,13 @@ TEST(PostingListUsedIntegerIndexDataSerializerTest,
EXPECT_THAT(serializer.GetMinPostingListSizeToFit(&pl_used), Eq(size));
}
-TEST(PostingListUsedIntegerIndexDataSerializerTest,
- GetMinPostingListSizeToFitFull) {
- PostingListUsedIntegerIndexDataSerializer serializer;
+TEST(PostingListIntegerIndexSerializerTest, GetMinPostingListSizeToFitFull) {
+ PostingListIntegerIndexSerializer serializer;
int size = 3 * sizeof(IntegerIndexData);
- std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
ASSERT_THAT(serializer.PrependData(
&pl_used, IntegerIndexData(/*section_id=*/0,
@@ -113,15 +105,13 @@ TEST(PostingListUsedIntegerIndexDataSerializerTest,
EXPECT_THAT(serializer.GetMinPostingListSizeToFit(&pl_used), Eq(size));
}
-TEST(PostingListUsedIntegerIndexDataSerializerTest, PrependDataNotFull) {
- PostingListUsedIntegerIndexDataSerializer serializer;
+TEST(PostingListIntegerIndexSerializerTest, PrependDataNotFull) {
+ PostingListIntegerIndexSerializer serializer;
int size = 2551 * sizeof(IntegerIndexData);
- std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
// Make used.
IntegerIndexData data0(/*section_id=*/0, /*document_id=*/0, /*key=*/2);
@@ -151,15 +141,13 @@ TEST(PostingListUsedIntegerIndexDataSerializerTest, PrependDataNotFull) {
IsOkAndHolds(ElementsAre(data2, data1, data0)));
}
-TEST(PostingListUsedIntegerIndexDataSerializerTest, PrependDataAlmostFull) {
- PostingListUsedIntegerIndexDataSerializer serializer;
+TEST(PostingListIntegerIndexSerializerTest, PrependDataAlmostFull) {
+ PostingListIntegerIndexSerializer serializer;
int size = 4 * sizeof(IntegerIndexData);
- std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
// Fill up the compressed region.
// Transitions:
@@ -195,16 +183,13 @@ TEST(PostingListUsedIntegerIndexDataSerializerTest, PrependDataAlmostFull) {
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
-TEST(PostingListUsedIntegerIndexDataSerializerTest,
- PrependDataPostingListUsedMinSize) {
- PostingListUsedIntegerIndexDataSerializer serializer;
+TEST(PostingListIntegerIndexSerializerTest, PrependDataPostingListUsedMinSize) {
+ PostingListIntegerIndexSerializer serializer;
int size = serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
// PL State: EMPTY
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(0));
@@ -233,16 +218,14 @@ TEST(PostingListUsedIntegerIndexDataSerializerTest,
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
-TEST(PostingListUsedIntegerIndexDataSerializerTest,
+TEST(PostingListIntegerIndexSerializerTest,
PrependDataArrayDoNotKeepPrepended) {
- PostingListUsedIntegerIndexDataSerializer serializer;
+ PostingListIntegerIndexSerializer serializer;
int size = 6 * sizeof(IntegerIndexData);
- std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<IntegerIndexData> data_in;
std::vector<IntegerIndexData> data_pushed;
@@ -314,16 +297,13 @@ TEST(PostingListUsedIntegerIndexDataSerializerTest,
IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
}
-TEST(PostingListUsedIntegerIndexDataSerializerTest,
- PrependDataArrayKeepPrepended) {
- PostingListUsedIntegerIndexDataSerializer serializer;
+TEST(PostingListIntegerIndexSerializerTest, PrependDataArrayKeepPrepended) {
+ PostingListIntegerIndexSerializer serializer;
int size = 6 * sizeof(IntegerIndexData);
- std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<IntegerIndexData> data_in;
std::vector<IntegerIndexData> data_pushed;
@@ -371,15 +351,13 @@ TEST(PostingListUsedIntegerIndexDataSerializerTest,
IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
}
-TEST(PostingListUsedIntegerIndexDataSerializerTest, MoveFrom) {
- PostingListUsedIntegerIndexDataSerializer serializer;
+TEST(PostingListIntegerIndexSerializerTest, MoveFrom) {
+ PostingListIntegerIndexSerializer serializer;
int size = 3 * serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> buf1 = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used1,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf1.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<IntegerIndexData> data_arr1 = {
IntegerIndexData(/*section_id=*/0, /*document_id=*/0, /*key=*/2),
@@ -389,11 +367,9 @@ TEST(PostingListUsedIntegerIndexDataSerializerTest, MoveFrom) {
/*keep_prepended=*/false),
Eq(data_arr1.size()));
- std::unique_ptr<char[]> buf2 = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used2,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf2.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<IntegerIndexData> data_arr2 = {
IntegerIndexData(/*section_id=*/0, /*document_id=*/2, /*key=*/0),
IntegerIndexData(/*section_id=*/0, /*document_id=*/3, /*key=*/-3),
@@ -412,16 +388,14 @@ TEST(PostingListUsedIntegerIndexDataSerializerTest, MoveFrom) {
EXPECT_THAT(serializer.GetData(&pl_used1), IsOkAndHolds(IsEmpty()));
}
-TEST(PostingListUsedIntegerIndexDataSerializerTest,
+TEST(PostingListIntegerIndexSerializerTest,
MoveToNullReturnsFailedPrecondition) {
- PostingListUsedIntegerIndexDataSerializer serializer;
+ PostingListIntegerIndexSerializer serializer;
int size = 3 * serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<IntegerIndexData> data_arr = {
IntegerIndexData(/*section_id=*/0, /*document_id=*/0, /*key=*/2),
IntegerIndexData(/*section_id=*/0, /*document_id=*/1, /*key=*/5)};
@@ -443,15 +417,13 @@ TEST(PostingListUsedIntegerIndexDataSerializerTest,
IsOkAndHolds(ElementsAreArray(data_arr.rbegin(), data_arr.rend())));
}
-TEST(PostingListUsedIntegerIndexDataSerializerTest, MoveToPostingListTooSmall) {
- PostingListUsedIntegerIndexDataSerializer serializer;
+TEST(PostingListIntegerIndexSerializerTest, MoveToPostingListTooSmall) {
+ PostingListIntegerIndexSerializer serializer;
int size1 = 3 * serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> buf1 = std::make_unique<char[]>(size1);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used1,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf1.get()), size1));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size1));
std::vector<IntegerIndexData> data_arr1 = {
IntegerIndexData(/*section_id=*/0, /*document_id=*/0, /*key=*/2),
IntegerIndexData(/*section_id=*/0, /*document_id=*/1, /*key=*/5),
@@ -464,11 +436,9 @@ TEST(PostingListUsedIntegerIndexDataSerializerTest, MoveToPostingListTooSmall) {
Eq(data_arr1.size()));
int size2 = serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> buf2 = std::make_unique<char[]>(size2);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used2,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf2.get()), size2));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size2));
std::vector<IntegerIndexData> data_arr2 = {
IntegerIndexData(/*section_id=*/0, /*document_id=*/5, /*key=*/-200)};
ASSERT_THAT(
@@ -486,15 +456,13 @@ TEST(PostingListUsedIntegerIndexDataSerializerTest, MoveToPostingListTooSmall) {
IsOkAndHolds(ElementsAreArray(data_arr2.rbegin(), data_arr2.rend())));
}
-TEST(PostingListUsedIntegerIndexDataSerializerTest, PopFrontData) {
- PostingListUsedIntegerIndexDataSerializer serializer;
+TEST(PostingListIntegerIndexSerializerTest, PopFrontData) {
+ PostingListIntegerIndexSerializer serializer;
int size = 2 * serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<IntegerIndexData> data_arr = {
IntegerIndexData(/*section_id=*/0, /*document_id=*/0, /*key=*/2),
diff --git a/icing/index/string-section-indexing-handler.cc b/icing/index/string-section-indexing-handler.cc
index 9b1db7e..f5e06ad 100644
--- a/icing/index/string-section-indexing-handler.cc
+++ b/icing/index/string-section-indexing-handler.cc
@@ -20,6 +20,7 @@
#include <string_view>
#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/index/index.h"
#include "icing/legacy/core/icing-string-util.h"
@@ -29,25 +30,44 @@
#include "icing/store/document-id.h"
#include "icing/transform/normalizer.h"
#include "icing/util/clock.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
#include "icing/util/tokenized-document.h"
namespace icing {
namespace lib {
+/* static */ libtextclassifier3::StatusOr<
+ std::unique_ptr<StringSectionIndexingHandler>>
+StringSectionIndexingHandler::Create(const Clock* clock,
+ const Normalizer* normalizer,
+ Index* index) {
+ ICING_RETURN_ERROR_IF_NULL(clock);
+ ICING_RETURN_ERROR_IF_NULL(normalizer);
+ ICING_RETURN_ERROR_IF_NULL(index);
+
+ return std::unique_ptr<StringSectionIndexingHandler>(
+ new StringSectionIndexingHandler(clock, normalizer, index));
+}
+
libtextclassifier3::Status StringSectionIndexingHandler::Handle(
const TokenizedDocument& tokenized_document, DocumentId document_id,
- PutDocumentStatsProto* put_document_stats) {
+ bool recovery_mode, PutDocumentStatsProto* put_document_stats) {
std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
if (index_.last_added_document_id() != kInvalidDocumentId &&
document_id <= index_.last_added_document_id()) {
+ if (recovery_mode) {
+ // Skip the document if document_id <= last_added_document_id in recovery
+ // mode without returning an error.
+ return libtextclassifier3::Status::OK;
+ }
return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
"DocumentId %d must be greater than last added document_id %d",
document_id, index_.last_added_document_id()));
}
- // TODO(b/259744228): revisit last_added_document_id with numeric index for
- // index rebuilding before rollout.
index_.set_last_added_document_id(document_id);
+
uint32_t num_tokens = 0;
libtextclassifier3::Status status;
for (const TokenizedSection& section :
@@ -102,9 +122,19 @@ libtextclassifier3::Status StringSectionIndexingHandler::Handle(
}
}
+ // Check and sort the LiteIndex HitBuffer if we're successful.
+ if (status.ok() && index_.LiteIndexNeedSort()) {
+ std::unique_ptr<Timer> sort_timer = clock_.GetNewTimer();
+ index_.SortLiteIndex();
+
+ if (put_document_stats != nullptr) {
+ put_document_stats->set_lite_index_sort_latency_ms(
+ sort_timer->GetElapsedMilliseconds());
+ }
+ }
+
if (put_document_stats != nullptr) {
- // TODO(b/259744228): switch to set individual index latency.
- put_document_stats->set_index_latency_ms(
+ put_document_stats->set_term_index_latency_ms(
index_timer->GetElapsedMilliseconds());
put_document_stats->mutable_tokenization_stats()->set_num_tokens_indexed(
num_tokens);
@@ -114,7 +144,7 @@ libtextclassifier3::Status StringSectionIndexingHandler::Handle(
// merge.
if ((status.ok() || absl_ports::IsResourceExhausted(status)) &&
index_.WantsMerge()) {
- ICING_LOG(ERROR) << "Merging the index at docid " << document_id << ".";
+ ICING_LOG(INFO) << "Merging the index at docid " << document_id << ".";
std::unique_ptr<Timer> merge_timer = clock_.GetNewTimer();
libtextclassifier3::Status merge_status = index_.Merge();
diff --git a/icing/index/string-section-indexing-handler.h b/icing/index/string-section-indexing-handler.h
index 4906f97..6abfba5 100644
--- a/icing/index/string-section-indexing-handler.h
+++ b/icing/index/string-section-indexing-handler.h
@@ -15,9 +15,12 @@
#ifndef ICING_INDEX_STRING_SECTION_INDEXING_HANDLER_H_
#define ICING_INDEX_STRING_SECTION_INDEXING_HANDLER_H_
+#include <memory>
+
#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/index/data-indexing-handler.h"
#include "icing/index/index.h"
-#include "icing/index/section-indexing-handler.h"
#include "icing/proto/logging.pb.h"
#include "icing/store/document-id.h"
#include "icing/transform/normalizer.h"
@@ -27,25 +30,29 @@
namespace icing {
namespace lib {
-class StringSectionIndexingHandler : public SectionIndexingHandler {
+class StringSectionIndexingHandler : public DataIndexingHandler {
public:
- explicit StringSectionIndexingHandler(const Clock* clock,
- const Normalizer* normalizer,
- Index* index)
- : SectionIndexingHandler(clock),
- normalizer_(*normalizer),
- index_(*index) {}
+ // Creates a StringSectionIndexingHandler instance which does not take
+ // ownership of any input components. All pointers must refer to valid objects
+ // that outlive the created StringSectionIndexingHandler instance.
+ //
+ // Returns:
+ // - A StringSectionIndexingHandler instance on success
+ // - FAILED_PRECONDITION_ERROR if any of the input pointer is null
+ static libtextclassifier3::StatusOr<
+ std::unique_ptr<StringSectionIndexingHandler>>
+ Create(const Clock* clock, const Normalizer* normalizer, Index* index);
~StringSectionIndexingHandler() override = default;
- // Handles the string indexing process: add hits into the lite index for all
- // contents in tokenized_document.tokenized_string_sections and merge lite
+ // Handles the string term indexing process: add hits into the lite index for
+ // all contents in tokenized_document.tokenized_string_sections and merge lite
// index into main index if necessary.
//
/// Returns:
// - OK on success
- // - INVALID_ARGUMENT_ERROR if document_id is less than the document_id of a
- // previously indexed document.
+ // - INVALID_ARGUMENT_ERROR if document_id is less than or equal to the
+ // document_id of a previously indexed document in non recovery mode.
// - RESOURCE_EXHAUSTED_ERROR if the index is full and can't add anymore
// content.
// - DATA_LOSS_ERROR if an attempt to merge the index fails and both indices
@@ -54,11 +61,16 @@ class StringSectionIndexingHandler : public SectionIndexingHandler {
// - Any main/lite index errors.
libtextclassifier3::Status Handle(
const TokenizedDocument& tokenized_document, DocumentId document_id,
- PutDocumentStatsProto* put_document_stats) override;
+ bool recovery_mode, PutDocumentStatsProto* put_document_stats) override;
private:
- const Normalizer& normalizer_;
- Index& index_;
+ explicit StringSectionIndexingHandler(const Clock* clock,
+ const Normalizer* normalizer,
+ Index* index)
+ : DataIndexingHandler(clock), normalizer_(*normalizer), index_(*index) {}
+
+ const Normalizer& normalizer_; // Does not own.
+ Index& index_; // Does not own.
};
} // namespace lib
diff --git a/icing/index/string-section-indexing-handler_test.cc b/icing/index/string-section-indexing-handler_test.cc
new file mode 100644
index 0000000..2c7f5e3
--- /dev/null
+++ b/icing/index/string-section-indexing-handler_test.cc
@@ -0,0 +1,587 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/string-section-indexing-handler.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/index.h"
+#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/transform/normalizer-factory.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/tokenized-document.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+using ::testing::Test;
+
+// Schema type with indexable properties and section Id.
+// Section Id is determined by the lexicographical order of indexable property
+// path.
+// Section id = 0: body
+// Section id = 1: title
+constexpr std::string_view kFakeType = "FakeType";
+constexpr std::string_view kPropertyBody = "body";
+constexpr std::string_view kPropertyTitle = "title";
+
+constexpr SectionId kSectionIdBody = 0;
+constexpr SectionId kSectionIdTitle = 1;
+
+// Schema type with nested indexable properties and section Id.
+// Section id = 0: "name"
+// Section id = 1: "nested.body"
+// Section id = 3: "nested.title"
+// Section id = 4: "subject"
+constexpr std::string_view kNestedType = "NestedType";
+constexpr std::string_view kPropertyName = "name";
+constexpr std::string_view kPropertyNestedDoc = "nested";
+constexpr std::string_view kPropertySubject = "subject";
+
+constexpr SectionId kSectionIdNestedBody = 1;
+
+class StringSectionIndexingHandlerTest : public Test {
+ protected:
+ void SetUp() override {
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ ICING_ASSERT_OK(
+ // File generated via icu_data_file rule in //icing/BUILD.
+ icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
+
+ base_dir_ = GetTestTempDir() + "/icing_test";
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+ IsTrue());
+
+ index_dir_ = base_dir_ + "/index";
+ schema_store_dir_ = base_dir_ + "/schema_store";
+ document_store_dir_ = base_dir_ + "/document_store";
+
+ language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ lang_segmenter_,
+ language_segmenter_factory::Create(std::move(segmenter_options)));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ normalizer_,
+ normalizer_factory::Create(
+ /*max_term_byte_size=*/std::numeric_limits<int32_t>::max()));
+
+ ASSERT_THAT(
+ filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()),
+ IsTrue());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kFakeType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyTitle)
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyBody)
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kNestedType)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName(kPropertyNestedDoc)
+ .SetDataTypeDocument(
+ kFakeType, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertySubject)
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyName)
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ ASSERT_TRUE(
+ filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult doc_store_create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
+ document_store_ = std::move(doc_store_create_result.document_store);
+ }
+
+ void TearDown() override {
+ document_store_.reset();
+ schema_store_.reset();
+ normalizer_.reset();
+ lang_segmenter_.reset();
+
+ filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+ }
+
+ Filesystem filesystem_;
+ IcingFilesystem icing_filesystem_;
+ FakeClock fake_clock_;
+ std::string base_dir_;
+ std::string index_dir_;
+ std::string schema_store_dir_;
+ std::string document_store_dir_;
+
+ std::unique_ptr<LanguageSegmenter> lang_segmenter_;
+ std::unique_ptr<Normalizer> normalizer_;
+ std::unique_ptr<SchemaStore> schema_store_;
+ std::unique_ptr<DocumentStore> document_store_;
+};
+
+std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) {
+ std::vector<DocHitInfo> infos;
+ while (iterator->Advance().ok()) {
+ infos.push_back(iterator->doc_hit_info());
+ }
+ return infos;
+}
+
+std::vector<DocHitInfoTermFrequencyPair> GetHitsWithTermFrequency(
+ std::unique_ptr<DocHitInfoIterator> iterator) {
+ std::vector<DocHitInfoTermFrequencyPair> infos;
+ while (iterator->Advance().ok()) {
+ std::vector<TermMatchInfo> matched_terms_stats;
+ iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+ for (const TermMatchInfo& term_match_info : matched_terms_stats) {
+ infos.push_back(DocHitInfoTermFrequencyPair(
+ iterator->doc_hit_info(), term_match_info.term_frequencies));
+ }
+ }
+ return infos;
+}
+
+TEST_F(StringSectionIndexingHandlerTest,
+ HandleIntoLiteIndex_sortInIndexingNotTriggered) {
+ Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(options, &filesystem_, &icing_filesystem_));
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyTitle), "foo")
+ .AddStringProperty(std::string(kPropertyBody), "foo bar baz")
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document)));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id,
+ document_store_->Put(tokenized_document.document()));
+
+ EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<StringSectionIndexingHandler> handler,
+ StringSectionIndexingHandler::Create(&fake_clock_, normalizer_.get(),
+ index.get()));
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+
+ EXPECT_THAT(index->last_added_document_id(), Eq(document_id));
+
+ // Query 'foo'
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
+ std::vector<DocHitInfoTermFrequencyPair> hits =
+ GetHitsWithTermFrequency(std::move(itr));
+ std::unordered_map<SectionId, Hit::TermFrequency> expected_map{
+ {kSectionIdTitle, 1}, {kSectionIdBody, 1}};
+ EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+ document_id, expected_map)));
+
+ // Query 'foo' with sectionId mask that masks all results
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, 1U << 2,
+ TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+}
+
+TEST_F(StringSectionIndexingHandlerTest,
+ HandleIntoLiteIndex_sortInIndexingTriggered) {
+ // Create the LiteIndex with a smaller sort threshold. At 64 bytes we sort the
+ // HitBuffer after inserting 8 hits
+ Index::Options options(index_dir_,
+ /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(options, &filesystem_, &icing_filesystem_));
+
+ DocumentProto document0 =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyTitle), "foo foo foo")
+ .AddStringProperty(std::string(kPropertyBody), "foo bar baz")
+ .Build();
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyTitle), "bar baz baz")
+ .AddStringProperty(std::string(kPropertyBody), "foo foo baz")
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("icing", "nested_type/0")
+ .SetSchema(std::string(kNestedType))
+ .AddDocumentProperty(std::string(kPropertyNestedDoc), document1)
+ .AddStringProperty(std::string(kPropertyName), "qux")
+ .AddStringProperty(std::string(kPropertySubject), "bar bar")
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document0,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document0)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id0,
+ document_store_->Put(tokenized_document0.document()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document1,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id1,
+ document_store_->Put(tokenized_document1.document()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document2,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id2,
+ document_store_->Put(tokenized_document2.document()));
+ EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<StringSectionIndexingHandler> handler,
+ StringSectionIndexingHandler::Create(&fake_clock_, normalizer_.get(),
+ index.get()));
+
+ // Handle doc0 and doc1. The LiteIndex should sort and merge after adding
+ // these
+ EXPECT_THAT(handler->Handle(tokenized_document0, document_id0,
+ /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(handler->Handle(tokenized_document1, document_id1,
+ /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(index->last_added_document_id(), Eq(document_id1));
+ EXPECT_THAT(index->LiteIndexNeedSort(), IsFalse());
+
+ // Handle doc2. The LiteIndex should have an unsorted portion after adding
+ EXPECT_THAT(handler->Handle(tokenized_document2, document_id2,
+ /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(index->last_added_document_id(), Eq(document_id2));
+
+ // Hits in the hit buffer:
+ // <term>: {(docId, sectionId, term_freq)...}
+ // foo: {(0, kSectionIdTitle, 3); (0, kSectionIdBody, 1);
+ // (1, kSectionIdBody, 2);
+ // (2, kSectionIdNestedBody, 2)}
+ // bar: {(0, kSectionIdBody, 1);
+ // (1, kSectionIdTitle, 1);
+ // (2, kSectionIdNestedTitle, 1); (2, kSectionIdSubject, 2)}
+ // baz: {(0, kSectionIdBody, 1);
+ // (1, kSectionIdTitle, 2); (1, kSectionIdBody, 1),
+ // (2, kSectionIdNestedTitle, 2); (2, kSectionIdNestedBody, 1)}
+ // qux: {(2, kSectionIdName, 1)}
+
+ // Query 'foo'
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
+
+ // Advance the iterator and verify that we're returning hits in the correct
+ // order (i.e. in descending order of DocId)
+ ASSERT_THAT(itr->Advance(), IsOk());
+ EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(2));
+ EXPECT_THAT(itr->doc_hit_info().hit_section_ids_mask(),
+ Eq(1U << kSectionIdNestedBody));
+ std::vector<TermMatchInfo> matched_terms_stats;
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map2 = {{kSectionIdNestedBody, 2}};
+ itr->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "foo", expected_section_ids_tf_map2)));
+
+ ASSERT_THAT(itr->Advance(), IsOk());
+ EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(1));
+ EXPECT_THAT(itr->doc_hit_info().hit_section_ids_mask(),
+ Eq(1U << kSectionIdBody));
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map1 = {{kSectionIdBody, 2}};
+ matched_terms_stats.clear();
+ itr->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "foo", expected_section_ids_tf_map1)));
+
+ ASSERT_THAT(itr->Advance(), IsOk());
+ EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(0));
+ EXPECT_THAT(itr->doc_hit_info().hit_section_ids_mask(),
+ Eq(1U << kSectionIdTitle | 1U << kSectionIdBody));
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map0 = {{kSectionIdTitle, 3},
+ {kSectionIdBody, 1}};
+ matched_terms_stats.clear();
+ itr->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "foo", expected_section_ids_tf_map0)));
+}
+
+TEST_F(StringSectionIndexingHandlerTest,
+ HandleIntoLiteIndex_enableSortInIndexing) {
+ // Create the LiteIndex with a smaller sort threshold. At 64 bytes we sort the
+ // HitBuffer after inserting 8 hits
+ Index::Options options(index_dir_,
+ /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/false,
+ /*lite_index_sort_size=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(options, &filesystem_, &icing_filesystem_));
+
+ DocumentProto document0 =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyTitle), "foo foo foo")
+ .AddStringProperty(std::string(kPropertyBody), "foo bar baz")
+ .Build();
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyTitle), "bar baz baz")
+ .AddStringProperty(std::string(kPropertyBody), "foo foo baz")
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("icing", "nested_type/0")
+ .SetSchema(std::string(kNestedType))
+ .AddDocumentProperty(std::string(kPropertyNestedDoc), document1)
+ .AddStringProperty(std::string(kPropertyName), "qux")
+ .AddStringProperty(std::string(kPropertySubject), "bar bar")
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document0,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document0)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id0,
+ document_store_->Put(tokenized_document0.document()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document1,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id1,
+ document_store_->Put(tokenized_document1.document()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document2,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id2,
+ document_store_->Put(tokenized_document2.document()));
+ EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<StringSectionIndexingHandler> handler,
+ StringSectionIndexingHandler::Create(&fake_clock_, normalizer_.get(),
+ index.get()));
+
+ // Handle all docs
+ EXPECT_THAT(handler->Handle(tokenized_document0, document_id0,
+ /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(handler->Handle(tokenized_document1, document_id1,
+ /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(handler->Handle(tokenized_document2, document_id2,
+ /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(index->last_added_document_id(), Eq(document_id2));
+
+ // We've disabled sorting during indexing so the HitBuffer's unsorted section
+ // should exceed the sort threshold. PersistToDisk and reinitialize the
+ // LiteIndex with sort_at_indexing=true.
+ ASSERT_THAT(index->PersistToDisk(), IsOk());
+ options = Index::Options(index_dir_,
+ /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ index, Index::Create(options, &filesystem_, &icing_filesystem_));
+
+ // Verify that the HitBuffer has been sorted after initializing with
+ // sort_at_indexing enabled.
+ EXPECT_THAT(index->LiteIndexNeedSort(), IsFalse());
+
+ // Hits in the hit buffer:
+ // <term>: {(docId, sectionId, term_freq)...}
+ // foo: {(0, kSectionIdTitle, 3); (0, kSectionIdBody, 1);
+ // (1, kSectionIdBody, 2);
+ // (2, kSectionIdNestedBody, 2)}
+ // bar: {(0, kSectionIdBody, 1);
+ // (1, kSectionIdTitle, 1);
+ // (2, kSectionIdNestedTitle, 1); (2, kSectionIdSubject, 2)}
+ // baz: {(0, kSectionIdBody, 1);
+ // (1, kSectionIdTitle, 2); (1, kSectionIdBody, 1),
+ // (2, kSectionIdNestedTitle, 2); (2, kSectionIdNestedBody, 1)}
+ // qux: {(2, kSectionIdName, 1)}
+
+ // Query 'foo'
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
+
+ // Advance the iterator and verify that we're returning hits in the correct
+ // order (i.e. in descending order of DocId)
+ ASSERT_THAT(itr->Advance(), IsOk());
+ EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(2));
+ EXPECT_THAT(itr->doc_hit_info().hit_section_ids_mask(),
+ Eq(1U << kSectionIdNestedBody));
+ std::vector<TermMatchInfo> matched_terms_stats;
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map2 = {{kSectionIdNestedBody, 2}};
+ itr->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "foo", expected_section_ids_tf_map2)));
+
+ ASSERT_THAT(itr->Advance(), IsOk());
+ EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(1));
+ EXPECT_THAT(itr->doc_hit_info().hit_section_ids_mask(),
+ Eq(1U << kSectionIdBody));
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map1 = {{kSectionIdBody, 2}};
+ matched_terms_stats.clear();
+ itr->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "foo", expected_section_ids_tf_map1)));
+
+ ASSERT_THAT(itr->Advance(), IsOk());
+ EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(0));
+ EXPECT_THAT(itr->doc_hit_info().hit_section_ids_mask(),
+ Eq(1U << kSectionIdTitle | 1U << kSectionIdBody));
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map0 = {{kSectionIdTitle, 3},
+ {kSectionIdBody, 1}};
+ matched_terms_stats.clear();
+ itr->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "foo", expected_section_ids_tf_map0)));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/jni.lds b/icing/jni.lds
index 401682a..64fae36 100644
--- a/icing/jni.lds
+++ b/icing/jni.lds
@@ -1,7 +1,6 @@
VERS_1.0 {
# Export JNI symbols.
global:
- Java_*;
JNI_OnLoad;
# Hide everything else
diff --git a/icing/jni/icing-search-engine-jni.cc b/icing/jni/icing-search-engine-jni.cc
index 51f3106..a0883fa 100644
--- a/icing/jni/icing-search-engine-jni.cc
+++ b/icing/jni/icing-search-engine-jni.cc
@@ -17,7 +17,6 @@
#include <string>
#include <utility>
-#include <google/protobuf/message_lite.h>
#include "icing/icing-search-engine.h"
#include "icing/jni/jni-cache.h"
#include "icing/jni/scoped-primitive-array-critical.h"
@@ -33,21 +32,18 @@
#include "icing/proto/usage.pb.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
+#include <google/protobuf/message_lite.h>
namespace {
-// JNI string constants
-// Matches field name of IcingSearchEngine#nativePointer.
-const char kNativePointerField[] = "nativePointer";
-
bool ParseProtoFromJniByteArray(JNIEnv* env, jbyteArray bytes,
google::protobuf::MessageLite* protobuf) {
icing::lib::ScopedPrimitiveArrayCritical<uint8_t> scoped_array(env, bytes);
return protobuf->ParseFromArray(scoped_array.data(), scoped_array.size());
}
-jbyteArray SerializeProtoToJniByteArray(
- JNIEnv* env, const google::protobuf::MessageLite& protobuf) {
+jbyteArray SerializeProtoToJniByteArray(JNIEnv* env,
+ const google::protobuf::MessageLite& protobuf) {
int size = protobuf.ByteSizeLong();
jbyteArray ret = env->NewByteArray(size);
if (ret == nullptr) {
@@ -61,11 +57,14 @@ jbyteArray SerializeProtoToJniByteArray(
return ret;
}
+struct {
+ jfieldID native_pointer;
+} JavaIcingSearchEngineImpl;
+
icing::lib::IcingSearchEngine* GetIcingSearchEnginePointer(JNIEnv* env,
jobject object) {
- jclass cls = env->GetObjectClass(object);
- jfieldID field_id = env->GetFieldID(cls, kNativePointerField, "J");
- jlong native_pointer = env->GetLongField(object, field_id);
+ jlong native_pointer =
+ env->GetLongField(object, JavaIcingSearchEngineImpl.native_pointer);
return reinterpret_cast<icing::lib::IcingSearchEngine*>(native_pointer);
}
@@ -73,19 +72,8 @@ icing::lib::IcingSearchEngine* GetIcingSearchEnginePointer(JNIEnv* env,
extern "C" {
-jint JNI_OnLoad(JavaVM* vm, void* reserved) {
- JNIEnv* env;
- if (vm->GetEnv(reinterpret_cast<void**>(&env), JNI_VERSION_1_6) != JNI_OK) {
- ICING_LOG(icing::lib::ERROR) << "ERROR: GetEnv failed";
- return JNI_ERR;
- }
-
- return JNI_VERSION_1_6;
-}
-
-JNIEXPORT jlong JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeCreate(
- JNIEnv* env, jclass clazz, jbyteArray icing_search_engine_options_bytes) {
+jlong nativeCreate(JNIEnv* env, jclass clazz,
+ jbyteArray icing_search_engine_options_bytes) {
icing::lib::IcingSearchEngineOptions options;
if (!ParseProtoFromJniByteArray(env, icing_search_engine_options_bytes,
&options)) {
@@ -103,17 +91,13 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeCreate(
return reinterpret_cast<jlong>(icing);
}
-JNIEXPORT void JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeDestroy(
- JNIEnv* env, jclass clazz, jobject object) {
+void nativeDestroy(JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
delete icing;
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeInitialize(
- JNIEnv* env, jclass clazz, jobject object) {
+jbyteArray nativeInitialize(JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -123,10 +107,9 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeInitialize(
return SerializeProtoToJniByteArray(env, initialize_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeSetSchema(
- JNIEnv* env, jclass clazz, jobject object, jbyteArray schema_bytes,
- jboolean ignore_errors_and_delete_documents) {
+jbyteArray nativeSetSchema(JNIEnv* env, jclass clazz, jobject object,
+ jbyteArray schema_bytes,
+ jboolean ignore_errors_and_delete_documents) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -143,9 +126,7 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeSetSchema(
return SerializeProtoToJniByteArray(env, set_schema_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetSchema(
- JNIEnv* env, jclass clazz, jobject object) {
+jbyteArray nativeGetSchema(JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -154,9 +135,8 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetSchema(
return SerializeProtoToJniByteArray(env, get_schema_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetSchemaType(
- JNIEnv* env, jclass clazz, jobject object, jstring schema_type) {
+jbyteArray nativeGetSchemaType(JNIEnv* env, jclass clazz, jobject object,
+ jstring schema_type) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -167,9 +147,8 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetSchemaType(
return SerializeProtoToJniByteArray(env, get_schema_type_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativePut(
- JNIEnv* env, jclass clazz, jobject object, jbyteArray document_bytes) {
+jbyteArray nativePut(JNIEnv* env, jclass clazz, jobject object,
+ jbyteArray document_bytes) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -186,10 +165,9 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativePut(
return SerializeProtoToJniByteArray(env, put_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeGet(
- JNIEnv* env, jclass clazz, jobject object, jstring name_space, jstring uri,
- jbyteArray result_spec_bytes) {
+jbyteArray nativeGet(JNIEnv* env, jclass clazz, jobject object,
+ jstring name_space, jstring uri,
+ jbyteArray result_spec_bytes) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -208,9 +186,8 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeGet(
return SerializeProtoToJniByteArray(env, get_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeReportUsage(
- JNIEnv* env, jclass clazz, jobject object, jbyteArray usage_report_bytes) {
+jbyteArray nativeReportUsage(JNIEnv* env, jclass clazz, jobject object,
+ jbyteArray usage_report_bytes) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -227,9 +204,7 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeReportUsage(
return SerializeProtoToJniByteArray(env, report_usage_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetAllNamespaces(
- JNIEnv* env, jclass clazz, jobject object) {
+jbyteArray nativeGetAllNamespaces(JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -239,10 +214,9 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetAllNamespaces(
return SerializeProtoToJniByteArray(env, get_all_namespaces_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetNextPage(
- JNIEnv* env, jclass clazz, jobject object, jlong next_page_token,
- jlong java_to_native_start_timestamp_ms) {
+jbyteArray nativeGetNextPage(JNIEnv* env, jclass clazz, jobject object,
+ jlong next_page_token,
+ jlong java_to_native_start_timestamp_ms) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -263,9 +237,8 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetNextPage(
return SerializeProtoToJniByteArray(env, next_page_result_proto);
}
-JNIEXPORT void JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeInvalidateNextPageToken(
- JNIEnv* env, jclass clazz, jobject object, jlong next_page_token) {
+void nativeInvalidateNextPageToken(JNIEnv* env, jclass clazz, jobject object,
+ jlong next_page_token) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -274,11 +247,11 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeInvalidateNextPageToke
return;
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeSearch(
- JNIEnv* env, jclass clazz, jobject object, jbyteArray search_spec_bytes,
- jbyteArray scoring_spec_bytes, jbyteArray result_spec_bytes,
- jlong java_to_native_start_timestamp_ms) {
+jbyteArray nativeSearch(JNIEnv* env, jclass clazz, jobject object,
+ jbyteArray search_spec_bytes,
+ jbyteArray scoring_spec_bytes,
+ jbyteArray result_spec_bytes,
+ jlong java_to_native_start_timestamp_ms) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -321,10 +294,8 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeSearch(
return SerializeProtoToJniByteArray(env, search_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeDelete(
- JNIEnv* env, jclass clazz, jobject object, jstring name_space,
- jstring uri) {
+jbyteArray nativeDelete(JNIEnv* env, jclass clazz, jobject object,
+ jstring name_space, jstring uri) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -336,9 +307,8 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeDelete(
return SerializeProtoToJniByteArray(env, delete_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeDeleteByNamespace(
- JNIEnv* env, jclass clazz, jobject object, jstring name_space) {
+jbyteArray nativeDeleteByNamespace(JNIEnv* env, jclass clazz, jobject object,
+ jstring name_space) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -349,9 +319,8 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeDeleteByNamespace(
return SerializeProtoToJniByteArray(env, delete_by_namespace_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeDeleteBySchemaType(
- JNIEnv* env, jclass clazz, jobject object, jstring schema_type) {
+jbyteArray nativeDeleteBySchemaType(JNIEnv* env, jclass clazz, jobject object,
+ jstring schema_type) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -362,10 +331,9 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeDeleteBySchemaType(
return SerializeProtoToJniByteArray(env, delete_by_schema_type_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeDeleteByQuery(
- JNIEnv* env, jclass clazz, jobject object, jbyteArray search_spec_bytes,
- jboolean return_deleted_document_info) {
+jbyteArray nativeDeleteByQuery(JNIEnv* env, jclass clazz, jobject object,
+ jbyteArray search_spec_bytes,
+ jboolean return_deleted_document_info) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -381,9 +349,8 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeDeleteByQuery(
return SerializeProtoToJniByteArray(env, delete_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativePersistToDisk(
- JNIEnv* env, jclass clazz, jobject object, jint persist_type_code) {
+jbyteArray nativePersistToDisk(JNIEnv* env, jclass clazz, jobject object,
+ jint persist_type_code) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -400,9 +367,7 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativePersistToDisk(
return SerializeProtoToJniByteArray(env, persist_to_disk_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeOptimize(
- JNIEnv* env, jclass clazz, jobject object) {
+jbyteArray nativeOptimize(JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -411,9 +376,7 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeOptimize(
return SerializeProtoToJniByteArray(env, optimize_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetOptimizeInfo(
- JNIEnv* env, jclass clazz, jobject object) {
+jbyteArray nativeGetOptimizeInfo(JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -423,9 +386,7 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetOptimizeInfo(
return SerializeProtoToJniByteArray(env, get_optimize_info_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetStorageInfo(
- JNIEnv* env, jclass clazz, jobject object) {
+jbyteArray nativeGetStorageInfo(JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -435,9 +396,7 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetStorageInfo(
return SerializeProtoToJniByteArray(env, storage_info_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeReset(
- JNIEnv* env, jclass clazz, jobject object) {
+jbyteArray nativeReset(JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -446,10 +405,8 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeReset(
return SerializeProtoToJniByteArray(env, reset_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeSearchSuggestions(
- JNIEnv* env, jclass clazz, jobject object,
- jbyteArray suggestion_spec_bytes) {
+jbyteArray nativeSearchSuggestions(JNIEnv* env, jclass clazz, jobject object,
+ jbyteArray suggestion_spec_bytes) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -466,9 +423,8 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeSearchSuggestions(
return SerializeProtoToJniByteArray(env, suggestionResponse);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetDebugInfo(
- JNIEnv* env, jclass clazz, jobject object, jint verbosity) {
+jbyteArray nativeGetDebugInfo(JNIEnv* env, jclass clazz, jobject object,
+ jint verbosity) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -485,9 +441,8 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetDebugInfo(
return SerializeProtoToJniByteArray(env, debug_info_result_proto);
}
-JNIEXPORT jboolean JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeShouldLog(
- JNIEnv* env, jclass clazz, jshort severity, jshort verbosity) {
+jboolean nativeShouldLog(JNIEnv* env, jclass clazz, jshort severity,
+ jshort verbosity) {
if (!icing::lib::LogSeverity::Code_IsValid(severity)) {
ICING_LOG(icing::lib::ERROR)
<< "Invalid value for logging severity: " << severity;
@@ -497,9 +452,8 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeShouldLog(
static_cast<icing::lib::LogSeverity::Code>(severity), verbosity);
}
-JNIEXPORT jboolean JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeSetLoggingLevel(
- JNIEnv* env, jclass clazz, jshort severity, jshort verbosity) {
+jboolean nativeSetLoggingLevel(JNIEnv* env, jclass clazz, jshort severity,
+ jshort verbosity) {
if (!icing::lib::LogSeverity::Code_IsValid(severity)) {
ICING_LOG(icing::lib::ERROR)
<< "Invalid value for logging severity: " << severity;
@@ -509,216 +463,111 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeSetLoggingLevel(
static_cast<icing::lib::LogSeverity::Code>(severity), verbosity);
}
-JNIEXPORT jstring JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetLoggingTag(
- JNIEnv* env, jclass clazz) {
+jstring nativeGetLoggingTag(JNIEnv* env, jclass clazz) {
return env->NewStringUTF(icing::lib::kIcingLoggingTag);
}
-// TODO(b/240333360) Remove the methods below for IcingSearchEngine once we have
-// a sync from Jetpack to g3 to contain the refactored IcingSearchEngine(with
-// IcingSearchEngineImpl).
-JNIEXPORT jlong JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeCreate(
- JNIEnv* env, jclass clazz, jbyteArray icing_search_engine_options_bytes) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeCreate(
- env, clazz, icing_search_engine_options_bytes);
-}
-
-JNIEXPORT void JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeDestroy(JNIEnv* env,
- jclass clazz,
- jobject object) {
- Java_com_google_android_icing_IcingSearchEngineImpl_nativeDestroy(env, clazz,
- object);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeInitialize(
- JNIEnv* env, jclass clazz, jobject object) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeInitialize(
- env, clazz, object);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeSetSchema(
- JNIEnv* env, jclass clazz, jobject object, jbyteArray schema_bytes,
- jboolean ignore_errors_and_delete_documents) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeSetSchema(
- env, clazz, object, schema_bytes, ignore_errors_and_delete_documents);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeGetSchema(
- JNIEnv* env, jclass clazz, jobject object) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetSchema(
- env, clazz, object);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeGetSchemaType(
- JNIEnv* env, jclass clazz, jobject object, jstring schema_type) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetSchemaType(
- env, clazz, object, schema_type);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativePut(
- JNIEnv* env, jclass clazz, jobject object, jbyteArray document_bytes) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativePut(
- env, clazz, object, document_bytes);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeGet(
- JNIEnv* env, jclass clazz, jobject object, jstring name_space, jstring uri,
- jbyteArray result_spec_bytes) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeGet(
- env, clazz, object, name_space, uri, result_spec_bytes);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeReportUsage(
- JNIEnv* env, jclass clazz, jobject object, jbyteArray usage_report_bytes) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeReportUsage(
- env, clazz, object, usage_report_bytes);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeGetAllNamespaces(
- JNIEnv* env, jclass clazz, jobject object) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetAllNamespaces(
- env, clazz, object);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeGetNextPage(
- JNIEnv* env, jclass clazz, jobject object, jlong next_page_token,
- jlong java_to_native_start_timestamp_ms) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetNextPage(
- env, clazz, object, next_page_token, java_to_native_start_timestamp_ms);
-}
-
-JNIEXPORT void JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeInvalidateNextPageToken(
- JNIEnv* env, jclass clazz, jobject object, jlong next_page_token) {
- Java_com_google_android_icing_IcingSearchEngineImpl_nativeInvalidateNextPageToken(
- env, clazz, object, next_page_token);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeSearch(
- JNIEnv* env, jclass clazz, jobject object, jbyteArray search_spec_bytes,
- jbyteArray scoring_spec_bytes, jbyteArray result_spec_bytes,
- jlong java_to_native_start_timestamp_ms) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeSearch(
- env, clazz, object, search_spec_bytes, scoring_spec_bytes,
- result_spec_bytes, java_to_native_start_timestamp_ms);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeDelete(JNIEnv* env,
- jclass clazz,
- jobject object,
- jstring name_space,
- jstring uri) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeDelete(
- env, clazz, object, name_space, uri);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeDeleteByNamespace(
- JNIEnv* env, jclass clazz, jobject object, jstring name_space) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeDeleteByNamespace(
- env, clazz, object, name_space);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeDeleteBySchemaType(
- JNIEnv* env, jclass clazz, jobject object, jstring schema_type) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeDeleteBySchemaType(
- env, clazz, object, schema_type);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeDeleteByQuery(
- JNIEnv* env, jclass clazz, jobject object, jbyteArray search_spec_bytes,
- jboolean return_deleted_document_info) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeDeleteByQuery(
- env, clazz, object, search_spec_bytes, return_deleted_document_info);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativePersistToDisk(
- JNIEnv* env, jclass clazz, jobject object, jint persist_type_code) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativePersistToDisk(
- env, clazz, object, persist_type_code);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeOptimize(JNIEnv* env,
- jclass clazz,
- jobject object) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeOptimize(
- env, clazz, object);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeGetOptimizeInfo(
- JNIEnv* env, jclass clazz, jobject object) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetOptimizeInfo(
- env, clazz, object);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeGetStorageInfo(
- JNIEnv* env, jclass clazz, jobject object) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetStorageInfo(
- env, clazz, object);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeReset(JNIEnv* env,
- jclass clazz,
- jobject object) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeReset(
- env, clazz, object);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeSearchSuggestions(
- JNIEnv* env, jclass clazz, jobject object,
- jbyteArray suggestion_spec_bytes) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeSearchSuggestions(
- env, clazz, object, suggestion_spec_bytes);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeGetDebugInfo(
- JNIEnv* env, jclass clazz, jobject object, jint verbosity) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetDebugInfo(
- env, clazz, object, verbosity);
-}
-
-JNIEXPORT jboolean JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeShouldLog(
- JNIEnv* env, jclass clazz, jshort severity, jshort verbosity) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeShouldLog(
- env, clazz, severity, verbosity);
-}
+#pragma clang diagnostic ignored "-Wwrite-strings"
+jint JNI_OnLoad(JavaVM* vm, void* reserved) {
+ JNIEnv* env;
+ if (vm->GetEnv(reinterpret_cast<void**>(&env), JNI_VERSION_1_6) != JNI_OK) {
+ ICING_LOG(icing::lib::ERROR) << "ERROR: GetEnv failed";
+ return JNI_ERR;
+ }
-JNIEXPORT jboolean JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeSetLoggingLevel(
- JNIEnv* env, jclass clazz, jshort severity, jshort verbosity) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeSetLoggingLevel(
- env, clazz, severity, verbosity);
-}
+ // Find your class. JNI_OnLoad is called from the correct class loader context
+ // for this to work.
+ jclass java_class =
+ env->FindClass("com/google/android/icing/IcingSearchEngineImpl");
+ if (java_class == nullptr) {
+ return JNI_ERR;
+ }
+ JavaIcingSearchEngineImpl.native_pointer =
+ env->GetFieldID(java_class, "nativePointer", "J");
+
+ // Register your class' native methods.
+ static const JNINativeMethod methods[] = {
+ {"nativeCreate", "([B)J", reinterpret_cast<void*>(nativeCreate)},
+ {"nativeDestroy", "(Lcom/google/android/icing/IcingSearchEngineImpl;)V",
+ reinterpret_cast<void*>(nativeDestroy)},
+ {"nativeInitialize",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;)[B",
+ reinterpret_cast<void*>(nativeInitialize)},
+ {"nativeSetSchema",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;[BZ)[B",
+ reinterpret_cast<void*>(nativeSetSchema)},
+ {"nativeGetSchema",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;)[B",
+ reinterpret_cast<void*>(nativeGetSchema)},
+ {"nativeGetSchemaType",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;Ljava/lang/String;)[B",
+ reinterpret_cast<void*>(nativeGetSchemaType)},
+ {"nativePut", "(Lcom/google/android/icing/IcingSearchEngineImpl;[B)[B",
+ reinterpret_cast<void*>(nativePut)},
+ {"nativeGet",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;Ljava/lang/"
+ "String;Ljava/lang/String;[B)[B",
+ reinterpret_cast<void*>(nativeGet)},
+ {"nativeReportUsage",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;[B)[B",
+ reinterpret_cast<void*>(nativeReportUsage)},
+ {"nativeGetAllNamespaces",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;)[B",
+ reinterpret_cast<void*>(nativeGetAllNamespaces)},
+ {"nativeGetNextPage",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;JJ)[B",
+ reinterpret_cast<void*>(nativeGetNextPage)},
+ {"nativeInvalidateNextPageToken",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;J)V",
+ reinterpret_cast<void*>(nativeInvalidateNextPageToken)},
+ {"nativeSearch",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;[B[B[BJ)[B",
+ reinterpret_cast<void*>(nativeSearch)},
+ {"nativeDelete",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;Ljava/lang/"
+ "String;Ljava/lang/String;)[B",
+ reinterpret_cast<void*>(nativeDelete)},
+ {"nativeDeleteByNamespace",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;Ljava/lang/String;)[B",
+ reinterpret_cast<void*>(nativeDeleteByNamespace)},
+ {"nativeDeleteBySchemaType",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;Ljava/lang/String;)[B",
+ reinterpret_cast<void*>(nativeDeleteBySchemaType)},
+ {"nativeDeleteByQuery",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;[BZ)[B",
+ reinterpret_cast<void*>(nativeDeleteByQuery)},
+ {"nativePersistToDisk",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;I)[B",
+ reinterpret_cast<void*>(nativePersistToDisk)},
+ {"nativeOptimize", "(Lcom/google/android/icing/IcingSearchEngineImpl;)[B",
+ reinterpret_cast<void*>(nativeOptimize)},
+ {"nativeGetOptimizeInfo",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;)[B",
+ reinterpret_cast<void*>(nativeGetOptimizeInfo)},
+ {"nativeGetStorageInfo",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;)[B",
+ reinterpret_cast<void*>(nativeGetStorageInfo)},
+ {"nativeReset", "(Lcom/google/android/icing/IcingSearchEngineImpl;)[B",
+ reinterpret_cast<void*>(nativeReset)},
+ {"nativeSearchSuggestions",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;[B)[B",
+ reinterpret_cast<void*>(nativeSearchSuggestions)},
+ {"nativeGetDebugInfo",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;I)[B",
+ reinterpret_cast<void*>(nativeGetDebugInfo)},
+ {"nativeShouldLog", "(SS)Z", reinterpret_cast<void*>(nativeShouldLog)},
+ {"nativeSetLoggingLevel", "(SS)Z",
+ reinterpret_cast<void*>(nativeSetLoggingLevel)},
+ {"nativeGetLoggingTag", "()Ljava/lang/String;",
+ reinterpret_cast<void*>(nativeGetLoggingTag)},
+ };
+ int register_natives_success = env->RegisterNatives(
+ java_class, methods, sizeof(methods) / sizeof(JNINativeMethod));
+ if (register_natives_success != JNI_OK) {
+ return register_natives_success;
+ }
-JNIEXPORT jstring JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeGetLoggingTag(
- JNIEnv* env, jclass clazz) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetLoggingTag(
- env, clazz);
+ return JNI_VERSION_1_6;
}
} // extern "C"
diff --git a/icing/join/doc-join-info.cc b/icing/join/doc-join-info.cc
new file mode 100644
index 0000000..3b06f01
--- /dev/null
+++ b/icing/join/doc-join-info.cc
@@ -0,0 +1,49 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/doc-join-info.h"
+
+#include <cstdint>
+
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-id.h"
+#include "icing/util/bit-util.h"
+
+namespace icing {
+namespace lib {
+
+DocJoinInfo::DocJoinInfo(DocumentId document_id,
+ JoinablePropertyId joinable_property_id) {
+ Value temp_value = 0;
+ bit_util::BitfieldSet(/*new_value=*/document_id,
+ /*lsb_offset=*/kJoinablePropertyIdBits,
+ /*len=*/kDocumentIdBits, &temp_value);
+ bit_util::BitfieldSet(/*new_value=*/joinable_property_id,
+ /*lsb_offset=*/0,
+ /*len=*/kJoinablePropertyIdBits, &temp_value);
+ value_ = temp_value;
+}
+
+DocumentId DocJoinInfo::document_id() const {
+ return bit_util::BitfieldGet(value_, /*lsb_offset=*/kJoinablePropertyIdBits,
+ /*len=*/kDocumentIdBits);
+}
+
+JoinablePropertyId DocJoinInfo::joinable_property_id() const {
+ return bit_util::BitfieldGet(value_, /*lsb_offset=*/0,
+ /*len=*/kJoinablePropertyIdBits);
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/join/doc-join-info.h b/icing/join/doc-join-info.h
new file mode 100644
index 0000000..7696b92
--- /dev/null
+++ b/icing/join/doc-join-info.h
@@ -0,0 +1,66 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_DOC_JOIN_INFO
+#define ICING_JOIN_DOC_JOIN_INFO
+
+#include <cstdint>
+#include <limits>
+
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+// DocJoinInfo is composed of document_id and joinable_property_id.
+class DocJoinInfo {
+ public:
+ // The datatype used to encode DocJoinInfo information: the document_id and
+ // joinable_property_id.
+ using Value = uint32_t;
+
+ static_assert(kDocumentIdBits + kJoinablePropertyIdBits <= sizeof(Value) * 8,
+ "Cannot encode document id and joinable property id in "
+ "DocJoinInfo::Value");
+
+ // All bits of kInvalidValue are 1, and it contains:
+ // - 0b1 for 4 unused bits.
+ // - kInvalidDocumentId (2^22-1).
+ // - JoinablePropertyId 2^6-1 (valid), which is ok because kInvalidDocumentId
+ // has already invalidated the value. In fact, we currently use all 2^6
+ // joinable property ids and there is no "invalid joinable property id", so
+ // it doesn't matter what JoinablePropertyId we set for kInvalidValue.
+ static constexpr Value kInvalidValue = std::numeric_limits<Value>::max();
+
+ explicit DocJoinInfo(DocumentId document_id,
+ JoinablePropertyId joinable_property_id);
+
+ explicit DocJoinInfo(Value value = kInvalidValue) : value_(value) {}
+
+ bool is_valid() const { return value_ != kInvalidValue; }
+ Value value() const { return value_; }
+ DocumentId document_id() const;
+ JoinablePropertyId joinable_property_id() const;
+
+ private:
+ // Value bits layout: 4 unused + 22 document_id + 6 joinable_property_id.
+ Value value_;
+} __attribute__((packed));
+static_assert(sizeof(DocJoinInfo) == 4, "");
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_JOIN_DOC_JOIN_INFO
diff --git a/icing/join/doc-join-info_test.cc b/icing/join/doc-join-info_test.cc
new file mode 100644
index 0000000..7025473
--- /dev/null
+++ b/icing/join/doc-join-info_test.cc
@@ -0,0 +1,96 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/doc-join-info.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+
+static constexpr DocumentId kSomeDocumentId = 24;
+static constexpr JoinablePropertyId kSomeJoinablePropertyId = 5;
+
+TEST(DocJoinInfoTest, Accessors) {
+ DocJoinInfo doc_join_info(kSomeDocumentId, kSomeJoinablePropertyId);
+ EXPECT_THAT(doc_join_info.document_id(), Eq(kSomeDocumentId));
+ EXPECT_THAT(doc_join_info.joinable_property_id(),
+ Eq(kSomeJoinablePropertyId));
+}
+
+TEST(DocJoinInfoTest, Invalid) {
+ DocJoinInfo default_invalid;
+ EXPECT_THAT(default_invalid.is_valid(), IsFalse());
+
+ // Also make sure the invalid DocJoinInfo contains an invalid document id.
+ EXPECT_THAT(default_invalid.document_id(), Eq(kInvalidDocumentId));
+ EXPECT_THAT(default_invalid.joinable_property_id(),
+ Eq(kMaxJoinablePropertyId));
+}
+
+TEST(DocJoinInfoTest, Valid) {
+ DocJoinInfo maximum_document_id_info(kMaxDocumentId, kSomeJoinablePropertyId);
+ EXPECT_THAT(maximum_document_id_info.is_valid(), IsTrue());
+ EXPECT_THAT(maximum_document_id_info.document_id(), Eq(kMaxDocumentId));
+ EXPECT_THAT(maximum_document_id_info.joinable_property_id(),
+ Eq(kSomeJoinablePropertyId));
+
+ DocJoinInfo maximum_joinable_property_id_info(kSomeDocumentId,
+ kMaxJoinablePropertyId);
+ EXPECT_THAT(maximum_joinable_property_id_info.is_valid(), IsTrue());
+ EXPECT_THAT(maximum_joinable_property_id_info.document_id(),
+ Eq(kSomeDocumentId));
+ EXPECT_THAT(maximum_joinable_property_id_info.joinable_property_id(),
+ Eq(kMaxJoinablePropertyId));
+
+ DocJoinInfo minimum_document_id_info(kMinDocumentId, kSomeJoinablePropertyId);
+ EXPECT_THAT(minimum_document_id_info.is_valid(), IsTrue());
+ EXPECT_THAT(minimum_document_id_info.document_id(), Eq(kMinDocumentId));
+ EXPECT_THAT(minimum_document_id_info.joinable_property_id(),
+ Eq(kSomeJoinablePropertyId));
+
+ DocJoinInfo minimum_joinable_property_id_info(kSomeDocumentId,
+ kMinJoinablePropertyId);
+ EXPECT_THAT(minimum_joinable_property_id_info.is_valid(), IsTrue());
+ EXPECT_THAT(minimum_joinable_property_id_info.document_id(),
+ Eq(kSomeDocumentId));
+ EXPECT_THAT(minimum_joinable_property_id_info.joinable_property_id(),
+ Eq(kMinJoinablePropertyId));
+
+ DocJoinInfo all_maximum_info(kMaxDocumentId, kMaxJoinablePropertyId);
+ EXPECT_THAT(all_maximum_info.is_valid(), IsTrue());
+ EXPECT_THAT(all_maximum_info.document_id(), Eq(kMaxDocumentId));
+ EXPECT_THAT(all_maximum_info.joinable_property_id(),
+ Eq(kMaxJoinablePropertyId));
+
+ DocJoinInfo all_minimum_info(kMinDocumentId, kMinJoinablePropertyId);
+ EXPECT_THAT(all_minimum_info.is_valid(), IsTrue());
+ EXPECT_THAT(all_minimum_info.document_id(), Eq(kMinDocumentId));
+ EXPECT_THAT(all_minimum_info.joinable_property_id(),
+ Eq(kMinJoinablePropertyId));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/join/join-children-fetcher.cc b/icing/join/join-children-fetcher.cc
new file mode 100644
index 0000000..c6d1b97
--- /dev/null
+++ b/icing/join/join-children-fetcher.cc
@@ -0,0 +1,39 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/join-children-fetcher.h"
+
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+
+namespace icing {
+namespace lib {
+
+libtextclassifier3::StatusOr<std::vector<ScoredDocumentHit>>
+JoinChildrenFetcher::GetChildren(DocumentId parent_doc_id) const {
+ if (join_spec_.parent_property_expression() == kQualifiedIdExpr) {
+ if (auto iter = map_joinable_qualified_id_.find(parent_doc_id);
+ iter != map_joinable_qualified_id_.end()) {
+ return iter->second;
+ }
+ return std::vector<ScoredDocumentHit>();
+ }
+ // TODO(b/256022027): So far we only support kQualifiedIdExpr for
+ // parent_property_expression, we could support more.
+ return absl_ports::UnimplementedError(absl_ports::StrCat(
+ "Parent property expression must be ", kQualifiedIdExpr));
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/join/join-children-fetcher.h b/icing/join/join-children-fetcher.h
new file mode 100644
index 0000000..1b875bc
--- /dev/null
+++ b/icing/join/join-children-fetcher.h
@@ -0,0 +1,73 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_JOIN_CHILDREN_FETCHER_H_
+#define ICING_JOIN_JOIN_CHILDREN_FETCHER_H_
+
+#include <unordered_map>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/proto/search.pb.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+// A class that provides the GetChildren method for joins to fetch all children
+// documents given a parent document id.
+//
+// Internally, the class maintains a map for each joinable value type that
+// groups children according to the joinable values. Currently we only support
+// QUALIFIED_ID joining, in which the joinable value type is document id.
+class JoinChildrenFetcher {
+ public:
+ explicit JoinChildrenFetcher(
+ const JoinSpecProto& join_spec,
+ std::unordered_map<DocumentId, std::vector<ScoredDocumentHit>>&&
+ map_joinable_qualified_id)
+ : join_spec_(join_spec),
+ map_joinable_qualified_id_(std::move(map_joinable_qualified_id)) {}
+
+ // Get a vector of children ScoredDocumentHit by parent document id.
+ //
+ // TODO(b/256022027): Implement property value joins with types of string and
+ // int. In these cases, GetChildren should look up join index to fetch
+ // joinable property value of the given parent_doc_id according to
+ // join_spec_.parent_property_expression, and then fetch children by the
+ // corresponding map in this class using the joinable property value.
+ //
+ // Returns:
+ // The vector of results on success.
+ // UNIMPLEMENTED_ERROR if the join type specified by join_spec is not
+ // supported.
+ libtextclassifier3::StatusOr<std::vector<ScoredDocumentHit>> GetChildren(
+ DocumentId parent_doc_id) const;
+
+ private:
+ static constexpr std::string_view kQualifiedIdExpr = "this.qualifiedId()";
+
+ const JoinSpecProto& join_spec_; // Does not own!
+
+ // The map that groups children by qualified id used to support QualifiedId
+ // joining. The joining type is document id.
+ std::unordered_map<DocumentId, std::vector<ScoredDocumentHit>>
+ map_joinable_qualified_id_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_JOIN_JOIN_CHILDREN_FETCHER_H_
diff --git a/icing/join/join-children-fetcher_test.cc b/icing/join/join-children-fetcher_test.cc
new file mode 100644
index 0000000..92a7a81
--- /dev/null
+++ b/icing/join/join-children-fetcher_test.cc
@@ -0,0 +1,83 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/join-children-fetcher.h"
+
+#include <unordered_map>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/join/join-processor.h"
+#include "icing/proto/search.pb.h"
+#include "icing/schema/section.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::IsEmpty;
+
+TEST(JoinChildrenFetcherTest, FetchQualifiedIdJoinChildren) {
+ JoinSpecProto join_spec;
+ join_spec.set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec.set_child_property_expression("sender");
+
+ std::unordered_map<DocumentId, std::vector<ScoredDocumentHit>>
+ map_joinable_qualified_id;
+ DocumentId parent_doc_id = 0;
+ ScoredDocumentHit child1(/*document_id=*/1, kSectionIdMaskNone,
+ /*score=*/1.0);
+ ScoredDocumentHit child2(/*document_id=*/2, kSectionIdMaskNone,
+ /*score=*/2.0);
+ map_joinable_qualified_id[parent_doc_id].push_back(child1);
+ map_joinable_qualified_id[parent_doc_id].push_back(child2);
+
+ JoinChildrenFetcher fetcher(join_spec, std::move(map_joinable_qualified_id));
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<ScoredDocumentHit> children,
+ fetcher.GetChildren(parent_doc_id));
+ EXPECT_THAT(children, ElementsAre(EqualsScoredDocumentHit(child1),
+ EqualsScoredDocumentHit(child2)));
+}
+
+TEST(JoinChildrenFetcherTest, FetchJoinEmptyChildren) {
+ JoinSpecProto join_spec;
+ join_spec.set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec.set_child_property_expression("sender");
+
+ DocumentId parent_doc_id = 0;
+
+ JoinChildrenFetcher fetcher(join_spec, /*map_joinable_qualified_id=*/{});
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<ScoredDocumentHit> children,
+ fetcher.GetChildren(parent_doc_id));
+ EXPECT_THAT(children, IsEmpty());
+}
+
+TEST(JoinChildrenFetcherTest, UnsupportedJoin) {
+ JoinSpecProto join_spec;
+ join_spec.set_parent_property_expression("name");
+ join_spec.set_child_property_expression("sender");
+ JoinChildrenFetcher fetcher(join_spec, /*map_joinable_qualified_id=*/{});
+ EXPECT_THAT(fetcher.GetChildren(0),
+ StatusIs(libtextclassifier3::StatusCode::UNIMPLEMENTED));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/join/join-processor.cc b/icing/join/join-processor.cc
index 7700397..e27b1ea 100644
--- a/icing/join/join-processor.cc
+++ b/icing/join/join-processor.cc
@@ -15,30 +15,44 @@
#include "icing/join/join-processor.h"
#include <algorithm>
-#include <functional>
+#include <memory>
+#include <optional>
#include <string>
#include <string_view>
+#include <unordered_map>
+#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/join/aggregation-scorer.h"
+#include "icing/join/doc-join-info.h"
+#include "icing/join/join-children-fetcher.h"
#include "icing/join/qualified-id.h"
+#include "icing/proto/schema.pb.h"
#include "icing/proto/scoring.pb.h"
#include "icing/proto/search.pb.h"
+#include "icing/schema/joinable-property.h"
#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
-#include "icing/util/snippet-helpers.h"
+#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
-libtextclassifier3::StatusOr<std::vector<JoinedScoredDocumentHit>>
-JoinProcessor::Join(
+libtextclassifier3::StatusOr<JoinChildrenFetcher>
+JoinProcessor::GetChildrenFetcher(
const JoinSpecProto& join_spec,
- std::vector<ScoredDocumentHit>&& parent_scored_document_hits,
std::vector<ScoredDocumentHit>&& child_scored_document_hits) {
+ if (join_spec.parent_property_expression() != kQualifiedIdExpr) {
+ // TODO(b/256022027): So far we only support kQualifiedIdExpr for
+ // parent_property_expression, we could support more.
+ return absl_ports::UnimplementedError(absl_ports::StrCat(
+ "Parent property expression must be ", kQualifiedIdExpr));
+ }
+
std::sort(
child_scored_document_hits.begin(), child_scored_document_hits.end(),
ScoredDocumentHitComparator(
@@ -59,39 +73,26 @@ JoinProcessor::Join(
// ScoredDocumentHits refer to. The values in this map are vectors of child
// ScoredDocumentHits that refer to a parent DocumentId.
std::unordered_map<DocumentId, std::vector<ScoredDocumentHit>>
- parent_id_to_child_map;
+ map_joinable_qualified_id;
for (const ScoredDocumentHit& child : child_scored_document_hits) {
- std::string property_content = FetchPropertyExpressionValue(
- child.document_id(), join_spec.child_property_expression());
-
- // Parse qualified id.
- libtextclassifier3::StatusOr<QualifiedId> qualified_id_or =
- QualifiedId::Parse(property_content);
- if (!qualified_id_or.ok()) {
- ICING_VLOG(2) << "Skip content with invalid format of QualifiedId";
- continue;
- }
- QualifiedId qualified_id = std::move(qualified_id_or).ValueOrDie();
-
- // Lookup parent DocumentId.
- libtextclassifier3::StatusOr<DocumentId> parent_doc_id_or =
- doc_store_->GetDocumentId(qualified_id.name_space(),
- qualified_id.uri());
- if (!parent_doc_id_or.ok()) {
- // Skip the document if getting errors.
+ ICING_ASSIGN_OR_RETURN(
+ DocumentId ref_doc_id,
+ FetchReferencedQualifiedId(child.document_id(),
+ join_spec.child_property_expression()));
+ if (ref_doc_id == kInvalidDocumentId) {
continue;
}
- DocumentId parent_doc_id = std::move(parent_doc_id_or).ValueOrDie();
-
- // Since we've already sorted child_scored_document_hits, just simply omit
- // if the parent_id_to_child_map[parent_doc_id].size() has reached max
- // joined child count.
- if (parent_id_to_child_map[parent_doc_id].size() <
- join_spec.max_joined_child_count()) {
- parent_id_to_child_map[parent_doc_id].push_back(child);
- }
+
+ map_joinable_qualified_id[ref_doc_id].push_back(child);
}
+ return JoinChildrenFetcher(join_spec, std::move(map_joinable_qualified_id));
+}
+libtextclassifier3::StatusOr<std::vector<JoinedScoredDocumentHit>>
+JoinProcessor::Join(
+ const JoinSpecProto& join_spec,
+ std::vector<ScoredDocumentHit>&& parent_scored_document_hits,
+ const JoinChildrenFetcher& join_children_fetcher) {
std::unique_ptr<AggregationScorer> aggregation_scorer =
AggregationScorer::Create(join_spec);
@@ -100,23 +101,11 @@ JoinProcessor::Join(
// Step 2: iterate through all parent documentIds and construct
// JoinedScoredDocumentHit for each by looking up
- // parent_id_to_child_map.
+ // join_children_fetcher.
for (ScoredDocumentHit& parent : parent_scored_document_hits) {
- DocumentId parent_doc_id = kInvalidDocumentId;
- if (join_spec.parent_property_expression() == kQualifiedIdExpr) {
- parent_doc_id = parent.document_id();
- } else {
- // TODO(b/256022027): So far we only support kQualifiedIdExpr for
- // parent_property_expression, we could support more.
- return absl_ports::UnimplementedError(absl_ports::StrCat(
- "Parent property expression must be ", kQualifiedIdExpr));
- }
-
- std::vector<ScoredDocumentHit> children;
- if (auto iter = parent_id_to_child_map.find(parent_doc_id);
- iter != parent_id_to_child_map.end()) {
- children = std::move(iter->second);
- }
+ ICING_ASSIGN_OR_RETURN(
+ std::vector<ScoredDocumentHit> children,
+ join_children_fetcher.GetChildren(parent.document_id()));
double final_score = aggregation_scorer->GetScore(parent, children);
joined_scored_document_hits.emplace_back(final_score, std::move(parent),
@@ -126,20 +115,49 @@ JoinProcessor::Join(
return joined_scored_document_hits;
}
-std::string JoinProcessor::FetchPropertyExpressionValue(
- const DocumentId& document_id,
- const std::string& property_expression) const {
- // TODO(b/256022027): Add caching of document_id -> {expression -> value}
- libtextclassifier3::StatusOr<DocumentProto> document_or =
- doc_store_->Get(document_id);
- if (!document_or.ok()) {
- // Skip the document if getting errors.
- return "";
+libtextclassifier3::StatusOr<DocumentId>
+JoinProcessor::FetchReferencedQualifiedId(
+ const DocumentId& document_id, const std::string& property_path) const {
+ std::optional<DocumentFilterData> filter_data =
+ doc_store_->GetAliveDocumentFilterData(document_id, current_time_ms_);
+ if (!filter_data) {
+ return kInvalidDocumentId;
+ }
+
+ ICING_ASSIGN_OR_RETURN(const JoinablePropertyMetadata* metadata,
+ schema_store_->GetJoinablePropertyMetadata(
+ filter_data->schema_type_id(), property_path));
+ if (metadata == nullptr ||
+ metadata->value_type != JoinableConfig::ValueType::QUALIFIED_ID) {
+ // Currently we only support qualified id.
+ return kInvalidDocumentId;
+ }
+
+ DocJoinInfo info(document_id, metadata->id);
+ libtextclassifier3::StatusOr<std::string_view> ref_qualified_id_str_or =
+ qualified_id_join_index_->Get(info);
+ if (!ref_qualified_id_str_or.ok()) {
+ if (absl_ports::IsNotFound(ref_qualified_id_str_or.status())) {
+ return kInvalidDocumentId;
+ }
+ return std::move(ref_qualified_id_str_or).status();
}
- DocumentProto document = std::move(document_or).ValueOrDie();
+ libtextclassifier3::StatusOr<QualifiedId> ref_qualified_id_or =
+ QualifiedId::Parse(std::move(ref_qualified_id_str_or).ValueOrDie());
+ if (!ref_qualified_id_or.ok()) {
+ // This shouldn't happen because we've validated it during indexing and only
+ // put valid qualified id strings into qualified id join index.
+ return kInvalidDocumentId;
+ }
+ QualifiedId qualified_id = std::move(ref_qualified_id_or).ValueOrDie();
- return std::string(GetString(&document, property_expression));
+ libtextclassifier3::StatusOr<DocumentId> ref_document_id_or =
+ doc_store_->GetDocumentId(qualified_id.name_space(), qualified_id.uri());
+ if (!ref_document_id_or.ok()) {
+ return kInvalidDocumentId;
+ }
+ return std::move(ref_document_id_or).ValueOrDie();
}
} // namespace lib
diff --git a/icing/join/join-processor.h b/icing/join/join-processor.h
index 65c9e5f..517e9db 100644
--- a/icing/join/join-processor.h
+++ b/icing/join/join-processor.h
@@ -15,12 +15,16 @@
#ifndef ICING_JOIN_JOIN_PROCESSOR_H_
#define ICING_JOIN_JOIN_PROCESSOR_H_
+#include <cstdint>
#include <string>
#include <string_view>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/join/join-children-fetcher.h"
+#include "icing/join/qualified-id-join-index.h"
#include "icing/proto/search.pb.h"
+#include "icing/schema/schema-store.h"
#include "icing/scoring/scored-document-hit.h"
#include "icing/store/document-store.h"
@@ -31,32 +35,51 @@ class JoinProcessor {
public:
static constexpr std::string_view kQualifiedIdExpr = "this.qualifiedId()";
- explicit JoinProcessor(const DocumentStore* doc_store)
- : doc_store_(doc_store) {}
+ explicit JoinProcessor(const DocumentStore* doc_store,
+ const SchemaStore* schema_store,
+ const QualifiedIdJoinIndex* qualified_id_join_index,
+ int64_t current_time_ms)
+ : doc_store_(doc_store),
+ schema_store_(schema_store),
+ qualified_id_join_index_(qualified_id_join_index),
+ current_time_ms_(current_time_ms) {}
+
+ // Get a JoinChildrenFetcher used to fetch all children documents by a parent
+ // document id.
+ //
+ // Returns:
+ // A JoinChildrenFetcher instance on success.
+ // UNIMPLEMENTED_ERROR if the join type specified by join_spec is not
+ // supported.
+ libtextclassifier3::StatusOr<JoinChildrenFetcher> GetChildrenFetcher(
+ const JoinSpecProto& join_spec,
+ std::vector<ScoredDocumentHit>&& child_scored_document_hits);
libtextclassifier3::StatusOr<std::vector<JoinedScoredDocumentHit>> Join(
const JoinSpecProto& join_spec,
std::vector<ScoredDocumentHit>&& parent_scored_document_hits,
- std::vector<ScoredDocumentHit>&& child_scored_document_hits);
+ const JoinChildrenFetcher& join_children_fetcher);
private:
- // Loads a document and uses a property expression to fetch the value of the
- // property from the document. The property expression may refer to nested
- // document properties.
- // Note: currently we only support single joining, so we use the first element
- // (index 0) for any repeated values.
+ // Fetches referenced document id of the given document under the given
+ // property path.
//
// TODO(b/256022027): validate joinable property (and its upper-level) should
// not have REPEATED cardinality.
//
// Returns:
- // "" on document load error.
- // "" if the property path is not found in the document.
- std::string FetchPropertyExpressionValue(
- const DocumentId& document_id,
- const std::string& property_expression) const;
+ // - A valid referenced document id on success
+ // - kInvalidDocumentId if the given document is not found, doesn't have
+ // qualified id joinable type for the given property_path, or doesn't have
+ // joinable value (an optional property)
+ // - Any other QualifiedIdJoinIndex errors
+ libtextclassifier3::StatusOr<DocumentId> FetchReferencedQualifiedId(
+ const DocumentId& document_id, const std::string& property_path) const;
const DocumentStore* doc_store_; // Does not own.
+ const SchemaStore* schema_store_; // Does not own.
+ const QualifiedIdJoinIndex* qualified_id_join_index_; // Does not own.
+ int64_t current_time_ms_;
};
} // namespace lib
diff --git a/icing/join/join-processor_test.cc b/icing/join/join-processor_test.cc
index 70eaf3f..f503442 100644
--- a/icing/join/join-processor_test.cc
+++ b/icing/join/join-processor_test.cc
@@ -16,13 +16,20 @@
#include <memory>
#include <string>
+#include <utility>
#include <vector>
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/join/qualified-id-join-indexing-handler.h"
+#include "icing/portable/platform.h"
#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/scoring.pb.h"
#include "icing/proto/search.pb.h"
@@ -31,9 +38,17 @@
#include "icing/schema/section.h"
#include "icing/scoring/scored-document-hit.h"
#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/util/status-macros.h"
+#include "icing/util/tokenized-document.h"
+#include "unicode/uloc.h"
namespace icing {
namespace lib {
@@ -41,16 +56,37 @@ namespace lib {
namespace {
using ::testing::ElementsAre;
+using ::testing::IsTrue;
class JoinProcessorTest : public ::testing::Test {
protected:
void SetUp() override {
test_dir_ = GetTestTempDir() + "/icing_join_processor_test";
- filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(test_dir_.c_str()),
+ IsTrue());
+ schema_store_dir_ = test_dir_ + "/schema_store";
+ doc_store_dir_ = test_dir_ + "/doc_store";
+ qualified_id_join_index_dir_ = test_dir_ + "/qualified_id_join_index";
+
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ ICING_ASSERT_OK(
+ // File generated via icu_data_file rule in //icing/BUILD.
+ icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
+
+ language_segmenter_factory::SegmenterOptions options(ULOC_US);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ lang_segmenter_,
+ language_segmenter_factory::Create(std::move(options)));
+
+ ASSERT_THAT(
+ filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()),
+ IsTrue());
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
SchemaProto schema =
SchemaBuilder()
@@ -73,25 +109,85 @@ class JoinProcessorTest : public ::testing::Test {
JOINABLE_VALUE_TYPE_QUALIFIED_ID)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str()),
+ IsTrue());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ DocumentStore::Create(
+ &filesystem_, doc_store_dir_, &fake_clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
doc_store_ = std::move(create_result.document_store);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ qualified_id_join_index_,
+ QualifiedIdJoinIndex::Create(filesystem_, qualified_id_join_index_dir_,
+ /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false));
}
void TearDown() override {
+ qualified_id_join_index_.reset();
doc_store_.reset();
schema_store_.reset();
+ lang_segmenter_.reset();
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
}
+ libtextclassifier3::StatusOr<DocumentId> PutAndIndexDocument(
+ const DocumentProto& document) {
+ ICING_ASSIGN_OR_RETURN(DocumentId document_id, doc_store_->Put(document));
+ ICING_ASSIGN_OR_RETURN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ qualified_id_join_index_.get()));
+ ICING_RETURN_IF_ERROR(handler->Handle(tokenized_document, document_id,
+ /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr));
+ return document_id;
+ }
+
+ libtextclassifier3::StatusOr<std::vector<JoinedScoredDocumentHit>> Join(
+ const JoinSpecProto& join_spec,
+ std::vector<ScoredDocumentHit>&& parent_scored_document_hits,
+ std::vector<ScoredDocumentHit>&& child_scored_document_hits) {
+ JoinProcessor join_processor(
+ doc_store_.get(), schema_store_.get(), qualified_id_join_index_.get(),
+ /*current_time_ms=*/fake_clock_.GetSystemTimeMilliseconds());
+ ICING_ASSIGN_OR_RETURN(
+ JoinChildrenFetcher join_children_fetcher,
+ join_processor.GetChildrenFetcher(
+ join_spec, std::move(child_scored_document_hits)));
+ return join_processor.Join(join_spec,
+ std::move(parent_scored_document_hits),
+ join_children_fetcher);
+ }
+
Filesystem filesystem_;
std::string test_dir_;
+ std::string schema_store_dir_;
+ std::string doc_store_dir_;
+ std::string qualified_id_join_index_dir_;
+
+ std::unique_ptr<LanguageSegmenter> lang_segmenter_;
std::unique_ptr<SchemaStore> schema_store_;
std::unique_ptr<DocumentStore> doc_store_;
+ std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index_;
+
FakeClock fake_clock_;
};
@@ -130,11 +226,16 @@ TEST_F(JoinProcessorTest, JoinByQualifiedId) {
.AddStringProperty("sender", "pkg$db/namespace#person1")
.Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, doc_store_->Put(person1));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, doc_store_->Put(person2));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, doc_store_->Put(email1));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4, doc_store_->Put(email2));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5, doc_store_->Put(email3));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ PutAndIndexDocument(person1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ PutAndIndexDocument(person2));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ PutAndIndexDocument(email1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+ PutAndIndexDocument(email2));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+ PutAndIndexDocument(email3));
ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
/*score=*/0.0);
@@ -156,7 +257,6 @@ TEST_F(JoinProcessorTest, JoinByQualifiedId) {
scored_doc_hit5, scored_doc_hit4, scored_doc_hit3};
JoinSpecProto join_spec;
- join_spec.set_max_joined_child_count(100);
join_spec.set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec.set_child_property_expression("sender");
@@ -165,11 +265,10 @@ TEST_F(JoinProcessorTest, JoinByQualifiedId) {
join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
ScoringSpecProto::Order::DESC);
- JoinProcessor join_processor(doc_store_.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
- join_processor.Join(join_spec, std::move(parent_scored_document_hits),
- std::move(child_scored_document_hits)));
+ Join(join_spec, std::move(parent_scored_document_hits),
+ std::move(child_scored_document_hits)));
EXPECT_THAT(
joined_result_document_hits,
ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
@@ -203,9 +302,12 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithoutJoiningProperty) {
.AddStringProperty("subject", "test subject 2")
.Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, doc_store_->Put(person1));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, doc_store_->Put(email1));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, doc_store_->Put(email2));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ PutAndIndexDocument(person1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ PutAndIndexDocument(email1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ PutAndIndexDocument(email2));
ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
/*score=*/0.0);
@@ -223,7 +325,6 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithoutJoiningProperty) {
scored_doc_hit3};
JoinSpecProto join_spec;
- join_spec.set_max_joined_child_count(100);
join_spec.set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec.set_child_property_expression("sender");
@@ -232,11 +333,10 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithoutJoiningProperty) {
join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
ScoringSpecProto::Order::DESC);
- JoinProcessor join_processor(doc_store_.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
- join_processor.Join(join_spec, std::move(parent_scored_document_hits),
- std::move(child_scored_document_hits)));
+ Join(join_spec, std::move(parent_scored_document_hits),
+ std::move(child_scored_document_hits)));
// Since Email2 doesn't have "sender" property, it should be ignored.
EXPECT_THAT(
joined_result_document_hits,
@@ -278,10 +378,14 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithInvalidQualifiedId) {
R"(pkg$db/namespace\#person1)") // invalid format
.Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, doc_store_->Put(person1));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, doc_store_->Put(email1));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, doc_store_->Put(email2));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4, doc_store_->Put(email3));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ PutAndIndexDocument(person1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ PutAndIndexDocument(email1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ PutAndIndexDocument(email2));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+ PutAndIndexDocument(email3));
ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
/*score=*/0.0);
@@ -301,7 +405,6 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithInvalidQualifiedId) {
scored_doc_hit2, scored_doc_hit3, scored_doc_hit4};
JoinSpecProto join_spec;
- join_spec.set_max_joined_child_count(100);
join_spec.set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec.set_child_property_expression("sender");
@@ -310,11 +413,10 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithInvalidQualifiedId) {
join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
ScoringSpecProto::Order::DESC);
- JoinProcessor join_processor(doc_store_.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
- join_processor.Join(join_spec, std::move(parent_scored_document_hits),
- std::move(child_scored_document_hits)));
+ Join(join_spec, std::move(parent_scored_document_hits),
+ std::move(child_scored_document_hits)));
// Email 2 and email 3 (document id 3 and 4) contain invalid qualified ids.
// Join processor should ignore them.
EXPECT_THAT(joined_result_document_hits,
@@ -345,9 +447,12 @@ TEST_F(JoinProcessorTest, LeftJoinShouldReturnParentWithoutChildren) {
R"(pkg$db/name\#space\\\\#person2)") // escaped
.Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, doc_store_->Put(person1));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, doc_store_->Put(person2));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, doc_store_->Put(email1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ PutAndIndexDocument(person1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ PutAndIndexDocument(person2));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ PutAndIndexDocument(email1));
ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
/*score=*/0.0);
@@ -364,7 +469,6 @@ TEST_F(JoinProcessorTest, LeftJoinShouldReturnParentWithoutChildren) {
std::vector<ScoredDocumentHit> child_scored_document_hits = {scored_doc_hit3};
JoinSpecProto join_spec;
- join_spec.set_max_joined_child_count(100);
join_spec.set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec.set_child_property_expression("sender");
@@ -373,11 +477,10 @@ TEST_F(JoinProcessorTest, LeftJoinShouldReturnParentWithoutChildren) {
join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
ScoringSpecProto::Order::DESC);
- JoinProcessor join_processor(doc_store_.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
- join_processor.Join(join_spec, std::move(parent_scored_document_hits),
- std::move(child_scored_document_hits)));
+ Join(join_spec, std::move(parent_scored_document_hits),
+ std::move(child_scored_document_hits)));
// Person1 has no child documents, but left join should also include it.
EXPECT_THAT(
joined_result_document_hits,
@@ -420,10 +523,14 @@ TEST_F(JoinProcessorTest, ShouldSortChildDocumentsByRankingStrategy) {
.AddStringProperty("sender", "pkg$db/namespace#person1")
.Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, doc_store_->Put(person1));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, doc_store_->Put(email1));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, doc_store_->Put(email2));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4, doc_store_->Put(email3));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ PutAndIndexDocument(person1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ PutAndIndexDocument(email1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ PutAndIndexDocument(email2));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+ PutAndIndexDocument(email3));
ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
/*score=*/0.0);
@@ -443,7 +550,6 @@ TEST_F(JoinProcessorTest, ShouldSortChildDocumentsByRankingStrategy) {
scored_doc_hit2, scored_doc_hit3, scored_doc_hit4};
JoinSpecProto join_spec;
- join_spec.set_max_joined_child_count(100);
join_spec.set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec.set_child_property_expression("sender");
@@ -452,11 +558,10 @@ TEST_F(JoinProcessorTest, ShouldSortChildDocumentsByRankingStrategy) {
join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
ScoringSpecProto::Order::DESC);
- JoinProcessor join_processor(doc_store_.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
- join_processor.Join(join_spec, std::move(parent_scored_document_hits),
- std::move(child_scored_document_hits)));
+ Join(join_spec, std::move(parent_scored_document_hits),
+ std::move(child_scored_document_hits)));
// Child documents should be sorted according to the (nested) ranking
// strategy.
EXPECT_THAT(
@@ -467,109 +572,6 @@ TEST_F(JoinProcessorTest, ShouldSortChildDocumentsByRankingStrategy) {
{scored_doc_hit3, scored_doc_hit4, scored_doc_hit2}))));
}
-TEST_F(JoinProcessorTest,
- ShouldTruncateByRankingStrategyIfExceedingMaxJoinedChildCount) {
- DocumentProto person1 = DocumentBuilder()
- .SetKey("pkg$db/namespace", "person1")
- .SetSchema("Person")
- .AddStringProperty("Name", "Alice")
- .Build();
- DocumentProto person2 = DocumentBuilder()
- .SetKey(R"(pkg$db/name#space\\)", "person2")
- .SetSchema("Person")
- .AddStringProperty("Name", "Bob")
- .Build();
-
- DocumentProto email1 =
- DocumentBuilder()
- .SetKey("pkg$db/namespace", "email1")
- .SetSchema("Email")
- .AddStringProperty("subject", "test subject 1")
- .AddStringProperty("sender", "pkg$db/namespace#person1")
- .Build();
- DocumentProto email2 =
- DocumentBuilder()
- .SetKey("pkg$db/namespace", "email2")
- .SetSchema("Email")
- .AddStringProperty("subject", "test subject 2")
- .AddStringProperty("sender", "pkg$db/namespace#person1")
- .Build();
- DocumentProto email3 =
- DocumentBuilder()
- .SetKey("pkg$db/namespace", "email3")
- .SetSchema("Email")
- .AddStringProperty("subject", "test subject 3")
- .AddStringProperty("sender", "pkg$db/namespace#person1")
- .Build();
- DocumentProto email4 =
- DocumentBuilder()
- .SetKey("pkg$db/namespace", "email4")
- .SetSchema("Email")
- .AddStringProperty("subject", "test subject 4")
- .AddStringProperty("sender",
- R"(pkg$db/name\#space\\\\#person2)") // escaped
- .Build();
-
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, doc_store_->Put(person1));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, doc_store_->Put(person2));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, doc_store_->Put(email1));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4, doc_store_->Put(email2));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5, doc_store_->Put(email3));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id6, doc_store_->Put(email4));
-
- ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
- /*score=*/0.0);
- ScoredDocumentHit scored_doc_hit2(document_id2, kSectionIdMaskNone,
- /*score=*/0.0);
- ScoredDocumentHit scored_doc_hit3(document_id3, kSectionIdMaskNone,
- /*score=*/2.0);
- ScoredDocumentHit scored_doc_hit4(document_id4, kSectionIdMaskNone,
- /*score=*/5.0);
- ScoredDocumentHit scored_doc_hit5(document_id5, kSectionIdMaskNone,
- /*score=*/3.0);
- ScoredDocumentHit scored_doc_hit6(document_id6, kSectionIdMaskNone,
- /*score=*/1.0);
-
- // Parent ScoredDocumentHits: all Person documents
- std::vector<ScoredDocumentHit> parent_scored_document_hits = {
- scored_doc_hit1, scored_doc_hit2};
-
- // Child ScoredDocumentHits: all Email documents
- std::vector<ScoredDocumentHit> child_scored_document_hits = {
- scored_doc_hit3, scored_doc_hit4, scored_doc_hit5, scored_doc_hit6};
-
- JoinSpecProto join_spec;
- join_spec.set_max_joined_child_count(2);
- join_spec.set_parent_property_expression(
- std::string(JoinProcessor::kQualifiedIdExpr));
- join_spec.set_child_property_expression("sender");
- join_spec.set_aggregation_scoring_strategy(
- JoinSpecProto::AggregationScoringStrategy::COUNT);
- join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
- ScoringSpecProto::Order::DESC);
-
- JoinProcessor join_processor(doc_store_.get());
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
- join_processor.Join(join_spec, std::move(parent_scored_document_hits),
- std::move(child_scored_document_hits)));
- // Since we set max_joind_child_count as 2 and use DESC as the (nested)
- // ranking strategy, parent document with # of child documents more than 2
- // should only keep 2 child documents with higher scores and the rest should
- // be truncated.
- EXPECT_THAT(
- joined_result_document_hits,
- ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
- /*final_score=*/2.0,
- /*parent_scored_document_hit=*/scored_doc_hit1,
- /*child_scored_document_hits=*/
- {scored_doc_hit4, scored_doc_hit5})),
- EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
- /*final_score=*/1.0,
- /*parent_scored_document_hit=*/scored_doc_hit2,
- /*child_scored_document_hits=*/{scored_doc_hit6}))));
-}
-
TEST_F(JoinProcessorTest, ShouldAllowSelfJoining) {
DocumentProto email1 =
DocumentBuilder()
@@ -579,7 +581,8 @@ TEST_F(JoinProcessorTest, ShouldAllowSelfJoining) {
.AddStringProperty("sender", "pkg$db/namespace#email1")
.Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, doc_store_->Put(email1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ PutAndIndexDocument(email1));
ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
/*score=*/0.0);
@@ -592,7 +595,6 @@ TEST_F(JoinProcessorTest, ShouldAllowSelfJoining) {
std::vector<ScoredDocumentHit> child_scored_document_hits = {scored_doc_hit1};
JoinSpecProto join_spec;
- join_spec.set_max_joined_child_count(100);
join_spec.set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec.set_child_property_expression("sender");
@@ -601,11 +603,10 @@ TEST_F(JoinProcessorTest, ShouldAllowSelfJoining) {
join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
ScoringSpecProto::Order::DESC);
- JoinProcessor join_processor(doc_store_.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
- join_processor.Join(join_spec, std::move(parent_scored_document_hits),
- std::move(child_scored_document_hits)));
+ Join(join_spec, std::move(parent_scored_document_hits),
+ std::move(child_scored_document_hits)));
EXPECT_THAT(joined_result_document_hits,
ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
/*final_score=*/1.0,
diff --git a/icing/join/qualified-id-join-index.cc b/icing/join/qualified-id-join-index.cc
new file mode 100644
index 0000000..07b5627
--- /dev/null
+++ b/icing/join/qualified-id-join-index.cc
@@ -0,0 +1,467 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/qualified-id-join-index.h"
+
+#include <cstring>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/destructible-directory.h"
+#include "icing/file/file-backed-vector.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/join/doc-join-info.h"
+#include "icing/store/document-id.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
+#include "icing/store/key-mapper.h"
+#include "icing/store/persistent-hash-map-key-mapper.h"
+#include "icing/util/crc32.h"
+#include "icing/util/encode-util.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Set 1M for max # of qualified id entries and 10 bytes for key-value bytes.
+// This will take at most 23 MiB disk space and mmap for persistent hash map.
+static constexpr int32_t kDocJoinInfoMapperMaxNumEntries = 1 << 20;
+static constexpr int32_t kDocJoinInfoMapperAverageKVByteSize = 10;
+
+static constexpr int32_t kDocJoinInfoMapperDynamicTrieMaxSize =
+ 128 * 1024 * 1024; // 128 MiB
+
+DocumentId GetNewDocumentId(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ DocumentId old_document_id) {
+ if (old_document_id >= document_id_old_to_new.size()) {
+ return kInvalidDocumentId;
+ }
+ return document_id_old_to_new[old_document_id];
+}
+
+std::string GetMetadataFilePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/metadata");
+}
+
+std::string GetDocJoinInfoMapperPath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/doc_join_info_mapper");
+}
+
+std::string GetQualifiedIdStoragePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/qualified_id_storage");
+}
+
+} // namespace
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
+QualifiedIdJoinIndex::Create(const Filesystem& filesystem,
+ std::string working_path, bool pre_mapping_fbv,
+ bool use_persistent_hash_map) {
+ if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str()) ||
+ !filesystem.DirectoryExists(
+ GetDocJoinInfoMapperPath(working_path).c_str()) ||
+ !filesystem.FileExists(GetQualifiedIdStoragePath(working_path).c_str())) {
+ // Discard working_path if any file/directory is missing, and reinitialize.
+ if (filesystem.DirectoryExists(working_path.c_str())) {
+ ICING_RETURN_IF_ERROR(Discard(filesystem, working_path));
+ }
+ return InitializeNewFiles(filesystem, std::move(working_path),
+ pre_mapping_fbv, use_persistent_hash_map);
+ }
+ return InitializeExistingFiles(filesystem, std::move(working_path),
+ pre_mapping_fbv, use_persistent_hash_map);
+}
+
+QualifiedIdJoinIndex::~QualifiedIdJoinIndex() {
+ if (!PersistToDisk().ok()) {
+ ICING_LOG(WARNING) << "Failed to persist qualified id type joinable index "
+ "to disk while destructing "
+ << working_path_;
+ }
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndex::Put(
+ const DocJoinInfo& doc_join_info, std::string_view ref_qualified_id_str) {
+ SetDirty();
+
+ if (!doc_join_info.is_valid()) {
+ return absl_ports::InvalidArgumentError(
+ "Cannot put data for an invalid DocJoinInfo");
+ }
+
+ int32_t qualified_id_index = qualified_id_storage_->num_elements();
+ ICING_ASSIGN_OR_RETURN(
+ FileBackedVector<char>::MutableArrayView mutable_arr,
+ qualified_id_storage_->Allocate(ref_qualified_id_str.size() + 1));
+ mutable_arr.SetArray(/*idx=*/0, ref_qualified_id_str.data(),
+ ref_qualified_id_str.size());
+ mutable_arr.SetArray(/*idx=*/ref_qualified_id_str.size(), /*arr=*/"\0",
+ /*arr_len=*/1);
+
+ ICING_RETURN_IF_ERROR(doc_join_info_mapper_->Put(
+ encode_util::EncodeIntToCString(doc_join_info.value()),
+ qualified_id_index));
+
+ // TODO(b/268521214): add data into delete propagation storage
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<std::string_view> QualifiedIdJoinIndex::Get(
+ const DocJoinInfo& doc_join_info) const {
+ if (!doc_join_info.is_valid()) {
+ return absl_ports::InvalidArgumentError(
+ "Cannot get data for an invalid DocJoinInfo");
+ }
+
+ ICING_ASSIGN_OR_RETURN(
+ int32_t qualified_id_index,
+ doc_join_info_mapper_->Get(
+ encode_util::EncodeIntToCString(doc_join_info.value())));
+
+ const char* data = qualified_id_storage_->array() + qualified_id_index;
+ return std::string_view(data, strlen(data));
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndex::Optimize(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ DocumentId new_last_added_document_id) {
+ std::string temp_working_path = working_path_ + "_temp";
+ ICING_RETURN_IF_ERROR(Discard(filesystem_, temp_working_path));
+
+ DestructibleDirectory temp_working_path_ddir(&filesystem_,
+ std::move(temp_working_path));
+ if (!temp_working_path_ddir.is_valid()) {
+ return absl_ports::InternalError(
+ "Unable to create temp directory to build new qualified id type "
+ "joinable index");
+ }
+
+ {
+ // Transfer all data from the current to new qualified id type joinable
+ // index. Also PersistToDisk and destruct the instance after finishing, so
+ // we can safely swap directories later.
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<QualifiedIdJoinIndex> new_index,
+ Create(filesystem_, temp_working_path_ddir.dir(),
+ pre_mapping_fbv_, use_persistent_hash_map_));
+ ICING_RETURN_IF_ERROR(
+ TransferIndex(document_id_old_to_new, new_index.get()));
+ new_index->set_last_added_document_id(new_last_added_document_id);
+ ICING_RETURN_IF_ERROR(new_index->PersistToDisk());
+ }
+
+ // Destruct current index's storage instances to safely swap directories.
+ // TODO(b/268521214): handle delete propagation storage
+ doc_join_info_mapper_.reset();
+ qualified_id_storage_.reset();
+
+ if (!filesystem_.SwapFiles(temp_working_path_ddir.dir().c_str(),
+ working_path_.c_str())) {
+ return absl_ports::InternalError(
+ "Unable to apply new qualified id type joinable index due to failed "
+ "swap");
+ }
+
+ // Reinitialize qualified id type joinable index.
+ if (!filesystem_.PRead(GetMetadataFilePath(working_path_).c_str(),
+ metadata_buffer_.get(), kMetadataFileSize,
+ /*offset=*/0)) {
+ return absl_ports::InternalError("Fail to read metadata file");
+ }
+ if (use_persistent_hash_map_) {
+ ICING_ASSIGN_OR_RETURN(
+ doc_join_info_mapper_,
+ PersistentHashMapKeyMapper<int32_t>::Create(
+ filesystem_, GetDocJoinInfoMapperPath(working_path_),
+ pre_mapping_fbv_,
+ /*max_num_entries=*/kDocJoinInfoMapperMaxNumEntries,
+ /*average_kv_byte_size=*/kDocJoinInfoMapperAverageKVByteSize));
+ } else {
+ ICING_ASSIGN_OR_RETURN(
+ doc_join_info_mapper_,
+ DynamicTrieKeyMapper<int32_t>::Create(
+ filesystem_, GetDocJoinInfoMapperPath(working_path_),
+ kDocJoinInfoMapperDynamicTrieMaxSize));
+ }
+
+ ICING_ASSIGN_OR_RETURN(
+ qualified_id_storage_,
+ FileBackedVector<char>::Create(
+ filesystem_, GetQualifiedIdStoragePath(working_path_),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ FileBackedVector<char>::kMaxFileSize,
+ /*pre_mapping_mmap_size=*/pre_mapping_fbv_ ? 1024 * 1024 : 0));
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndex::Clear() {
+ SetDirty();
+
+ doc_join_info_mapper_.reset();
+ // Discard and reinitialize doc join info mapper.
+ std::string doc_join_info_mapper_path =
+ GetDocJoinInfoMapperPath(working_path_);
+ if (use_persistent_hash_map_) {
+ ICING_RETURN_IF_ERROR(PersistentHashMapKeyMapper<int32_t>::Delete(
+ filesystem_, doc_join_info_mapper_path));
+ ICING_ASSIGN_OR_RETURN(
+ doc_join_info_mapper_,
+ PersistentHashMapKeyMapper<int32_t>::Create(
+ filesystem_, std::move(doc_join_info_mapper_path), pre_mapping_fbv_,
+ /*max_num_entries=*/kDocJoinInfoMapperMaxNumEntries,
+ /*average_kv_byte_size=*/kDocJoinInfoMapperAverageKVByteSize));
+ } else {
+ ICING_RETURN_IF_ERROR(DynamicTrieKeyMapper<int32_t>::Delete(
+ filesystem_, doc_join_info_mapper_path));
+ ICING_ASSIGN_OR_RETURN(doc_join_info_mapper_,
+ DynamicTrieKeyMapper<int32_t>::Create(
+ filesystem_, doc_join_info_mapper_path,
+ kDocJoinInfoMapperDynamicTrieMaxSize));
+ }
+
+ // Clear qualified_id_storage_.
+ if (qualified_id_storage_->num_elements() > 0) {
+ ICING_RETURN_IF_ERROR(qualified_id_storage_->TruncateTo(0));
+ }
+
+ // TODO(b/268521214): clear delete propagation storage
+
+ info().last_added_document_id = kInvalidDocumentId;
+ return libtextclassifier3::Status::OK;
+}
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
+QualifiedIdJoinIndex::InitializeNewFiles(const Filesystem& filesystem,
+ std::string&& working_path,
+ bool pre_mapping_fbv,
+ bool use_persistent_hash_map) {
+ // Create working directory.
+ if (!filesystem.CreateDirectoryRecursively(working_path.c_str())) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Failed to create directory: ", working_path));
+ }
+
+ // Initialize doc_join_info_mapper
+ std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper;
+ if (use_persistent_hash_map) {
+ // TODO(b/263890397): decide PersistentHashMapKeyMapper size
+ ICING_ASSIGN_OR_RETURN(
+ doc_join_info_mapper,
+ PersistentHashMapKeyMapper<int32_t>::Create(
+ filesystem, GetDocJoinInfoMapperPath(working_path), pre_mapping_fbv,
+ /*max_num_entries=*/kDocJoinInfoMapperMaxNumEntries,
+ /*average_kv_byte_size=*/kDocJoinInfoMapperAverageKVByteSize));
+ } else {
+ ICING_ASSIGN_OR_RETURN(
+ doc_join_info_mapper,
+ DynamicTrieKeyMapper<int32_t>::Create(
+ filesystem, GetDocJoinInfoMapperPath(working_path),
+ kDocJoinInfoMapperDynamicTrieMaxSize));
+ }
+
+ // Initialize qualified_id_storage
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<FileBackedVector<char>> qualified_id_storage,
+ FileBackedVector<char>::Create(
+ filesystem, GetQualifiedIdStoragePath(working_path),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ FileBackedVector<char>::kMaxFileSize,
+ /*pre_mapping_mmap_size=*/pre_mapping_fbv ? 1024 * 1024 : 0));
+
+ // Create instance.
+ auto new_index =
+ std::unique_ptr<QualifiedIdJoinIndex>(new QualifiedIdJoinIndex(
+ filesystem, std::move(working_path),
+ /*metadata_buffer=*/std::make_unique<uint8_t[]>(kMetadataFileSize),
+ std::move(doc_join_info_mapper), std::move(qualified_id_storage),
+ pre_mapping_fbv, use_persistent_hash_map));
+ // Initialize info content.
+ new_index->info().magic = Info::kMagic;
+ new_index->info().last_added_document_id = kInvalidDocumentId;
+ // Initialize new PersistentStorage. The initial checksums will be computed
+ // and set via InitializeNewStorage.
+ ICING_RETURN_IF_ERROR(new_index->InitializeNewStorage());
+
+ return new_index;
+}
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
+QualifiedIdJoinIndex::InitializeExistingFiles(const Filesystem& filesystem,
+ std::string&& working_path,
+ bool pre_mapping_fbv,
+ bool use_persistent_hash_map) {
+ // PRead metadata file.
+ auto metadata_buffer = std::make_unique<uint8_t[]>(kMetadataFileSize);
+ if (!filesystem.PRead(GetMetadataFilePath(working_path).c_str(),
+ metadata_buffer.get(), kMetadataFileSize,
+ /*offset=*/0)) {
+ return absl_ports::InternalError("Fail to read metadata file");
+ }
+
+ // Initialize doc_join_info_mapper
+ bool dynamic_trie_key_mapper_dir_exists = filesystem.DirectoryExists(
+ absl_ports::StrCat(GetDocJoinInfoMapperPath(working_path),
+ "/key_mapper_dir")
+ .c_str());
+ if ((use_persistent_hash_map && dynamic_trie_key_mapper_dir_exists) ||
+ (!use_persistent_hash_map && !dynamic_trie_key_mapper_dir_exists)) {
+ // Return a failure here so that the caller can properly delete and rebuild
+ // this component.
+ return absl_ports::FailedPreconditionError("Key mapper type mismatch");
+ }
+
+ std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper;
+ if (use_persistent_hash_map) {
+ ICING_ASSIGN_OR_RETURN(
+ doc_join_info_mapper,
+ PersistentHashMapKeyMapper<int32_t>::Create(
+ filesystem, GetDocJoinInfoMapperPath(working_path), pre_mapping_fbv,
+ /*max_num_entries=*/kDocJoinInfoMapperMaxNumEntries,
+ /*average_kv_byte_size=*/kDocJoinInfoMapperAverageKVByteSize));
+ } else {
+ ICING_ASSIGN_OR_RETURN(
+ doc_join_info_mapper,
+ DynamicTrieKeyMapper<int32_t>::Create(
+ filesystem, GetDocJoinInfoMapperPath(working_path),
+ kDocJoinInfoMapperDynamicTrieMaxSize));
+ }
+
+ // Initialize qualified_id_storage
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<FileBackedVector<char>> qualified_id_storage,
+ FileBackedVector<char>::Create(
+ filesystem, GetQualifiedIdStoragePath(working_path),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ FileBackedVector<char>::kMaxFileSize,
+ /*pre_mapping_mmap_size=*/pre_mapping_fbv ? 1024 * 1024 : 0));
+
+ // Create instance.
+ auto type_joinable_index =
+ std::unique_ptr<QualifiedIdJoinIndex>(new QualifiedIdJoinIndex(
+ filesystem, std::move(working_path), std::move(metadata_buffer),
+ std::move(doc_join_info_mapper), std::move(qualified_id_storage),
+ pre_mapping_fbv, use_persistent_hash_map));
+ // Initialize existing PersistentStorage. Checksums will be validated.
+ ICING_RETURN_IF_ERROR(type_joinable_index->InitializeExistingStorage());
+
+ // Validate magic.
+ if (type_joinable_index->info().magic != Info::kMagic) {
+ return absl_ports::FailedPreconditionError("Incorrect magic value");
+ }
+
+ return type_joinable_index;
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndex::TransferIndex(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ QualifiedIdJoinIndex* new_index) const {
+ std::unique_ptr<KeyMapper<int32_t>::Iterator> iter =
+ doc_join_info_mapper_->GetIterator();
+ while (iter->Advance()) {
+ DocJoinInfo old_doc_join_info(
+ encode_util::DecodeIntFromCString(iter->GetKey()));
+ int32_t qualified_id_index = iter->GetValue();
+
+ const char* data = qualified_id_storage_->array() + qualified_id_index;
+ std::string_view ref_qualified_id_str(data, strlen(data));
+
+ // Translate to new doc id.
+ DocumentId new_document_id = GetNewDocumentId(
+ document_id_old_to_new, old_doc_join_info.document_id());
+
+ if (new_document_id != kInvalidDocumentId) {
+ ICING_RETURN_IF_ERROR(
+ new_index->Put(DocJoinInfo(new_document_id,
+ old_doc_join_info.joinable_property_id()),
+ ref_qualified_id_str));
+ }
+ }
+
+ // TODO(b/268521214): transfer delete propagation storage
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndex::PersistMetadataToDisk(
+ bool force) {
+ if (!force && !is_info_dirty() && !is_storage_dirty()) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ std::string metadata_file_path = GetMetadataFilePath(working_path_);
+
+ ScopedFd sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ if (!sfd.is_valid()) {
+ return absl_ports::InternalError("Fail to open metadata file for write");
+ }
+
+ if (!filesystem_.PWrite(sfd.get(), /*offset=*/0, metadata_buffer_.get(),
+ kMetadataFileSize)) {
+ return absl_ports::InternalError("Fail to write metadata file");
+ }
+
+ if (!filesystem_.DataSync(sfd.get())) {
+ return absl_ports::InternalError("Fail to sync metadata to disk");
+ }
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndex::PersistStoragesToDisk(
+ bool force) {
+ if (!force && !is_storage_dirty()) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ ICING_RETURN_IF_ERROR(doc_join_info_mapper_->PersistToDisk());
+ ICING_RETURN_IF_ERROR(qualified_id_storage_->PersistToDisk());
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<Crc32> QualifiedIdJoinIndex::ComputeInfoChecksum(
+ bool force) {
+ if (!force && !is_info_dirty()) {
+ return Crc32(crcs().component_crcs.info_crc);
+ }
+
+ return info().ComputeChecksum();
+}
+
+libtextclassifier3::StatusOr<Crc32>
+QualifiedIdJoinIndex::ComputeStoragesChecksum(bool force) {
+ if (!force && !is_storage_dirty()) {
+ return Crc32(crcs().component_crcs.storages_crc);
+ }
+
+ ICING_ASSIGN_OR_RETURN(Crc32 doc_join_info_mapper_crc,
+ doc_join_info_mapper_->ComputeChecksum());
+ ICING_ASSIGN_OR_RETURN(Crc32 qualified_id_storage_crc,
+ qualified_id_storage_->ComputeChecksum());
+
+ return Crc32(doc_join_info_mapper_crc.Get() ^ qualified_id_storage_crc.Get());
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/join/qualified-id-join-index.h b/icing/join/qualified-id-join-index.h
new file mode 100644
index 0000000..86297cd
--- /dev/null
+++ b/icing/join/qualified-id-join-index.h
@@ -0,0 +1,308 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_H_
+#define ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/file-backed-vector.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/persistent-storage.h"
+#include "icing/join/doc-join-info.h"
+#include "icing/store/document-id.h"
+#include "icing/store/key-mapper.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+// QualifiedIdJoinIndex: a class to maintain data mapping DocJoinInfo to
+// joinable qualified ids and delete propagation info.
+class QualifiedIdJoinIndex : public PersistentStorage {
+ public:
+ struct Info {
+ static constexpr int32_t kMagic = 0x48cabdc6;
+
+ int32_t magic;
+ DocumentId last_added_document_id;
+
+ Crc32 ComputeChecksum() const {
+ return Crc32(
+ std::string_view(reinterpret_cast<const char*>(this), sizeof(Info)));
+ }
+ } __attribute__((packed));
+ static_assert(sizeof(Info) == 8, "");
+
+ // Metadata file layout: <Crcs><Info>
+ static constexpr int32_t kCrcsMetadataBufferOffset = 0;
+ static constexpr int32_t kInfoMetadataBufferOffset =
+ static_cast<int32_t>(sizeof(Crcs));
+ static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info);
+ static_assert(kMetadataFileSize == 20, "");
+
+ static constexpr WorkingPathType kWorkingPathType =
+ WorkingPathType::kDirectory;
+
+ // Creates a QualifiedIdJoinIndex instance to store qualified ids for future
+ // joining search. If any of the underlying file is missing, then delete the
+ // whole working_path and (re)initialize with new ones. Otherwise initialize
+ // and create the instance by existing files.
+ //
+ // filesystem: Object to make system level calls
+ // working_path: Specifies the working path for PersistentStorage.
+ // QualifiedIdJoinIndex uses working path as working directory
+ // and all related files will be stored under this directory. It
+ // takes full ownership and of working_path_, including
+ // creation/deletion. It is the caller's responsibility to
+ // specify correct working path and avoid mixing different
+ // persistent storages together under the same path. Also the
+ // caller has the ownership for the parent directory of
+ // working_path_, and it is responsible for parent directory
+ // creation/deletion. See PersistentStorage for more details
+ // about the concept of working_path.
+ // pre_mapping_fbv: flag indicating whether memory map max possible file size
+ // for underlying FileBackedVector before growing the actual
+ // file size.
+ // use_persistent_hash_map: flag indicating whether use persistent hash map as
+ // the key mapper (if false, then fall back to
+ // dynamic trie key mapper).
+ //
+ // Returns:
+ // - FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
+ // checksum
+ // - INTERNAL_ERROR on I/O errors
+ // - Any KeyMapper errors
+ static libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
+ Create(const Filesystem& filesystem, std::string working_path,
+ bool pre_mapping_fbv, bool use_persistent_hash_map);
+
+ // Deletes QualifiedIdJoinIndex under working_path.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ static libtextclassifier3::Status Discard(const Filesystem& filesystem,
+ const std::string& working_path) {
+ return PersistentStorage::Discard(filesystem, working_path,
+ kWorkingPathType);
+ }
+
+ // Delete copy and move constructor/assignment operator.
+ QualifiedIdJoinIndex(const QualifiedIdJoinIndex&) = delete;
+ QualifiedIdJoinIndex& operator=(const QualifiedIdJoinIndex&) = delete;
+
+ QualifiedIdJoinIndex(QualifiedIdJoinIndex&&) = delete;
+ QualifiedIdJoinIndex& operator=(QualifiedIdJoinIndex&&) = delete;
+
+ ~QualifiedIdJoinIndex() override;
+
+ // Puts a new data into index: DocJoinInfo (DocumentId, JoinablePropertyId)
+ // references to ref_qualified_id_str (the identifier of another document).
+ //
+ // REQUIRES: ref_qualified_id_str contains no '\0'.
+ //
+ // Returns:
+ // - OK on success
+ // - INVALID_ARGUMENT_ERROR if doc_join_info is invalid
+ // - Any KeyMapper errors
+ libtextclassifier3::Status Put(const DocJoinInfo& doc_join_info,
+ std::string_view ref_qualified_id_str);
+
+ // Gets the referenced document's qualified id string by DocJoinInfo.
+ //
+ // Returns:
+ // - A qualified id string referenced by the given DocJoinInfo (DocumentId,
+ // JoinablePropertyId) on success
+ // - INVALID_ARGUMENT_ERROR if doc_join_info is invalid
+ // - NOT_FOUND_ERROR if doc_join_info doesn't exist
+ // - Any KeyMapper errors
+ libtextclassifier3::StatusOr<std::string_view> Get(
+ const DocJoinInfo& doc_join_info) const;
+
+ // Reduces internal file sizes by reclaiming space and ids of deleted
+ // documents. Qualified id type joinable index will convert all entries to the
+ // new document ids.
+ //
+ // - document_id_old_to_new: a map for converting old document id to new
+ // document id.
+ // - new_last_added_document_id: will be used to update the last added
+ // document id in the qualified id type joinable
+ // index.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error. This could potentially leave the index in
+ // an invalid state and the caller should handle it properly (e.g. discard
+ // and rebuild)
+ libtextclassifier3::Status Optimize(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ DocumentId new_last_added_document_id);
+
+ // Clears all data and set last_added_document_id to kInvalidDocumentId.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status Clear();
+
+ int32_t size() const { return doc_join_info_mapper_->num_keys(); }
+
+ bool empty() const { return size() == 0; }
+
+ DocumentId last_added_document_id() const {
+ return info().last_added_document_id;
+ }
+
+ void set_last_added_document_id(DocumentId document_id) {
+ SetInfoDirty();
+
+ Info& info_ref = info();
+ if (info_ref.last_added_document_id == kInvalidDocumentId ||
+ document_id > info_ref.last_added_document_id) {
+ info_ref.last_added_document_id = document_id;
+ }
+ }
+
+ private:
+ explicit QualifiedIdJoinIndex(
+ const Filesystem& filesystem, std::string&& working_path,
+ std::unique_ptr<uint8_t[]> metadata_buffer,
+ std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper,
+ std::unique_ptr<FileBackedVector<char>> qualified_id_storage,
+ bool pre_mapping_fbv, bool use_persistent_hash_map)
+ : PersistentStorage(filesystem, std::move(working_path),
+ kWorkingPathType),
+ metadata_buffer_(std::move(metadata_buffer)),
+ doc_join_info_mapper_(std::move(doc_join_info_mapper)),
+ qualified_id_storage_(std::move(qualified_id_storage)),
+ pre_mapping_fbv_(pre_mapping_fbv),
+ use_persistent_hash_map_(use_persistent_hash_map),
+ is_info_dirty_(false),
+ is_storage_dirty_(false) {}
+
+ static libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
+ InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path,
+ bool pre_mapping_fbv, bool use_persistent_hash_map);
+
+ static libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
+ InitializeExistingFiles(const Filesystem& filesystem,
+ std::string&& working_path, bool pre_mapping_fbv,
+ bool use_persistent_hash_map);
+
+ // Transfers qualified id type joinable index data from the current to
+ // new_index and convert to new document id according to
+ // document_id_old_to_new. It is a helper function for Optimize.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status TransferIndex(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ QualifiedIdJoinIndex* new_index) const;
+
+ // Flushes contents of metadata file.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status PersistMetadataToDisk(bool force) override;
+
+ // Flushes contents of all storages to underlying files.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status PersistStoragesToDisk(bool force) override;
+
+ // Computes and returns Info checksum.
+ //
+ // Returns:
+ // - Crc of the Info on success
+ libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(bool force) override;
+
+ // Computes and returns all storages checksum.
+ //
+ // Returns:
+ // - Crc of all storages on success
+ // - INTERNAL_ERROR if any data inconsistency
+ libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+ bool force) override;
+
+ Crcs& crcs() override {
+ return *reinterpret_cast<Crcs*>(metadata_buffer_.get() +
+ kCrcsMetadataBufferOffset);
+ }
+
+ const Crcs& crcs() const override {
+ return *reinterpret_cast<const Crcs*>(metadata_buffer_.get() +
+ kCrcsMetadataBufferOffset);
+ }
+
+ Info& info() {
+ return *reinterpret_cast<Info*>(metadata_buffer_.get() +
+ kInfoMetadataBufferOffset);
+ }
+
+ const Info& info() const {
+ return *reinterpret_cast<const Info*>(metadata_buffer_.get() +
+ kInfoMetadataBufferOffset);
+ }
+
+ void SetInfoDirty() { is_info_dirty_ = true; }
+ // When storage is dirty, we have to set info dirty as well. So just expose
+ // SetDirty to set both.
+ void SetDirty() {
+ is_info_dirty_ = true;
+ is_storage_dirty_ = true;
+ }
+
+ bool is_info_dirty() const { return is_info_dirty_; }
+ bool is_storage_dirty() const { return is_storage_dirty_; }
+
+ // Metadata buffer
+ std::unique_ptr<uint8_t[]> metadata_buffer_;
+
+ // Persistent KeyMapper for mapping (encoded) DocJoinInfo (DocumentId,
+ // JoinablePropertyId) to another referenced document's qualified id string
+ // index in qualified_id_storage_.
+ std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper_;
+
+ // Storage for qualified id strings.
+ std::unique_ptr<FileBackedVector<char>> qualified_id_storage_;
+
+ // TODO(b/268521214): add delete propagation storage
+
+ // Flag indicating whether memory map max possible file size for underlying
+ // FileBackedVector before growing the actual file size.
+ bool pre_mapping_fbv_;
+
+ // Flag indicating whether use persistent hash map as the key mapper (if
+ // false, then fall back to dynamic trie key mapper).
+ bool use_persistent_hash_map_;
+
+ bool is_info_dirty_;
+ bool is_storage_dirty_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_H_
diff --git a/icing/join/qualified-id-join-index_test.cc b/icing/join/qualified-id-join-index_test.cc
new file mode 100644
index 0000000..3d59f4b
--- /dev/null
+++ b/icing/join/qualified-id-join-index_test.cc
@@ -0,0 +1,922 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/qualified-id-join-index.h"
+
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/file-backed-vector.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/persistent-storage.h"
+#include "icing/join/doc-join-info.h"
+#include "icing/store/document-id.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
+#include "icing/store/key-mapper.h"
+#include "icing/store/persistent-hash-map-key-mapper.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::IsTrue;
+using ::testing::Lt;
+using ::testing::Ne;
+using ::testing::Not;
+using ::testing::Pointee;
+using ::testing::SizeIs;
+
+using Crcs = PersistentStorage::Crcs;
+using Info = QualifiedIdJoinIndex::Info;
+
+static constexpr int32_t kCorruptedValueOffset = 3;
+
+struct QualifiedIdJoinIndexTestParam {
+ bool pre_mapping_fbv;
+ bool use_persistent_hash_map;
+
+ explicit QualifiedIdJoinIndexTestParam(bool pre_mapping_fbv_in,
+ bool use_persistent_hash_map_in)
+ : pre_mapping_fbv(pre_mapping_fbv_in),
+ use_persistent_hash_map(use_persistent_hash_map_in) {}
+};
+
+class QualifiedIdJoinIndexTest
+ : public ::testing::TestWithParam<QualifiedIdJoinIndexTestParam> {
+ protected:
+ void SetUp() override {
+ base_dir_ = GetTestTempDir() + "/icing";
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+ IsTrue());
+
+ working_path_ = base_dir_ + "/qualified_id_join_index_test";
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+ }
+
+ Filesystem filesystem_;
+ std::string base_dir_;
+ std::string working_path_;
+};
+
+TEST_P(QualifiedIdJoinIndexTest, InvalidWorkingPath) {
+ const QualifiedIdJoinIndexTestParam& param = GetParam();
+
+ EXPECT_THAT(QualifiedIdJoinIndex::Create(
+ filesystem_, "/dev/null/qualified_id_join_index_test",
+ param.pre_mapping_fbv, param.use_persistent_hash_map),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_P(QualifiedIdJoinIndexTest, InitializeNewFiles) {
+ const QualifiedIdJoinIndexTestParam& param = GetParam();
+
+ {
+ // Create new qualified id join index
+ ASSERT_FALSE(filesystem_.DirectoryExists(working_path_.c_str()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> index,
+ QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
+ EXPECT_THAT(index, Pointee(IsEmpty()));
+
+ ICING_ASSERT_OK(index->PersistToDisk());
+ }
+
+ // Metadata file should be initialized correctly for both info and crcs
+ // sections.
+ const std::string metadata_file_path =
+ absl_ports::StrCat(working_path_, "/metadata");
+ auto metadata_buffer =
+ std::make_unique<uint8_t[]>(QualifiedIdJoinIndex::kMetadataFileSize);
+ ASSERT_THAT(
+ filesystem_.PRead(metadata_file_path.c_str(), metadata_buffer.get(),
+ QualifiedIdJoinIndex::kMetadataFileSize,
+ /*offset=*/0),
+ IsTrue());
+
+ // Check info section
+ const Info* info = reinterpret_cast<const Info*>(
+ metadata_buffer.get() + QualifiedIdJoinIndex::kInfoMetadataBufferOffset);
+ EXPECT_THAT(info->magic, Eq(Info::kMagic));
+ EXPECT_THAT(info->last_added_document_id, Eq(kInvalidDocumentId));
+
+ // Check crcs section
+ const Crcs* crcs = reinterpret_cast<const Crcs*>(
+ metadata_buffer.get() + QualifiedIdJoinIndex::kCrcsMetadataBufferOffset);
+ // There are some initial info in KeyMapper, so storages_crc should be
+ // non-zero.
+ EXPECT_THAT(crcs->component_crcs.storages_crc, Ne(0));
+ EXPECT_THAT(crcs->component_crcs.info_crc,
+ Eq(Crc32(std::string_view(reinterpret_cast<const char*>(info),
+ sizeof(Info)))
+ .Get()));
+ EXPECT_THAT(crcs->all_crc,
+ Eq(Crc32(std::string_view(
+ reinterpret_cast<const char*>(&crcs->component_crcs),
+ sizeof(Crcs::ComponentCrcs)))
+ .Get()));
+}
+
+TEST_P(QualifiedIdJoinIndexTest,
+ InitializationShouldFailWithoutPersistToDiskOrDestruction) {
+ const QualifiedIdJoinIndexTestParam& param = GetParam();
+
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> index,
+ QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
+
+ // Insert some data.
+ ICING_ASSERT_OK(
+ index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+ /*ref_qualified_id_str=*/"namespace#uriA"));
+ ICING_ASSERT_OK(index->PersistToDisk());
+ ICING_ASSERT_OK(
+ index->Put(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/20),
+ /*ref_qualified_id_str=*/"namespace#uriB"));
+ ICING_ASSERT_OK(
+ index->Put(DocJoinInfo(/*document_id=*/5, /*joinable_property_id=*/20),
+ /*ref_qualified_id_str=*/"namespace#uriC"));
+
+ // Without calling PersistToDisk, checksums will not be recomputed or synced
+ // to disk, so initializing another instance on the same files should fail.
+ EXPECT_THAT(QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map),
+ StatusIs(param.use_persistent_hash_map
+ ? libtextclassifier3::StatusCode::FAILED_PRECONDITION
+ : libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_P(QualifiedIdJoinIndexTest, InitializationShouldSucceedWithPersistToDisk) {
+ const QualifiedIdJoinIndexTestParam& param = GetParam();
+
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> index1,
+ QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
+
+ // Insert some data.
+ ICING_ASSERT_OK(
+ index1->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+ /*ref_qualified_id_str=*/"namespace#uriA"));
+ ICING_ASSERT_OK(
+ index1->Put(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/20),
+ /*ref_qualified_id_str=*/"namespace#uriB"));
+ ICING_ASSERT_OK(
+ index1->Put(DocJoinInfo(/*document_id=*/5, /*joinable_property_id=*/20),
+ /*ref_qualified_id_str=*/"namespace#uriC"));
+ ASSERT_THAT(index1, Pointee(SizeIs(3)));
+
+ // After calling PersistToDisk, all checksums should be recomputed and synced
+ // correctly to disk, so initializing another instance on the same files
+ // should succeed, and we should be able to get the same contents.
+ ICING_EXPECT_OK(index1->PersistToDisk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> index2,
+ QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
+ EXPECT_THAT(index2, Pointee(SizeIs(3)));
+ EXPECT_THAT(
+ index2->Get(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20)),
+ IsOkAndHolds(/*ref_qualified_id_str=*/"namespace#uriA"));
+ EXPECT_THAT(
+ index2->Get(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/20)),
+ IsOkAndHolds(/*ref_qualified_id_str=*/"namespace#uriB"));
+ EXPECT_THAT(
+ index2->Get(DocJoinInfo(/*document_id=*/5, /*joinable_property_id=*/20)),
+ IsOkAndHolds(/*ref_qualified_id_str=*/"namespace#uriC"));
+}
+
+TEST_P(QualifiedIdJoinIndexTest, InitializationShouldSucceedAfterDestruction) {
+ const QualifiedIdJoinIndexTestParam& param = GetParam();
+
+ {
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> index,
+ QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
+
+ // Insert some data.
+ ICING_ASSERT_OK(
+ index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+ /*ref_qualified_id_str=*/"namespace#uriA"));
+ ICING_ASSERT_OK(
+ index->Put(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/20),
+ /*ref_qualified_id_str=*/"namespace#uriB"));
+ ICING_ASSERT_OK(
+ index->Put(DocJoinInfo(/*document_id=*/5, /*joinable_property_id=*/20),
+ /*ref_qualified_id_str=*/"namespace#uriC"));
+ ASSERT_THAT(index, Pointee(SizeIs(3)));
+ }
+
+ {
+ // The previous instance went out of scope and was destructed. Although we
+ // didn't call PersistToDisk explicitly, the destructor should invoke it and
+ // thus initializing another instance on the same files should succeed, and
+ // we should be able to get the same contents.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> index,
+ QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
+ EXPECT_THAT(index, Pointee(SizeIs(3)));
+ EXPECT_THAT(index->Get(DocJoinInfo(/*document_id=*/1,
+ /*joinable_property_id=*/20)),
+ IsOkAndHolds("namespace#uriA"));
+ EXPECT_THAT(index->Get(DocJoinInfo(/*document_id=*/3,
+ /*joinable_property_id=*/20)),
+ IsOkAndHolds("namespace#uriB"));
+ EXPECT_THAT(index->Get(DocJoinInfo(/*document_id=*/5,
+ /*joinable_property_id=*/20)),
+ IsOkAndHolds("namespace#uriC"));
+ }
+}
+
+TEST_P(QualifiedIdJoinIndexTest,
+ InitializeExistingFilesWithDifferentMagicShouldFail) {
+ const QualifiedIdJoinIndexTestParam& param = GetParam();
+
+ {
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> index,
+ QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
+ ICING_ASSERT_OK(
+ index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+ /*ref_qualified_id_str=*/"namespace#uriA"));
+
+ ICING_ASSERT_OK(index->PersistToDisk());
+ }
+
+ {
+ // Manually change magic and update checksum
+ const std::string metadata_file_path =
+ absl_ports::StrCat(working_path_, "/metadata");
+ ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
+
+ auto metadata_buffer =
+ std::make_unique<uint8_t[]>(QualifiedIdJoinIndex::kMetadataFileSize);
+ ASSERT_THAT(filesystem_.PRead(metadata_sfd.get(), metadata_buffer.get(),
+ QualifiedIdJoinIndex::kMetadataFileSize,
+ /*offset=*/0),
+ IsTrue());
+
+ // Manually change magic and update checksums.
+ Crcs* crcs = reinterpret_cast<Crcs*>(
+ metadata_buffer.get() +
+ QualifiedIdJoinIndex::kCrcsMetadataBufferOffset);
+ Info* info = reinterpret_cast<Info*>(
+ metadata_buffer.get() +
+ QualifiedIdJoinIndex::kInfoMetadataBufferOffset);
+ info->magic += kCorruptedValueOffset;
+ crcs->component_crcs.info_crc = info->ComputeChecksum().Get();
+ crcs->all_crc = crcs->component_crcs.ComputeChecksum().Get();
+ ASSERT_THAT(filesystem_.PWrite(metadata_sfd.get(), /*offset=*/0,
+ metadata_buffer.get(),
+ QualifiedIdJoinIndex::kMetadataFileSize),
+ IsTrue());
+ }
+
+ // Attempt to create the qualified id join index with different magic. This
+ // should fail.
+ EXPECT_THAT(QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+ HasSubstr("Incorrect magic value")));
+}
+
+TEST_P(QualifiedIdJoinIndexTest,
+ InitializeExistingFilesWithWrongAllCrcShouldFail) {
+ const QualifiedIdJoinIndexTestParam& param = GetParam();
+
+ {
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> index,
+ QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
+ ICING_ASSERT_OK(
+ index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+ /*ref_qualified_id_str=*/"namespace#uriA"));
+
+ ICING_ASSERT_OK(index->PersistToDisk());
+ }
+
+ {
+ const std::string metadata_file_path =
+ absl_ports::StrCat(working_path_, "/metadata");
+ ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
+
+ auto metadata_buffer =
+ std::make_unique<uint8_t[]>(QualifiedIdJoinIndex::kMetadataFileSize);
+ ASSERT_THAT(filesystem_.PRead(metadata_sfd.get(), metadata_buffer.get(),
+ QualifiedIdJoinIndex::kMetadataFileSize,
+ /*offset=*/0),
+ IsTrue());
+
+ // Manually corrupt all_crc
+ Crcs* crcs = reinterpret_cast<Crcs*>(
+ metadata_buffer.get() +
+ QualifiedIdJoinIndex::kCrcsMetadataBufferOffset);
+ crcs->all_crc += kCorruptedValueOffset;
+
+ ASSERT_THAT(filesystem_.PWrite(metadata_sfd.get(), /*offset=*/0,
+ metadata_buffer.get(),
+ QualifiedIdJoinIndex::kMetadataFileSize),
+ IsTrue());
+ }
+
+ // Attempt to create the qualified id join index with metadata containing
+ // corrupted all_crc. This should fail.
+ EXPECT_THAT(QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+ HasSubstr("Invalid all crc")));
+}
+
+TEST_P(QualifiedIdJoinIndexTest,
+ InitializeExistingFilesWithCorruptedInfoShouldFail) {
+ const QualifiedIdJoinIndexTestParam& param = GetParam();
+
+ {
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> index,
+ QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
+ ICING_ASSERT_OK(
+ index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+ /*ref_qualified_id_str=*/"namespace#uriA"));
+
+ ICING_ASSERT_OK(index->PersistToDisk());
+ }
+
+ {
+ const std::string metadata_file_path =
+ absl_ports::StrCat(working_path_, "/metadata");
+ ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
+
+ auto metadata_buffer =
+ std::make_unique<uint8_t[]>(QualifiedIdJoinIndex::kMetadataFileSize);
+ ASSERT_THAT(filesystem_.PRead(metadata_sfd.get(), metadata_buffer.get(),
+ QualifiedIdJoinIndex::kMetadataFileSize,
+ /*offset=*/0),
+ IsTrue());
+
+ // Modify info, but don't update the checksum. This would be similar to
+ // corruption of info.
+ Info* info = reinterpret_cast<Info*>(
+ metadata_buffer.get() +
+ QualifiedIdJoinIndex::kInfoMetadataBufferOffset);
+ info->last_added_document_id += kCorruptedValueOffset;
+
+ ASSERT_THAT(filesystem_.PWrite(metadata_sfd.get(), /*offset=*/0,
+ metadata_buffer.get(),
+ QualifiedIdJoinIndex::kMetadataFileSize),
+ IsTrue());
+ }
+
+ // Attempt to create the qualified id join index with info that doesn't match
+ // its checksum. This should fail.
+ EXPECT_THAT(QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+ HasSubstr("Invalid info crc")));
+}
+
+TEST_P(QualifiedIdJoinIndexTest,
+ InitializeExistingFilesWithCorruptedDocJoinInfoMapperShouldFail) {
+ const QualifiedIdJoinIndexTestParam& param = GetParam();
+
+ {
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> index,
+ QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
+ ICING_ASSERT_OK(
+ index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+ /*ref_qualified_id_str=*/"namespace#uriA"));
+
+ ICING_ASSERT_OK(index->PersistToDisk());
+ }
+
+ // Corrupt doc_join_info_mapper manually.
+ {
+ std::string mapper_working_path =
+ absl_ports::StrCat(working_path_, "/doc_join_info_mapper");
+ std::unique_ptr<KeyMapper<int32_t>> mapper;
+ if (param.use_persistent_hash_map) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ mapper, PersistentHashMapKeyMapper<int32_t>::Create(
+ filesystem_, std::move(mapper_working_path),
+ param.pre_mapping_fbv));
+ } else {
+ ICING_ASSERT_OK_AND_ASSIGN(mapper,
+ DynamicTrieKeyMapper<int32_t>::Create(
+ filesystem_, mapper_working_path,
+ /*maximum_size_bytes=*/128 * 1024 * 1024));
+ }
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc, mapper->ComputeChecksum());
+ ICING_ASSERT_OK(mapper->Put("foo", 12345));
+ ICING_ASSERT_OK(mapper->PersistToDisk());
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc, mapper->ComputeChecksum());
+ ASSERT_THAT(old_crc, Not(Eq(new_crc)));
+ }
+
+ // Attempt to create the qualified id join index with corrupted
+ // doc_join_info_mapper. This should fail.
+ EXPECT_THAT(QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+ HasSubstr("Invalid storages crc")));
+}
+
+TEST_P(QualifiedIdJoinIndexTest,
+ InitializeExistingFilesWithCorruptedQualifiedIdStorageShouldFail) {
+ const QualifiedIdJoinIndexTestParam& param = GetParam();
+
+ {
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> index,
+ QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
+ ICING_ASSERT_OK(
+ index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+ /*ref_qualified_id_str=*/"namespace#uriA"));
+
+ ICING_ASSERT_OK(index->PersistToDisk());
+ }
+
+ {
+ // Corrupt qualified_id_storage manually.
+ std::string qualified_id_storage_path =
+ absl_ports::StrCat(working_path_, "/qualified_id_storage");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> qualified_id_storage,
+ FileBackedVector<char>::Create(
+ filesystem_, qualified_id_storage_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc,
+ qualified_id_storage->ComputeChecksum());
+ ICING_ASSERT_OK(qualified_id_storage->Append('a'));
+ ICING_ASSERT_OK(qualified_id_storage->Append('b'));
+ ICING_ASSERT_OK(qualified_id_storage->PersistToDisk());
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc,
+ qualified_id_storage->ComputeChecksum());
+ ASSERT_THAT(old_crc, Not(Eq(new_crc)));
+ }
+
+ // Attempt to create the qualified id join index with corrupted
+ // qualified_id_storage. This should fail.
+ EXPECT_THAT(QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+ HasSubstr("Invalid storages crc")));
+}
+
+TEST_P(QualifiedIdJoinIndexTest, InvalidPut) {
+ const QualifiedIdJoinIndexTestParam& param = GetParam();
+
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> index,
+ QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
+
+ DocJoinInfo default_invalid;
+ EXPECT_THAT(
+ index->Put(default_invalid, /*ref_qualified_id_str=*/"namespace#uriA"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(QualifiedIdJoinIndexTest, InvalidGet) {
+ const QualifiedIdJoinIndexTestParam& param = GetParam();
+
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> index,
+ QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
+
+ DocJoinInfo default_invalid;
+ EXPECT_THAT(index->Get(default_invalid),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(QualifiedIdJoinIndexTest, PutAndGet) {
+ const QualifiedIdJoinIndexTestParam& param = GetParam();
+
+ DocJoinInfo target_info1(/*document_id=*/1, /*joinable_property_id=*/20);
+ std::string_view ref_qualified_id_str_a = "namespace#uriA";
+
+ DocJoinInfo target_info2(/*document_id=*/3, /*joinable_property_id=*/13);
+ std::string_view ref_qualified_id_str_b = "namespace#uriB";
+
+ DocJoinInfo target_info3(/*document_id=*/4, /*joinable_property_id=*/4);
+ std::string_view ref_qualified_id_str_c = "namespace#uriC";
+
+ {
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> index,
+ QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
+
+ EXPECT_THAT(index->Put(target_info1, ref_qualified_id_str_a), IsOk());
+ EXPECT_THAT(index->Put(target_info2, ref_qualified_id_str_b), IsOk());
+ EXPECT_THAT(index->Put(target_info3, ref_qualified_id_str_c), IsOk());
+ EXPECT_THAT(index, Pointee(SizeIs(3)));
+
+ EXPECT_THAT(index->Get(target_info1), IsOkAndHolds(ref_qualified_id_str_a));
+ EXPECT_THAT(index->Get(target_info2), IsOkAndHolds(ref_qualified_id_str_b));
+ EXPECT_THAT(index->Get(target_info3), IsOkAndHolds(ref_qualified_id_str_c));
+
+ ICING_ASSERT_OK(index->PersistToDisk());
+ }
+
+ // Verify we can get all of them after destructing and re-initializing.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> index,
+ QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
+ EXPECT_THAT(index, Pointee(SizeIs(3)));
+ EXPECT_THAT(index->Get(target_info1), IsOkAndHolds(ref_qualified_id_str_a));
+ EXPECT_THAT(index->Get(target_info2), IsOkAndHolds(ref_qualified_id_str_b));
+ EXPECT_THAT(index->Get(target_info3), IsOkAndHolds(ref_qualified_id_str_c));
+}
+
+TEST_P(QualifiedIdJoinIndexTest, GetShouldReturnNotFoundErrorIfNotExist) {
+ const QualifiedIdJoinIndexTestParam& param = GetParam();
+
+ DocJoinInfo target_info(/*document_id=*/1, /*joinable_property_id=*/20);
+ std::string_view ref_qualified_id_str = "namespace#uriA";
+
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> index,
+ QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
+
+ // Verify entry is not found in the beginning.
+ EXPECT_THAT(index->Get(target_info),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ ICING_ASSERT_OK(index->Put(target_info, ref_qualified_id_str));
+ ASSERT_THAT(index->Get(target_info), IsOkAndHolds(ref_qualified_id_str));
+
+ // Get another non-existing entry. This should get NOT_FOUND_ERROR.
+ DocJoinInfo another_target_info(/*document_id=*/2,
+ /*joinable_property_id=*/20);
+ EXPECT_THAT(index->Get(another_target_info),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_P(QualifiedIdJoinIndexTest, SetLastAddedDocumentId) {
+ const QualifiedIdJoinIndexTestParam& param = GetParam();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> index,
+ QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
+
+ EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+ constexpr DocumentId kDocumentId = 100;
+ index->set_last_added_document_id(kDocumentId);
+ EXPECT_THAT(index->last_added_document_id(), Eq(kDocumentId));
+
+ constexpr DocumentId kNextDocumentId = 123;
+ index->set_last_added_document_id(kNextDocumentId);
+ EXPECT_THAT(index->last_added_document_id(), Eq(kNextDocumentId));
+}
+
+TEST_P(
+ QualifiedIdJoinIndexTest,
+ SetLastAddedDocumentIdShouldIgnoreNewDocumentIdNotGreaterThanTheCurrent) {
+ const QualifiedIdJoinIndexTestParam& param = GetParam();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> index,
+ QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
+
+ constexpr DocumentId kDocumentId = 123;
+ index->set_last_added_document_id(kDocumentId);
+ ASSERT_THAT(index->last_added_document_id(), Eq(kDocumentId));
+
+ constexpr DocumentId kNextDocumentId = 100;
+ ASSERT_THAT(kNextDocumentId, Lt(kDocumentId));
+ index->set_last_added_document_id(kNextDocumentId);
+ // last_added_document_id() should remain unchanged.
+ EXPECT_THAT(index->last_added_document_id(), Eq(kDocumentId));
+}
+
+TEST_P(QualifiedIdJoinIndexTest, Optimize) {
+ const QualifiedIdJoinIndexTestParam& param = GetParam();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> index,
+ QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
+
+ ICING_ASSERT_OK(
+ index->Put(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/10),
+ /*ref_qualified_id_str=*/"namespace#uriA"));
+ ICING_ASSERT_OK(
+ index->Put(DocJoinInfo(/*document_id=*/5, /*joinable_property_id=*/3),
+ /*ref_qualified_id_str=*/"namespace#uriA"));
+ ICING_ASSERT_OK(
+ index->Put(DocJoinInfo(/*document_id=*/8, /*joinable_property_id=*/9),
+ /*ref_qualified_id_str=*/"namespace#uriB"));
+ ICING_ASSERT_OK(
+ index->Put(DocJoinInfo(/*document_id=*/13, /*joinable_property_id=*/4),
+ /*ref_qualified_id_str=*/"namespace#uriC"));
+ ICING_ASSERT_OK(
+ index->Put(DocJoinInfo(/*document_id=*/21, /*joinable_property_id=*/12),
+ /*ref_qualified_id_str=*/"namespace#uriC"));
+ index->set_last_added_document_id(21);
+
+ ASSERT_THAT(index, Pointee(SizeIs(5)));
+
+ // Delete doc id = 5, 8, compress and keep the rest.
+ std::vector<DocumentId> document_id_old_to_new(22, kInvalidDocumentId);
+ document_id_old_to_new[3] = 0;
+ document_id_old_to_new[13] = 1;
+ document_id_old_to_new[21] = 2;
+
+ DocumentId new_last_added_document_id = 2;
+ EXPECT_THAT(
+ index->Optimize(document_id_old_to_new, new_last_added_document_id),
+ IsOk());
+ EXPECT_THAT(index, Pointee(SizeIs(3)));
+ EXPECT_THAT(index->last_added_document_id(), Eq(new_last_added_document_id));
+
+ // Verify Put and Get API still work normally after Optimize().
+ // (old_doc_id=3, joinable_property_id=10), which is now (doc_id=0,
+ // joinable_property_id=10), has referenced qualified id str =
+ // "namespace#uriA".
+ EXPECT_THAT(
+ index->Get(DocJoinInfo(/*document_id=*/0, /*joinable_property_id=*/10)),
+ IsOkAndHolds("namespace#uriA"));
+
+ // (old_doc_id=5, joinable_property_id=3) and (old_doc_id=8,
+ // joinable_property_id=9) are now not found since we've deleted old_doc_id =
+ // 5, 8. It is not testable via Get() because there is no valid doc_id mapping
+ // for old_doc_id = 5, 8 and we cannot generate a valid DocJoinInfo for it.
+
+ // (old_doc_id=13, joinable_property_id=4), which is now (doc_id=1,
+ // joinable_property_id=4), has referenced qualified id str =
+ // "namespace#uriC".
+ EXPECT_THAT(
+ index->Get(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/4)),
+ IsOkAndHolds("namespace#uriC"));
+
+ // (old_doc_id=21, joinable_property_id=12), which is now (doc_id=2,
+ // joinable_property_id=12), has referenced qualified id str =
+ // "namespace#uriC".
+ EXPECT_THAT(
+ index->Get(DocJoinInfo(/*document_id=*/2, /*joinable_property_id=*/12)),
+ IsOkAndHolds("namespace#uriC"));
+
+ // Joinable index should be able to work normally after Optimize().
+ ICING_ASSERT_OK(
+ index->Put(DocJoinInfo(/*document_id=*/99, /*joinable_property_id=*/2),
+ /*ref_qualified_id_str=*/"namespace#uriD"));
+ index->set_last_added_document_id(99);
+
+ EXPECT_THAT(index, Pointee(SizeIs(4)));
+ EXPECT_THAT(index->last_added_document_id(), Eq(99));
+ EXPECT_THAT(index->Get(DocJoinInfo(/*document_id=*/99,
+ /*joinable_property_id=*/2)),
+ IsOkAndHolds("namespace#uriD"));
+}
+
+TEST_P(QualifiedIdJoinIndexTest, OptimizeOutOfRangeDocumentId) {
+ const QualifiedIdJoinIndexTestParam& param = GetParam();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> index,
+ QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
+
+ ICING_ASSERT_OK(
+ index->Put(DocJoinInfo(/*document_id=*/99, /*joinable_property_id=*/10),
+ /*ref_qualified_id_str=*/"namespace#uriA"));
+ index->set_last_added_document_id(99);
+
+ // Create document_id_old_to_new with size = 1. Optimize should handle out of
+ // range DocumentId properly.
+ std::vector<DocumentId> document_id_old_to_new = {kInvalidDocumentId};
+
+ // There shouldn't be any error due to vector index.
+ EXPECT_THAT(
+ index->Optimize(document_id_old_to_new,
+ /*new_last_added_document_id=*/kInvalidDocumentId),
+ IsOk());
+ EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+ // Verify all data are discarded after Optimize().
+ EXPECT_THAT(index, Pointee(IsEmpty()));
+}
+
+TEST_P(QualifiedIdJoinIndexTest, OptimizeDeleteAll) {
+ const QualifiedIdJoinIndexTestParam& param = GetParam();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> index,
+ QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
+
+ ICING_ASSERT_OK(
+ index->Put(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/10),
+ /*ref_qualified_id_str=*/"namespace#uriA"));
+ ICING_ASSERT_OK(
+ index->Put(DocJoinInfo(/*document_id=*/5, /*joinable_property_id=*/3),
+ /*ref_qualified_id_str=*/"namespace#uriA"));
+ ICING_ASSERT_OK(
+ index->Put(DocJoinInfo(/*document_id=*/8, /*joinable_property_id=*/9),
+ /*ref_qualified_id_str=*/"namespace#uriB"));
+ ICING_ASSERT_OK(
+ index->Put(DocJoinInfo(/*document_id=*/13, /*joinable_property_id=*/4),
+ /*ref_qualified_id_str=*/"namespace#uriC"));
+ ICING_ASSERT_OK(
+ index->Put(DocJoinInfo(/*document_id=*/21, /*joinable_property_id=*/12),
+ /*ref_qualified_id_str=*/"namespace#uriC"));
+ index->set_last_added_document_id(21);
+
+ // Delete all documents.
+ std::vector<DocumentId> document_id_old_to_new(22, kInvalidDocumentId);
+
+ EXPECT_THAT(
+ index->Optimize(document_id_old_to_new,
+ /*new_last_added_document_id=*/kInvalidDocumentId),
+ IsOk());
+ EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+ // Verify all data are discarded after Optimize().
+ EXPECT_THAT(index, Pointee(IsEmpty()));
+}
+
+TEST_P(QualifiedIdJoinIndexTest, Clear) {
+ const QualifiedIdJoinIndexTestParam& param = GetParam();
+
+ DocJoinInfo target_info1(/*document_id=*/1, /*joinable_property_id=*/20);
+ DocJoinInfo target_info2(/*document_id=*/3, /*joinable_property_id=*/5);
+ DocJoinInfo target_info3(/*document_id=*/6, /*joinable_property_id=*/13);
+
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> index,
+ QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
+ ICING_ASSERT_OK(
+ index->Put(target_info1, /*ref_qualified_id_str=*/"namespace#uriA"));
+ ICING_ASSERT_OK(
+ index->Put(target_info2, /*ref_qualified_id_str=*/"namespace#uriB"));
+ ICING_ASSERT_OK(
+ index->Put(target_info3, /*ref_qualified_id_str=*/"namespace#uriC"));
+ ASSERT_THAT(index, Pointee(SizeIs(3)));
+ index->set_last_added_document_id(6);
+ ASSERT_THAT(index->last_added_document_id(), Eq(6));
+
+ // After resetting, last_added_document_id should be set to
+ // kInvalidDocumentId, and the previous added data should be deleted.
+ EXPECT_THAT(index->Clear(), IsOk());
+ EXPECT_THAT(index, Pointee(IsEmpty()));
+ EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+ EXPECT_THAT(index->Get(target_info1),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(index->Get(target_info2),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(index->Get(target_info3),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ // Join index should be able to work normally after Clear().
+ DocJoinInfo target_info4(/*document_id=*/2, /*joinable_property_id=*/19);
+ ICING_ASSERT_OK(
+ index->Put(target_info4, /*ref_qualified_id_str=*/"namespace#uriD"));
+ index->set_last_added_document_id(2);
+
+ EXPECT_THAT(index->last_added_document_id(), Eq(2));
+ EXPECT_THAT(index->Get(target_info4), IsOkAndHolds("namespace#uriD"));
+
+ ICING_ASSERT_OK(index->PersistToDisk());
+ index.reset();
+
+ // Verify index after reconstructing.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ index, QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
+ EXPECT_THAT(index->last_added_document_id(), Eq(2));
+ EXPECT_THAT(index->Get(target_info1),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(index->Get(target_info2),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(index->Get(target_info3),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(index->Get(target_info4), IsOkAndHolds("namespace#uriD"));
+}
+
+TEST_P(QualifiedIdJoinIndexTest, SwitchKeyMapperTypeShouldReturnError) {
+ const QualifiedIdJoinIndexTestParam& param = GetParam();
+
+ {
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndex> index,
+ QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
+ ICING_ASSERT_OK(
+ index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+ /*ref_qualified_id_str=*/"namespace#uriA"));
+
+ ICING_ASSERT_OK(index->PersistToDisk());
+ }
+
+ bool switch_key_mapper_flag = !param.use_persistent_hash_map;
+ EXPECT_THAT(QualifiedIdJoinIndex::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ switch_key_mapper_flag),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ QualifiedIdJoinIndexTest, QualifiedIdJoinIndexTest,
+ testing::Values(
+ QualifiedIdJoinIndexTestParam(/*pre_mapping_fbv_in=*/true,
+ /*use_persistent_hash_map_in=*/true),
+ QualifiedIdJoinIndexTestParam(/*pre_mapping_fbv_in=*/true,
+ /*use_persistent_hash_map_in=*/false),
+ QualifiedIdJoinIndexTestParam(/*pre_mapping_fbv_in=*/false,
+ /*use_persistent_hash_map_in=*/true),
+ QualifiedIdJoinIndexTestParam(/*pre_mapping_fbv_in=*/false,
+ /*use_persistent_hash_map_in=*/false)));
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/join/qualified-id-join-indexing-handler.cc b/icing/join/qualified-id-join-indexing-handler.cc
new file mode 100644
index 0000000..344cf41
--- /dev/null
+++ b/icing/join/qualified-id-join-indexing-handler.cc
@@ -0,0 +1,108 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/qualified-id-join-indexing-handler.h"
+
+#include <memory>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/join/doc-join-info.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/join/qualified-id.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-id.h"
+#include "icing/util/clock.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+#include "icing/util/tokenized-document.h"
+
+namespace icing {
+namespace lib {
+
+/* static */ libtextclassifier3::StatusOr<
+ std::unique_ptr<QualifiedIdJoinIndexingHandler>>
+QualifiedIdJoinIndexingHandler::Create(
+ const Clock* clock, QualifiedIdJoinIndex* qualified_id_join_index) {
+ ICING_RETURN_ERROR_IF_NULL(clock);
+ ICING_RETURN_ERROR_IF_NULL(qualified_id_join_index);
+
+ return std::unique_ptr<QualifiedIdJoinIndexingHandler>(
+ new QualifiedIdJoinIndexingHandler(clock, qualified_id_join_index));
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexingHandler::Handle(
+ const TokenizedDocument& tokenized_document, DocumentId document_id,
+ bool recovery_mode, PutDocumentStatsProto* put_document_stats) {
+ std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
+
+ if (!IsDocumentIdValid(document_id)) {
+ return absl_ports::InvalidArgumentError(
+ IcingStringUtil::StringPrintf("Invalid DocumentId %d", document_id));
+ }
+
+ if (qualified_id_join_index_.last_added_document_id() != kInvalidDocumentId &&
+ document_id <= qualified_id_join_index_.last_added_document_id()) {
+ if (recovery_mode) {
+ // Skip the document if document_id <= last_added_document_id in recovery
+ // mode without returning an error.
+ return libtextclassifier3::Status::OK;
+ }
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "DocumentId %d must be greater than last added document_id %d",
+ document_id, qualified_id_join_index_.last_added_document_id()));
+ }
+ qualified_id_join_index_.set_last_added_document_id(document_id);
+
+ for (const JoinableProperty<std::string_view>& qualified_id_property :
+ tokenized_document.qualified_id_join_properties()) {
+ if (qualified_id_property.values.empty()) {
+ continue;
+ }
+
+ DocJoinInfo info(document_id, qualified_id_property.metadata.id);
+ // Currently we only support single (non-repeated) joinable value under a
+ // property.
+ std::string_view ref_qualified_id_str = qualified_id_property.values[0];
+
+ // Attempt to parse qualified id string to make sure the format is correct.
+ if (!QualifiedId::Parse(ref_qualified_id_str).ok()) {
+ // Skip incorrect format of qualified id string to save disk space.
+ continue;
+ }
+
+ libtextclassifier3::Status status =
+ qualified_id_join_index_.Put(info, ref_qualified_id_str);
+ if (!status.ok()) {
+ ICING_LOG(WARNING)
+ << "Failed to add data into qualified id join index due to: "
+ << status.error_message();
+ return status;
+ }
+ }
+
+ if (put_document_stats != nullptr) {
+ put_document_stats->set_qualified_id_join_index_latency_ms(
+ index_timer->GetElapsedMilliseconds());
+ }
+
+ return libtextclassifier3::Status::OK;
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/join/qualified-id-join-indexing-handler.h b/icing/join/qualified-id-join-indexing-handler.h
new file mode 100644
index 0000000..f44e45d
--- /dev/null
+++ b/icing/join/qualified-id-join-indexing-handler.h
@@ -0,0 +1,70 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_QUALIFIED_ID_JOIN_INDEXING_HANDLER_H_
+#define ICING_JOIN_QUALIFIED_ID_JOIN_INDEXING_HANDLER_H_
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/index/data-indexing-handler.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/store/document-id.h"
+#include "icing/util/clock.h"
+#include "icing/util/tokenized-document.h"
+
+namespace icing {
+namespace lib {
+
+class QualifiedIdJoinIndexingHandler : public DataIndexingHandler {
+ public:
+ // Creates a QualifiedIdJoinIndexingHandler instance which does not take
+ // ownership of any input components. All pointers must refer to valid objects
+ // that outlive the created QualifiedIdJoinIndexingHandler instance.
+ //
+ // Returns:
+ // - A QualifiedIdJoinIndexingHandler instance on success
+ // - FAILED_PRECONDITION_ERROR if any of the input pointer is null
+ static libtextclassifier3::StatusOr<
+ std::unique_ptr<QualifiedIdJoinIndexingHandler>>
+ Create(const Clock* clock, QualifiedIdJoinIndex* qualified_id_join_index);
+
+ ~QualifiedIdJoinIndexingHandler() override = default;
+
+ // Handles the joinable qualified id data indexing process: add data into the
+ // qualified id join index.
+ //
+ /// Returns:
+ // - OK on success.
+ // - INVALID_ARGUMENT_ERROR if document_id is invalid OR document_id is less
+ // than or equal to the document_id of a previously indexed document in
+ // non recovery mode.
+ // - INTERNAL_ERROR if any other errors occur.
+ // - Any QualifiedIdJoinIndex errors.
+ libtextclassifier3::Status Handle(
+ const TokenizedDocument& tokenized_document, DocumentId document_id,
+ bool recovery_mode, PutDocumentStatsProto* put_document_stats) override;
+
+ private:
+ explicit QualifiedIdJoinIndexingHandler(
+ const Clock* clock, QualifiedIdJoinIndex* qualified_id_join_index)
+ : DataIndexingHandler(clock),
+ qualified_id_join_index_(*qualified_id_join_index) {}
+
+ QualifiedIdJoinIndex& qualified_id_join_index_; // Does not own.
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_JOIN_QUALIFIED_ID_JOIN_INDEXING_HANDLER_H_
diff --git a/icing/join/qualified-id-join-indexing-handler_test.cc b/icing/join/qualified-id-join-indexing-handler_test.cc
new file mode 100644
index 0000000..7e89dfa
--- /dev/null
+++ b/icing/join/qualified-id-join-indexing-handler_test.cc
@@ -0,0 +1,526 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/qualified-id-join-indexing-handler.h"
+
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/join/qualified-id.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/util/tokenized-document.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::IsTrue;
+
+// Schema type for referenced documents: ReferencedType
+static constexpr std::string_view kReferencedType = "ReferencedType";
+static constexpr std::string_view kPropertyName = "name";
+
+// Joinable properties and joinable property id. Joinable property id is
+// determined by the lexicographical order of joinable property path.
+// Schema type with joinable property: FakeType
+static constexpr std::string_view kFakeType = "FakeType";
+static constexpr std::string_view kPropertyQualifiedId = "qualifiedId";
+
+static constexpr JoinablePropertyId kQualifiedIdJoinablePropertyId = 0;
+
+// Schema type with nested joinable properties: NestedType
+static constexpr std::string_view kNestedType = "NestedType";
+static constexpr std::string_view kPropertyNestedDoc = "nested";
+static constexpr std::string_view kPropertyQualifiedId2 = "qualifiedId2";
+
+static constexpr JoinablePropertyId kNestedQualifiedIdJoinablePropertyId = 0;
+static constexpr JoinablePropertyId kQualifiedId2JoinablePropertyId = 1;
+
+static constexpr DocumentId kDefaultDocumentId = 3;
+
+class QualifiedIdJoinIndexingHandlerTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ ICING_ASSERT_OK(
+ // File generated via icu_data_file rule in //icing/BUILD.
+ icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
+
+ base_dir_ = GetTestTempDir() + "/icing_test";
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+ IsTrue());
+
+ qualified_id_join_index_dir_ = base_dir_ + "/qualified_id_join_index";
+ schema_store_dir_ = base_dir_ + "/schema_store";
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ qualified_id_join_index_,
+ QualifiedIdJoinIndex::Create(filesystem_, qualified_id_join_index_dir_,
+ /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false));
+
+ language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ lang_segmenter_,
+ language_segmenter_factory::Create(std::move(segmenter_options)));
+
+ ASSERT_THAT(
+ filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()),
+ IsTrue());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kReferencedType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyName)
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType(kFakeType).AddProperty(
+ PropertyConfigBuilder()
+ .SetName(kPropertyQualifiedId)
+ .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kNestedType)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName(kPropertyNestedDoc)
+ .SetDataTypeDocument(
+ kFakeType, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyQualifiedId2)
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+ }
+
+ void TearDown() override {
+ schema_store_.reset();
+ lang_segmenter_.reset();
+ qualified_id_join_index_.reset();
+
+ filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+ }
+
+ Filesystem filesystem_;
+ FakeClock fake_clock_;
+ std::string base_dir_;
+ std::string qualified_id_join_index_dir_;
+ std::string schema_store_dir_;
+
+ std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index_;
+ std::unique_ptr<LanguageSegmenter> lang_segmenter_;
+ std::unique_ptr<SchemaStore> schema_store_;
+};
+
+TEST_F(QualifiedIdJoinIndexingHandlerTest, CreationWithNullPointerShouldFail) {
+ EXPECT_THAT(QualifiedIdJoinIndexingHandler::Create(
+ /*clock=*/nullptr, qualified_id_join_index_.get()),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+
+ EXPECT_THAT(QualifiedIdJoinIndexingHandler::Create(
+ &fake_clock_, /*qualified_id_join_index=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleJoinableProperty) {
+ DocumentProto referenced_document =
+ DocumentBuilder()
+ .SetKey("pkg$db/ns", "ref_type/1")
+ .SetSchema(std::string(kReferencedType))
+ .AddStringProperty(std::string(kPropertyName), "one")
+ .Build();
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyQualifiedId),
+ "pkg$db/ns#ref_type/1")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+
+ ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kInvalidDocumentId));
+ // Handle document.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ qualified_id_join_index_.get()));
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, kDefaultDocumentId,
+ /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+ IsOk());
+
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+ IsOkAndHolds("pkg$db/ns#ref_type/1"));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleNestedJoinableProperty) {
+ DocumentProto referenced_document1 =
+ DocumentBuilder()
+ .SetKey("pkg$db/ns", "ref_type/1")
+ .SetSchema(std::string(kReferencedType))
+ .AddStringProperty(std::string(kPropertyName), "one")
+ .Build();
+ DocumentProto referenced_document2 =
+ DocumentBuilder()
+ .SetKey("pkg$db/ns", "ref_type/2")
+ .SetSchema(std::string(kReferencedType))
+ .AddStringProperty(std::string(kPropertyName), "two")
+ .Build();
+
+ DocumentProto nested_document =
+ DocumentBuilder()
+ .SetKey("pkg$db/ns", "nested_type/1")
+ .SetSchema(std::string(kNestedType))
+ .AddDocumentProperty(
+ std::string(kPropertyNestedDoc),
+ DocumentBuilder()
+ .SetKey("pkg$db/ns", "nested_fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyQualifiedId),
+ "pkg$db/ns#ref_type/2")
+ .Build())
+ .AddStringProperty(std::string(kPropertyQualifiedId2),
+ "pkg$db/ns#ref_type/1")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ nested_document));
+
+ ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kInvalidDocumentId));
+ // Handle nested_document.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ qualified_id_join_index_.get()));
+ EXPECT_THAT(handler->Handle(tokenized_document, kDefaultDocumentId,
+ /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kDefaultDocumentId, kNestedQualifiedIdJoinablePropertyId)),
+ IsOkAndHolds("pkg$db/ns#ref_type/2"));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kDefaultDocumentId, kQualifiedId2JoinablePropertyId)),
+ IsOkAndHolds("pkg$db/ns#ref_type/1"));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerTest,
+ HandleShouldSkipInvalidFormatQualifiedId) {
+ static constexpr std::string_view kInvalidFormatQualifiedId =
+ "invalid_format_qualified_id";
+ ASSERT_THAT(QualifiedId::Parse(kInvalidFormatQualifiedId),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyQualifiedId),
+ std::string(kInvalidFormatQualifiedId))
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+
+ ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kInvalidDocumentId));
+ // Handle document. Should ignore invalid format qualified id.
+ // Index data should remain unchanged since there is no valid qualified id,
+ // but last_added_document_id should be updated.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ qualified_id_join_index_.get()));
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, kDefaultDocumentId,
+ /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleShouldSkipEmptyQualifiedId) {
+ // Create a document without any qualified id.
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+ ASSERT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty());
+
+ ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kInvalidDocumentId));
+ // Handle document. Index data should remain unchanged since there is no
+ // qualified id, but last_added_document_id should be updated.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ qualified_id_join_index_.get()));
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, kDefaultDocumentId,
+ /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerTest,
+ HandleInvalidDocumentIdShouldReturnInvalidArgumentError) {
+ DocumentProto referenced_document =
+ DocumentBuilder()
+ .SetKey("pkg$db/ns", "ref_type/1")
+ .SetSchema(std::string(kReferencedType))
+ .AddStringProperty(std::string(kPropertyName), "one")
+ .Build();
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyQualifiedId),
+ "pkg$db/ns#ref_type/1")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+
+ qualified_id_join_index_->set_last_added_document_id(kDefaultDocumentId);
+ ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ qualified_id_join_index_.get()));
+
+ // Handling document with kInvalidDocumentId should cause a failure, and both
+ // index data and last_added_document_id should remain unchanged.
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, kInvalidDocumentId,
+ /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kInvalidDocumentId, kQualifiedIdJoinablePropertyId)),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ // Recovery mode should get the same result.
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, kInvalidDocumentId,
+ /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kInvalidDocumentId, kQualifiedIdJoinablePropertyId)),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerTest,
+ HandleOutOfOrderDocumentIdShouldReturnInvalidArgumentError) {
+ DocumentProto referenced_document =
+ DocumentBuilder()
+ .SetKey("pkg$db/ns", "ref_type/1")
+ .SetSchema(std::string(kReferencedType))
+ .AddStringProperty(std::string(kPropertyName), "one")
+ .Build();
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyQualifiedId),
+ "pkg$db/ns#ref_type/1")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+
+ qualified_id_join_index_->set_last_added_document_id(kDefaultDocumentId);
+ ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ qualified_id_join_index_.get()));
+
+ // Handling document with document_id < last_added_document_id should cause a
+ // failure, and both index data and last_added_document_id should remain
+ // unchanged.
+ ASSERT_THAT(IsDocumentIdValid(kDefaultDocumentId - 1), IsTrue());
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, kDefaultDocumentId - 1,
+ /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ // Handling document with document_id == last_added_document_id should cause a
+ // failure, and both index data and last_added_document_id should remain
+ // unchanged.
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, kDefaultDocumentId,
+ /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerTest,
+ HandleRecoveryModeShouldIgnoreDocsLELastAddedDocId) {
+ DocumentProto referenced_document =
+ DocumentBuilder()
+ .SetKey("pkg$db/ns", "ref_type/1")
+ .SetSchema(std::string(kReferencedType))
+ .AddStringProperty(std::string(kPropertyName), "one")
+ .Build();
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyQualifiedId),
+ "pkg$db/ns#ref_type/1")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+
+ qualified_id_join_index_->set_last_added_document_id(kDefaultDocumentId);
+ ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ qualified_id_join_index_.get()));
+
+ // Handle document with document_id < last_added_document_id in recovery mode.
+ // We should not get any error, but the handler should ignore the document, so
+ // both index data and last_added_document_id should remain unchanged.
+ ASSERT_THAT(IsDocumentIdValid(kDefaultDocumentId - 1), IsTrue());
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, kDefaultDocumentId - 1,
+ /*recovery_mode=*/true, /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ // Handle document with document_id == last_added_document_id in recovery
+ // mode. We should not get any error, but the handler should ignore the
+ // document, so both index data and last_added_document_id should remain
+ // unchanged.
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, kDefaultDocumentId,
+ /*recovery_mode=*/true, /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ // Handle document with document_id > last_added_document_id in recovery mode.
+ // The handler should index this document and update last_added_document_id.
+ ASSERT_THAT(IsDocumentIdValid(kDefaultDocumentId + 1), IsTrue());
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, kDefaultDocumentId + 1,
+ /*recovery_mode=*/true, /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId + 1));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kDefaultDocumentId + 1, kQualifiedIdJoinablePropertyId)),
+ IsOkAndHolds("pkg$db/ns#ref_type/1"));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/join/qualified-id.cc b/icing/join/qualified-id.cc
index 2a30c44..42e080c 100644
--- a/icing/join/qualified-id.cc
+++ b/icing/join/qualified-id.cc
@@ -40,9 +40,14 @@ bool IsSpecialCharacter(char c) {
// A valid index of the separator on success.
// std::string::npos if the escape format of content is incorrect.
// std::string::npos if the content contains 0 or more than 1 separators.
+// std::string::npos if the content contains '\0'.
size_t VerifyFormatAndGetSeparatorPosition(std::string_view content) {
size_t separator_pos = std::string::npos;
for (size_t i = 0; i < content.length(); ++i) {
+ if (content[i] == '\0') {
+ return std::string::npos;
+ }
+
if (content[i] == QualifiedId::kEscapeChar) {
// Advance to the next character.
++i;
diff --git a/icing/join/qualified-id_test.cc b/icing/join/qualified-id_test.cc
index 0c3750a..92bf63e 100644
--- a/icing/join/qualified-id_test.cc
+++ b/icing/join/qualified-id_test.cc
@@ -135,6 +135,24 @@ TEST(QualifiedIdTest, InvalidQualifiedIdWithWrongNumberOfSeparators) {
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
+TEST(QualifiedIdTest, InvalidQualifiedIdWithStringTerminator) {
+ const char invalid_qualified_id1[] = "names\0pace#uri";
+ EXPECT_THAT(QualifiedId::Parse(std::string_view(invalid_qualified_id1, 14)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ const char invalid_qualified_id2[] = "namespace#ur\0i";
+ EXPECT_THAT(QualifiedId::Parse(std::string_view(invalid_qualified_id2, 14)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ const char invalid_qualified_id3[] = "\0namespace#uri";
+ EXPECT_THAT(QualifiedId::Parse(std::string_view(invalid_qualified_id3, 14)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ const char invalid_qualified_id4[] = "namespace#uri\0";
+ EXPECT_THAT(QualifiedId::Parse(std::string_view(invalid_qualified_id4, 14)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/monkey_test/icing-monkey-test-runner.cc b/icing/monkey_test/icing-monkey-test-runner.cc
index e7c0bdf..558da1c 100644
--- a/icing/monkey_test/icing-monkey-test-runner.cc
+++ b/icing/monkey_test/icing-monkey-test-runner.cc
@@ -113,7 +113,22 @@ ResultSpecProto::SnippetSpecProto GenerateRandomSnippetSpecProto(
return snippet_spec;
}
-ResultSpecProto GenerateRandomResultSpecProto(MonkeyTestRandomEngine* random) {
+TypePropertyMask GenerateTypePropertyMask(
+ MonkeyTestRandomEngine* random, const SchemaTypeConfigProto& type_config) {
+ TypePropertyMask type_property_mask;
+ type_property_mask.set_schema_type(type_config.schema_type());
+ for (const auto& properties : type_config.properties()) {
+ // 25% chance of adding the current property to the mask.
+ std::uniform_int_distribution<> dist(0, 3);
+ if (dist(*random) == 0) {
+ type_property_mask.add_paths(properties.property_name());
+ }
+ }
+ return type_property_mask;
+}
+
+ResultSpecProto GenerateRandomResultSpecProto(MonkeyTestRandomEngine* random,
+ const SchemaProto* schema) {
std::uniform_int_distribution<> dist(0, 4);
ResultSpecProto result_spec;
// 1/5 chance of getting one of 1, 4, 16, 64, 256
@@ -121,6 +136,18 @@ ResultSpecProto GenerateRandomResultSpecProto(MonkeyTestRandomEngine* random) {
result_spec.set_num_per_page(num_per_page);
*result_spec.mutable_snippet_spec() =
GenerateRandomSnippetSpecProto(random, result_spec);
+
+ // 1/5 chance of enabling projection.
+ if (dist(*random) == 0) {
+ for (const SchemaTypeConfigProto& type_config : schema->types()) {
+ // 25% chance of adding the current type to the projection.
+ std::uniform_int_distribution<> dist(0, 3);
+ if (dist(*random) == 0) {
+ *result_spec.add_type_property_masks() =
+ GenerateTypePropertyMask(random, type_config);
+ }
+ }
+ }
return result_spec;
}
@@ -338,26 +365,37 @@ void IcingMonkeyTestRunner::DoDeleteByQuery() {
}
void IcingMonkeyTestRunner::DoSearch() {
- SearchSpecProto search_spec =
- GenerateRandomSearchSpecProto(&random_, document_generator_.get());
- ScoringSpecProto scoring_spec = GenerateRandomScoringSpec(&random_);
- ResultSpecProto result_spec = GenerateRandomResultSpecProto(&random_);
- const ResultSpecProto::SnippetSpecProto& snippet_spec =
- result_spec.snippet_spec();
-
- ICING_LOG(INFO) << "Monkey searching by query: " << search_spec.query()
- << ", term_match_type: " << search_spec.term_match_type();
- ICING_VLOG(1) << "search_spec:\n" << search_spec.DebugString();
- ICING_VLOG(1) << "scoring_spec:\n" << scoring_spec.DebugString();
- ICING_VLOG(1) << "result_spec:\n" << result_spec.DebugString();
+ std::unique_ptr<SearchSpecProto> search_spec =
+ std::make_unique<SearchSpecProto>(
+ GenerateRandomSearchSpecProto(&random_, document_generator_.get()));
+ std::unique_ptr<ScoringSpecProto> scoring_spec =
+ std::make_unique<ScoringSpecProto>(GenerateRandomScoringSpec(&random_));
+ std::unique_ptr<ResultSpecProto> result_spec =
+ std::make_unique<ResultSpecProto>(GenerateRandomResultSpecProto(
+ &random_, in_memory_icing_->GetSchema()));
+ const ResultSpecProto::SnippetSpecProto snippet_spec =
+ result_spec->snippet_spec();
+ bool is_projection_enabled = !result_spec->type_property_masks().empty();
+
+ ICING_LOG(INFO) << "Monkey searching by query: " << search_spec->query()
+ << ", term_match_type: " << search_spec->term_match_type();
+ ICING_VLOG(1) << "search_spec:\n" << search_spec->DebugString();
+ ICING_VLOG(1) << "scoring_spec:\n" << scoring_spec->DebugString();
+ ICING_VLOG(1) << "result_spec:\n" << result_spec->DebugString();
std::vector<DocumentProto> exp_documents =
- in_memory_icing_->Search(search_spec);
+ in_memory_icing_->Search(*search_spec);
SearchResultProto search_result =
- icing_->Search(search_spec, scoring_spec, result_spec);
+ icing_->Search(*search_spec, *scoring_spec, *result_spec);
ASSERT_THAT(search_result.status(), ProtoIsOk());
+ // Delete all of the specs used in the search. GetNextPage should have no
+ // problem because it shouldn't be keeping any references to them.
+ search_spec.reset();
+ scoring_spec.reset();
+ result_spec.reset();
+
std::vector<DocumentProto> actual_documents;
int num_snippeted = 0;
while (true) {
@@ -382,7 +420,7 @@ void IcingMonkeyTestRunner::DoSearch() {
if (exp_documents.size() >= 30000) {
return;
}
- if (snippet_spec.num_matches_per_property() > 0) {
+ if (snippet_spec.num_matches_per_property() > 0 && !is_projection_enabled) {
ASSERT_THAT(num_snippeted,
Eq(std::min<uint32_t>(exp_documents.size(),
snippet_spec.num_to_snippet())));
@@ -391,6 +429,12 @@ void IcingMonkeyTestRunner::DoSearch() {
SortDocuments(actual_documents);
ASSERT_THAT(actual_documents, SizeIs(exp_documents.size()));
for (int i = 0; i < exp_documents.size(); ++i) {
+ if (is_projection_enabled) {
+ ASSERT_THAT(actual_documents[i].namespace_(),
+ Eq(exp_documents[i].namespace_()));
+ ASSERT_THAT(actual_documents[i].uri(), Eq(exp_documents[i].uri()));
+ continue;
+ }
ASSERT_THAT(actual_documents[i], EqualsProto(exp_documents[i]));
}
ICING_LOG(INFO) << exp_documents.size() << " documents found by query.";
@@ -409,9 +453,21 @@ void IcingMonkeyTestRunner::DoOptimize() {
}
void IcingMonkeyTestRunner::CreateIcingSearchEngine() {
+ std::uniform_int_distribution<> dist(0, 1);
+
+ bool always_rebuild_index_optimize = dist(random_);
+ float optimize_rebuild_index_threshold =
+ always_rebuild_index_optimize ? 0.0 : 0.9;
+
IcingSearchEngineOptions icing_options;
icing_options.set_index_merge_size(config_.index_merge_size);
icing_options.set_base_dir(icing_dir_->dir());
+ icing_options.set_optimize_rebuild_index_threshold(
+ optimize_rebuild_index_threshold);
+ // The method will be called every time when we ReloadFromDisk(), so randomly
+ // flip this flag to test document store's compatibility.
+ icing_options.set_document_store_namespace_id_fingerprint(
+ (bool)dist(random_));
icing_ = std::make_unique<IcingSearchEngine>(icing_options);
ASSERT_THAT(icing_->Initialize().status(), ProtoIsOk());
}
diff --git a/icing/performance-configuration.cc b/icing/performance-configuration.cc
index 07ff9bc..1518381 100644
--- a/icing/performance-configuration.cc
+++ b/icing/performance-configuration.cc
@@ -38,20 +38,17 @@ namespace {
// rendering 2 frames.
//
// With the information above, we then try to choose default values for
-// query_length and num_to_score so that the overall time can comfortably fit
-// in with our goal.
+// query_length so that the overall time can comfortably fit in with our goal
+// (note that num_to_score will be decided by the client, which is specified in
+// ResultSpecProto).
// 1. Set query_length to 23000 so that any query can be executed by
// QueryProcessor within 15 ms on a Pixel 3 XL according to results of
// //icing/query:query-processor_benchmark.
-// 2. Set num_to_score to 30000 so that results can be scored and ranked within
-// 3 ms on a Pixel 3 XL according to results of
-// //icing/scoring:score-and-rank_benchmark.
//
// In the worse-case scenario, we still have [33 ms - 15 ms - 3 ms] = 15 ms left
// for all the other things like proto parsing, document fetching, and even
// Android Binder calls if Icing search engine runs in a separate process.
constexpr int kMaxQueryLength = 23000;
-constexpr int kDefaultNumToScore = 30000;
// New Android devices nowadays all allow more than 16 MB memory per app. Using
// that as a guideline and being more conservative, we set 4 MB as the safe
@@ -67,8 +64,7 @@ constexpr int kMaxNumTotalHits = kSafeMemoryUsage / sizeof(ScoredDocumentHit);
} // namespace
PerformanceConfiguration::PerformanceConfiguration()
- : PerformanceConfiguration(kMaxQueryLength, kDefaultNumToScore,
- kMaxNumTotalHits) {}
+ : PerformanceConfiguration(kMaxQueryLength, kMaxNumTotalHits) {}
} // namespace lib
} // namespace icing
diff --git a/icing/performance-configuration.h b/icing/performance-configuration.h
index b9282ca..3ec67f3 100644
--- a/icing/performance-configuration.h
+++ b/icing/performance-configuration.h
@@ -23,10 +23,8 @@ struct PerformanceConfiguration {
// Loads default configuration.
PerformanceConfiguration();
- PerformanceConfiguration(int max_query_length_in, int num_to_score_in,
- int max_num_total_hits)
+ PerformanceConfiguration(int max_query_length_in, int max_num_total_hits)
: max_query_length(max_query_length_in),
- num_to_score(num_to_score_in),
max_num_total_hits(max_num_total_hits) {}
// Search performance
@@ -34,9 +32,6 @@ struct PerformanceConfiguration {
// Maximum length of query to execute in IndexProcessor.
int max_query_length;
- // Number of results to score in ScoringProcessor for every query.
- int num_to_score;
-
// Memory
// Maximum number of ScoredDocumentHits to cache in the ResultStateManager at
diff --git a/icing/portable/equals-proto.h b/icing/portable/equals-proto.h
index 6a600be..8bb835e 100644
--- a/icing/portable/equals-proto.h
+++ b/icing/portable/equals-proto.h
@@ -20,8 +20,8 @@
#ifndef ICING_PORTABLE_EQUALS_PROTO_H_
#define ICING_PORTABLE_EQUALS_PROTO_H_
+#include "gmock/gmock.h" // IWYU pragma: export
#include <google/protobuf/message_lite.h> // IWYU pragma: export
-#include "gmock/gmock.h" // IWYU pragma: export
#if defined(__ANDROID__) || defined(__APPLE__)
namespace icing {
diff --git a/icing/portable/gzip_stream.h b/icing/portable/gzip_stream.h
index 602093f..8008a55 100644
--- a/icing/portable/gzip_stream.h
+++ b/icing/portable/gzip_stream.h
@@ -27,8 +27,8 @@
#ifndef GOOGLE3_ICING_PORTABLE_GZIP_STREAM_H_
#define GOOGLE3_ICING_PORTABLE_GZIP_STREAM_H_
-#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
#include "icing/portable/zlib.h"
+#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
namespace icing {
namespace lib {
@@ -50,9 +50,8 @@ class GzipInputStream : public google::protobuf::io::ZeroCopyInputStream {
};
// buffer_size and format may be -1 for default of 64kB and GZIP format
- explicit GzipInputStream(
- google::protobuf::io::ZeroCopyInputStream* sub_stream,
- Format format = AUTO, int buffer_size = -1);
+ explicit GzipInputStream(google::protobuf::io::ZeroCopyInputStream* sub_stream,
+ Format format = AUTO, int buffer_size = -1);
virtual ~GzipInputStream();
// Return last error message or NULL if no error.
@@ -113,13 +112,11 @@ class GzipOutputStream : public google::protobuf::io::ZeroCopyOutputStream {
};
// Create a GzipOutputStream with default options.
- explicit GzipOutputStream(
- google::protobuf::io::ZeroCopyOutputStream* sub_stream);
+ explicit GzipOutputStream(google::protobuf::io::ZeroCopyOutputStream* sub_stream);
// Create a GzipOutputStream with the given options.
- GzipOutputStream(
- google::protobuf::io::ZeroCopyOutputStream* sub_stream,
- const Options& options);
+ GzipOutputStream(google::protobuf::io::ZeroCopyOutputStream* sub_stream,
+ const Options& options);
virtual ~GzipOutputStream();
@@ -164,9 +161,8 @@ class GzipOutputStream : public google::protobuf::io::ZeroCopyOutputStream {
size_t input_buffer_length_;
// Shared constructor code.
- void Init(
- google::protobuf::io::ZeroCopyOutputStream* sub_stream,
- const Options& options);
+ void Init(google::protobuf::io::ZeroCopyOutputStream* sub_stream,
+ const Options& options);
// Do some compression.
// Takes zlib flush mode.
diff --git a/icing/query/advanced_query_parser/abstract-syntax-tree.h b/icing/query/advanced_query_parser/abstract-syntax-tree.h
index dc28ab6..67049ad 100644
--- a/icing/query/advanced_query_parser/abstract-syntax-tree.h
+++ b/icing/query/advanced_query_parser/abstract-syntax-tree.h
@@ -17,6 +17,7 @@
#include <memory>
#include <string>
+#include <string_view>
#include <utility>
#include <vector>
@@ -52,18 +53,29 @@ class Node {
class TerminalNode : public Node {
public:
- explicit TerminalNode(std::string value) : value_(std::move(value)) {}
+ explicit TerminalNode(std::string value, std::string_view raw_value,
+ bool is_prefix)
+ : value_(std::move(value)),
+ raw_value_(raw_value),
+ is_prefix_(is_prefix) {}
- const std::string& value() const { return value_; }
+ const std::string& value() const& { return value_; }
+ std::string value() && { return std::move(value_); }
+
+ bool is_prefix() const { return is_prefix_; }
+
+ std::string_view raw_value() const { return raw_value_; }
private:
std::string value_;
+ std::string_view raw_value_;
+ bool is_prefix_;
};
class FunctionNameNode : public TerminalNode {
public:
explicit FunctionNameNode(std::string value)
- : TerminalNode(std::move(value)) {}
+ : TerminalNode(std::move(value), /*raw_value=*/"", /*is_prefix=*/false) {}
void Accept(AbstractSyntaxTreeVisitor* visitor) const override {
visitor->VisitFunctionName(this);
}
@@ -71,7 +83,9 @@ class FunctionNameNode : public TerminalNode {
class StringNode : public TerminalNode {
public:
- explicit StringNode(std::string value) : TerminalNode(std::move(value)) {}
+ explicit StringNode(std::string value, std::string_view raw_value,
+ bool is_prefix = false)
+ : TerminalNode(std::move(value), raw_value, is_prefix) {}
void Accept(AbstractSyntaxTreeVisitor* visitor) const override {
visitor->VisitString(this);
}
@@ -79,7 +93,9 @@ class StringNode : public TerminalNode {
class TextNode : public TerminalNode {
public:
- explicit TextNode(std::string value) : TerminalNode(std::move(value)) {}
+ explicit TextNode(std::string value, std::string_view raw_value,
+ bool is_prefix = false)
+ : TerminalNode(std::move(value), raw_value, is_prefix) {}
void Accept(AbstractSyntaxTreeVisitor* visitor) const override {
visitor->VisitText(this);
}
diff --git a/icing/query/advanced_query_parser/abstract-syntax-tree_test.cc b/icing/query/advanced_query_parser/abstract-syntax-tree_test.cc
index a8599fd..5e28278 100644
--- a/icing/query/advanced_query_parser/abstract-syntax-tree_test.cc
+++ b/icing/query/advanced_query_parser/abstract-syntax-tree_test.cc
@@ -27,8 +27,8 @@ namespace {
using ::testing::ElementsAre;
TEST(AbstractSyntaxTreeTest, Simple) {
- // foo
- std::unique_ptr<Node> root = std::make_unique<TextNode>("foo");
+ std::string_view query = "foo";
+ std::unique_ptr<Node> root = std::make_unique<TextNode>("foo", query);
SimpleVisitor visitor;
root->Accept(&visitor);
@@ -37,16 +37,16 @@ TEST(AbstractSyntaxTreeTest, Simple) {
}
TEST(AbstractSyntaxTreeTest, Composite) {
- // (foo bar) OR baz
+ std::string_view query = "(foo bar) OR baz";
std::vector<std::unique_ptr<Node>> and_args;
- and_args.push_back(std::make_unique<TextNode>("foo"));
- and_args.push_back(std::make_unique<TextNode>("bar"));
+ and_args.push_back(std::make_unique<TextNode>("foo", query.substr(1, 3)));
+ and_args.push_back(std::make_unique<TextNode>("bar", query.substr(5, 3)));
auto and_node =
std::make_unique<NaryOperatorNode>("AND", std::move(and_args));
std::vector<std::unique_ptr<Node>> or_args;
or_args.push_back(std::move(and_node));
- or_args.push_back(std::make_unique<TextNode>("baz"));
+ or_args.push_back(std::make_unique<TextNode>("baz", query.substr(13, 3)));
std::unique_ptr<Node> root =
std::make_unique<NaryOperatorNode>("OR", std::move(or_args));
@@ -72,9 +72,9 @@ TEST(AbstractSyntaxTreeTest, Function) {
ElementsAre(EqualsNodeInfo("foo", NodeType::kFunctionName),
EqualsNodeInfo("", NodeType::kFunction)));
- // foo("bar")
+ std::string_view query = "foo(\"bar\")";
std::vector<std::unique_ptr<Node>> args;
- args.push_back(std::make_unique<StringNode>("bar"));
+ args.push_back(std::make_unique<StringNode>("bar", query.substr(5, 3)));
root = std::make_unique<FunctionNode>(
std::make_unique<FunctionNameNode>("foo"), std::move(args));
visitor = SimpleVisitor();
@@ -85,9 +85,9 @@ TEST(AbstractSyntaxTreeTest, Function) {
EqualsNodeInfo("bar", NodeType::kString),
EqualsNodeInfo("", NodeType::kFunction)));
- // foo(bar("baz"))
+ query = "foo(bar(\"baz\"))";
std::vector<std::unique_ptr<Node>> inner_args;
- inner_args.push_back(std::make_unique<StringNode>("baz"));
+ inner_args.push_back(std::make_unique<StringNode>("baz", query.substr(9, 3)));
args.clear();
args.push_back(std::make_unique<FunctionNode>(
std::make_unique<FunctionNameNode>("bar"), std::move(inner_args)));
@@ -105,14 +105,16 @@ TEST(AbstractSyntaxTreeTest, Function) {
}
TEST(AbstractSyntaxTreeTest, Restriction) {
- // sender.name:(IMPORTANT OR URGENT)
+ std::string_view query = "sender.name:(IMPORTANT OR URGENT)";
std::vector<std::unique_ptr<TextNode>> member_args;
- member_args.push_back(std::make_unique<TextNode>("sender"));
- member_args.push_back(std::make_unique<TextNode>("name"));
+ member_args.push_back(
+ std::make_unique<TextNode>("sender", query.substr(0, 6)));
+ member_args.push_back(std::make_unique<TextNode>("name", query.substr(7, 4)));
std::vector<std::unique_ptr<Node>> or_args;
- or_args.push_back(std::make_unique<TextNode>("IMPORTANT"));
- or_args.push_back(std::make_unique<TextNode>("URGENT"));
+ or_args.push_back(
+ std::make_unique<TextNode>("IMPORTANT", query.substr(13, 9)));
+ or_args.push_back(std::make_unique<TextNode>("URGENT", query.substr(26, 6)));
std::vector<std::unique_ptr<Node>> has_args;
has_args.push_back(std::make_unique<MemberNode>(std::move(member_args),
diff --git a/icing/query/advanced_query_parser/function.cc b/icing/query/advanced_query_parser/function.cc
new file mode 100644
index 0000000..e7938db
--- /dev/null
+++ b/icing/query/advanced_query_parser/function.cc
@@ -0,0 +1,77 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "icing/query/advanced_query_parser/function.h"
+
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+/*static*/ libtextclassifier3::StatusOr<Function> Function::Create(
+ DataType return_type, std::string name, std::vector<Param> params,
+ Function::EvalFunction eval) {
+ bool has_had_optional = false;
+ for (int i = 0; i < params.size(); ++i) {
+ switch (params.at(i).cardinality) {
+ case Cardinality::kVariable:
+ if (i != params.size() - 1) {
+ return absl_ports::InvalidArgumentError(
+ "Can only specify a variable param as the final param.");
+ }
+ break;
+ case Cardinality::kOptional:
+ has_had_optional = true;
+ break;
+ case Cardinality::kRequired:
+ if (has_had_optional) {
+ return absl_ports::InvalidArgumentError(
+ "Can't specify optional params followed by required params.");
+ }
+ break;
+ }
+ }
+ return Function(return_type, std::move(name), std::move(params),
+ std::move(eval));
+}
+
+libtextclassifier3::StatusOr<PendingValue> Function::Eval(
+ std::vector<PendingValue>&& args) const {
+ for (int i = 0; i < params_.size() || i < args.size(); ++i) {
+ if (i < args.size() && i < params_.size()) {
+ ICING_RETURN_IF_ERROR(params_.at(i).Matches(args.at(i)));
+ } else if (i >= params_.size()) {
+ // There are remaining args. This would happen if the final arg is
+ // kVariable.
+ if (params_.empty() ||
+ params_.rbegin()->cardinality != Cardinality::kVariable) {
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "Expected to find only ", std::to_string(params_.size()),
+ " arguments, but found ", std::to_string(args.size())));
+ }
+ ICING_RETURN_IF_ERROR(params_.rbegin()->Matches(args.at(i)));
+ } else if (params_.at(i).cardinality == Cardinality::kRequired) {
+ // There are no more args, but there are still params to check for. If
+ // These params are kRequired, then there is an error.
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "Expected to find ", std::to_string(i + 1), "th argument, but only ",
+ std::to_string(args.size()), " arguments provided."));
+ }
+ }
+ return eval_(std::move(args));
+}
+
+} // namespace lib
+} // namespace icing \ No newline at end of file
diff --git a/icing/query/advanced_query_parser/function.h b/icing/query/advanced_query_parser/function.h
new file mode 100644
index 0000000..3514878
--- /dev/null
+++ b/icing/query/advanced_query_parser/function.h
@@ -0,0 +1,66 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef ICING_QUERY_ADVANCED_QUERY_PARSER_FUNCTION_H_
+#define ICING_QUERY_ADVANCED_QUERY_PARSER_FUNCTION_H_
+
+#include <functional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/query/advanced_query_parser/param.h"
+#include "icing/query/advanced_query_parser/pending-value.h"
+
+namespace icing {
+namespace lib {
+
+class Function {
+ public:
+ using EvalFunction = std::function<libtextclassifier3::StatusOr<PendingValue>(
+ std::vector<PendingValue>&&)>;
+
+ static libtextclassifier3::StatusOr<Function> Create(
+ DataType return_type, std::string name, std::vector<Param> params,
+ EvalFunction eval);
+
+ Function(const Function& rhs) = default;
+ Function(Function&& rhs) = default;
+
+ Function& operator=(const Function& rhs) = default;
+ Function& operator=(Function&& rhs) = default;
+
+ const std::string& name() const { return name_; }
+
+ libtextclassifier3::StatusOr<PendingValue> Eval(
+ std::vector<PendingValue>&& args) const;
+
+ private:
+ Function(DataType return_type, std::string name, std::vector<Param> params,
+ EvalFunction eval)
+ : name_(std::move(name)),
+ params_(std::move(params)),
+ eval_(std::move(eval)),
+ return_type_(return_type) {}
+
+ std::string name_;
+ std::vector<Param> params_;
+ EvalFunction eval_;
+ DataType return_type_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_QUERY_ADVANCED_QUERY_PARSER_FUNCTION_H_
diff --git a/icing/query/advanced_query_parser/function_test.cc b/icing/query/advanced_query_parser/function_test.cc
new file mode 100644
index 0000000..afd4e04
--- /dev/null
+++ b/icing/query/advanced_query_parser/function_test.cc
@@ -0,0 +1,332 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "icing/query/advanced_query_parser/function.h"
+
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "gtest/gtest.h"
+#include "icing/query/advanced_query_parser/param.h"
+#include "icing/query/advanced_query_parser/pending-value.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::IsTrue;
+
+struct TrivialEval {
+ libtextclassifier3::StatusOr<PendingValue> operator()(
+ const std::vector<PendingValue>&) const {
+ return PendingValue();
+ }
+};
+
+TEST(FunctionTest, NoParamCreateSucceeds) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ Function function, Function::Create(/*return_type=*/DataType::kString,
+ "foo", /*params=*/{}, TrivialEval()));
+ // foo()
+ std::vector<PendingValue> empty_args;
+ ICING_ASSERT_OK_AND_ASSIGN(PendingValue val,
+ function.Eval(std::move(empty_args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+}
+
+TEST(FunctionTest, NoParamNonEmptyArgsFails) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ Function function, Function::Create(/*return_type=*/DataType::kString,
+ "foo", /*params=*/{}, TrivialEval()));
+
+ // foo()
+ std::vector<PendingValue> args;
+ args.push_back(PendingValue());
+ EXPECT_THAT(function.Eval(std::move(args)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(FunctionTest, ParamNotWrongTypeFails) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ Function function,
+ Function::Create(/*return_type=*/DataType::kString, "foo",
+ /*params=*/{Param(DataType::kString)}, TrivialEval()));
+ std::string_view query = "foo(bar)";
+ std::vector<PendingValue> args;
+ args.push_back(PendingValue::CreateTextPendingValue(
+ QueryTerm{"bar", query.substr(4, 3), /*is_prefix_val=*/false}));
+ EXPECT_THAT(function.Eval(std::move(args)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(FunctionTest, ParamRequiredArgSucceeds) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ Function function,
+ Function::Create(/*return_type=*/DataType::kString, "foo",
+ /*params=*/{Param(DataType::kString)}, TrivialEval()));
+
+ std::string_view query = R"(foo("bar"))";
+ std::vector<PendingValue> args;
+ args.push_back(PendingValue::CreateStringPendingValue(
+ QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
+ ICING_ASSERT_OK_AND_ASSIGN(PendingValue val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+}
+
+TEST(FunctionTest, ParamRequiredArgNotPresentFails) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ Function function,
+ Function::Create(/*return_type=*/DataType::kString, "foo",
+ /*params=*/{Param(DataType::kString)}, TrivialEval()));
+
+ // foo()
+ std::vector<PendingValue> empty_args;
+ EXPECT_THAT(function.Eval(std::move(empty_args)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(FunctionTest, ParamOptionalArgNotPresentSucceeds) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ Function function,
+ Function::Create(
+ /*return_type=*/DataType::kString, "foo",
+ /*params=*/{Param(DataType::kString, Cardinality::kOptional)},
+ TrivialEval()));
+
+ // foo()
+ std::vector<PendingValue> empty_args;
+ ICING_ASSERT_OK_AND_ASSIGN(PendingValue val,
+ function.Eval(std::move(empty_args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+}
+
+TEST(FunctionTest, ParamVariableArgNotPresentSucceeds) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ Function function,
+ Function::Create(
+ /*return_type=*/DataType::kString, "foo",
+ /*params=*/{Param(DataType::kString, Cardinality::kVariable)},
+ TrivialEval()));
+
+ // foo()
+ std::vector<PendingValue> empty_args;
+ ICING_ASSERT_OK_AND_ASSIGN(PendingValue val,
+ function.Eval(std::move(empty_args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+}
+
+TEST(FunctionTest, MultipleArgsTrailingOptionalSucceeds) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ Function function, Function::Create(
+ /*return_type=*/DataType::kString, "foo",
+ /*params=*/
+ {Param(DataType::kString, Cardinality::kRequired),
+ Param(DataType::kString, Cardinality::kOptional)},
+ TrivialEval()));
+
+ std::string_view query = R"(foo("bar"))";
+ std::vector<PendingValue> args;
+ args.push_back(PendingValue::CreateStringPendingValue(
+ QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
+ ICING_ASSERT_OK_AND_ASSIGN(PendingValue val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+ query = R"(foo("bar", "baz"))";
+ args = std::vector<PendingValue>();
+ args.push_back(PendingValue::CreateStringPendingValue(
+ QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
+ args.push_back(PendingValue::CreateStringPendingValue(
+ QueryTerm{"baz", query.substr(12, 3), /*is_prefix_val=*/false}));
+ ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+}
+
+TEST(FunctionTest, MultipleArgsTrailingVariableSucceeds) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ Function function, Function::Create(
+ /*return_type=*/DataType::kString, "foo",
+ /*params=*/
+ {Param(DataType::kString, Cardinality::kRequired),
+ Param(DataType::kString, Cardinality::kVariable)},
+ TrivialEval()));
+
+ std::string_view query = R"(foo("bar"))";
+ std::vector<PendingValue> args;
+ args.push_back(PendingValue::CreateStringPendingValue(
+ QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
+ ICING_ASSERT_OK_AND_ASSIGN(PendingValue val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+ query = R"(foo("bar", "baz"))";
+ args = std::vector<PendingValue>();
+ args.push_back(PendingValue::CreateStringPendingValue(
+ QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
+ args.push_back(PendingValue::CreateStringPendingValue(
+ QueryTerm{"baz", query.substr(12, 3), /*is_prefix_val=*/false}));
+ ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+ query = R"(foo("bar", "baz", "bat"))";
+ args = std::vector<PendingValue>();
+ args.push_back(PendingValue::CreateStringPendingValue(
+ QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
+ args.push_back(PendingValue::CreateStringPendingValue(
+ QueryTerm{"baz", query.substr(12, 3), /*is_prefix_val=*/false}));
+ args.push_back(PendingValue::CreateStringPendingValue(
+ QueryTerm{"bat", query.substr(19, 3), /*is_prefix_val=*/false}));
+ ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+}
+
+TEST(FunctionTest, MultipleArgsOptionalBeforeRequiredFails) {
+ EXPECT_THAT(Function::Create(
+ /*return_type=*/DataType::kString, "foo",
+ /*params=*/
+ {Param(DataType::kString, Cardinality::kOptional),
+ Param(DataType::kString, Cardinality::kRequired)},
+ TrivialEval()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(FunctionTest, MultipleArgsOptionalBeforeOptionalSucceeds) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ Function function, Function::Create(
+ /*return_type=*/DataType::kString, "foo",
+ /*params=*/
+ {Param(DataType::kString, Cardinality::kOptional),
+ Param(DataType::kText, Cardinality::kOptional)},
+ TrivialEval()));
+
+ // foo()
+ std::vector<PendingValue> args;
+ ICING_ASSERT_OK_AND_ASSIGN(PendingValue val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+ std::string_view query = R"(foo("bar"))";
+ args = std::vector<PendingValue>();
+ args.push_back(PendingValue::CreateStringPendingValue(
+ QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
+ ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+ query = R"(foo("bar", baz))";
+ args = std::vector<PendingValue>();
+ args.push_back(PendingValue::CreateStringPendingValue(
+ QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
+ args.push_back(PendingValue::CreateTextPendingValue(
+ QueryTerm{"baz", query.substr(11, 3), /*is_prefix_val=*/false}));
+ ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+ query = R"(foo(baz))";
+ args = std::vector<PendingValue>();
+ args.push_back(PendingValue::CreateTextPendingValue(
+ QueryTerm{"baz", query.substr(4, 3), /*is_prefix_val=*/false}));
+ EXPECT_THAT(function.Eval(std::move(args)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(FunctionTest, MultipleArgsOptionalBeforeVariableSucceeds) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ Function function, Function::Create(
+ /*return_type=*/DataType::kString, "foo",
+ /*params=*/
+ {Param(DataType::kString, Cardinality::kOptional),
+ Param(DataType::kText, Cardinality::kVariable)},
+ TrivialEval()));
+
+ // foo()
+ std::vector<PendingValue> args;
+ ICING_ASSERT_OK_AND_ASSIGN(PendingValue val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+ std::string_view query = R"(foo("bar"))";
+ args = std::vector<PendingValue>();
+ args.push_back(PendingValue::CreateStringPendingValue(
+ QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
+ ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+ query = R"(foo("bar", baz))";
+ args = std::vector<PendingValue>();
+ args.push_back(PendingValue::CreateStringPendingValue(
+ QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
+ args.push_back(PendingValue::CreateTextPendingValue(
+ QueryTerm{"baz", query.substr(11, 3), /*is_prefix_val=*/false}));
+ ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+ query = R"(foo("bar", baz, bat))";
+ args = std::vector<PendingValue>();
+ args.push_back(PendingValue::CreateStringPendingValue(
+ QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
+ args.push_back(PendingValue::CreateTextPendingValue(
+ QueryTerm{"baz", query.substr(11, 3), /*is_prefix_val=*/false}));
+ args.push_back(PendingValue::CreateTextPendingValue(
+ QueryTerm{"bat", query.substr(16, 3), /*is_prefix_val=*/false}));
+ ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+ query = R"(foo(baz))";
+ args = std::vector<PendingValue>();
+ args.push_back(PendingValue::CreateTextPendingValue(
+ QueryTerm{"baz", query.substr(4, 3), /*is_prefix_val=*/false}));
+ EXPECT_THAT(function.Eval(std::move(args)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ query = R"(foo(baz, bat))";
+ args = std::vector<PendingValue>();
+ args.push_back(PendingValue::CreateTextPendingValue(
+ QueryTerm{"baz", query.substr(4, 3), /*is_prefix_val=*/false}));
+ args.push_back(PendingValue::CreateTextPendingValue(
+ QueryTerm{"bat", query.substr(9, 3), /*is_prefix_val=*/false}));
+ EXPECT_THAT(function.Eval(std::move(args)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(FunctionTest, MultipleArgsVariableBeforeRequiredFails) {
+ EXPECT_THAT(Function::Create(
+ /*return_type=*/DataType::kString, "foo",
+ /*params=*/
+ {Param(DataType::kString, Cardinality::kVariable),
+ Param(DataType::kString, Cardinality::kRequired)},
+ TrivialEval()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(FunctionTest, MultipleArgsVariableBeforeOptionalFails) {
+ EXPECT_THAT(Function::Create(
+ /*return_type=*/DataType::kString, "foo",
+ /*params=*/
+ {Param(DataType::kString, Cardinality::kVariable),
+ Param(DataType::kString, Cardinality::kOptional)},
+ TrivialEval()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(FunctionTest, MultipleArgsVariableBeforeVariableFails) {
+ EXPECT_THAT(Function::Create(
+ /*return_type=*/DataType::kString, "foo",
+ /*params=*/
+ {Param(DataType::kString, Cardinality::kVariable),
+ Param(DataType::kString, Cardinality::kVariable)},
+ TrivialEval()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing \ No newline at end of file
diff --git a/icing/query/advanced_query_parser/lexer.cc b/icing/query/advanced_query_parser/lexer.cc
index 18932f6..0dd0bb0 100644
--- a/icing/query/advanced_query_parser/lexer.cc
+++ b/icing/query/advanced_query_parser/lexer.cc
@@ -14,6 +14,8 @@
#include "icing/query/advanced_query_parser/lexer.h"
+#include <string>
+
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/util/i18n-utils.h"
@@ -36,24 +38,43 @@ bool Lexer::ConsumeWhitespace() {
}
bool Lexer::ConsumeQuerySingleChar() {
- if (current_char_ != ':') {
- return false;
+ std::string_view original_text = query_.substr(current_index_, 1);
+ switch (current_char_) {
+ case ':':
+ tokens_.push_back({":", original_text, TokenType::COMPARATOR});
+ break;
+ case '*':
+ tokens_.push_back({"", original_text, TokenType::STAR});
+ break;
+ case '-':
+ if (in_text_) {
+ // MINUS ('-') is considered to be a part of a text segment if it is
+ // in the middle of a TEXT segment (ex. `foo-bar`).
+ return false;
+ }
+ tokens_.push_back({"", original_text, TokenType::MINUS});
+ break;
+ default:
+ return false;
}
- tokens_.push_back({":", TokenType::COMPARATOR});
Advance();
return true;
}
bool Lexer::ConsumeScoringSingleChar() {
+ std::string_view original_text = query_.substr(current_index_, 1);
switch (current_char_) {
case '+':
- tokens_.push_back({"", TokenType::PLUS});
+ tokens_.push_back({"", original_text, TokenType::PLUS});
break;
case '*':
- tokens_.push_back({"", TokenType::TIMES});
+ tokens_.push_back({"", original_text, TokenType::TIMES});
break;
case '/':
- tokens_.push_back({"", TokenType::DIV});
+ tokens_.push_back({"", original_text, TokenType::DIV});
+ break;
+ case '-':
+ tokens_.push_back({"", original_text, TokenType::MINUS});
break;
default:
return false;
@@ -63,21 +84,19 @@ bool Lexer::ConsumeScoringSingleChar() {
}
bool Lexer::ConsumeGeneralSingleChar() {
+ std::string_view original_text = query_.substr(current_index_, 1);
switch (current_char_) {
case ',':
- tokens_.push_back({"", TokenType::COMMA});
+ tokens_.push_back({"", original_text, TokenType::COMMA});
break;
case '.':
- tokens_.push_back({"", TokenType::DOT});
- break;
- case '-':
- tokens_.push_back({"", TokenType::MINUS});
+ tokens_.push_back({"", original_text, TokenType::DOT});
break;
case '(':
- tokens_.push_back({"", TokenType::LPAREN});
+ tokens_.push_back({"", original_text, TokenType::LPAREN});
break;
case ')':
- tokens_.push_back({"", TokenType::RPAREN});
+ tokens_.push_back({"", original_text, TokenType::RPAREN});
break;
default:
return false;
@@ -108,13 +127,17 @@ bool Lexer::ConsumeComparator() {
// Matching for '<=', '>=', '!=', or '=='.
char next_char = PeekNext(1);
if (next_char == '=') {
- tokens_.push_back({{current_char_, next_char}, TokenType::COMPARATOR});
+ tokens_.push_back({{current_char_, next_char},
+ query_.substr(current_index_, 2),
+ TokenType::COMPARATOR});
Advance(2);
return true;
}
// Now, next_char must not be '='. Let's match for '<' and '>'.
if (current_char_ == '<' || current_char_ == '>') {
- tokens_.push_back({{current_char_}, TokenType::COMPARATOR});
+ tokens_.push_back({{current_char_},
+ query_.substr(current_index_, 1),
+ TokenType::COMPARATOR});
Advance();
return true;
}
@@ -129,10 +152,11 @@ bool Lexer::ConsumeAndOr() {
if (current_char_ != next_char) {
return false;
}
+ std::string_view original_text = query_.substr(current_index_, 2);
if (current_char_ == '&') {
- tokens_.push_back({"", TokenType::AND});
+ tokens_.push_back({"", original_text, TokenType::AND});
} else {
- tokens_.push_back({"", TokenType::OR});
+ tokens_.push_back({"", original_text, TokenType::OR});
}
Advance(2);
return true;
@@ -142,38 +166,44 @@ bool Lexer::ConsumeStringLiteral() {
if (current_char_ != '"') {
return false;
}
- std::string text;
Advance();
+ int32_t unnormalized_start_pos = current_index_;
while (current_char_ != '\0' && current_char_ != '"') {
// When getting a backslash, we will always match the next character, even
// if the next character is a quotation mark
if (current_char_ == '\\') {
- text.push_back(current_char_);
Advance();
if (current_char_ == '\0') {
// In this case, we are missing a terminating quotation mark.
break;
}
}
- text.push_back(current_char_);
Advance();
}
if (current_char_ == '\0') {
SyntaxError("missing terminating \" character");
return false;
}
- tokens_.push_back({text, TokenType::STRING});
+ int32_t unnormalized_length = current_index_ - unnormalized_start_pos;
+ std::string_view raw_token_text =
+ query_.substr(unnormalized_start_pos, unnormalized_length);
+ std::string token_text(raw_token_text);
+ tokens_.push_back({std::move(token_text), raw_token_text, TokenType::STRING});
Advance();
return true;
}
-bool Lexer::Text() {
+bool Lexer::ConsumeText() {
if (current_char_ == '\0') {
return false;
}
- tokens_.push_back({"", TokenType::TEXT});
+ tokens_.push_back({"", query_.substr(current_index_, 0), TokenType::TEXT});
int token_index = tokens_.size() - 1;
+
+ int32_t unnormalized_start_pos = current_index_;
+ int32_t unnormalized_end_pos = current_index_;
while (!ConsumeNonText() && current_char_ != '\0') {
+ in_text_ = true;
// When getting a backslash in TEXT, unescape it by accepting its following
// character no matter which character it is, including white spaces,
// operator symbols, parentheses, etc.
@@ -186,12 +216,18 @@ bool Lexer::Text() {
}
tokens_[token_index].text.push_back(current_char_);
Advance();
- if (current_char_ == '(') {
- // A TEXT followed by a LPAREN is a FUNCTION_NAME.
- tokens_.back().type = TokenType::FUNCTION_NAME;
- // No need to break, since NonText() must be true at this point.
- }
+ unnormalized_end_pos = current_index_;
}
+ in_text_ = false;
+
+ tokens_[token_index].original_text = query_.substr(
+ unnormalized_start_pos, unnormalized_end_pos - unnormalized_start_pos);
+ if (unnormalized_end_pos < query_.length() &&
+ query_[unnormalized_end_pos] == '(') {
+ // A TEXT followed by a LPAREN is a FUNCTION_NAME.
+ tokens_[token_index].type = TokenType::FUNCTION_NAME;
+ }
+
if (language_ == Lexer::Language::QUERY) {
std::string &text = tokens_[token_index].text;
TokenType &type = tokens_[token_index].type;
@@ -215,12 +251,18 @@ Lexer::ExtractTokens() {
// Clear out any non-text before matching a Text.
while (ConsumeNonText()) {
}
- Text();
+ ConsumeText();
}
if (!error_.empty()) {
return absl_ports::InvalidArgumentError(
absl_ports::StrCat("Syntax Error: ", error_));
}
+ if (tokens_.size() > kMaxNumTokens) {
+ return absl_ports::InvalidArgumentError(
+ absl_ports::StrCat("The maximum number of tokens allowed is ",
+ std::to_string(kMaxNumTokens), ", but got ",
+ std::to_string(tokens_.size()), " tokens."));
+ }
return tokens_;
}
diff --git a/icing/query/advanced_query_parser/lexer.h b/icing/query/advanced_query_parser/lexer.h
index f72affb..b313fa7 100644
--- a/icing/query/advanced_query_parser/lexer.h
+++ b/icing/query/advanced_query_parser/lexer.h
@@ -29,11 +29,16 @@ class Lexer {
public:
enum class Language { QUERY, SCORING };
+ // The maximum number of tokens allowed, in order to prevent stack overflow
+ // issues in the parsers or visitors.
+ static constexpr uint32_t kMaxNumTokens = 2048;
+
enum class TokenType {
COMMA, // ','
DOT, // '.'
PLUS, // '+' Not allowed in QUERY language.
MINUS, // '-'
+ STAR, // '*' Not allowed in SCORING language.
TIMES, // '*' Not allowed in QUERY language.
DIV, // '/' Not allowed in QUERY language.
LPAREN, // '('
@@ -43,7 +48,9 @@ class Lexer {
AND, // 'AND' | '&&' Not allowed in SCORING language.
OR, // 'OR' | '||' Not allowed in SCORING language.
NOT, // 'NOT' Not allowed in SCORING language.
- STRING, // String literal surrounded by quotation marks
+ STRING, // String literal surrounded by quotation marks. The
+ // original_text of a STRING token will not include quotation
+ // marks.
TEXT, // A sequence of chars that are not any above-listed operator
FUNCTION_NAME, // A TEXT followed by LPAREN.
// Whitespaces not inside a string literal will be skipped.
@@ -64,6 +71,10 @@ class Lexer {
// For other types, this field will be empty.
std::string text;
+ // Lifecycle is dependent on the lifecycle of the string pointed to by
+ // query_.
+ std::string_view original_text;
+
// The type of the token.
TokenType type;
};
@@ -136,8 +147,9 @@ class Lexer {
}
// Try to match TEXT, FUNCTION_NAME, 'AND', 'OR' and 'NOT'.
- // Should make sure that NonText() is false before calling into this method.
- bool Text();
+ // REQUIRES: ConsumeNonText() must be called immediately before calling this
+ // function.
+ bool ConsumeText();
std::string_view query_;
std::string error_;
@@ -145,6 +157,10 @@ class Lexer {
int32_t current_index_ = -1;
char current_char_ = '\0';
std::vector<LexerToken> tokens_;
+
+ // Stores whether the lexer is currently inspecting a TEXT segment while
+ // handling current_char_.
+ bool in_text_ = false;
};
} // namespace lib
diff --git a/icing/query/advanced_query_parser/lexer_test.cc b/icing/query/advanced_query_parser/lexer_test.cc
index 41e78fe..ec0e663 100644
--- a/icing/query/advanced_query_parser/lexer_test.cc
+++ b/icing/query/advanced_query_parser/lexer_test.cc
@@ -73,22 +73,26 @@ TEST(LexerTest, PrefixQuery) {
ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> tokens,
lexer->ExtractTokens());
EXPECT_THAT(tokens,
- ElementsAre(EqualsLexerToken("foo*", Lexer::TokenType::TEXT)));
+ ElementsAre(EqualsLexerToken("foo", Lexer::TokenType::TEXT),
+ EqualsLexerToken("", Lexer::TokenType::STAR)));
lexer = std::make_unique<Lexer>("fooAND*", Lexer::Language::QUERY);
ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
EXPECT_THAT(tokens,
- ElementsAre(EqualsLexerToken("fooAND*", Lexer::TokenType::TEXT)));
+ ElementsAre(EqualsLexerToken("fooAND", Lexer::TokenType::TEXT),
+ EqualsLexerToken("", Lexer::TokenType::STAR)));
lexer = std::make_unique<Lexer>("*ORfoo", Lexer::Language::QUERY);
ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
EXPECT_THAT(tokens,
- ElementsAre(EqualsLexerToken("*ORfoo", Lexer::TokenType::TEXT)));
+ ElementsAre(EqualsLexerToken("", Lexer::TokenType::STAR),
+ EqualsLexerToken("ORfoo", Lexer::TokenType::TEXT)));
lexer = std::make_unique<Lexer>("fooANDbar*", Lexer::Language::QUERY);
ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
- EXPECT_THAT(tokens, ElementsAre(EqualsLexerToken("fooANDbar*",
- Lexer::TokenType::TEXT)));
+ EXPECT_THAT(tokens,
+ ElementsAre(EqualsLexerToken("fooANDbar", Lexer::TokenType::TEXT),
+ EqualsLexerToken("", Lexer::TokenType::STAR)));
}
TEST(LexerTest, SimpleStringQuery) {
@@ -296,7 +300,8 @@ TEST(LexerTest, ComplexQuery) {
EqualsLexerToken("sender", Lexer::TokenType::TEXT),
EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
EqualsLexerToken(Lexer::TokenType::LPAREN),
- EqualsLexerToken("foo*", Lexer::TokenType::TEXT),
+ EqualsLexerToken("foo", Lexer::TokenType::TEXT),
+ EqualsLexerToken("", Lexer::TokenType::STAR),
EqualsLexerToken(Lexer::TokenType::AND),
EqualsLexerToken("bar", Lexer::TokenType::TEXT),
EqualsLexerToken(Lexer::TokenType::OR),
@@ -376,14 +381,13 @@ TEST(LexerTest, CJKT) {
lexer = std::make_unique<Lexer>("ញុំ&&ដើរទៅ||ធ្វើការ-រាល់ថ្ងៃ",
Lexer::Language::QUERY);
ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
- EXPECT_THAT(tokens,
- ElementsAre(EqualsLexerToken("ញុំ", Lexer::TokenType::TEXT),
- EqualsLexerToken(Lexer::TokenType::AND),
- EqualsLexerToken("ដើរទៅ", Lexer::TokenType::TEXT),
- EqualsLexerToken(Lexer::TokenType::OR),
- EqualsLexerToken("ធ្វើការ", Lexer::TokenType::TEXT),
- EqualsLexerToken(Lexer::TokenType::MINUS),
- EqualsLexerToken("រាល់ថ្ងៃ", Lexer::TokenType::TEXT)));
+ EXPECT_THAT(
+ tokens,
+ ElementsAre(EqualsLexerToken("ញុំ", Lexer::TokenType::TEXT),
+ EqualsLexerToken(Lexer::TokenType::AND),
+ EqualsLexerToken("ដើរទៅ", Lexer::TokenType::TEXT),
+ EqualsLexerToken(Lexer::TokenType::OR),
+ EqualsLexerToken("ធ្វើការ-រាល់ថ្ងៃ", Lexer::TokenType::TEXT)));
lexer = std::make_unique<Lexer>(
"나는"
@@ -477,7 +481,9 @@ TEST(LexerTest, ScoringArithmetic) {
lexer = std::make_unique<Lexer>("1+2*3/4", Lexer::Language::QUERY);
ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
EXPECT_THAT(tokens,
- ElementsAre(EqualsLexerToken("1+2*3/4", Lexer::TokenType::TEXT)));
+ ElementsAre(EqualsLexerToken("1+2", Lexer::TokenType::TEXT),
+ EqualsLexerToken("", Lexer::TokenType::STAR),
+ EqualsLexerToken("3/4", Lexer::TokenType::TEXT)));
}
// Currently, in scoring language, the lexer will view these logic operators as
@@ -609,5 +615,84 @@ TEST(LexerTest, ComplexScoring) {
EqualsLexerToken(Lexer::TokenType::RPAREN)));
}
+// foo:bar:baz is considered an invalid query as proposed in
+// http://go/appsearch-advanced-query-impl-plan#bookmark=id.yoeyepokmbc5 ; this
+// ensures that the lexer consistently tokenizes colons independently.
+TEST(LexerTest, NoAmbiguousTokenizing) {
+ // This is an invalid query; the lexer doesn't treat `bar:baz` as one token.
+ std::unique_ptr<Lexer> lexer =
+ std::make_unique<Lexer>("foo:bar:baz", Lexer::Language::QUERY);
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> invalidQueryTokens,
+ lexer->ExtractTokens());
+ EXPECT_THAT(invalidQueryTokens,
+ ElementsAre(EqualsLexerToken("foo", Lexer::TokenType::TEXT),
+ EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+ EqualsLexerToken("bar", Lexer::TokenType::TEXT),
+ EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+ EqualsLexerToken("baz", Lexer::TokenType::TEXT)));
+
+ lexer = std::make_unique<Lexer>("foo:\"bar:baz\"", Lexer::Language::QUERY);
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> validQueryTokens,
+ lexer->ExtractTokens());
+ EXPECT_THAT(
+ validQueryTokens,
+ ElementsAre(EqualsLexerToken("foo", Lexer::TokenType::TEXT),
+ EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+ EqualsLexerToken("bar:baz", Lexer::TokenType::STRING)));
+}
+
+TEST(LexerTest, WhiteSpacesDoNotAffectColonTokenization) {
+ std::unique_ptr<Lexer> lexer =
+ std::make_unique<Lexer>("a:b c : d e: f g :h", Lexer::Language::QUERY);
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> tokens,
+ lexer->ExtractTokens());
+ EXPECT_THAT(tokens,
+ ElementsAre(EqualsLexerToken("a", Lexer::TokenType::TEXT),
+ EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+ EqualsLexerToken("b", Lexer::TokenType::TEXT),
+ EqualsLexerToken("c", Lexer::TokenType::TEXT),
+ EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+ EqualsLexerToken("d", Lexer::TokenType::TEXT),
+ EqualsLexerToken("e", Lexer::TokenType::TEXT),
+ EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+ EqualsLexerToken("f", Lexer::TokenType::TEXT),
+ EqualsLexerToken("g", Lexer::TokenType::TEXT),
+ EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+ EqualsLexerToken("h", Lexer::TokenType::TEXT)));
+}
+
+// For the "bar:baz" part to be treated as a TEXT token in a query like
+// foo:bar:baz, an explicit escape is required, so use foo:bar\:baz instead.
+TEST(LexerTest, ColonInTextRequiresExplicitEscaping) {
+ std::unique_ptr<Lexer> lexer =
+ std::make_unique<Lexer>("foo:bar\\:baz", Lexer::Language::QUERY);
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> tokens,
+ lexer->ExtractTokens());
+ EXPECT_THAT(tokens,
+ ElementsAre(EqualsLexerToken("foo", Lexer::TokenType::TEXT),
+ EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+ EqualsLexerToken("bar:baz", Lexer::TokenType::TEXT)));
+}
+
+TEST(LexerTest, QueryShouldRejectTokensBeyondLimit) {
+ std::string query;
+ for (int i = 0; i < Lexer::kMaxNumTokens + 1; ++i) {
+ query.push_back('(');
+ }
+ Lexer lexer(query, Lexer::Language::QUERY);
+ EXPECT_THAT(lexer.ExtractTokens(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(LexerTest, ScoringShouldRejectTokensBeyondLimit) {
+ std::string scoring;
+ for (int i = 0; i < Lexer::kMaxNumTokens + 1; ++i) {
+ scoring.push_back('(');
+ }
+ Lexer lexer(scoring, Lexer::Language::SCORING);
+ EXPECT_THAT(lexer.ExtractTokens(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/query/advanced_query_parser/param.h b/icing/query/advanced_query_parser/param.h
new file mode 100644
index 0000000..69c46be
--- /dev/null
+++ b/icing/query/advanced_query_parser/param.h
@@ -0,0 +1,57 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef ICING_QUERY_ADVANCED_QUERY_PARSER_PARAM_H_
+#define ICING_QUERY_ADVANCED_QUERY_PARSER_PARAM_H_
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/query/advanced_query_parser/pending-value.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+enum class Cardinality {
+ kRequired,
+ kOptional,
+ kVariable,
+};
+
+struct Param {
+ explicit Param(DataType data_type,
+ Cardinality cardinality = Cardinality::kRequired)
+ : data_type(data_type), cardinality(cardinality) {}
+
+ libtextclassifier3::Status Matches(PendingValue& arg) const {
+ bool matches = arg.data_type() == data_type;
+ // Values of type kText could also potentially be valid kLong values. If
+ // we're expecting a kLong and we have a kText, try to parse it as a kLong.
+ if (!matches && data_type == DataType::kLong &&
+ arg.data_type() == DataType::kText) {
+ ICING_RETURN_IF_ERROR(arg.ParseInt());
+ matches = true;
+ }
+ return matches ? libtextclassifier3::Status::OK
+ : absl_ports::InvalidArgumentError(
+ "Provided arg doesn't match required param type.");
+ }
+
+ DataType data_type;
+ Cardinality cardinality;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_QUERY_ADVANCED_QUERY_PARSER_PARAM_H_
diff --git a/icing/query/advanced_query_parser/parser.cc b/icing/query/advanced_query_parser/parser.cc
index 086f038..fd74561 100644
--- a/icing/query/advanced_query_parser/parser.cc
+++ b/icing/query/advanced_query_parser/parser.cc
@@ -55,7 +55,8 @@ libtextclassifier3::StatusOr<std::unique_ptr<TextNode>> Parser::ConsumeText() {
if (!Match(Lexer::TokenType::TEXT)) {
return absl_ports::InvalidArgumentError("Unable to consume token as TEXT.");
}
- auto text_node = std::make_unique<TextNode>(std::move(current_token_->text));
+ auto text_node = std::make_unique<TextNode>(std::move(current_token_->text),
+ current_token_->original_text);
++current_token_;
return text_node;
}
@@ -72,15 +73,25 @@ Parser::ConsumeFunctionName() {
return function_name_node;
}
+// stringElement
+// : STRING STAR?
libtextclassifier3::StatusOr<std::unique_ptr<StringNode>>
-Parser::ConsumeString() {
+Parser::ConsumeStringElement() {
if (!Match(Lexer::TokenType::STRING)) {
return absl_ports::InvalidArgumentError(
"Unable to consume token as STRING.");
}
- auto node = std::make_unique<StringNode>(std::move(current_token_->text));
+ std::string text = std::move(current_token_->text);
+ std::string_view raw_text = current_token_->original_text;
++current_token_;
- return node;
+
+ bool is_prefix = false;
+ if (Match(Lexer::TokenType::STAR)) {
+ is_prefix = true;
+ ++current_token_;
+ }
+
+ return std::make_unique<StringNode>(std::move(text), raw_text, is_prefix);
}
libtextclassifier3::StatusOr<std::string> Parser::ConsumeComparator() {
@@ -95,25 +106,37 @@ libtextclassifier3::StatusOr<std::string> Parser::ConsumeComparator() {
// member
// : TEXT (DOT TEXT)* (DOT function)?
+// | TEXT STAR
// ;
libtextclassifier3::StatusOr<std::unique_ptr<MemberNode>>
Parser::ConsumeMember() {
ICING_ASSIGN_OR_RETURN(std::unique_ptr<TextNode> text_node, ConsumeText());
std::vector<std::unique_ptr<TextNode>> children;
- children.push_back(std::move(text_node));
-
- while (Match(Lexer::TokenType::DOT)) {
- Consume(Lexer::TokenType::DOT);
- if (MatchFunction()) {
- ICING_ASSIGN_OR_RETURN(std::unique_ptr<FunctionNode> function_node,
- ConsumeFunction());
- // Once a function is matched, we should exit the current rule based on
- // the grammar.
- return std::make_unique<MemberNode>(std::move(children),
- std::move(function_node));
- }
- ICING_ASSIGN_OR_RETURN(text_node, ConsumeText());
+
+ // Member could be either `TEXT (DOT TEXT)* (DOT function)?` or `TEXT STAR`
+ // at this point. So check for 'STAR' to differentiate the two cases.
+ if (Match(Lexer::TokenType::STAR)) {
+ Consume(Lexer::TokenType::STAR);
+ std::string_view raw_text = text_node->raw_value();
+ std::string text = std::move(*text_node).value();
+ text_node = std::make_unique<TextNode>(std::move(text), raw_text,
+ /*is_prefix=*/true);
+ children.push_back(std::move(text_node));
+ } else {
children.push_back(std::move(text_node));
+ while (Match(Lexer::TokenType::DOT)) {
+ Consume(Lexer::TokenType::DOT);
+ if (MatchFunction()) {
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<FunctionNode> function_node,
+ ConsumeFunction());
+ // Once a function is matched, we should exit the current rule based on
+ // the grammar.
+ return std::make_unique<MemberNode>(std::move(children),
+ std::move(function_node));
+ }
+ ICING_ASSIGN_OR_RETURN(text_node, ConsumeText());
+ children.push_back(std::move(text_node));
+ }
}
return std::make_unique<MemberNode>(std::move(children),
/*function=*/nullptr);
@@ -141,14 +164,14 @@ Parser::ConsumeFunction() {
}
// comparable
-// : STRING
+// : stringElement
// | member
// | function
// ;
libtextclassifier3::StatusOr<std::unique_ptr<Node>>
Parser::ConsumeComparable() {
if (Match(Lexer::TokenType::STRING)) {
- return ConsumeString();
+ return ConsumeStringElement();
} else if (MatchMember()) {
return ConsumeMember();
}
@@ -186,7 +209,7 @@ Parser::ConsumeArgs() {
}
// restriction
-// : comparable (COMPARATOR (comparable | composite))?
+// : comparable (COMPARATOR MINUS? (comparable | composite))?
// ;
// COMPARATOR will not be produced in Scoring Lexer.
libtextclassifier3::StatusOr<std::unique_ptr<Node>>
@@ -197,6 +220,12 @@ Parser::ConsumeRestriction() {
return comparable;
}
ICING_ASSIGN_OR_RETURN(std::string operator_text, ConsumeComparator());
+
+ bool has_minus = Match(Lexer::TokenType::MINUS);
+ if (has_minus) {
+ Consume(Lexer::TokenType::MINUS);
+ }
+
std::unique_ptr<Node> arg;
if (MatchComposite()) {
ICING_ASSIGN_OR_RETURN(arg, ConsumeComposite());
@@ -206,6 +235,11 @@ Parser::ConsumeRestriction() {
return absl_ports::InvalidArgumentError(
"ARG: must begin with LPAREN or FIRST(comparable)");
}
+
+ if (has_minus) {
+ arg = std::make_unique<UnaryOperatorNode>("MINUS", std::move(arg));
+ }
+
std::vector<std::unique_ptr<Node>> args;
args.push_back(std::move(comparable));
args.push_back(std::move(arg));
@@ -243,10 +277,11 @@ libtextclassifier3::StatusOr<std::unique_ptr<Node>> Parser::ConsumeTerm() {
} else {
if (Match(Lexer::TokenType::NOT)) {
Consume(Lexer::TokenType::NOT);
+ operator_text = "NOT";
} else {
Consume(Lexer::TokenType::MINUS);
+ operator_text = "MINUS";
}
- operator_text = "NOT";
}
ICING_ASSIGN_OR_RETURN(std::unique_ptr<Node> simple, ConsumeSimple());
return std::make_unique<UnaryOperatorNode>(operator_text, std::move(simple));
diff --git a/icing/query/advanced_query_parser/parser.h b/icing/query/advanced_query_parser/parser.h
index 330b8b9..a48c562 100644
--- a/icing/query/advanced_query_parser/parser.h
+++ b/icing/query/advanced_query_parser/parser.h
@@ -94,7 +94,8 @@ class Parser {
libtextclassifier3::StatusOr<std::unique_ptr<FunctionNameNode>>
ConsumeFunctionName();
- libtextclassifier3::StatusOr<std::unique_ptr<StringNode>> ConsumeString();
+ libtextclassifier3::StatusOr<std::unique_ptr<StringNode>>
+ ConsumeStringElement();
libtextclassifier3::StatusOr<std::string> ConsumeComparator();
diff --git a/icing/query/advanced_query_parser/parser_integration_test.cc b/icing/query/advanced_query_parser/parser_integration_test.cc
index 75be15b..fa1bd2e 100644
--- a/icing/query/advanced_query_parser/parser_integration_test.cc
+++ b/icing/query/advanced_query_parser/parser_integration_test.cc
@@ -28,6 +28,7 @@ namespace {
using ::testing::ElementsAre;
using ::testing::ElementsAreArray;
using ::testing::IsNull;
+using ::testing::SizeIs;
TEST(ParserIntegrationTest, EmptyQuery) {
std::string query = "";
@@ -188,7 +189,7 @@ TEST(ParserIntegrationTest, Minus) {
parser.ConsumeQuery());
// Expected AST:
- // NOT
+ // MINUS
// |
// member
// |
@@ -196,11 +197,11 @@ TEST(ParserIntegrationTest, Minus) {
SimpleVisitor visitor;
tree_root->Accept(&visitor);
// SimpleVisitor ordering
- // { text, member, NOT }
+ // { text, member, MINUS }
EXPECT_THAT(visitor.nodes(),
ElementsAre(EqualsNodeInfo("foo", NodeType::kText),
EqualsNodeInfo("", NodeType::kMember),
- EqualsNodeInfo("NOT", NodeType::kUnaryOperator)));
+ EqualsNodeInfo("MINUS", NodeType::kUnaryOperator)));
}
TEST(ParserIntegrationTest, Has) {
@@ -939,6 +940,72 @@ TEST(ParserTest, QueryComplexMemberFunction) {
EqualsNodeInfo("", NodeType::kMember)));
}
+TEST(ParserTest, QueryShouldNotStackOverflowAtMaxNumTokens) {
+ // query = "(( ... (foo bar) ... ))"
+ std::string query;
+ for (int i = 0; i < Lexer::kMaxNumTokens / 2 - 1; ++i) {
+ query.push_back('(');
+ }
+ query.append("foo bar");
+ for (int i = 0; i < Lexer::kMaxNumTokens / 2 - 1; ++i) {
+ query.push_back(')');
+ }
+
+ Lexer lexer(query, Lexer::Language::QUERY);
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+ lexer.ExtractTokens());
+ EXPECT_THAT(lexer_tokens, SizeIs(Lexer::kMaxNumTokens));
+ Parser parser = Parser::Create(std::move(lexer_tokens));
+ EXPECT_THAT(parser.ConsumeQuery(), IsOk());
+}
+
+TEST(ParserTest, ScoringShouldNotStackOverflowAtMaxNumTokens) {
+ // scoring = "(( ... (-1) ... ))"
+ std::string scoring;
+ for (int i = 0; i < Lexer::kMaxNumTokens / 2 - 1; ++i) {
+ scoring.push_back('(');
+ }
+ scoring.append("-1");
+ for (int i = 0; i < Lexer::kMaxNumTokens / 2 - 1; ++i) {
+ scoring.push_back(')');
+ }
+
+ Lexer lexer(scoring, Lexer::Language::SCORING);
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+ lexer.ExtractTokens());
+ EXPECT_THAT(lexer_tokens, SizeIs(Lexer::kMaxNumTokens));
+ Parser parser = Parser::Create(std::move(lexer_tokens));
+ EXPECT_THAT(parser.ConsumeScoring(), IsOk());
+}
+
+TEST(ParserTest, InvalidQueryShouldNotStackOverflowAtMaxNumTokens) {
+ std::string query;
+ for (int i = 0; i < Lexer::kMaxNumTokens; ++i) {
+ query.push_back('(');
+ }
+ Lexer lexer(query, Lexer::Language::QUERY);
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+ lexer.ExtractTokens());
+ EXPECT_THAT(lexer_tokens, SizeIs(Lexer::kMaxNumTokens));
+ Parser parser = Parser::Create(std::move(lexer_tokens));
+ EXPECT_THAT(parser.ConsumeQuery(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ParserTest, InvalidScoringShouldNotStackOverflowAtMaxNumTokens) {
+ std::string scoring;
+ for (int i = 0; i < Lexer::kMaxNumTokens; ++i) {
+ scoring.push_back('(');
+ }
+ Lexer lexer(scoring, Lexer::Language::SCORING);
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+ lexer.ExtractTokens());
+ EXPECT_THAT(lexer_tokens, SizeIs(Lexer::kMaxNumTokens));
+ Parser parser = Parser::Create(std::move(lexer_tokens));
+ EXPECT_THAT(parser.ConsumeScoring(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/query/advanced_query_parser/parser_test.cc b/icing/query/advanced_query_parser/parser_test.cc
index f997329..824c2ce 100644
--- a/icing/query/advanced_query_parser/parser_test.cc
+++ b/icing/query/advanced_query_parser/parser_test.cc
@@ -46,9 +46,9 @@ TEST(ParserTest, EmptyScoring) {
}
TEST(ParserTest, SingleTerm) {
- // Query: "foo"
+ std::string_view query = "foo";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::TEXT}};
+ {"foo", query, Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -67,9 +67,10 @@ TEST(ParserTest, SingleTerm) {
}
TEST(ParserTest, ImplicitAnd) {
- // Query: "foo bar"
+ std::string_view query = "foo bar";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::TEXT}, {"bar", Lexer::TokenType::TEXT}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+ {"bar", query.substr(4, 3), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -93,11 +94,11 @@ TEST(ParserTest, ImplicitAnd) {
}
TEST(ParserTest, Or) {
- // Query: "foo OR bar"
+ std::string_view query = "foo OR bar";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::OR},
- {"bar", Lexer::TokenType::TEXT}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+ {"", query.substr(4, 2), Lexer::TokenType::OR},
+ {"bar", query.substr(7, 3), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -121,11 +122,11 @@ TEST(ParserTest, Or) {
}
TEST(ParserTest, And) {
- // Query: "foo AND bar"
+ std::string_view query = "foo AND bar";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::AND},
- {"bar", Lexer::TokenType::TEXT}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+ {"", query.substr(4, 3), Lexer::TokenType::AND},
+ {"bar", query.substr(8, 4), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -149,9 +150,10 @@ TEST(ParserTest, And) {
}
TEST(ParserTest, Not) {
- // Query: "NOT foo"
+ std::string_view query = "NOT foo";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"", Lexer::TokenType::NOT}, {"foo", Lexer::TokenType::TEXT}};
+ {"", query.substr(0, 3), Lexer::TokenType::NOT},
+ {"foo", query.substr(4, 3), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -173,15 +175,16 @@ TEST(ParserTest, Not) {
}
TEST(ParserTest, Minus) {
- // Query: "-foo"
+ std::string_view query = "-foo";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"", Lexer::TokenType::MINUS}, {"foo", Lexer::TokenType::TEXT}};
+ {"", query.substr(0, 1), Lexer::TokenType::MINUS},
+ {"foo", query.substr(1, 3), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
// Expected AST:
- // NOT
+ // MINUS
// |
// member
// |
@@ -189,19 +192,19 @@ TEST(ParserTest, Minus) {
SimpleVisitor visitor;
tree_root->Accept(&visitor);
// SimpleVisitor ordering
- // { text, member, NOT }
+ // { text, member, MINUS }
EXPECT_THAT(visitor.nodes(),
ElementsAre(EqualsNodeInfo("foo", NodeType::kText),
EqualsNodeInfo("", NodeType::kMember),
- EqualsNodeInfo("NOT", NodeType::kUnaryOperator)));
+ EqualsNodeInfo("MINUS", NodeType::kUnaryOperator)));
}
TEST(ParserTest, Has) {
- // Query: "subject:foo"
+ std::string_view query = "subject:foo";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"subject", Lexer::TokenType::TEXT},
- {":", Lexer::TokenType::COMPARATOR},
- {"foo", Lexer::TokenType::TEXT}};
+ {"subject", query.substr(0, 7), Lexer::TokenType::TEXT},
+ {":", query.substr(7, 1), Lexer::TokenType::COMPARATOR},
+ {"foo", query.substr(8, 3), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -225,13 +228,13 @@ TEST(ParserTest, Has) {
}
TEST(ParserTest, HasNested) {
- // Query: "sender.name:foo"
+ std::string_view query = "sender.name:foo";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"sender", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::DOT},
- {"name", Lexer::TokenType::TEXT},
- {":", Lexer::TokenType::COMPARATOR},
- {"foo", Lexer::TokenType::TEXT}};
+ {"sender", query.substr(0, 6), Lexer::TokenType::TEXT},
+ {"", query.substr(6, 1), Lexer::TokenType::DOT},
+ {"name", query.substr(7, 4), Lexer::TokenType::TEXT},
+ {":", query.substr(11, 1), Lexer::TokenType::COMPARATOR},
+ {"foo", query.substr(12, 3), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -256,11 +259,11 @@ TEST(ParserTest, HasNested) {
}
TEST(ParserTest, EmptyFunction) {
- // Query: "foo()"
+ std::string_view query = "foo()";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::FUNCTION_NAME},
- {"", Lexer::TokenType::LPAREN},
- {"", Lexer::TokenType::RPAREN}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", query.substr(3, 1), Lexer::TokenType::LPAREN},
+ {"", query.substr(4, 1), Lexer::TokenType::RPAREN}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -279,12 +282,12 @@ TEST(ParserTest, EmptyFunction) {
}
TEST(ParserTest, FunctionSingleArg) {
- // Query: "foo("bar")"
+ std::string_view query = "foo(\"bar\")";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::FUNCTION_NAME},
- {"", Lexer::TokenType::LPAREN},
- {"bar", Lexer::TokenType::STRING},
- {"", Lexer::TokenType::RPAREN}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", query.substr(3, 1), Lexer::TokenType::LPAREN},
+ {"bar", query.substr(5, 3), Lexer::TokenType::STRING},
+ {"", query.substr(8, 1), Lexer::TokenType::RPAREN}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -304,11 +307,14 @@ TEST(ParserTest, FunctionSingleArg) {
}
TEST(ParserTest, FunctionMultiArg) {
- // Query: "foo("bar", "baz")"
+ std::string_view query = "foo(\"bar\", \"baz\")";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::FUNCTION_NAME}, {"", Lexer::TokenType::LPAREN},
- {"bar", Lexer::TokenType::STRING}, {"", Lexer::TokenType::COMMA},
- {"baz", Lexer::TokenType::STRING}, {"", Lexer::TokenType::RPAREN}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", query.substr(3, 1), Lexer::TokenType::LPAREN},
+ {"bar", query.substr(5, 3), Lexer::TokenType::STRING},
+ {"", query.substr(9, 1), Lexer::TokenType::COMMA},
+ {"baz", query.substr(12, 3), Lexer::TokenType::STRING},
+ {"", query.substr(16, 1), Lexer::TokenType::RPAREN}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -329,11 +335,14 @@ TEST(ParserTest, FunctionMultiArg) {
}
TEST(ParserTest, FunctionNested) {
- // Query: "foo(bar())"
+ std::string_view query = "foo(bar())";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::FUNCTION_NAME}, {"", Lexer::TokenType::LPAREN},
- {"bar", Lexer::TokenType::FUNCTION_NAME}, {"", Lexer::TokenType::LPAREN},
- {"", Lexer::TokenType::RPAREN}, {"", Lexer::TokenType::RPAREN}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", query.substr(3, 1), Lexer::TokenType::LPAREN},
+ {"bar", query.substr(4, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", query.substr(7, 1), Lexer::TokenType::LPAREN},
+ {"", query.substr(8, 1), Lexer::TokenType::RPAREN},
+ {"", query.substr(9, 1), Lexer::TokenType::RPAREN}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -356,13 +365,13 @@ TEST(ParserTest, FunctionNested) {
}
TEST(ParserTest, FunctionWithTrailingSequence) {
- // Query: "foo() OR bar"
+ std::string_view query = "foo() OR bar";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::FUNCTION_NAME},
- {"", Lexer::TokenType::LPAREN},
- {"", Lexer::TokenType::RPAREN},
- {"", Lexer::TokenType::OR},
- {"bar", Lexer::TokenType::TEXT}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", query.substr(3, 1), Lexer::TokenType::LPAREN},
+ {"", query.substr(4, 1), Lexer::TokenType::RPAREN},
+ {"", query.substr(6, 2), Lexer::TokenType::OR},
+ {"bar", query.substr(9, 3), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -386,11 +395,14 @@ TEST(ParserTest, FunctionWithTrailingSequence) {
}
TEST(ParserTest, Composite) {
- // Query: "foo OR (bar baz)"
+ std::string_view query = "foo OR (bar baz)";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::OR},
- {"", Lexer::TokenType::LPAREN}, {"bar", Lexer::TokenType::TEXT},
- {"baz", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::RPAREN}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+ {"", query.substr(4, 2), Lexer::TokenType::OR},
+ {"", query.substr(7, 1), Lexer::TokenType::LPAREN},
+ {"bar", query.substr(8, 3), Lexer::TokenType::TEXT},
+ {"baz", query.substr(12, 3), Lexer::TokenType::TEXT},
+ {"", query.substr(15, 1), Lexer::TokenType::RPAREN}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -419,11 +431,14 @@ TEST(ParserTest, Composite) {
}
TEST(ParserTest, CompositeWithTrailingSequence) {
- // Query: "(bar baz) OR foo"
+ std::string_view query = "(bar baz) OR foo";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"", Lexer::TokenType::LPAREN}, {"bar", Lexer::TokenType::TEXT},
- {"baz", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::RPAREN},
- {"", Lexer::TokenType::OR}, {"foo", Lexer::TokenType::TEXT}};
+ {"", query.substr(0, 1), Lexer::TokenType::LPAREN},
+ {"bar", query.substr(1, 3), Lexer::TokenType::TEXT},
+ {"baz", query.substr(5, 3), Lexer::TokenType::TEXT},
+ {"", query.substr(8, 1), Lexer::TokenType::RPAREN},
+ {"", query.substr(10, 2), Lexer::TokenType::OR},
+ {"foo", query.substr(13, 3), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -452,17 +467,17 @@ TEST(ParserTest, CompositeWithTrailingSequence) {
}
TEST(ParserTest, Complex) {
- // Query: "foo bar:baz OR pal("bat")"
+ std::string_view query = R"(foo bar:baz OR pal("bat"))";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::TEXT},
- {"bar", Lexer::TokenType::TEXT},
- {":", Lexer::TokenType::COMPARATOR},
- {"baz", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::OR},
- {"pal", Lexer::TokenType::FUNCTION_NAME},
- {"", Lexer::TokenType::LPAREN},
- {"bat", Lexer::TokenType::STRING},
- {"", Lexer::TokenType::RPAREN}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+ {"bar", query.substr(4, 3), Lexer::TokenType::TEXT},
+ {":", query.substr(7, 1), Lexer::TokenType::COMPARATOR},
+ {"baz", query.substr(8, 3), Lexer::TokenType::TEXT},
+ {"", query.substr(12, 2), Lexer::TokenType::OR},
+ {"pal", query.substr(15, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", query.substr(18, 1), Lexer::TokenType::LPAREN},
+ {"bat", query.substr(20, 3), Lexer::TokenType::STRING},
+ {"", query.substr(24, 1), Lexer::TokenType::RPAREN}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -498,107 +513,116 @@ TEST(ParserTest, Complex) {
}
TEST(ParserTest, InvalidHas) {
- // Query: "foo:" No right hand operand to :
+ std::string_view query = "foo:"; // No right hand operand to :
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::TEXT}, {":", Lexer::TokenType::COMPARATOR}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+ {":", query.substr(3, 1), Lexer::TokenType::COMPARATOR}};
Parser parser = Parser::Create(std::move(lexer_tokens));
EXPECT_THAT(parser.ConsumeQuery(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST(ParserTest, InvalidComposite) {
- // Query: "(foo bar" No terminating RPAREN
+ std::string_view query = "(foo bar"; // No terminating RPAREN
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"", Lexer::TokenType::LPAREN},
- {"foo", Lexer::TokenType::TEXT},
- {"bar", Lexer::TokenType::TEXT}};
+ {"", query.substr(0, 1), Lexer::TokenType::LPAREN},
+ {"foo", query.substr(1, 3), Lexer::TokenType::TEXT},
+ {"bar", query.substr(5, 3), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
EXPECT_THAT(parser.ConsumeQuery(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST(ParserTest, InvalidMember) {
- // Query: "foo." DOT must have succeeding TEXT
+ std::string_view query = "foo."; // DOT must have succeeding TEXT
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::DOT}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+ {"", query.substr(3, 1), Lexer::TokenType::DOT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
EXPECT_THAT(parser.ConsumeQuery(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST(ParserTest, InvalidOr) {
- // Query: "foo OR" No right hand operand to OR
+ std::string_view query = "foo OR"; // No right hand operand to OR
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::OR}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+ {"", query.substr(3, 2), Lexer::TokenType::OR}};
Parser parser = Parser::Create(std::move(lexer_tokens));
EXPECT_THAT(parser.ConsumeQuery(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST(ParserTest, InvalidAnd) {
- // Query: "foo AND" No right hand operand to AND
+ std::string_view query = "foo AND"; // No right hand operand to AND
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::AND}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+ {"", query.substr(4, 3), Lexer::TokenType::AND}};
Parser parser = Parser::Create(std::move(lexer_tokens));
EXPECT_THAT(parser.ConsumeQuery(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST(ParserTest, InvalidNot) {
- // Query: "NOT" No right hand operand to NOT
- std::vector<Lexer::LexerToken> lexer_tokens = {{"", Lexer::TokenType::NOT}};
+ std::string_view query = "NOT"; // No right hand operand to NOT
+ std::vector<Lexer::LexerToken> lexer_tokens = {
+ {"", query.substr(0, 3), Lexer::TokenType::NOT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
EXPECT_THAT(parser.ConsumeQuery(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST(ParserTest, InvalidMinus) {
- // Query: "-" No right hand operand to -
- std::vector<Lexer::LexerToken> lexer_tokens = {{"", Lexer::TokenType::MINUS}};
+ std::string_view query = "-"; // No right hand operand to -
+ std::vector<Lexer::LexerToken> lexer_tokens = {
+ {"", query.substr(0, 1), Lexer::TokenType::MINUS}};
Parser parser = Parser::Create(std::move(lexer_tokens));
EXPECT_THAT(parser.ConsumeQuery(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST(ParserTest, InvalidFunctionCallNoRparen) {
- // Query: "foo(" No terminating RPAREN
+ std::string_view query = "foo("; // No terminating RPAREN
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::FUNCTION_NAME}, {"", Lexer::TokenType::LPAREN}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", query.substr(3, 0), Lexer::TokenType::LPAREN}};
Parser parser = Parser::Create(std::move(lexer_tokens));
EXPECT_THAT(parser.ConsumeQuery(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST(ParserTest, InvalidFunctionCallNoLparen) {
- // Query: "foo bar" foo labeled FUNCTION_NAME despite no LPAREN
+ std::string_view query =
+ "foo bar"; // foo labeled FUNCTION_NAME despite no LPAREN
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::FUNCTION_NAME},
- {"bar", Lexer::TokenType::FUNCTION_NAME}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"bar", query.substr(4, 3), Lexer::TokenType::FUNCTION_NAME}};
Parser parser = Parser::Create(std::move(lexer_tokens));
EXPECT_THAT(parser.ConsumeQuery(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST(ParserTest, InvalidFunctionArgsHangingComma) {
- // Query: "foo("bar",)" no valid arg following COMMA
+ std::string_view query = R"(foo("bar",))"; // no valid arg following COMMA
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::FUNCTION_NAME},
- {"", Lexer::TokenType::LPAREN},
- {"bar", Lexer::TokenType::STRING},
- {"", Lexer::TokenType::COMMA},
- {"", Lexer::TokenType::RPAREN}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", query.substr(3, 1), Lexer::TokenType::LPAREN},
+ {"bar", query.substr(5, 3), Lexer::TokenType::STRING},
+ {"", query.substr(9, 1), Lexer::TokenType::COMMA},
+ {"", query.substr(10, 1), Lexer::TokenType::RPAREN}};
Parser parser = Parser::Create(std::move(lexer_tokens));
EXPECT_THAT(parser.ConsumeQuery(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST(ParserTest, ScoringPlus) {
- // Scoring: "1 + 1 + 1"
- std::vector<Lexer::LexerToken> lexer_tokens = {{"1", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::PLUS},
- {"1", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::PLUS},
- {"1", Lexer::TokenType::TEXT}};
+ std::string_view scoring_exp = "1 + 1 + 1";
+ std::vector<Lexer::LexerToken> lexer_tokens = {
+ {"1", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(2, 1), Lexer::TokenType::PLUS},
+ {"1", scoring_exp.substr(4, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(6, 1), Lexer::TokenType::PLUS},
+ {"1", scoring_exp.substr(8, 1), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeScoring());
@@ -622,12 +646,13 @@ TEST(ParserTest, ScoringPlus) {
}
TEST(ParserTest, ScoringMinus) {
- // Scoring: "1 - 1 - 1"
- std::vector<Lexer::LexerToken> lexer_tokens = {{"1", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::MINUS},
- {"1", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::MINUS},
- {"1", Lexer::TokenType::TEXT}};
+ std::string_view scoring_exp = "1 - 1 - 1";
+ std::vector<Lexer::LexerToken> lexer_tokens = {
+ {"1", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(2, 1), Lexer::TokenType::MINUS},
+ {"1", scoring_exp.substr(4, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(6, 1), Lexer::TokenType::MINUS},
+ {"1", scoring_exp.substr(8, 1), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeScoring());
@@ -651,11 +676,14 @@ TEST(ParserTest, ScoringMinus) {
}
TEST(ParserTest, ScoringUnaryMinus) {
- // Scoring: "1 + -1 + 1"
+ std::string_view scoring_exp = "1 + -1 + 1";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"1", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::PLUS},
- {"", Lexer::TokenType::MINUS}, {"1", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::PLUS}, {"1", Lexer::TokenType::TEXT}};
+ {"1", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(2, 1), Lexer::TokenType::PLUS},
+ {"", scoring_exp.substr(4, 1), Lexer::TokenType::MINUS},
+ {"1", scoring_exp.substr(5, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(7, 1), Lexer::TokenType::PLUS},
+ {"1", scoring_exp.substr(9, 1), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeScoring());
@@ -682,12 +710,15 @@ TEST(ParserTest, ScoringUnaryMinus) {
}
TEST(ParserTest, ScoringPlusMinus) {
- // Scoring: "11 + 12 - 13 + 14"
+ std::string_view scoring_exp = "11 + 12 - 13 + 14";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"11", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::PLUS},
- {"12", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::MINUS},
- {"13", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::PLUS},
- {"14", Lexer::TokenType::TEXT}};
+ {"11", scoring_exp.substr(0, 2), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(3, 1), Lexer::TokenType::PLUS},
+ {"12", scoring_exp.substr(5, 2), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(6, 1), Lexer::TokenType::MINUS},
+ {"13", scoring_exp.substr(8, 2), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(11, 1), Lexer::TokenType::PLUS},
+ {"14", scoring_exp.substr(13, 2), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeScoring());
@@ -719,12 +750,13 @@ TEST(ParserTest, ScoringPlusMinus) {
}
TEST(ParserTest, ScoringTimes) {
- // Scoring: "1 * 1 * 1"
- std::vector<Lexer::LexerToken> lexer_tokens = {{"1", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::TIMES},
- {"1", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::TIMES},
- {"1", Lexer::TokenType::TEXT}};
+ std::string_view scoring_exp = "1 * 1 * 1";
+ std::vector<Lexer::LexerToken> lexer_tokens = {
+ {"1", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(2, 1), Lexer::TokenType::TIMES},
+ {"1", scoring_exp.substr(4, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(6, 1), Lexer::TokenType::TIMES},
+ {"1", scoring_exp.substr(8, 1), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeScoring());
@@ -748,12 +780,13 @@ TEST(ParserTest, ScoringTimes) {
}
TEST(ParserTest, ScoringDiv) {
- // Scoring: "1 / 1 / 1"
- std::vector<Lexer::LexerToken> lexer_tokens = {{"1", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::DIV},
- {"1", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::DIV},
- {"1", Lexer::TokenType::TEXT}};
+ std::string_view scoring_exp = "1 / 1 / 1";
+ std::vector<Lexer::LexerToken> lexer_tokens = {
+ {"1", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(2, 1), Lexer::TokenType::DIV},
+ {"1", scoring_exp.substr(4, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(6, 1), Lexer::TokenType::DIV},
+ {"1", scoring_exp.substr(8, 1), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeScoring());
@@ -777,13 +810,17 @@ TEST(ParserTest, ScoringDiv) {
}
TEST(ParserTest, ScoringTimesDiv) {
- // Scoring: "11 / 12 * 13 / 14 / 15"
+ std::string_view scoring_exp = "11 / 12 * 13 / 14 / 15";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"11", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::DIV},
- {"12", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::TIMES},
- {"13", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::DIV},
- {"14", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::DIV},
- {"15", Lexer::TokenType::TEXT}};
+ {"11", scoring_exp.substr(0, 2), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(3, 1), Lexer::TokenType::DIV},
+ {"12", scoring_exp.substr(5, 2), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(8, 1), Lexer::TokenType::TIMES},
+ {"13", scoring_exp.substr(10, 2), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(13, 1), Lexer::TokenType::DIV},
+ {"14", scoring_exp.substr(15, 2), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(18, 1), Lexer::TokenType::DIV},
+ {"15", scoring_exp.substr(20, 2), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeScoring());
@@ -817,29 +854,29 @@ TEST(ParserTest, ScoringTimesDiv) {
}
TEST(ParserTest, ComplexScoring) {
- // Scoring: "1 + pow((2 * sin(3)), 4) + -5 / 6"
+ std::string_view scoring_exp = "1 + pow((2 * sin(3)), 4) + -5 / 6";
// With parentheses in function arguments.
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"1", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::PLUS},
- {"pow", Lexer::TokenType::FUNCTION_NAME},
- {"", Lexer::TokenType::LPAREN},
- {"", Lexer::TokenType::LPAREN},
- {"2", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::TIMES},
- {"sin", Lexer::TokenType::FUNCTION_NAME},
- {"", Lexer::TokenType::LPAREN},
- {"3", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::RPAREN},
- {"", Lexer::TokenType::RPAREN},
- {"", Lexer::TokenType::COMMA},
- {"4", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::RPAREN},
- {"", Lexer::TokenType::PLUS},
- {"", Lexer::TokenType::MINUS},
- {"5", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::DIV},
- {"6", Lexer::TokenType::TEXT},
+ {"1", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(2, 1), Lexer::TokenType::PLUS},
+ {"pow", scoring_exp.substr(4, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", scoring_exp.substr(7, 1), Lexer::TokenType::LPAREN},
+ {"", scoring_exp.substr(8, 1), Lexer::TokenType::LPAREN},
+ {"2", scoring_exp.substr(9, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(11, 1), Lexer::TokenType::TIMES},
+ {"sin", scoring_exp.substr(13, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", scoring_exp.substr(16, 1), Lexer::TokenType::LPAREN},
+ {"3", scoring_exp.substr(17, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(18, 1), Lexer::TokenType::RPAREN},
+ {"", scoring_exp.substr(19, 1), Lexer::TokenType::RPAREN},
+ {"", scoring_exp.substr(20, 1), Lexer::TokenType::COMMA},
+ {"4", scoring_exp.substr(22, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(23, 1), Lexer::TokenType::RPAREN},
+ {"", scoring_exp.substr(25, 1), Lexer::TokenType::PLUS},
+ {"", scoring_exp.substr(27, 1), Lexer::TokenType::MINUS},
+ {"5", scoring_exp.substr(28, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(30, 1), Lexer::TokenType::DIV},
+ {"6", scoring_exp.substr(32, 1), Lexer::TokenType::TEXT},
};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
@@ -869,27 +906,27 @@ TEST(ParserTest, ComplexScoring) {
EqualsNodeInfo("DIV", NodeType::kNaryOperator),
EqualsNodeInfo("PLUS", NodeType::kNaryOperator)));
- // Scoring: "1 + pow(2 * sin(3), 4) + -5 / 6"
+ scoring_exp = "1 + pow(2 * sin(3), 4) + -5 / 6";
// Without parentheses in function arguments.
lexer_tokens = {
- {"1", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::PLUS},
- {"pow", Lexer::TokenType::FUNCTION_NAME},
- {"", Lexer::TokenType::LPAREN},
- {"2", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::TIMES},
- {"sin", Lexer::TokenType::FUNCTION_NAME},
- {"", Lexer::TokenType::LPAREN},
- {"3", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::RPAREN},
- {"", Lexer::TokenType::COMMA},
- {"4", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::RPAREN},
- {"", Lexer::TokenType::PLUS},
- {"", Lexer::TokenType::MINUS},
- {"5", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::DIV},
- {"6", Lexer::TokenType::TEXT},
+ {"1", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(2, 1), Lexer::TokenType::PLUS},
+ {"pow", scoring_exp.substr(4, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", scoring_exp.substr(7, 1), Lexer::TokenType::LPAREN},
+ {"2", scoring_exp.substr(8, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(10, 1), Lexer::TokenType::TIMES},
+ {"sin", scoring_exp.substr(12, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", scoring_exp.substr(15, 1), Lexer::TokenType::LPAREN},
+ {"3", scoring_exp.substr(16, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(17, 1), Lexer::TokenType::RPAREN},
+ {"", scoring_exp.substr(18, 1), Lexer::TokenType::COMMA},
+ {"4", scoring_exp.substr(20, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(21, 1), Lexer::TokenType::RPAREN},
+ {"", scoring_exp.substr(23, 1), Lexer::TokenType::PLUS},
+ {"", scoring_exp.substr(25, 1), Lexer::TokenType::MINUS},
+ {"5", scoring_exp.substr(26, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(28, 1), Lexer::TokenType::DIV},
+ {"6", scoring_exp.substr(30, 1), Lexer::TokenType::TEXT},
};
parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(tree_root, parser.ConsumeScoring());
@@ -899,13 +936,14 @@ TEST(ParserTest, ComplexScoring) {
}
TEST(ParserTest, ScoringMemberFunction) {
- // Scoring: this.CreationTimestamp()
+ std::string_view scoring_exp = "this.CreationTimestamp()";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"this", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::DOT},
- {"CreationTimestamp", Lexer::TokenType::FUNCTION_NAME},
- {"", Lexer::TokenType::LPAREN},
- {"", Lexer::TokenType::RPAREN}};
+ {"this", scoring_exp.substr(0, 4), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(4, 1), Lexer::TokenType::DOT},
+ {"CreationTimestamp", scoring_exp.substr(5, 17),
+ Lexer::TokenType::FUNCTION_NAME},
+ {"", scoring_exp.substr(22, 1), Lexer::TokenType::LPAREN},
+ {"", scoring_exp.substr(23, 1), Lexer::TokenType::RPAREN}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeScoring());
@@ -927,13 +965,13 @@ TEST(ParserTest, ScoringMemberFunction) {
}
TEST(ParserTest, QueryMemberFunction) {
- // Query: this.foo()
+ std::string_view query = "this.foo()";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"this", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::DOT},
- {"foo", Lexer::TokenType::FUNCTION_NAME},
- {"", Lexer::TokenType::LPAREN},
- {"", Lexer::TokenType::RPAREN}};
+ {"this", query.substr(0, 4), Lexer::TokenType::TEXT},
+ {"", query.substr(4, 1), Lexer::TokenType::DOT},
+ {"foo", query.substr(5, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", query.substr(8, 1), Lexer::TokenType::LPAREN},
+ {"", query.substr(9, 1), Lexer::TokenType::RPAREN}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -954,18 +992,18 @@ TEST(ParserTest, QueryMemberFunction) {
}
TEST(ParserTest, ScoringComplexMemberFunction) {
- // Scoring: a.b.fun(c, d)
+ std::string_view scoring_exp = "a.b.fun(c, d)";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"a", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::DOT},
- {"b", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::DOT},
- {"fun", Lexer::TokenType::FUNCTION_NAME},
- {"", Lexer::TokenType::LPAREN},
- {"c", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::COMMA},
- {"d", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::RPAREN}};
+ {"a", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(1, 1), Lexer::TokenType::DOT},
+ {"b", scoring_exp.substr(2, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(3, 1), Lexer::TokenType::DOT},
+ {"fun", scoring_exp.substr(4, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", scoring_exp.substr(7, 1), Lexer::TokenType::LPAREN},
+ {"c", scoring_exp.substr(8, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(9, 1), Lexer::TokenType::COMMA},
+ {"d", scoring_exp.substr(11, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(12, 1), Lexer::TokenType::RPAREN}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeScoring());
@@ -993,13 +1031,18 @@ TEST(ParserTest, ScoringComplexMemberFunction) {
}
TEST(ParserTest, QueryComplexMemberFunction) {
- // Query: this.abc.fun(def, ghi)
+ std::string_view query = "this.abc.fun(def, ghi)";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"this", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::DOT},
- {"abc", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::DOT},
- {"fun", Lexer::TokenType::FUNCTION_NAME}, {"", Lexer::TokenType::LPAREN},
- {"def", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::COMMA},
- {"ghi", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::RPAREN}};
+ {"this", query.substr(0, 4), Lexer::TokenType::TEXT},
+ {"", query.substr(4, 1), Lexer::TokenType::DOT},
+ {"abc", query.substr(5, 3), Lexer::TokenType::TEXT},
+ {"", query.substr(8, 1), Lexer::TokenType::DOT},
+ {"fun", query.substr(9, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", query.substr(12, 1), Lexer::TokenType::LPAREN},
+ {"def", query.substr(13, 3), Lexer::TokenType::TEXT},
+ {"", query.substr(16, 1), Lexer::TokenType::COMMA},
+ {"ghi", query.substr(17, 3), Lexer::TokenType::TEXT},
+ {"", query.substr(20, 1), Lexer::TokenType::RPAREN}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -1027,11 +1070,12 @@ TEST(ParserTest, QueryComplexMemberFunction) {
}
TEST(ParserTest, InvalidScoringToken) {
- // Scoring: "1 + NOT 1"
- std::vector<Lexer::LexerToken> lexer_tokens = {{"1", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::PLUS},
- {"", Lexer::TokenType::NOT},
- {"1", Lexer::TokenType::TEXT}};
+ std::string_view scoring_exp = "1 + NOT 1";
+ std::vector<Lexer::LexerToken> lexer_tokens = {
+ {"1", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(2, 1), Lexer::TokenType::PLUS},
+ {"", scoring_exp.substr(4, 3), Lexer::TokenType::NOT},
+ {"1", scoring_exp.substr(8, 1), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
EXPECT_THAT(parser.ConsumeScoring(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
diff --git a/icing/query/advanced_query_parser/pending-value.cc b/icing/query/advanced_query_parser/pending-value.cc
new file mode 100644
index 0000000..67bdc3a
--- /dev/null
+++ b/icing/query/advanced_query_parser/pending-value.cc
@@ -0,0 +1,44 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "icing/query/advanced_query_parser/pending-value.h"
+
+#include "icing/absl_ports/canonical_errors.h"
+
+namespace icing {
+namespace lib {
+
+libtextclassifier3::Status PendingValue::ParseInt() {
+ if (data_type_ == DataType::kLong) {
+ return libtextclassifier3::Status::OK;
+ } else if (data_type_ != DataType::kText) {
+ return absl_ports::InvalidArgumentError("Cannot parse value as LONG");
+ }
+ if (query_term_.is_prefix_val) {
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "Cannot use prefix operator '*' with numeric value: ",
+ query_term_.term));
+ }
+ char* value_end;
+ long_val_ = std::strtoll(query_term_.term.c_str(), &value_end, /*base=*/10);
+ if (value_end != query_term_.term.c_str() + query_term_.term.length()) {
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "Unable to parse \"", query_term_.term, "\" as number."));
+ }
+ data_type_ = DataType::kLong;
+ query_term_ = {/*term=*/"", /*raw_term=*/"", /*is_prefix_val=*/false};
+ return libtextclassifier3::Status::OK;
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/query/advanced_query_parser/pending-value.h b/icing/query/advanced_query_parser/pending-value.h
new file mode 100644
index 0000000..1a6717e
--- /dev/null
+++ b/icing/query/advanced_query_parser/pending-value.h
@@ -0,0 +1,160 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef ICING_QUERY_ADVANCED_QUERY_PARSER_PENDING_VALUE_H_
+#define ICING_QUERY_ADVANCED_QUERY_PARSER_PENDING_VALUE_H_
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+enum class DataType {
+ kNone,
+ kLong,
+ kText,
+ kString,
+ kStringList,
+ kDocumentIterator,
+};
+
+struct QueryTerm {
+ std::string term;
+ std::string_view raw_term;
+ bool is_prefix_val;
+};
+
+// A holder for intermediate results when processing child nodes.
+struct PendingValue {
+ static PendingValue CreateStringPendingValue(QueryTerm str) {
+ return PendingValue(std::move(str), DataType::kString);
+ }
+
+ static PendingValue CreateTextPendingValue(QueryTerm text) {
+ return PendingValue(std::move(text), DataType::kText);
+ }
+
+ PendingValue() : data_type_(DataType::kNone) {}
+
+ explicit PendingValue(std::unique_ptr<DocHitInfoIterator> iterator)
+ : iterator_(std::move(iterator)),
+ data_type_(DataType::kDocumentIterator) {}
+
+ explicit PendingValue(std::vector<std::string> string_lists)
+ : string_vals_(std::move(string_lists)),
+ data_type_(DataType::kStringList) {}
+
+ PendingValue(const PendingValue&) = delete;
+ PendingValue(PendingValue&&) = default;
+
+ PendingValue& operator=(const PendingValue&) = delete;
+ PendingValue& operator=(PendingValue&&) = default;
+
+ // Placeholder is used to indicate where the children of a particular node
+ // begin.
+ bool is_placeholder() const { return data_type_ == DataType::kNone; }
+
+ libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
+ iterator() && {
+ ICING_RETURN_IF_ERROR(CheckDataType(DataType::kDocumentIterator));
+ return std::move(iterator_);
+ }
+
+ libtextclassifier3::StatusOr<const std::vector<std::string>*> string_vals()
+ const& {
+ ICING_RETURN_IF_ERROR(CheckDataType(DataType::kStringList));
+ return &string_vals_;
+ }
+ libtextclassifier3::StatusOr<std::vector<std::string>> string_vals() && {
+ ICING_RETURN_IF_ERROR(CheckDataType(DataType::kStringList));
+ return std::move(string_vals_);
+ }
+
+ libtextclassifier3::StatusOr<const QueryTerm*> string_val() const& {
+ ICING_RETURN_IF_ERROR(CheckDataType(DataType::kString));
+ return &query_term_;
+ }
+ libtextclassifier3::StatusOr<QueryTerm> string_val() && {
+ ICING_RETURN_IF_ERROR(CheckDataType(DataType::kString));
+ return std::move(query_term_);
+ }
+
+ libtextclassifier3::StatusOr<const QueryTerm*> text_val() const& {
+ ICING_RETURN_IF_ERROR(CheckDataType(DataType::kText));
+ return &query_term_;
+ }
+ libtextclassifier3::StatusOr<QueryTerm> text_val() && {
+ ICING_RETURN_IF_ERROR(CheckDataType(DataType::kText));
+ return std::move(query_term_);
+ }
+
+ libtextclassifier3::StatusOr<int64_t> long_val() {
+ ICING_RETURN_IF_ERROR(ParseInt());
+ return long_val_;
+ }
+
+ // Attempts to interpret the value as an int. A pending value can be parsed as
+ // an int under two circumstances:
+ // 1. It holds a kText value which can be parsed to an int
+ // 2. It holds a kLong value
+ // If #1 is true, then the parsed value will be stored in long_value and
+ // data_type will be updated to kLong.
+ // RETURNS:
+ // - OK, if able to successfully parse the value into a long
+ // - INVALID_ARGUMENT if the value could not be parsed as a long
+ libtextclassifier3::Status ParseInt();
+
+ DataType data_type() const { return data_type_; }
+
+ private:
+ explicit PendingValue(QueryTerm query_term, DataType data_type)
+ : query_term_(std::move(query_term)), data_type_(data_type) {}
+
+ libtextclassifier3::Status CheckDataType(DataType required_data_type) const {
+ if (data_type_ == required_data_type) {
+ return libtextclassifier3::Status::OK;
+ }
+ return absl_ports::InvalidArgumentError(
+ absl_ports::StrCat("Unable to retrieve value of type '",
+ std::to_string(static_cast<int>(required_data_type)),
+ "' from pending value of type '",
+ std::to_string(static_cast<int>(data_type_)), "'"));
+ }
+
+ // iterator_ will be populated when data_type_ is kDocumentIterator.
+ std::unique_ptr<DocHitInfoIterator> iterator_;
+
+ // string_vals_ will be populated when data_type_ kStringList.
+ std::vector<std::string> string_vals_;
+
+ // query_term_ will be populated when data_type_ is kString or kText
+ QueryTerm query_term_;
+
+ // long_val_ will be populated when data_type_ is kLong - after a successful
+ // call to ParseInt.
+ int64_t long_val_;
+ DataType data_type_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_QUERY_ADVANCED_QUERY_PARSER_PENDING_VALUE_H_
diff --git a/icing/query/advanced_query_parser/query-visitor.cc b/icing/query/advanced_query_parser/query-visitor.cc
index fbd4504..d75a550 100644
--- a/icing/query/advanced_query_parser/query-visitor.cc
+++ b/icing/query/advanced_query_parser/query-visitor.cc
@@ -14,21 +14,38 @@
#include "icing/query/advanced_query_parser/query-visitor.h"
+#include <algorithm>
#include <cstdint>
#include <cstdlib>
+#include <iterator>
#include <limits>
#include <memory>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
+#include "icing/index/iterator/doc-hit-info-iterator-all-document-id.h"
#include "icing/index/iterator/doc-hit-info-iterator-and.h"
+#include "icing/index/iterator/doc-hit-info-iterator-none.h"
#include "icing/index/iterator/doc-hit-info-iterator-not.h"
#include "icing/index/iterator/doc-hit-info-iterator-or.h"
+#include "icing/index/iterator/doc-hit-info-iterator-property-in-schema.h"
#include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/query/advanced_query_parser/lexer.h"
+#include "icing/query/advanced_query_parser/param.h"
+#include "icing/query/advanced_query_parser/parser.h"
+#include "icing/query/advanced_query_parser/pending-value.h"
+#include "icing/query/advanced_query_parser/util/string-util.h"
#include "icing/query/query-features.h"
-#include "icing/schema/section-manager.h"
+#include "icing/schema/property-util.h"
+#include "icing/schema/section.h"
+#include "icing/tokenization/token.h"
+#include "icing/tokenization/tokenizer.h"
#include "icing/util/status-macros.h"
namespace icing {
@@ -36,24 +53,18 @@ namespace lib {
namespace {
-libtextclassifier3::StatusOr<std::string> EscapeStringValue(
- std::string_view value) {
- std::string result;
- bool in_escape = false;
- for (char c : value) {
- if (in_escape) {
- in_escape = false;
- } else if (c == '\\') {
- in_escape = true;
- continue;
- } else if (c == '"') {
- return absl_ports::InvalidArgumentError(
- "Encountered an unescaped quotation mark!");
+struct CreateList {
+ libtextclassifier3::StatusOr<PendingValue> operator()(
+ std::vector<PendingValue>&& args) const {
+ std::vector<std::string> values;
+ values.reserve(args.size());
+ for (PendingValue& arg : args) {
+ QueryTerm string_val = std::move(arg).string_val().ValueOrDie();
+ values.push_back(std::move(string_val.term));
}
- result += c;
+ return PendingValue(std::move(values));
}
- return result;
-}
+};
bool IsNumericComparator(std::string_view operator_text) {
if (operator_text.length() < 1 || operator_text.length() > 2) {
@@ -104,114 +115,542 @@ libtextclassifier3::StatusOr<Int64Range> GetInt64Range(
} // namespace
-libtextclassifier3::StatusOr<int64_t> QueryVisitor::RetrieveIntValue() {
- if (pending_values_.empty() || !pending_values_.top().holds_text()) {
- return absl_ports::InvalidArgumentError("Unable to retrieve int value.");
+void QueryVisitor::PendingPropertyRestricts::AddValidRestricts(
+ std::set<std::string> new_restricts) {
+ if (!has_active_property_restricts()) {
+ pending_property_restricts_.push_back(std::move(new_restricts));
+ return;
}
- std::string& value = pending_values_.top().text;
- char* value_end;
- int64_t int_value = std::strtoll(value.c_str(), &value_end, /*base=*/10);
- if (value_end != value.c_str() + value.length()) {
- return absl_ports::InvalidArgumentError(
- absl_ports::StrCat("Unable to parse \"", value, "\" as number."));
+
+ // There is an active property restrict already in effect. To determine the
+ // updated active property restrict being applied at this level, we need to
+ // calculate the intersection of new_restricts and
+ // active_property_restricts.
+ const std::set<std::string>& active_restricts = active_property_restricts();
+ auto active_restricts_itr = active_restricts.begin();
+ for (auto new_restricts_itr = new_restricts.begin();
+ new_restricts_itr != new_restricts.end();) {
+ while (active_restricts_itr != active_restricts.end() &&
+ *active_restricts_itr < *new_restricts_itr) {
+ // new_restricts_itr is behind active_restricts_itr.
+ ++active_restricts_itr;
+ }
+ if (active_restricts_itr == active_restricts.end()) {
+ // There's nothing left in active restricts. Everything at
+ // new_restricts_itr and beyond should be removed
+ new_restricts_itr =
+ new_restricts.erase(new_restricts_itr, new_restricts.end());
+ } else if (*active_restricts_itr > *new_restricts_itr) {
+ // new_restricts_itr points to elements not present in
+ // active_restricts_itr
+ new_restricts_itr = new_restricts.erase(new_restricts_itr);
+ } else {
+ // the element that new_restricts_itr points to is present in
+ // active_restricts_itr.
+ ++new_restricts_itr;
+ }
+ }
+ pending_property_restricts_.push_back(std::move(new_restricts));
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
+QueryVisitor::CreateTermIterator(const QueryTerm& query_term) {
+ if (query_term.is_prefix_val) {
+ // '*' prefix operator was added in list filters
+ features_.insert(kListFilterQueryLanguageFeature);
+ }
+ TermMatchType::Code match_type = GetTermMatchType(query_term.is_prefix_val);
+ int unnormalized_term_start =
+ query_term.raw_term.data() - raw_query_text_.data();
+ if (!processing_not_) {
+ // 1. Add term to property_query_terms_map
+ if (pending_property_restricts_.has_active_property_restricts()) {
+ for (const std::string& property_restrict :
+ pending_property_restricts_.active_property_restricts()) {
+ property_query_terms_map_[property_restrict].insert(query_term.term);
+ }
+ } else {
+ property_query_terms_map_[""].insert(query_term.term);
+ }
+
+ // 2. If needed add term iterator to query_term_iterators_ map.
+ if (needs_term_frequency_info_) {
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<DocHitInfoIterator> term_iterator,
+ index_.GetIterator(query_term.term, unnormalized_term_start,
+ query_term.raw_term.length(), kSectionIdMaskAll,
+ match_type_, needs_term_frequency_info_));
+ query_term_iterators_[query_term.term] =
+ std::make_unique<DocHitInfoIteratorFilter>(
+ std::move(term_iterator), &document_store_, &schema_store_,
+ filter_options_, current_time_ms_);
+ }
+ }
+
+ // 3. Add the term iterator.
+ return index_.GetIterator(query_term.term, unnormalized_term_start,
+ query_term.raw_term.length(), kSectionIdMaskAll,
+ match_type, needs_term_frequency_info_);
+}
+
+void QueryVisitor::RegisterFunctions() {
+ // std::vector<std::string> createList(std::string...);
+ Function create_list_function_ =
+ Function::Create(DataType::kStringList, "createList",
+ {Param(DataType::kString, Cardinality::kRequired),
+ Param(DataType::kString, Cardinality::kVariable)},
+ CreateList())
+ .ValueOrDie();
+ registered_functions_.insert(
+ {create_list_function_.name(), std::move(create_list_function_)});
+
+ // DocHitInfoIterator search(std::string);
+ // DocHitInfoIterator search(std::string, std::vector<std::string>);
+ auto search_eval = [this](std::vector<PendingValue>&& args) {
+ return this->SearchFunction(std::move(args));
+ };
+ Function search_function =
+ Function::Create(DataType::kDocumentIterator, "search",
+ {Param(DataType::kString),
+ Param(DataType::kStringList, Cardinality::kOptional)},
+ std::move(search_eval))
+ .ValueOrDie();
+ registered_functions_.insert(
+ {search_function.name(), std::move(search_function)});
+
+ // DocHitInfoIterator propertyDefined(std::string);
+ auto property_defined = [this](std::vector<PendingValue>&& args) {
+ return this->PropertyDefinedFunction(std::move(args));
+ };
+
+ Function property_defined_function =
+ Function::Create(DataType::kDocumentIterator, "propertyDefined",
+ {Param(DataType::kString)}, std::move(property_defined))
+ .ValueOrDie();
+ registered_functions_.insert(
+ {property_defined_function.name(), std::move(property_defined_function)});
+}
+
+libtextclassifier3::StatusOr<PendingValue> QueryVisitor::SearchFunction(
+ std::vector<PendingValue>&& args) {
+ // The second arg (if present) is a list of sections to restrict to.
+ if (args.size() == 2) {
+ std::set<std::string> new_restricts;
+ std::vector<std::string> property_restricts =
+ std::move(args.at(1)).string_vals().ValueOrDie();
+ for (std::string& property_restrict : property_restricts) {
+ new_restricts.insert(std::move(property_restrict));
+ }
+ pending_property_restricts_.AddValidRestricts(std::move(new_restricts));
+ if (pending_property_restricts_.active_property_restricts().empty()) {
+ pending_property_restricts_.PopRestricts();
+ return PendingValue(std::make_unique<DocHitInfoIteratorNone>());
+ }
+ }
+
+ // The first arg is guaranteed to be a STRING at this point. It should be safe
+ // to call ValueOrDie.
+ const QueryTerm* query = args.at(0).string_val().ValueOrDie();
+ Lexer lexer(query->term, Lexer::Language::QUERY);
+ ICING_ASSIGN_OR_RETURN(std::vector<Lexer::LexerToken> lexer_tokens,
+ lexer.ExtractTokens());
+
+ Parser parser = Parser::Create(std::move(lexer_tokens));
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<Node> tree_root,
+ parser.ConsumeQuery());
+
+ std::unique_ptr<DocHitInfoIterator> iterator;
+ QueryResults query_result;
+ if (tree_root == nullptr) {
+ iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
+ document_store_.last_added_document_id());
+ } else {
+ QueryVisitor query_visitor(
+ &index_, &numeric_index_, &document_store_, &schema_store_,
+ &normalizer_, &tokenizer_, query->raw_term, filter_options_,
+ match_type_, needs_term_frequency_info_, pending_property_restricts_,
+ processing_not_, current_time_ms_);
+ tree_root->Accept(&query_visitor);
+ ICING_ASSIGN_OR_RETURN(query_result,
+ std::move(query_visitor).ConsumeResults());
+ iterator = std::move(query_result.root_iterator);
}
+
+ // Update members based on results of processing the query.
+ if (args.size() == 2 &&
+ pending_property_restricts_.has_active_property_restricts()) {
+ iterator = std::make_unique<DocHitInfoIteratorSectionRestrict>(
+ std::move(iterator), &document_store_, &schema_store_,
+ pending_property_restricts_.active_property_restricts(),
+ current_time_ms_);
+ pending_property_restricts_.PopRestricts();
+ }
+ if (!processing_not_) {
+ std::move(
+ query_result.query_term_iterators.begin(),
+ query_result.query_term_iterators.end(),
+ std::inserter(query_term_iterators_, query_term_iterators_.end()));
+
+ std::move(query_result.query_terms.begin(), query_result.query_terms.end(),
+ std::inserter(property_query_terms_map_,
+ property_query_terms_map_.end()));
+ }
+ std::move(query_result.features_in_use.begin(),
+ query_result.features_in_use.end(),
+ std::inserter(features_, features_.end()));
+ return PendingValue(std::move(iterator));
+}
+
+libtextclassifier3::StatusOr<PendingValue>
+QueryVisitor::PropertyDefinedFunction(std::vector<PendingValue>&& args) {
+ // The first arg is guaranteed to be a STRING at this point. It should be safe
+ // to call ValueOrDie.
+ const QueryTerm* member = args.at(0).string_val().ValueOrDie();
+
+ std::unique_ptr<DocHitInfoIterator> all_docs_iterator =
+ std::make_unique<DocHitInfoIteratorAllDocumentId>(
+ document_store_.last_added_document_id());
+
+ std::set<std::string> target_sections = {std::move(member->term)};
+ std::unique_ptr<DocHitInfoIterator> property_in_schema_iterator =
+ std::make_unique<DocHitInfoIteratorPropertyInSchema>(
+ std::move(all_docs_iterator), &document_store_, &schema_store_,
+ std::move(target_sections), current_time_ms_);
+
+ features_.insert(kListFilterQueryLanguageFeature);
+
+ return PendingValue(std::move(property_in_schema_iterator));
+}
+
+libtextclassifier3::StatusOr<int64_t> QueryVisitor::PopPendingIntValue() {
+ if (pending_values_.empty()) {
+ return absl_ports::InvalidArgumentError("Unable to retrieve int value.");
+ }
+ ICING_ASSIGN_OR_RETURN(int64_t int_value, pending_values_.top().long_val());
pending_values_.pop();
return int_value;
}
-libtextclassifier3::StatusOr<std::string> QueryVisitor::RetrieveStringValue() {
- if (pending_values_.empty() || !pending_values_.top().holds_text()) {
+libtextclassifier3::StatusOr<QueryTerm> QueryVisitor::PopPendingStringValue() {
+ if (pending_values_.empty()) {
return absl_ports::InvalidArgumentError("Unable to retrieve string value.");
}
- std::string string_value = std::move(pending_values_.top().text);
+ ICING_ASSIGN_OR_RETURN(QueryTerm string_value,
+ std::move(pending_values_.top()).string_val());
pending_values_.pop();
return string_value;
}
+libtextclassifier3::StatusOr<QueryTerm> QueryVisitor::PopPendingTextValue() {
+ if (pending_values_.empty()) {
+ return absl_ports::InvalidArgumentError("Unable to retrieve text value.");
+ }
+ ICING_ASSIGN_OR_RETURN(QueryTerm text_value,
+ std::move(pending_values_.top()).text_val());
+ pending_values_.pop();
+ return text_value;
+}
+
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
-QueryVisitor::RetrieveIterator() {
- if (pending_values_.top().holds_iterator()) {
+QueryVisitor::PopPendingIterator() {
+ if (pending_values_.empty() || pending_values_.top().is_placeholder()) {
+ return absl_ports::InvalidArgumentError("Unable to retrieve iterator.");
+ }
+ if (pending_values_.top().data_type() == DataType::kDocumentIterator) {
std::unique_ptr<DocHitInfoIterator> iterator =
- std::move(pending_values_.top().iterator);
+ std::move(pending_values_.top()).iterator().ValueOrDie();
pending_values_.pop();
return iterator;
+ } else if (pending_values_.top().data_type() == DataType::kString) {
+ features_.insert(kVerbatimSearchFeature);
+ ICING_ASSIGN_OR_RETURN(QueryTerm string_value, PopPendingStringValue());
+ return CreateTermIterator(std::move(string_value));
+ } else {
+ ICING_ASSIGN_OR_RETURN(QueryTerm text_value, PopPendingTextValue());
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> token_itr,
+ tokenizer_.Tokenize(text_value.term));
+ std::string normalized_term;
+ std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+ // The tokenizer will produce 1+ tokens out of the text. The prefix operator
+ // only applies to the final token.
+ bool reached_final_token = !token_itr->Advance();
+ // raw_text is the portion of text_value.raw_term that hasn't yet been
+ // matched to any of the tokens that we've processed. escaped_token will
+ // hold the portion of raw_text that corresponds to the current token that
+ // is being processed.
+ std::string_view raw_text = text_value.raw_term;
+ std::string_view raw_token;
+ while (!reached_final_token) {
+ std::vector<Token> tokens = token_itr->GetTokens();
+ if (tokens.size() > 1) {
+ // The tokenizer iterator iterates between token groups. In practice,
+ // the tokenizer used with QueryVisitor (PlainTokenizer) will always
+ // only produce a single token per token group.
+ return absl_ports::InvalidArgumentError(
+ "Encountered unexpected token group with >1 tokens.");
+ }
+
+ reached_final_token = !token_itr->Advance();
+ const Token& token = tokens.at(0);
+ if (reached_final_token && token.text.length() == raw_text.length()) {
+ // Unescaped tokens are strictly smaller than their escaped counterparts
+ // This means that if we're at the final token and token.length equals
+ // raw_text, then all of raw_text must correspond to this token.
+ raw_token = raw_text;
+ } else {
+ ICING_ASSIGN_OR_RETURN(raw_token, string_util::FindEscapedToken(
+ raw_text, token.text));
+ }
+ normalized_term = normalizer_.NormalizeTerm(token.text);
+ QueryTerm term_value{std::move(normalized_term), raw_token,
+ reached_final_token && text_value.is_prefix_val};
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<DocHitInfoIterator> iterator,
+ CreateTermIterator(std::move(term_value)));
+ iterators.push_back(std::move(iterator));
+
+ // Remove escaped_token from raw_text now that we've processed
+ // raw_text.
+ const char* escaped_token_end = raw_token.data() + raw_token.length();
+ raw_text = raw_text.substr(escaped_token_end - raw_text.data());
+ }
+
+ // Finally, create an And Iterator. If there's only a single term here, then
+ // it will just return that term iterator. Otherwise, segmented text is
+ // treated as a group of terms AND'd together.
+ return CreateAndIterator(std::move(iterators));
}
- ICING_ASSIGN_OR_RETURN(std::string value, RetrieveStringValue());
- // Make it into a term iterator.
- return index_.GetIterator(value, kSectionIdMaskAll, match_type_,
- /*need_term_hit_frequency_=*/false);
}
libtextclassifier3::StatusOr<std::vector<std::unique_ptr<DocHitInfoIterator>>>
-QueryVisitor::RetrieveIterators() {
+QueryVisitor::PopAllPendingIterators() {
std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
while (!pending_values_.empty() && !pending_values_.top().is_placeholder()) {
ICING_ASSIGN_OR_RETURN(std::unique_ptr<DocHitInfoIterator> itr,
- RetrieveIterator());
+ PopPendingIterator());
iterators.push_back(std::move(itr));
}
if (pending_values_.empty()) {
return absl_ports::InvalidArgumentError(
"Unable to retrieve expected iterators.");
}
+ // Iterators will be in reverse order because we retrieved them from the
+ // stack. Reverse them to get back to the original ordering.
+ std::reverse(iterators.begin(), iterators.end());
return iterators;
}
-libtextclassifier3::StatusOr<QueryVisitor::PendingValue>
-QueryVisitor::ProcessNumericComparator(const NaryOperatorNode* node) {
- // 1. The children should have been processed and added their outputs to
- // pending_values_. Time to process them.
- // The first two pending values should be the int value and the property.
- ICING_ASSIGN_OR_RETURN(int64_t int_value, RetrieveIntValue());
- ICING_ASSIGN_OR_RETURN(std::string property, RetrieveStringValue());
+libtextclassifier3::Status QueryVisitor::ProcessNumericComparator(
+ const NaryOperatorNode* node) {
+ if (node->children().size() != 2) {
+ return absl_ports::InvalidArgumentError("Expected 2 children.");
+ }
- // 2. Create the iterator.
+ // 1. Put in a placeholder PendingValue
+ pending_values_.push(PendingValue());
+
+ // 2. The first child is the property to restrict by.
+ node->children().at(0)->Accept(this);
+ if (has_pending_error()) {
+ return std::move(pending_error_);
+ }
+ ICING_ASSIGN_OR_RETURN(QueryTerm text_value, PopPendingTextValue());
+
+ if (text_value.is_prefix_val) {
+ return absl_ports::InvalidArgumentError(
+ "Cannot use prefix operator '*' with a property name!");
+ }
+
+ // If there is an active property restrict and this property is not present in
+ // in the active restrict set, then it's not satisfiable.
+ if (pending_property_restricts_.has_active_property_restricts() &&
+ pending_property_restricts_.active_property_restricts().find(
+ text_value.term) ==
+ pending_property_restricts_.active_property_restricts().end()) {
+ // The property restrict can't be satisfiable. Pop the placeholder that was
+ // just added and push a FALSE iterator.
+ pending_property_restricts_.PopRestricts();
+ pending_values_.pop();
+ pending_values_.push(
+ PendingValue(std::make_unique<DocHitInfoIteratorNone>()));
+ return libtextclassifier3::Status::OK;
+ }
+
+ // 3. The second child should be parseable as an integer value.
+ expecting_numeric_arg_ = true;
+ node->children().at(1)->Accept(this);
+ expecting_numeric_arg_ = false;
+ ICING_ASSIGN_OR_RETURN(int64_t int_value, PopPendingIntValue());
+
+ // 4. Check for the placeholder.
+ if (!pending_values_.top().is_placeholder()) {
+ return absl_ports::InvalidArgumentError(
+ "Error processing arguments for node.");
+ }
+ pending_values_.pop();
+
+ // 5. Create the iterator and push it onto pending_values_.
ICING_ASSIGN_OR_RETURN(Int64Range range,
GetInt64Range(node->operator_text(), int_value));
- auto iterator_or =
- numeric_index_.GetIterator(property, range.low, range.high);
- if (!iterator_or.ok()) {
- return std::move(iterator_or).status();
- }
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<DocHitInfoIterator> iterator,
+ numeric_index_.GetIterator(
+ text_value.term, range.low, range.high,
+ document_store_, schema_store_, current_time_ms_));
features_.insert(kNumericSearchFeature);
- std::unique_ptr<DocHitInfoIterator> iterator =
- std::move(iterator_or).ValueOrDie();
- return PendingValue(std::move(iterator));
+ pending_values_.push(PendingValue(std::move(iterator)));
+ return libtextclassifier3::Status::OK;
}
-libtextclassifier3::StatusOr<QueryVisitor::PendingValue>
-QueryVisitor::ProcessAndOperator(const NaryOperatorNode* node) {
+libtextclassifier3::StatusOr<PendingValue> QueryVisitor::ProcessAndOperator(
+ const NaryOperatorNode* node) {
ICING_ASSIGN_OR_RETURN(
std::vector<std::unique_ptr<DocHitInfoIterator>> iterators,
- RetrieveIterators());
+ PopAllPendingIterators());
return PendingValue(CreateAndIterator(std::move(iterators)));
}
-libtextclassifier3::StatusOr<QueryVisitor::PendingValue>
-QueryVisitor::ProcessOrOperator(const NaryOperatorNode* node) {
+libtextclassifier3::StatusOr<PendingValue> QueryVisitor::ProcessOrOperator(
+ const NaryOperatorNode* node) {
ICING_ASSIGN_OR_RETURN(
std::vector<std::unique_ptr<DocHitInfoIterator>> iterators,
- RetrieveIterators());
+ PopAllPendingIterators());
return PendingValue(CreateOrIterator(std::move(iterators)));
}
-libtextclassifier3::StatusOr<QueryVisitor::PendingValue>
-QueryVisitor::ProcessHasOperator(const NaryOperatorNode* node) {
- // 1. The children should have been processed and added their outputs to
- // pending_values_. Time to process them.
- // The first two pending values should be the delegate and the property.
+libtextclassifier3::Status QueryVisitor::ProcessNegationOperator(
+ const UnaryOperatorNode* node) {
+ // 1. Put in a placeholder PendingValue
+ pending_values_.push(PendingValue());
+
+ // 2. Visit child
+ node->child()->Accept(this);
+ if (has_pending_error()) {
+ return std::move(pending_error_);
+ }
+
+ if (pending_values_.size() < 2) {
+ return absl_ports::InvalidArgumentError(
+ "Visit unary operator child didn't correctly add pending values.");
+ }
+
+ // 3. We want to preserve the original text of the integer value, append our
+ // minus and *then* parse as an int.
+ ICING_ASSIGN_OR_RETURN(QueryTerm int_text_val, PopPendingTextValue());
+ int_text_val.term = absl_ports::StrCat("-", int_text_val.term);
+ PendingValue pending_value =
+ PendingValue::CreateTextPendingValue(std::move(int_text_val));
+ ICING_RETURN_IF_ERROR(pending_value.long_val());
+
+ // We've parsed our integer value successfully. Pop our placeholder, push it
+ // on to the stack and return successfully.
+ if (!pending_values_.top().is_placeholder()) {
+ return absl_ports::InvalidArgumentError(
+ "Error processing arguments for node.");
+ }
+ pending_values_.pop();
+ pending_values_.push(std::move(pending_value));
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status QueryVisitor::ProcessNotOperator(
+ const UnaryOperatorNode* node) {
+ // TODO(b/265312785) Consider implementing query optimization when we run into
+ // nested NOTs. This would allow us to simplify a query like "NOT (-foo)" to
+ // just "foo". This would also require more complicate rewrites as we would
+ // need to do things like rewrite "NOT (-a OR b)" as "a AND -b" and
+ // "NOT (price < 5)" as "price >= 5".
+ // 1. Put in a placeholder PendingValue
+ pending_values_.push(PendingValue());
+ // Toggle whatever the current value of 'processing_not_' is before visiting
+ // the children.
+ processing_not_ = !processing_not_;
+
+ // 2. Visit child
+ node->child()->Accept(this);
+ if (has_pending_error()) {
+ return std::move(pending_error_);
+ }
+
+ if (pending_values_.size() < 2) {
+ return absl_ports::InvalidArgumentError(
+ "Visit unary operator child didn't correctly add pending values.");
+ }
+
+ // 3. Retrieve the delegate iterator
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<DocHitInfoIterator> delegate,
+ PopPendingIterator());
+
+ // 4. Check for the placeholder.
+ if (!pending_values_.top().is_placeholder()) {
+ return absl_ports::InvalidArgumentError(
+ "Error processing arguments for node.");
+ }
+ pending_values_.pop();
+
+ pending_values_.push(PendingValue(std::make_unique<DocHitInfoIteratorNot>(
+ std::move(delegate), document_store_.last_added_document_id())));
+
+ // Untoggle whatever the current value of 'processing_not_' is now that we've
+ // finished processing this NOT.
+ processing_not_ = !processing_not_;
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status QueryVisitor::ProcessHasOperator(
+ const NaryOperatorNode* node) {
+ if (node->children().size() != 2) {
+ return absl_ports::InvalidArgumentError("Expected 2 children.");
+ }
+
+ // 1. Put in a placeholder PendingValue
+ pending_values_.push(PendingValue());
+
+ // 2. Visit the first child - the property.
+ node->children().at(0)->Accept(this);
+ if (has_pending_error()) {
+ return pending_error_;
+ }
+ ICING_ASSIGN_OR_RETURN(QueryTerm text_value, PopPendingTextValue());
+ if (text_value.is_prefix_val) {
+ return absl_ports::InvalidArgumentError(
+ "Cannot use prefix operator '*' with a property name!");
+ }
+ pending_property_restricts_.AddValidRestricts({text_value.term});
+
+ // Just added a restrict - if there are no active property restricts then that
+ // be because this restrict is unsatisfiable.
+ if (pending_property_restricts_.active_property_restricts().empty()) {
+ // The property restrict can't be satisfiable. Pop the placeholder that was
+ // just added and push a FALSE iterator.
+ pending_property_restricts_.PopRestricts();
+ pending_values_.pop();
+ pending_values_.push(
+ PendingValue(std::make_unique<DocHitInfoIteratorNone>()));
+ return libtextclassifier3::Status::OK;
+ }
+
+ // 3. Visit the second child - the argument.
+ node->children().at(1)->Accept(this);
+ if (has_pending_error()) {
+ return pending_error_;
+ }
ICING_ASSIGN_OR_RETURN(std::unique_ptr<DocHitInfoIterator> delegate,
- RetrieveIterator());
- // TODO(b/208654892): The HAS operator need to be able to differentiate
- // between values that came from STRING nodes and those that came from
- // members. members should be allowed as the left operator to HAS, but STRINGs
- // should not be. IOW, `"prop1":foo` should not be treated equivalently to
- // `prop1:foo`
- ICING_ASSIGN_OR_RETURN(std::string property, RetrieveStringValue());
- return PendingValue(std::make_unique<DocHitInfoIteratorSectionRestrict>(
- std::move(delegate), &document_store_, &schema_store_,
- std::move(property)));
+ PopPendingIterator());
+
+ // 4. Check for the placeholder.
+ if (!pending_values_.top().is_placeholder()) {
+ return absl_ports::InvalidArgumentError(
+ "Error processing arguments for node.");
+ }
+ pending_values_.pop();
+ pending_property_restricts_.PopRestricts();
+
+ std::set<std::string> property_restricts = {std::move(text_value.term)};
+ pending_values_.push(
+ PendingValue(std::make_unique<DocHitInfoIteratorSectionRestrict>(
+ std::move(delegate), &document_store_, &schema_store_,
+ std::move(property_restricts), current_time_ms_)));
+ return libtextclassifier3::Status::OK;
}
void QueryVisitor::VisitFunctionName(const FunctionNameNode* node) {
@@ -220,24 +659,33 @@ void QueryVisitor::VisitFunctionName(const FunctionNameNode* node) {
}
void QueryVisitor::VisitString(const StringNode* node) {
- auto escaped_string_or = EscapeStringValue(node->value());
- if (!escaped_string_or.ok()) {
- pending_error_ = std::move(escaped_string_or).status();
+ // A STRING node can only be a term. Create the iterator now.
+ auto unescaped_string_or = string_util::UnescapeStringValue(node->value());
+ if (!unescaped_string_or.ok()) {
+ pending_error_ = std::move(unescaped_string_or).status();
return;
}
- features_.insert(kVerbatimSearchFeature);
- std::string escaped_string = std::move(escaped_string_or).ValueOrDie();
- pending_values_.push(PendingValue(std::move(escaped_string)));
+ std::string unescaped_string = std::move(unescaped_string_or).ValueOrDie();
+ QueryTerm val{std::move(unescaped_string), node->raw_value(),
+ node->is_prefix()};
+ pending_values_.push(PendingValue::CreateStringPendingValue(std::move(val)));
}
void QueryVisitor::VisitText(const TextNode* node) {
- // TODO(b/208654892): Add support for 1. segmentation and 2. the prefix
- // prefix operator (*).
- std::string normalized_text = normalizer_.NormalizeTerm(node->value());
- pending_values_.push(PendingValue(std::move(normalized_text)));
+ // TEXT nodes could either be a term (and will become DocHitInfoIteratorTerm)
+ // or a property name. As such, we just push the TEXT value into pending
+ // values and determine which it is at a later point.
+ QueryTerm val{std::move(node->value()), node->raw_value(), node->is_prefix()};
+ pending_values_.push(PendingValue::CreateTextPendingValue(std::move(val)));
}
void QueryVisitor::VisitMember(const MemberNode* node) {
+ if (node->children().empty()) {
+ pending_error_ =
+ absl_ports::InvalidArgumentError("Encountered malformed member node.");
+ return;
+ }
+
// 1. Put in a placeholder PendingValue
pending_values_.push(PendingValue());
@@ -249,75 +697,145 @@ void QueryVisitor::VisitMember(const MemberNode* node) {
}
}
- // 3. The children should have been processed and added their outputs to
- // pending_values_. Time to process them.
- std::string member = std::move(pending_values_.top().text);
- pending_values_.pop();
- while (!pending_values_.empty() && !pending_values_.top().is_placeholder()) {
- member = absl_ports::StrCat(pending_values_.top().text, kPropertySeparator,
- member);
+ // 3. Now process the results of the children and produce a single pending
+ // value representing this member.
+ PendingValue pending_value;
+ if (node->children().size() == 1) {
+ // 3a. This member only has a single child, then the pending value produced
+ // by that child is the final value produced by this member.
+ pending_value = std::move(pending_values_.top());
pending_values_.pop();
+ } else {
+ // 3b. Retrieve the values of all children and concatenate them into a
+ // single value.
+ libtextclassifier3::StatusOr<QueryTerm> member_or;
+ std::vector<std::string> members;
+ QueryTerm text_val;
+ const char* start = nullptr;
+ const char* end = nullptr;
+ while (!pending_values_.empty() &&
+ !pending_values_.top().is_placeholder()) {
+ member_or = PopPendingTextValue();
+ if (!member_or.ok()) {
+ pending_error_ = std::move(member_or).status();
+ return;
+ }
+ text_val = std::move(member_or).ValueOrDie();
+ if (text_val.is_prefix_val) {
+ pending_error_ = absl_ports::InvalidArgumentError(
+ "Cannot use prefix operator '*' within a property name!");
+ return;
+ }
+ if (start == nullptr) {
+ start = text_val.raw_term.data();
+ end = text_val.raw_term.data() + text_val.raw_term.length();
+ } else {
+ start = std::min(start, text_val.raw_term.data());
+ end = std::max(end, text_val.raw_term.data() + text_val.raw_term.length());
+ }
+ members.push_back(std::move(text_val.term));
+ }
+ QueryTerm member;
+ member.term = absl_ports::StrJoin(members.rbegin(), members.rend(),
+ property_util::kPropertyPathSeparator);
+ member.raw_term = std::string_view(start, end - start);
+ member.is_prefix_val = false;
+ pending_value = PendingValue::CreateTextPendingValue(std::move(member));
}
// 4. If pending_values_ is empty somehow, then our placeholder disappeared
// somehow.
if (pending_values_.empty()) {
pending_error_ = absl_ports::InvalidArgumentError(
- "\"<\" operator must have two arguments.");
+ "Error processing arguments for member node.");
return;
}
pending_values_.pop();
- pending_values_.push(PendingValue(std::move(member)));
+ pending_values_.push(std::move(pending_value));
}
void QueryVisitor::VisitFunction(const FunctionNode* node) {
- pending_error_ = absl_ports::UnimplementedError(
- "Function node visiting not implemented yet.");
-}
-
-void QueryVisitor::VisitUnaryOperator(const UnaryOperatorNode* node) {
- if (node->operator_text() != "NOT") {
- pending_error_ = absl_ports::UnimplementedError(
- absl_ports::StrCat("Visiting for unary operator ",
- node->operator_text(), " not implemented yet."));
+ // 1. Get the associated function.
+ auto itr = registered_functions_.find(node->function_name()->value());
+ if (itr == registered_functions_.end()) {
+ pending_error_ = absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "Function ", node->function_name()->value(), " is not supported."));
return;
}
- // 1. Put in a placeholder PendingValue
+ // 2. Put in a placeholder PendingValue
pending_values_.push(PendingValue());
- // 2. Visit child
- node->child()->Accept(this);
- if (has_pending_error()) {
- return;
+ // 3. Visit the children.
+ for (const std::unique_ptr<Node>& arg : node->args()) {
+ arg->Accept(this);
+ if (has_pending_error()) {
+ return;
+ }
}
- if (pending_values_.size() < 2) {
- pending_error_ = absl_ports::InvalidArgumentError(
- "Visit unary operator child didn't correctly add pending values.");
+ // 4. Collect the arguments and evaluate the function.
+ std::vector<PendingValue> args;
+ while (!pending_values_.empty() && !pending_values_.top().is_placeholder()) {
+ args.push_back(std::move(pending_values_.top()));
+ pending_values_.pop();
+ }
+ std::reverse(args.begin(), args.end());
+ const Function& function = itr->second;
+ auto eval_result = function.Eval(std::move(args));
+ if (!eval_result.ok()) {
+ pending_error_ = std::move(eval_result).status();
return;
}
- // 3. Retrieve the delegate iterator
- auto iterator_or = RetrieveIterator();
- if (!iterator_or.ok()) {
- pending_error_ = std::move(iterator_or).status();
+ // 5. Pop placeholder in pending_values and add the result of our function.
+ pending_values_.pop();
+ pending_values_.push(std::move(eval_result).ValueOrDie());
+
+ // Support for custom functions was added in list filters.
+ features_.insert(kListFilterQueryLanguageFeature);
+}
+
+// TODO(b/265312785) Clarify handling of the interaction between HAS and NOT.
+// Currently, `prop1:(NOT foo bar)` will not match any documents. Likewise,
+// `search("NOT foo bar", createList("prop1"))` will not match any documents.
+//
+// We should either confirm that this is the desired behavior or consider
+// rewriting these queries so that they're interpreted as
+// `NOT prop1:foo AND prop1:bar` and
+// `NOT search("foo", createList("prop1"))
+// AND search("bar", createList("prop1"))`
+void QueryVisitor::VisitUnaryOperator(const UnaryOperatorNode* node) {
+ bool is_minus = node->operator_text() == "MINUS";
+ if (node->operator_text() != "NOT" && !is_minus) {
+ pending_error_ = absl_ports::UnimplementedError(
+ absl_ports::StrCat("Visiting for unary operator ",
+ node->operator_text(), " not implemented yet."));
return;
}
- std::unique_ptr<DocHitInfoIterator> delegate =
- std::move(iterator_or).ValueOrDie();
- // 4. Check for the placeholder.
- if (!pending_values_.top().is_placeholder()) {
- pending_error_ = absl_ports::InvalidArgumentError(
- "Error processing arguments for node.");
- return;
+ libtextclassifier3::Status status;
+ if (expecting_numeric_arg_ && is_minus) {
+ // If the operator is a MINUS ('-') and we're at the child of a numeric
+ // comparator, then this must be a negation ('-3')
+ status = ProcessNegationOperator(node);
+ } else {
+ status = ProcessNotOperator(node);
}
- pending_values_.pop();
- pending_values_.push(PendingValue(std::make_unique<DocHitInfoIteratorNot>(
- std::move(delegate), document_store_.last_added_document_id())));
+ if (!status.ok()) {
+ pending_error_ = std::move(status);
+ }
+
+ if (!is_minus ||
+ pending_property_restricts_.has_active_property_restricts() ||
+ processing_not_) {
+ // 'NOT' operator was added in list filters.
+ // Likewise, mixing property restricts and NOTs were made valid in list
+ // filters.
+ features_.insert(kListFilterQueryLanguageFeature);
+ }
}
void QueryVisitor::VisitNaryOperator(const NaryOperatorNode* node) {
@@ -327,12 +845,33 @@ void QueryVisitor::VisitNaryOperator(const NaryOperatorNode* node) {
return;
}
+ if (pending_property_restricts_.has_active_property_restricts() ||
+ processing_not_) {
+ // Likewise, mixing property restricts and NOT with compound statements was
+ // added in list filters.
+ features_.insert(kListFilterQueryLanguageFeature);
+ }
+
+ if (node->operator_text() == ":") {
+ libtextclassifier3::Status status = ProcessHasOperator(node);
+ if (!status.ok()) {
+ pending_error_ = std::move(status);
+ }
+ return;
+ } else if (IsNumericComparator(node->operator_text())) {
+ libtextclassifier3::Status status = ProcessNumericComparator(node);
+ if (!status.ok()) {
+ pending_error_ = std::move(status);
+ }
+ return;
+ }
+
// 1. Put in a placeholder PendingValue
pending_values_.push(PendingValue());
// 2. Visit the children.
- for (const std::unique_ptr<Node>& child : node->children()) {
- child->Accept(this);
+ for (int i = 0; i < node->children().size(); ++i) {
+ node->children().at(i)->Accept(this);
if (has_pending_error()) {
return;
}
@@ -340,14 +879,10 @@ void QueryVisitor::VisitNaryOperator(const NaryOperatorNode* node) {
// 3. Retrieve the pending value for this node.
libtextclassifier3::StatusOr<PendingValue> pending_value_or;
- if (IsNumericComparator(node->operator_text())) {
- pending_value_or = ProcessNumericComparator(node);
- } else if (node->operator_text() == "AND") {
+ if (node->operator_text() == "AND") {
pending_value_or = ProcessAndOperator(node);
} else if (node->operator_text() == "OR") {
pending_value_or = ProcessOrOperator(node);
- } else if (node->operator_text() == ":") {
- pending_value_or = ProcessHasOperator(node);
}
if (!pending_value_or.ok()) {
pending_error_ = std::move(pending_value_or).status();
@@ -374,12 +909,15 @@ libtextclassifier3::StatusOr<QueryResults> QueryVisitor::ConsumeResults() && {
return absl_ports::InvalidArgumentError(
"Visitor does not contain a single root iterator.");
}
- auto iterator_or = RetrieveIterator();
+ auto iterator_or = PopPendingIterator();
if (!iterator_or.ok()) {
return std::move(iterator_or).status();
}
+
QueryResults results;
results.root_iterator = std::move(iterator_or).ValueOrDie();
+ results.query_term_iterators = std::move(query_term_iterators_);
+ results.query_terms = std::move(property_query_terms_map_);
results.features_in_use = std::move(features_);
return results;
}
diff --git a/icing/query/advanced_query_parser/query-visitor.h b/icing/query/advanced_query_parser/query-visitor.h
index c6b7d8e..38864f8 100644
--- a/icing/query/advanced_query_parser/query-visitor.h
+++ b/icing/query/advanced_query_parser/query-visitor.h
@@ -20,16 +20,22 @@
#include <stack>
#include <string>
#include <unordered_set>
+#include <vector>
-#include "icing/absl_ports/canonical_errors.h"
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/index/index.h"
+#include "icing/index/iterator/doc-hit-info-iterator-filter.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/numeric/numeric-index.h"
#include "icing/query/advanced_query_parser/abstract-syntax-tree.h"
+#include "icing/query/advanced_query_parser/function.h"
+#include "icing/query/advanced_query_parser/pending-value.h"
#include "icing/query/query-features.h"
#include "icing/query/query-results.h"
#include "icing/schema/schema-store.h"
#include "icing/store/document-store.h"
+#include "icing/tokenization/tokenizer.h"
#include "icing/transform/normalizer.h"
namespace icing {
@@ -44,13 +50,16 @@ class QueryVisitor : public AbstractSyntaxTreeVisitor {
const DocumentStore* document_store,
const SchemaStore* schema_store,
const Normalizer* normalizer,
- TermMatchType::Code match_type)
- : index_(*index),
- numeric_index_(*numeric_index),
- document_store_(*document_store),
- schema_store_(*schema_store),
- normalizer_(*normalizer),
- match_type_(match_type) {}
+ const Tokenizer* tokenizer,
+ std::string_view raw_query_text,
+ DocHitInfoIteratorFilter::Options filter_options,
+ TermMatchType::Code match_type,
+ bool needs_term_frequency_info, int64_t current_time_ms)
+ : QueryVisitor(index, numeric_index, document_store, schema_store,
+ normalizer, tokenizer, raw_query_text, filter_options,
+ match_type, needs_term_frequency_info,
+ PendingPropertyRestricts(),
+ /*processing_not=*/false, current_time_ms) {}
void VisitFunctionName(const FunctionNameNode* node) override;
void VisitString(const StringNode* node) override;
@@ -67,42 +76,92 @@ class QueryVisitor : public AbstractSyntaxTreeVisitor {
libtextclassifier3::StatusOr<QueryResults> ConsumeResults() &&;
private:
- // A holder for intermediate results when processing child nodes.
- struct PendingValue {
- PendingValue() = default;
+ // An internal class to help manage property restricts being applied at
+ // different levels.
+ class PendingPropertyRestricts {
+ public:
+ // Add another set of property restricts. Elements of new_restricts that are
+ // not present in active_property_rest
+ void AddValidRestricts(std::set<std::string> new_restricts);
- explicit PendingValue(std::unique_ptr<DocHitInfoIterator> iterator)
- : iterator(std::move(iterator)) {}
+ // Pops the most recently added set of property restricts.
+ void PopRestricts() {
+ if (has_active_property_restricts()) {
+ pending_property_restricts_.pop_back();
+ }
+ }
- explicit PendingValue(std::string text) : text(std::move(text)) {}
+ bool has_active_property_restricts() const {
+ return !pending_property_restricts_.empty();
+ }
- // Placeholder is used to indicate where the children of a particular node
- // begin.
- bool is_placeholder() const { return iterator == nullptr && text.empty(); }
+ // The set of all property restrictions that are currently being applied.
+ const std::set<std::string>& active_property_restricts() const {
+ return pending_property_restricts_.back();
+ }
- bool holds_text() const { return iterator == nullptr && !text.empty(); }
-
- bool holds_iterator() const { return iterator != nullptr && text.empty(); }
-
- std::unique_ptr<DocHitInfoIterator> iterator;
- std::string text;
+ private:
+ std::vector<std::set<std::string>> pending_property_restricts_;
};
+ explicit QueryVisitor(
+ Index* index, const NumericIndex<int64_t>* numeric_index,
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ const Normalizer* normalizer, const Tokenizer* tokenizer,
+ std::string_view raw_query_text,
+ DocHitInfoIteratorFilter::Options filter_options,
+ TermMatchType::Code match_type, bool needs_term_frequency_info,
+ PendingPropertyRestricts pending_property_restricts, bool processing_not,
+ int64_t current_time_ms)
+ : index_(*index),
+ numeric_index_(*numeric_index),
+ document_store_(*document_store),
+ schema_store_(*schema_store),
+ normalizer_(*normalizer),
+ tokenizer_(*tokenizer),
+ raw_query_text_(raw_query_text),
+ filter_options_(std::move(filter_options)),
+ match_type_(match_type),
+ needs_term_frequency_info_(needs_term_frequency_info),
+ pending_property_restricts_(std::move(pending_property_restricts)),
+ processing_not_(processing_not),
+ expecting_numeric_arg_(false),
+ current_time_ms_(current_time_ms) {
+ RegisterFunctions();
+ }
+
bool has_pending_error() const { return !pending_error_.ok(); }
+ // Creates a DocHitInfoIterator reflecting the provided term and whether the
+ // prefix operator has been applied to this term. Also populates,
+ // property_query_terms_map_ and query_term_iterators_ as appropriate.
+ // Returns:
+ // - On success, a DocHitInfoIterator for the provided term
+ // - INVALID_ARGUMENT if unable to create an iterator for the term.
+ libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
+ CreateTermIterator(const QueryTerm& term);
+
// Processes the PendingValue at the top of pending_values_, parses it into a
// int64_t and pops the top.
// Returns:
// - On success, the int value stored in the text at the top
// - INVALID_ARGUMENT if pending_values_ is empty, doesn't hold a text or
// can't be parsed as an int.
- libtextclassifier3::StatusOr<int64_t> RetrieveIntValue();
+ libtextclassifier3::StatusOr<int64_t> PopPendingIntValue();
+
+ // Processes the PendingValue at the top of pending_values_ and pops the top.
+ // Returns:
+ // - On success, the string value stored in the text at the top and a bool
+ // indicating whether or not the string value has a prefix operator.
+ // - INVALID_ARGUMENT if pending_values_ is empty or doesn't hold a string.
+ libtextclassifier3::StatusOr<QueryTerm> PopPendingStringValue();
// Processes the PendingValue at the top of pending_values_ and pops the top.
// Returns:
// - On success, the string value stored in the text at the top
+ // indicating whether or not the string value has a prefix operator.
// - INVALID_ARGUMENT if pending_values_ is empty or doesn't hold a text.
- libtextclassifier3::StatusOr<std::string> RetrieveStringValue();
+ libtextclassifier3::StatusOr<QueryTerm> PopPendingTextValue();
// Processes the PendingValue at the top of pending_values_ and pops the top.
// Returns:
@@ -110,7 +169,7 @@ class QueryVisitor : public AbstractSyntaxTreeVisitor {
// - INVALID_ARGUMENT if pending_values_ is empty or if unable to create an
// iterator for the term.
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
- RetrieveIterator();
+ PopPendingIterator();
// Processes all PendingValues at the top of pending_values_ until the first
// placeholder is encounter.
@@ -120,17 +179,37 @@ class QueryVisitor : public AbstractSyntaxTreeVisitor {
// - INVALID_ARGUMENT if pending_values_is empty or if unable to create an
// iterator for any of the terms at the top of pending_values_
libtextclassifier3::StatusOr<std::vector<std::unique_ptr<DocHitInfoIterator>>>
- RetrieveIterators();
+ PopAllPendingIterators();
+
+ // Processes the unary operator node as a NOT operator. A NOT can have an
+ // operator type of "NOT" or "MINUS"
+ //
+ // RETURNS:
+ // - OK on success
+ // - INVALID_ARGUMENT if any errors are encountered while processing
+ // node->child
+ libtextclassifier3::Status ProcessNotOperator(const UnaryOperatorNode* node);
+
+ // Processes the unary operator node as a negation operator. A negation
+ // operator should have an operator of type "MINUS" and it's children must
+ // resolve to a numeric value.
+ //
+ // RETURNS:
+ // - OK on success
+ // - INVALID_ARGUMENT if the node->child can't be resolved to a numeric
+ // value.
+ libtextclassifier3::Status ProcessNegationOperator(
+ const UnaryOperatorNode* node);
// Processes the NumericComparator represented by node. This must be called
// *after* this node's children have been visited. The PendingValues added by
// this node's children will be consumed by this function and the PendingValue
// for this node will be returned.
// Returns:
- // - On success, then PendingValue representing this node and it's children.
+ // - On success, OK
// - INVALID_ARGUMENT if unable to retrieve string value or int value
// - NOT_FOUND if there is no entry in the numeric index for the property
- libtextclassifier3::StatusOr<PendingValue> ProcessNumericComparator(
+ libtextclassifier3::Status ProcessNumericComparator(
const NaryOperatorNode* node);
// Processes the AND and OR operators represented by the node. This must be
@@ -155,20 +234,54 @@ class QueryVisitor : public AbstractSyntaxTreeVisitor {
libtextclassifier3::StatusOr<PendingValue> ProcessOrOperator(
const NaryOperatorNode* node);
- // Processes the HAS operator represented by the node. This must be called
- // *after* this node's children have been visited. The PendingValues added by
- // this node's children will be consumed by this function and the PendingValue
- // for this node will be returned.
+ // Populates registered_functions with the currently supported set of
+ // functions.
+ void RegisterFunctions();
+
+ // Implementation of `search` custom function in the query language.
// Returns:
- // - On success, then PendingValue representing this node and it's children.
- // - INVALID_ARGUMENT if unable to properly retrieve an iterator
- // representing the second child
- libtextclassifier3::StatusOr<PendingValue> ProcessHasOperator(
- const NaryOperatorNode* node);
+ // - a PendingValue holding the DocHitInfoIterator reflecting the query
+ // provided to SearchFunction
+ // - any errors returned by Lexer::ExtractTokens, Parser::ConsumeQuery or
+ // QueryVisitor::ConsumeResults.
+ libtextclassifier3::StatusOr<PendingValue> SearchFunction(
+ std::vector<PendingValue>&& args);
+
+ // Implementation of the propertyDefined(member) custom function.
+ // Returns:
+ // - a Pending Value holding a DocHitIterator to be implemented.
+ // - any errors returned by Lexer::ExtractTokens
+ libtextclassifier3::StatusOr<PendingValue> PropertyDefinedFunction(
+ std::vector<PendingValue>&& args);
+
+ // Handles a NaryOperatorNode where the operator is HAS (':') and pushes an
+ // iterator with the proper section filter applied. If the current property
+ // restriction represented by pending_property_restricts and the first child
+ // of this node is unsatisfiable (ex. `prop1:(prop2:foo)`), then a NONE
+ // iterator is returned immediately and subtree represented by the second
+ // child is not traversed.
+ //
+ // Returns:
+ // - OK on success
+ // - INVALID_ARGUMENT node does not have exactly two children or the two
+ // children cannot be resolved to a MEMBER or an iterator respectively.
+ libtextclassifier3::Status ProcessHasOperator(const NaryOperatorNode* node);
+
+ // Returns the correct match type to apply based on both the match type and
+ // whether the prefix operator is currently present.
+ TermMatchType::Code GetTermMatchType(bool is_prefix) const {
+ return (is_prefix) ? TermMatchType::PREFIX : match_type_;
+ }
std::stack<PendingValue> pending_values_;
libtextclassifier3::Status pending_error_;
+ // A map from function name to Function instance.
+ std::unordered_map<std::string, Function> registered_functions_;
+
+ SectionRestrictQueryTermsMap property_query_terms_map_;
+
+ QueryTermIteratorsMap query_term_iterators_;
// Set of features invoked in the query.
std::unordered_set<Feature> features_;
@@ -177,8 +290,25 @@ class QueryVisitor : public AbstractSyntaxTreeVisitor {
const DocumentStore& document_store_; // Does not own!
const SchemaStore& schema_store_; // Does not own!
const Normalizer& normalizer_; // Does not own!
+ const Tokenizer& tokenizer_; // Does not own!
+ std::string_view raw_query_text_;
+ DocHitInfoIteratorFilter::Options filter_options_;
TermMatchType::Code match_type_;
+ // Whether or not term_frequency information is needed. This affects:
+ // - how DocHitInfoIteratorTerms are constructed
+ // - whether the QueryTermIteratorsMap is populated in the QueryResults.
+ bool needs_term_frequency_info_;
+
+ // The stack of property restricts currently being processed by the visitor.
+ PendingPropertyRestricts pending_property_restricts_;
+ bool processing_not_;
+
+ // Whether we are in the midst of processing a subtree that is expected to
+ // resolve to a numeric argument.
+ bool expecting_numeric_arg_;
+
+ int64_t current_time_ms_;
};
} // namespace lib
diff --git a/icing/query/advanced_query_parser/query-visitor_test.cc b/icing/query/advanced_query_parser/query-visitor_test.cc
index 2b5117b..59e924d 100644
--- a/icing/query/advanced_query_parser/query-visitor_test.cc
+++ b/icing/query/advanced_query_parser/query-visitor_test.cc
@@ -17,27 +17,37 @@
#include <cstdint>
#include <limits>
#include <memory>
+#include <string_view>
+#include "icing/text_classifier/lib3/utils/base/status.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/document-builder.h"
#include "icing/index/index.h"
#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/numeric/dummy-numeric-index.h"
#include "icing/index/numeric/numeric-index.h"
+#include "icing/jni/jni-cache.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/portable/platform.h"
#include "icing/query/advanced_query_parser/abstract-syntax-tree.h"
#include "icing/query/advanced_query_parser/lexer.h"
#include "icing/query/advanced_query_parser/parser.h"
-#include "icing/schema-builder.h"
#include "icing/query/query-features.h"
+#include "icing/schema-builder.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/tokenization/tokenizer-factory.h"
+#include "icing/tokenization/tokenizer.h"
#include "icing/transform/normalizer-factory.h"
#include "icing/transform/normalizer.h"
+#include "unicode/uloc.h"
namespace icing {
namespace lib {
@@ -46,6 +56,7 @@ namespace {
using ::testing::ElementsAre;
using ::testing::IsEmpty;
+using ::testing::UnorderedElementsAre;
constexpr DocumentId kDocumentId0 = 0;
constexpr DocumentId kDocumentId1 = 1;
@@ -55,11 +66,27 @@ constexpr SectionId kSectionId0 = 0;
constexpr SectionId kSectionId1 = 1;
constexpr SectionId kSectionId2 = 2;
-class QueryVisitorTest : public ::testing::Test {
+template <typename T, typename U>
+std::vector<T> ExtractKeys(const std::unordered_map<T, U>& map) {
+ std::vector<T> keys;
+ keys.reserve(map.size());
+ for (const auto& [key, value] : map) {
+ keys.push_back(key);
+ }
+ return keys;
+}
+
+enum class QueryType {
+ kPlain,
+ kSearch,
+};
+
+class QueryVisitorTest : public ::testing::TestWithParam<QueryType> {
protected:
void SetUp() override {
test_dir_ = GetTestTempDir() + "/icing";
index_dir_ = test_dir_ + "/index";
+ numeric_index_dir_ = test_dir_ + "/numeric_index";
store_dir_ = test_dir_ + "/store";
schema_store_dir_ = test_dir_ + "/schema_store";
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
@@ -67,6 +94,8 @@ class QueryVisitorTest : public ::testing::Test {
filesystem_.CreateDirectoryRecursively(store_dir_.c_str());
filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+ jni_cache_ = GetTestJniCache();
+
if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
// If we've specified using the reverse-JNI method for segmentation (i.e.
// not ICU), then we won't have the ICU data file included to set up.
@@ -85,19 +114,40 @@ class QueryVisitorTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, store_dir_, &clock_,
- schema_store_.get()));
+ DocumentStore::Create(
+ &filesystem_, store_dir_, &clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
Index::Options options(index_dir_.c_str(),
- /*index_merge_size=*/1024 * 1024);
+ /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
- numeric_index_ = std::make_unique<DummyNumericIndex<int64_t>>();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ numeric_index_,
+ DummyNumericIndex<int64_t>::Create(filesystem_, numeric_index_dir_));
ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
/*max_term_byte_size=*/1000));
+
+ language_segmenter_factory::SegmenterOptions segmenter_options(
+ ULOC_US, jni_cache_.get());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ language_segmenter_,
+ language_segmenter_factory::Create(segmenter_options));
+
+ ICING_ASSERT_OK_AND_ASSIGN(tokenizer_,
+ tokenizer_factory::CreateIndexingTokenizer(
+ StringIndexingConfig::TokenizerType::PLAIN,
+ language_segmenter_.get()));
}
libtextclassifier3::StatusOr<std::unique_ptr<Node>> ParseQueryHelper(
@@ -109,10 +159,47 @@ class QueryVisitorTest : public ::testing::Test {
return parser.ConsumeQuery();
}
+ std::string EscapeString(std::string_view str) {
+ std::string result;
+ result.reserve(str.size());
+ for (char c : str) {
+ if (c == '\\' || c == '"') {
+ result.push_back('\\');
+ }
+ result.push_back(c);
+ }
+ return result;
+ }
+
+ std::string CreateQuery(std::string query,
+ std::string property_restrict = "") {
+ switch (GetParam()) {
+ case QueryType::kPlain:
+ if (property_restrict.empty()) {
+ // CreateQuery("foo bar") returns `foo bar`
+ return query;
+ }
+ // CreateQuery("foo", "subject") returns `subject:foo`
+ return absl_ports::StrCat(property_restrict, ":", query);
+ case QueryType::kSearch:
+ query = EscapeString(query);
+ property_restrict = EscapeString(property_restrict);
+ if (property_restrict.empty()) {
+ // CreateQuery("foo bar") returns `search("foo bar")`
+ return absl_ports::StrCat("search(\"", query, "\")");
+ }
+ // CreateQuery("foo", "subject") returns
+ // `search("foo bar", createList("subject"))`
+ return absl_ports::StrCat("search(\"", query, "\", createList(\"",
+ property_restrict, "\"))");
+ }
+ }
+
Filesystem filesystem_;
IcingFilesystem icing_filesystem_;
std::string test_dir_;
std::string index_dir_;
+ std::string numeric_index_dir_;
std::string schema_store_dir_;
std::string store_dir_;
Clock clock_;
@@ -121,333 +208,443 @@ class QueryVisitorTest : public ::testing::Test {
std::unique_ptr<Index> index_;
std::unique_ptr<DummyNumericIndex<int64_t>> numeric_index_;
std::unique_ptr<Normalizer> normalizer_;
+ std::unique_ptr<LanguageSegmenter> language_segmenter_;
+ std::unique_ptr<Tokenizer> tokenizer_;
+ std::unique_ptr<const JniCache> jni_cache_;
};
-TEST_F(QueryVisitorTest, SimpleLessThan) {
+TEST_P(QueryVisitorTest, SimpleLessThan) {
// Setup the numeric index with docs 0, 1 and 2 holding the values 0, 1 and 2
// respectively.
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
editor->BufferKey(0);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
editor->BufferKey(1);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
editor->BufferKey(2);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
- std::string query = "price < 2";
+ std::string query = CreateQuery("price < 2");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kNumericSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature));
+ }
+ // "price" is a property restrict here and "2" isn't a "term" - its a numeric
+ // value. So QueryTermIterators should be empty.
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+ EXPECT_THAT(query_results.query_terms, IsEmpty());
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId1, kDocumentId0));
}
-TEST_F(QueryVisitorTest, SimpleLessThanEq) {
+TEST_P(QueryVisitorTest, SimpleLessThanEq) {
// Setup the numeric index with docs 0, 1 and 2 holding the values 0, 1 and 2
// respectively.
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
editor->BufferKey(0);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
editor->BufferKey(1);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
editor->BufferKey(2);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
- std::string query = "price <= 1";
+ std::string query = CreateQuery("price <= 1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kNumericSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature));
+ }
+ // "price" is a property restrict here and "1" isn't a "term" - its a numeric
+ // value. So QueryTermIterators should be empty.
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+ EXPECT_THAT(query_results.query_terms, IsEmpty());
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId1, kDocumentId0));
}
-TEST_F(QueryVisitorTest, SimpleEqual) {
+TEST_P(QueryVisitorTest, SimpleEqual) {
// Setup the numeric index with docs 0, 1 and 2 holding the values 0, 1 and 2
// respectively.
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
editor->BufferKey(0);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
editor->BufferKey(1);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
editor->BufferKey(2);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
- std::string query = "price == 2";
+ std::string query = CreateQuery("price == 2");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kNumericSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature));
+ }
+ // "price" is a property restrict here and "2" isn't a "term" - its a numeric
+ // value. So QueryTermIterators should be empty.
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+ EXPECT_THAT(query_results.query_terms, IsEmpty());
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId2));
}
-TEST_F(QueryVisitorTest, SimpleGreaterThanEq) {
+TEST_P(QueryVisitorTest, SimpleGreaterThanEq) {
// Setup the numeric index with docs 0, 1 and 2 holding the values 0, 1 and 2
// respectively.
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
editor->BufferKey(0);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
editor->BufferKey(1);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
editor->BufferKey(2);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
- std::string query = "price >= 1";
+ std::string query = CreateQuery("price >= 1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kNumericSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature));
+ }
+ // "price" is a property restrict here and "1" isn't a "term" - its a numeric
+ // value. So QueryTermIterators should be empty.
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+ EXPECT_THAT(query_results.query_terms, IsEmpty());
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId2, kDocumentId1));
}
-TEST_F(QueryVisitorTest, SimpleGreaterThan) {
+TEST_P(QueryVisitorTest, SimpleGreaterThan) {
// Setup the numeric index with docs 0, 1 and 2 holding the values 0, 1 and 2
// respectively.
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
editor->BufferKey(0);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
editor->BufferKey(1);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
editor->BufferKey(2);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
- std::string query = "price > 1";
+ std::string query = CreateQuery("price > 1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kNumericSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature));
+ }
+ // "price" is a property restrict here and "1" isn't a "term" - its a numeric
+ // value. So QueryTermIterators should be empty.
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+ EXPECT_THAT(query_results.query_terms, IsEmpty());
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId2));
}
-// TODO(b/208654892) Properly handle negative numbers in query expressions.
-TEST_F(QueryVisitorTest, DISABLED_IntMinLessThanEqual) {
+TEST_P(QueryVisitorTest, IntMinLessThanEqual) {
// Setup the numeric index with docs 0, 1 and 2 holding the values INT_MIN,
// INT_MAX and INT_MIN + 1 respectively.
int64_t int_min = std::numeric_limits<int64_t>::min();
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
editor->BufferKey(int_min);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
editor->BufferKey(std::numeric_limits<int64_t>::max());
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
editor->BufferKey(int_min + 1);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
- std::string query = "price <= " + std::to_string(int_min);
+ std::string query = CreateQuery("price <= " + std::to_string(int_min));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kNumericSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature));
+ }
+ // "price" is a property restrict here and int_min isn't a "term" - its a
+ // numeric value. So QueryTermIterators should be empty.
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+ EXPECT_THAT(query_results.query_terms, IsEmpty());
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId0));
}
-TEST_F(QueryVisitorTest, IntMaxGreaterThanEqual) {
+TEST_P(QueryVisitorTest, IntMaxGreaterThanEqual) {
// Setup the numeric index with docs 0, 1 and 2 holding the values INT_MIN,
// INT_MAX and INT_MAX - 1 respectively.
int64_t int_max = std::numeric_limits<int64_t>::max();
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
editor->BufferKey(std::numeric_limits<int64_t>::min());
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
editor->BufferKey(int_max);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
editor->BufferKey(int_max - 1);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
- std::string query = "price >= " + std::to_string(int_max);
+ std::string query = CreateQuery("price >= " + std::to_string(int_max));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kNumericSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature));
+ }
+ // "price" is a property restrict here and int_max isn't a "term" - its a
+ // numeric value. So QueryTermIterators should be empty.
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+ EXPECT_THAT(query_results.query_terms, IsEmpty());
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId1));
}
-TEST_F(QueryVisitorTest, NestedPropertyLessThan) {
+TEST_P(QueryVisitorTest, NestedPropertyLessThan) {
// Setup the numeric index with docs 0, 1 and 2 holding the values 0, 1 and 2
// respectively.
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("subscription.price", kDocumentId0, kSectionId0);
editor->BufferKey(0);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor =
numeric_index_->Edit("subscription.price", kDocumentId1, kSectionId1);
editor->BufferKey(1);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor =
numeric_index_->Edit("subscription.price", kDocumentId2, kSectionId2);
editor->BufferKey(2);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
- std::string query = "subscription.price < 2";
+ std::string query = CreateQuery("subscription.price < 2");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kNumericSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature));
+ }
+ // "subscription.price" is a property restrict here and int_max isn't a "term"
+ // - its a numeric value. So QueryTermIterators should be empty.
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+ EXPECT_THAT(query_results.query_terms, IsEmpty());
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId1, kDocumentId0));
}
-TEST_F(QueryVisitorTest, IntParsingError) {
- std::string query = "subscription.price < fruit";
+TEST_P(QueryVisitorTest, IntParsingError) {
+ std::string query = CreateQuery("subscription.price < fruit");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(QueryVisitorTest, NotEqualsUnsupported) {
- std::string query = "subscription.price != 3";
+TEST_P(QueryVisitorTest, NotEqualsUnsupported) {
+ std::string query = CreateQuery("subscription.price != 3");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
StatusIs(libtextclassifier3::StatusCode::UNIMPLEMENTED));
}
-TEST_F(QueryVisitorTest, LessThanTooManyOperandsInvalid) {
+TEST_P(QueryVisitorTest, LessThanTooManyOperandsInvalid) {
// Setup the numeric index with docs 0, 1 and 2 holding the values 0, 1 and 2
// respectively.
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("subscription.price", kDocumentId0, kSectionId0);
editor->BufferKey(0);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor =
numeric_index_->Edit("subscription.price", kDocumentId1, kSectionId1);
editor->BufferKey(1);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor =
numeric_index_->Edit("subscription.price", kDocumentId2, kSectionId2);
editor->BufferKey(2);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
// Create an invalid AST for the query '3 < subscription.price 25' where '<'
// has three operands
- auto property_node = std::make_unique<TextNode>("subscription");
- auto subproperty_node = std::make_unique<TextNode>("price");
+ std::string_view query = "3 < subscription.price 25";
+ auto property_node =
+ std::make_unique<TextNode>("subscription", query.substr(4, 12));
+ auto subproperty_node =
+ std::make_unique<TextNode>("price", query.substr(17, 5));
std::vector<std::unique_ptr<TextNode>> member_args;
member_args.push_back(std::move(property_node));
member_args.push_back(std::move(subproperty_node));
auto member_node = std::make_unique<MemberNode>(std::move(member_args),
/*function=*/nullptr);
- auto value_node = std::make_unique<TextNode>("3");
- auto extra_value_node = std::make_unique<TextNode>("25");
+ auto value_node = std::make_unique<TextNode>("3", query.substr(0, 1));
+ auto extra_value_node = std::make_unique<TextNode>("25", query.substr(23, 2));
std::vector<std::unique_ptr<Node>> args;
args.push_back(std::move(value_node));
args.push_back(std::move(member_node));
args.push_back(std::move(extra_value_node));
auto root_node = std::make_unique<NaryOperatorNode>("<", std::move(args));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(QueryVisitorTest, LessThanTooFewOperandsInvalid) {
+TEST_P(QueryVisitorTest, LessThanTooFewOperandsInvalid) {
// Create an invalid AST for the query 'subscription.price <' where '<'
// has a single operand
- auto property_node = std::make_unique<TextNode>("subscription");
- auto subproperty_node = std::make_unique<TextNode>("price");
+ std::string_view query = "subscription.price <";
+ auto property_node =
+ std::make_unique<TextNode>("subscription", query.substr(0, 12));
+ auto subproperty_node =
+ std::make_unique<TextNode>("price", query.substr(13, 5));
std::vector<std::unique_ptr<TextNode>> member_args;
member_args.push_back(std::move(property_node));
member_args.push_back(std::move(subproperty_node));
@@ -457,111 +654,323 @@ TEST_F(QueryVisitorTest, LessThanTooFewOperandsInvalid) {
std::vector<std::unique_ptr<Node>> args;
args.push_back(std::move(member_node));
auto root_node = std::make_unique<NaryOperatorNode>("<", std::move(args));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(QueryVisitorTest, LessThanNonExistentPropertyNotFound) {
+TEST_P(QueryVisitorTest, LessThanNonExistentPropertyNotFound) {
// Setup the numeric index with docs 0, 1 and 2 holding the values 0, 1 and 2
// respectively.
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("subscription.price", kDocumentId0, kSectionId0);
editor->BufferKey(0);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor =
numeric_index_->Edit("subscription.price", kDocumentId1, kSectionId1);
editor->BufferKey(1);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor =
numeric_index_->Edit("subscription.price", kDocumentId2, kSectionId2);
editor->BufferKey(2);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
- // Create an invalid AST for the query 'time < 25' where '<'
- // has three operands
- std::string query = "time < 25";
+ std::string query = CreateQuery("time < 25");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
- EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature));
+ }
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+ EXPECT_THAT(query_results.query_terms, IsEmpty());
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), IsEmpty());
}
-TEST_F(QueryVisitorTest, NeverVisitedReturnsInvalid) {
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+TEST_P(QueryVisitorTest, NeverVisitedReturnsInvalid) {
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), "",
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-// TODO(b/208654892) Properly handle negative numbers in query expressions.
-TEST_F(QueryVisitorTest, DISABLED_IntMinLessThanInvalid) {
+TEST_P(QueryVisitorTest, IntMinLessThanInvalid) {
// Setup the numeric index with docs 0, 1 and 2 holding the values INT_MIN,
// INT_MAX and INT_MIN + 1 respectively.
int64_t int_min = std::numeric_limits<int64_t>::min();
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
editor->BufferKey(int_min);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
editor->BufferKey(std::numeric_limits<int64_t>::max());
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
editor->BufferKey(int_min + 1);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
- std::string query = "price <" + std::to_string(int_min);
+ std::string query = CreateQuery("price <" + std::to_string(int_min));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(QueryVisitorTest, IntMaxGreaterThanInvalid) {
+TEST_P(QueryVisitorTest, IntMaxGreaterThanInvalid) {
// Setup the numeric index with docs 0, 1 and 2 holding the values INT_MIN,
// INT_MAX and INT_MAX - 1 respectively.
int64_t int_max = std::numeric_limits<int64_t>::max();
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
editor->BufferKey(std::numeric_limits<int64_t>::min());
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
editor->BufferKey(int_max);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
editor->BufferKey(int_max - 1);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
+
+ std::string query = CreateQuery("price >" + std::to_string(int_max));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
- std::string query = "price >" + std::to_string(int_max);
+TEST_P(QueryVisitorTest, NumericComparisonPropertyStringIsInvalid) {
+ // "price" is a STRING token, which cannot be a property name.
+ std::string query = CreateQuery(R"("price" > 7)");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(QueryVisitorTest, SingleTerm) {
+TEST_P(QueryVisitorTest, NumericComparatorDoesntAffectLaterTerms) {
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("type"))
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // Index three documents:
+ // - Doc0: ["-2", "-1", "1", "2"] and [-2, -1, 1, 2]
+ // - Doc1: [-1]
+ // - Doc2: ["2"] and [-1]
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+ std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
+ numeric_index_->Edit("price", kDocumentId0, kSectionId0);
+ editor->BufferKey(-2);
+ editor->BufferKey(-1);
+ editor->BufferKey(1);
+ editor->BufferKey(2);
+ std::move(*editor).IndexAllBufferedKeys();
+ Index::Editor term_editor = index_->Edit(
+ kDocumentId0, kSectionId1, TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ term_editor.BufferTerm("-2");
+ term_editor.BufferTerm("-1");
+ term_editor.BufferTerm("1");
+ term_editor.BufferTerm("2");
+ term_editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+ editor = numeric_index_->Edit("price", kDocumentId1, kSectionId0);
+ editor->BufferKey(-1);
+ std::move(*editor).IndexAllBufferedKeys();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+ editor = numeric_index_->Edit("price", kDocumentId2, kSectionId0);
+ editor->BufferKey(-1);
+ std::move(*editor).IndexAllBufferedKeys();
+ term_editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ term_editor.BufferTerm("2");
+ term_editor.IndexAllBufferedTerms();
+
+ // Translating MINUS chars that are interpreted as NOTs, this query would be
+ // `price == -1 AND NOT 2`
+ // All documents should match `price == -1`
+ // Both docs 0 and 2 should be excluded because of the `NOT 2` clause
+ // doc0 has both a text and number entry for `-2`, neither of which should
+ // match.
+ std::string query = CreateQuery("price == -1 -2");
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature));
+ }
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+ EXPECT_THAT(query_results.query_terms, IsEmpty());
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(kDocumentId1));
+}
+
+TEST_P(QueryVisitorTest, SingleTermTermFrequencyEnabled) {
+ // Setup the index with docs 0, 1 and 2 holding the values "foo", "foo" and
+ // "bar" respectively.
+ Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+ TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("bar");
+ editor.IndexAllBufferedTerms();
+
+ std::string query = CreateQuery("foo");
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ EXPECT_THAT(query_results.query_terms[""], UnorderedElementsAre("foo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo"));
+
+ ASSERT_THAT(query_results.root_iterator->Advance(), IsOk());
+ std::vector<TermMatchInfo> match_infos;
+ query_results.root_iterator->PopulateMatchedTermsStats(&match_infos);
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map = {{kSectionId1, 1}};
+ EXPECT_THAT(match_infos, ElementsAre(EqualsTermMatchInfo(
+ "foo", expected_section_ids_tf_map)));
+
+ ASSERT_THAT(query_results.root_iterator->Advance(), IsOk());
+ match_infos.clear();
+ query_results.root_iterator->PopulateMatchedTermsStats(&match_infos);
+ EXPECT_THAT(match_infos, ElementsAre(EqualsTermMatchInfo(
+ "foo", expected_section_ids_tf_map)));
+
+ EXPECT_THAT(query_results.root_iterator->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+}
+
+TEST_P(QueryVisitorTest, SingleTermTermFrequencyDisabled) {
+ // Setup the index with docs 0, 1 and 2 holding the values "foo", "foo" and
+ // "bar" respectively.
+ Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+ TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("bar");
+ editor.IndexAllBufferedTerms();
+
+ std::string query = CreateQuery("foo");
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/false, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ EXPECT_THAT(query_results.query_terms[""], UnorderedElementsAre("foo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+
+ ASSERT_THAT(query_results.root_iterator->Advance(), IsOk());
+ std::vector<TermMatchInfo> match_infos;
+ query_results.root_iterator->PopulateMatchedTermsStats(&match_infos);
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map = {{kSectionId1, 0}};
+ EXPECT_THAT(match_infos, ElementsAre(EqualsTermMatchInfo(
+ "foo", expected_section_ids_tf_map)));
+
+ ASSERT_THAT(query_results.root_iterator->Advance(), IsOk());
+ match_infos.clear();
+ query_results.root_iterator->PopulateMatchedTermsStats(&match_infos);
+ EXPECT_THAT(match_infos, ElementsAre(EqualsTermMatchInfo(
+ "foo", expected_section_ids_tf_map)));
+
+ EXPECT_THAT(query_results.root_iterator->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+}
+
+TEST_P(QueryVisitorTest, SingleTermPrefix) {
// Setup the index with docs 0, 1 and 2 holding the values "foo", "foo" and
// "bar" respectively.
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
@@ -579,20 +988,152 @@ TEST_F(QueryVisitorTest, SingleTerm) {
editor.BufferTerm("bar");
editor.IndexAllBufferedTerms();
- std::string query = "foo";
+ // An EXACT query for 'fo' won't match anything.
+ std::string query = CreateQuery("fo");
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_EXACT,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ EXPECT_THAT(query_results.query_terms[""], UnorderedElementsAre("fo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("fo"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), IsEmpty());
+
+ query = CreateQuery("fo*");
+ ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
+ QueryVisitor query_visitor_two(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_EXACT,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor_two);
+ ICING_ASSERT_OK_AND_ASSIGN(query_results,
+ std::move(query_visitor_two).ConsumeResults());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ EXPECT_THAT(query_results.query_terms[""], UnorderedElementsAre("fo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("fo"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(kDocumentId1, kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest, PrefixOperatorAfterPropertyReturnsInvalid) {
+ std::string query = "price* < 2";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(QueryVisitorTest, PrefixOperatorAfterNumericValueReturnsInvalid) {
+ std::string query = "price < 2*";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(QueryVisitorTest, PrefixOperatorAfterPropertyRestrictReturnsInvalid) {
+ std::string query = "subject*:foo";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(QueryVisitorTest, SegmentationWithPrefix) {
+ // Setup the index with docs 0, 1 and 2 holding the values ["foo", "ba"],
+ // ["foo", "ba"] and ["bar", "fo"] respectively.
+ Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+ TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.BufferTerm("ba");
+ editor.IndexAllBufferedTerms();
+
+ editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.BufferTerm("ba");
+ editor.IndexAllBufferedTerms();
+
+ editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("bar");
+ editor.BufferTerm("fo");
+ editor.IndexAllBufferedTerms();
+
+ // An EXACT query for `ba?fo` will be lexed into a single TEXT token.
+ // The visitor will tokenize it into `ba` and `fo` (`?` is dropped because it
+ // is punctuation). Each document will match one and only one of these exact
+ // tokens. Therefore, nothing will match this query.
+ std::string query = CreateQuery("ba?fo");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_EXACT,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ EXPECT_THAT(query_results.query_terms[""], UnorderedElementsAre("ba", "fo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("ba", "fo"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), IsEmpty());
+
+ // An EXACT query for `ba?fo*` will be lexed into a TEXT token and a TIMES
+ // token.
+ // The visitor will tokenize the TEXT into `ba` and `fo` (`?` is dropped
+ // because it is punctuation). The prefix operator should only apply to the
+ // final token `fo`. This will cause matches with docs 0 and 1 which contain
+ // "ba" and "foo". doc2 will not match because "ba" does not exactly match
+ // either "bar" or "fo".
+ query = CreateQuery("ba?fo*");
+ ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
+ QueryVisitor query_visitor_two(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_EXACT,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor_two);
+ ICING_ASSERT_OK_AND_ASSIGN(query_results,
+ std::move(query_visitor_two).ConsumeResults());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ EXPECT_THAT(query_results.query_terms[""], UnorderedElementsAre("ba", "fo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("ba", "fo"));
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId1, kDocumentId0));
}
-TEST_F(QueryVisitorTest, SingleVerbatimTerm) {
+TEST_P(QueryVisitorTest, SingleVerbatimTerm) {
// Setup the index with docs 0, 1 and 2 holding the values "foo:bar(baz)",
// "foo:bar(baz)" and "bar:baz(foo)" respectively.
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
@@ -610,17 +1151,71 @@ TEST_F(QueryVisitorTest, SingleVerbatimTerm) {
editor.BufferTerm("bar:baz(foo)");
editor.IndexAllBufferedTerms();
- std::string query = "\"foo:bar(baz)\"";
+ std::string query = CreateQuery("\"foo:bar(baz)\"");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature));
+ }
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ EXPECT_THAT(query_results.query_terms[""],
+ UnorderedElementsAre("foo:bar(baz)"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo:bar(baz)"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(kDocumentId1, kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest, SingleVerbatimTermPrefix) {
+ // Setup the index with docs 0, 1 and 2 holding the values "foo:bar(baz)",
+ // "foo:bar(abc)" and "bar:baz(foo)" respectively.
+ Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+ TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ editor.BufferTerm("foo:bar(baz)");
+ editor.IndexAllBufferedTerms();
+
+ editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo:bar(abc)");
+ editor.IndexAllBufferedTerms();
+
+ editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("bar:baz(foo)");
+ editor.IndexAllBufferedTerms();
+
+ // Query for `"foo:bar("*`. This should match docs 0 and 1.
+ std::string query = CreateQuery("\"foo:bar(\"*");
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_EXACT,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kVerbatimSearchFeature));
+ UnorderedElementsAre(kVerbatimSearchFeature,
+ kListFilterQueryLanguageFeature));
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ EXPECT_THAT(query_results.query_terms[""], UnorderedElementsAre("foo:bar("));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo:bar("));
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId1, kDocumentId0));
}
@@ -636,7 +1231,7 @@ TEST_F(QueryVisitorTest, SingleVerbatimTerm) {
// verbatim term?
// Example: verbatim_term = `foobar"`
// Answer: quote char must be escaped. verbatim_query = `foobar\"`
-TEST_F(QueryVisitorTest, VerbatimTermEscapingQuote) {
+TEST_P(QueryVisitorTest, VerbatimTermEscapingQuote) {
// Setup the index with docs 0, 1 and 2 holding the values "foobary",
// "foobar\" and "foobar"" respectively.
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
@@ -656,17 +1251,30 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingQuote) {
// From the comment above, verbatim_term = `foobar"` and verbatim_query =
// `foobar\"`
- std::string query = R"("foobar\"")";
+ std::string query = CreateQuery(R"(("foobar\""))");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kVerbatimSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature));
+ }
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ EXPECT_THAT(query_results.query_terms[""],
+ UnorderedElementsAre(R"(foobar")"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre(R"(foobar")"));
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId2));
}
@@ -675,7 +1283,7 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingQuote) {
// end of the verbatim term
// Example: verbatim_term = `foobar\`
// Answer: escape chars can be escaped. verbatim_query = `foobar\\`
-TEST_F(QueryVisitorTest, VerbatimTermEscapingEscape) {
+TEST_P(QueryVisitorTest, VerbatimTermEscapingEscape) {
// Setup the index with docs 0, 1 and 2 holding the values "foobary",
// "foobar\" and "foobar"" respectively.
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
@@ -695,17 +1303,30 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingEscape) {
editor.IndexAllBufferedTerms();
// Issue a query for the verbatim token `foobar\`.
- std::string query = R"("foobar\\")";
+ std::string query = CreateQuery(R"(("foobar\\"))");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kVerbatimSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature));
+ }
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ EXPECT_THAT(query_results.query_terms[""],
+ UnorderedElementsAre(R"(foobar\)"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre(R"(foobar\)"));
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId1));
}
@@ -716,7 +1337,7 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingEscape) {
// in, consume the escape char and add the char like we do for the
// quote char). So the above query would match the verbatim_term
// `foobary`.
-TEST_F(QueryVisitorTest, VerbatimTermEscapingNonSpecialChar) {
+TEST_P(QueryVisitorTest, VerbatimTermEscapingNonSpecialChar) {
// Setup the index with docs 0, 1 and 2 holding the values "foobary",
// "foobar\" and "foobar"" respectively.
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
@@ -736,31 +1357,57 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingNonSpecialChar) {
editor.IndexAllBufferedTerms();
// Issue a query for the verbatim token `foobary`.
- std::string query = R"("foobar\y")";
+ std::string query = CreateQuery(R"(("foobar\y"))");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kVerbatimSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature));
+ }
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ EXPECT_THAT(query_results.query_terms[""],
+ UnorderedElementsAre(R"(foobary)"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre(R"(foobary)"));
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId0));
// Issue a query for the verbatim token `foobar\y`.
- query = R"("foobar\\y")";
+ query = CreateQuery(R"(("foobar\\y"))");
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
- QueryVisitor query_visitor_two(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor_two(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor_two);
ICING_ASSERT_OK_AND_ASSIGN(query_results,
std::move(query_visitor_two).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kVerbatimSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature));
+ }
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ EXPECT_THAT(query_results.query_terms[""],
+ UnorderedElementsAre(R"(foobar\y)"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre(R"(foobar\y)"));
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId2));
}
@@ -771,7 +1418,7 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingNonSpecialChar) {
// is its own separate ascii value. For a query `foobar\n`, the parser will see
// the character sequence [`f`, `o`, `o`, `b`, `a`, `r`, `\n`] - it *won't* ever
// see `\` and `n`.
-TEST_F(QueryVisitorTest, VerbatimTermNewLine) {
+TEST_P(QueryVisitorTest, VerbatimTermNewLine) {
// Setup the index with docs 0, 1 and 2 holding the values "foobar\n",
// `foobar\` and `foobar\n` respectively.
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
@@ -792,36 +1439,61 @@ TEST_F(QueryVisitorTest, VerbatimTermNewLine) {
editor.IndexAllBufferedTerms();
// Issue a query for the verbatim token `foobar` + '\n'.
- std::string query = "\"foobar\n\"";
+ std::string query = CreateQuery("\"foobar\n\"");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kVerbatimSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature));
+ }
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ EXPECT_THAT(query_results.query_terms[""], UnorderedElementsAre("foobar\n"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foobar\n"));
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId0));
// Now, issue a query for the verbatim token `foobar\n`.
- query = R"("foobar\\n")";
+ query = CreateQuery(R"(("foobar\\n"))");
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
- QueryVisitor query_visitor_two(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor_two(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor_two);
ICING_ASSERT_OK_AND_ASSIGN(query_results,
std::move(query_visitor_two).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kVerbatimSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature));
+ }
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ EXPECT_THAT(query_results.query_terms[""],
+ UnorderedElementsAre(R"(foobar\n)"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre(R"(foobar\n)"));
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId2));
}
-TEST_F(QueryVisitorTest, VerbatimTermEscapingComplex) {
+TEST_P(QueryVisitorTest, VerbatimTermEscapingComplex) {
// Setup the index with docs 0, 1 and 2 holding the values `foo\"bar\nbaz"`,
// `foo\\\"bar\\nbaz\"` and `foo\\"bar\\nbaz"` respectively.
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
@@ -842,28 +1514,43 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingComplex) {
editor.IndexAllBufferedTerms();
// Issue a query for the verbatim token `foo\"bar\nbaz"`.
- std::string query = R"("foo\\\"bar\\nbaz\"")";
+ std::string query = CreateQuery(R"(("foo\\\"bar\\nbaz\""))");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kVerbatimSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature));
+ }
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ EXPECT_THAT(query_results.query_terms[""],
+ UnorderedElementsAre(R"(foo\"bar\nbaz")"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre(R"(foo\"bar\nbaz")"));
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId0));
}
-TEST_F(QueryVisitorTest, SingleMinusTerm) {
+TEST_P(QueryVisitorTest, SingleMinusTerm) {
// Setup the index with docs 0, 1 and 2 holding the values "foo", "foo" and
// "bar" respectively.
ICING_ASSERT_OK(schema_store_->SetSchema(
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("type"))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
@@ -886,27 +1573,38 @@ TEST_F(QueryVisitorTest, SingleMinusTerm) {
editor.BufferTerm("bar");
editor.IndexAllBufferedTerms();
- std::string query = "-foo";
+ std::string query = CreateQuery("-foo");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), IsEmpty());
+ EXPECT_THAT(query_results.query_term_iterators, IsEmpty());
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ }
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId2));
}
-TEST_F(QueryVisitorTest, SingleNotTerm) {
+TEST_P(QueryVisitorTest, SingleNotTerm) {
// Setup the index with docs 0, 1 and 2 holding the values "foo", "foo" and
// "bar" respectively.
ICING_ASSERT_OK(schema_store_->SetSchema(
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("type"))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
@@ -929,20 +1627,153 @@ TEST_F(QueryVisitorTest, SingleNotTerm) {
editor.BufferTerm("bar");
editor.IndexAllBufferedTerms();
- std::string query = "NOT foo";
+ std::string query = CreateQuery("NOT foo");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.query_terms, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ EXPECT_THAT(query_results.query_term_iterators, IsEmpty());
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId2));
}
-TEST_F(QueryVisitorTest, ImplicitAndTerms) {
+
+TEST_P(QueryVisitorTest, NestedNotTerms) {
+ // Setup the index with docs 0, 1 and 2 holding the values
+ // ["foo", "bar", "baz"], ["foo", "baz"] and ["bar", "baz"] respectively.
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("type"))
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+ Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+ TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.BufferTerm("bar");
+ editor.BufferTerm("baz");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.BufferTerm("baz");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("bar");
+ editor.BufferTerm("baz");
+ editor.IndexAllBufferedTerms();
+
+ // Double negative could be rewritten as `(foo AND NOT bar) baz`
+ std::string query = CreateQuery("NOT (-foo OR bar) baz");
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ EXPECT_THAT(query_results.query_terms[""],
+ UnorderedElementsAre("foo", "baz"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo", "baz"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(kDocumentId1));
+}
+
+TEST_P(QueryVisitorTest, DeeplyNestedNotTerms) {
+ // Setup the index with docs 0, 1 and 2 holding the values
+ // ["foo", "bar", "baz"], ["foo", "baz"] and ["bar", "baz"] respectively.
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("type"))
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+ Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+ TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.BufferTerm("bar");
+ editor.BufferTerm("baz");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.BufferTerm("baz");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("bar");
+ editor.BufferTerm("baz");
+ editor.IndexAllBufferedTerms();
+
+ // Simplifying:
+ // NOT (-(NOT (foo -bar) baz) -bat) NOT bass
+ // NOT (-((-foo OR bar) baz) -bat) NOT bass
+ // NOT (((foo -bar) OR -baz) -bat) NOT bass
+ // (((-foo OR bar) baz) OR bat) NOT bass
+ //
+ // Doc 0 : (((-TRUE OR TRUE) TRUE) OR FALSE) NOT FALSE ->
+ // ((FALSE OR TRUE) TRUE) TRUE -> ((TRUE) TRUE) TRUE -> TRUE
+ // Doc 1 : (((-TRUE OR FALSE) TRUE) OR FALSE) NOT FALSE
+ // ((FALSE OR FALSE) TRUE) TRUE -> ((FALSE) TRUE) TRUE -> FALSE
+ // Doc 2 : (((-FALSE OR TRUE) TRUE) OR FALSE) NOT FALSE
+ // ((TRUE OR TRUE) TRUE) TRUE -> ((TRUE) TRUE) TRUE -> TRUE
+ std::string query = CreateQuery("NOT (-(NOT (foo -bar) baz) -bat) NOT bass");
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ EXPECT_THAT(query_results.query_terms[""],
+ UnorderedElementsAre("bar", "baz", "bat"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("bar", "baz", "bat"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(kDocumentId2, kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest, ImplicitAndTerms) {
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
editor.BufferTerm("foo");
@@ -959,21 +1790,33 @@ TEST_F(QueryVisitorTest, ImplicitAndTerms) {
editor.BufferTerm("bar");
editor.IndexAllBufferedTerms();
- std::string query = "foo bar";
+ std::string query = CreateQuery("foo bar");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ }
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ EXPECT_THAT(query_results.query_terms[""],
+ UnorderedElementsAre("foo", "bar"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo", "bar"));
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId1));
}
-TEST_F(QueryVisitorTest, ExplicitAndTerms) {
+TEST_P(QueryVisitorTest, ExplicitAndTerms) {
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
editor.BufferTerm("foo");
@@ -990,21 +1833,33 @@ TEST_F(QueryVisitorTest, ExplicitAndTerms) {
editor.BufferTerm("bar");
editor.IndexAllBufferedTerms();
- std::string query = "foo AND bar";
+ std::string query = CreateQuery("foo AND bar");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ }
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ EXPECT_THAT(query_results.query_terms[""],
+ UnorderedElementsAre("foo", "bar"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo", "bar"));
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId1));
}
-TEST_F(QueryVisitorTest, OrTerms) {
+TEST_P(QueryVisitorTest, OrTerms) {
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
editor.BufferTerm("foo");
@@ -1021,21 +1876,33 @@ TEST_F(QueryVisitorTest, OrTerms) {
editor.BufferTerm("bar");
editor.IndexAllBufferedTerms();
- std::string query = "foo OR bar";
+ std::string query = CreateQuery("foo OR bar");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ }
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ EXPECT_THAT(query_results.query_terms[""],
+ UnorderedElementsAre("foo", "bar"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo", "bar"));
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId2, kDocumentId0));
}
-TEST_F(QueryVisitorTest, AndOrTermPrecedence) {
+TEST_P(QueryVisitorTest, AndOrTermPrecedence) {
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
editor.BufferTerm("bar");
@@ -1054,46 +1921,82 @@ TEST_F(QueryVisitorTest, AndOrTermPrecedence) {
editor.IndexAllBufferedTerms();
// Should be interpreted like `foo (bar OR baz)`
- std::string query = "foo bar OR baz";
+ std::string query = CreateQuery("foo bar OR baz");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ }
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ EXPECT_THAT(query_results.query_terms[""],
+ UnorderedElementsAre("foo", "bar", "baz"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo", "bar", "baz"));
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId2, kDocumentId1));
// Should be interpreted like `(bar OR baz) foo`
- query = "bar OR baz foo";
+ query = CreateQuery("bar OR baz foo");
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
- QueryVisitor query_visitor_two(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor_two(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor_two);
ICING_ASSERT_OK_AND_ASSIGN(query_results,
std::move(query_visitor_two).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ }
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ EXPECT_THAT(query_results.query_terms[""],
+ UnorderedElementsAre("foo", "bar", "baz"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo", "bar", "baz"));
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId2, kDocumentId1));
- query = "(bar OR baz) foo";
+ query = CreateQuery("(bar OR baz) foo");
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
- QueryVisitor query_visitor_three(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor_three(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor_three);
ICING_ASSERT_OK_AND_ASSIGN(query_results,
std::move(query_visitor_three).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ }
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ EXPECT_THAT(query_results.query_terms[""],
+ UnorderedElementsAre("foo", "bar", "baz"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo", "bar", "baz"));
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId2, kDocumentId1));
}
-TEST_F(QueryVisitorTest, AndOrNotPrecedence) {
+TEST_P(QueryVisitorTest, AndOrNotPrecedence) {
ICING_ASSERT_OK(schema_store_->SetSchema(
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("type").AddProperty(
@@ -1101,7 +2004,9 @@ TEST_F(QueryVisitorTest, AndOrNotPrecedence) {
.SetName("prop1")
.SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
@@ -1127,33 +2032,191 @@ TEST_F(QueryVisitorTest, AndOrNotPrecedence) {
editor.IndexAllBufferedTerms();
// Should be interpreted like `foo ((NOT bar) OR baz)`
- std::string query = "foo NOT bar OR baz";
+ std::string query = CreateQuery("foo NOT bar OR baz");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ EXPECT_THAT(query_results.query_terms[""],
+ UnorderedElementsAre("foo", "baz"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo", "baz"));
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId2, kDocumentId0));
- query = "foo NOT (bar OR baz)";
+ query = CreateQuery("foo NOT (bar OR baz)");
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
- QueryVisitor query_visitor_two(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor_two(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor_two);
ICING_ASSERT_OK_AND_ASSIGN(query_results,
std::move(query_visitor_two).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ EXPECT_THAT(query_results.query_terms[""], UnorderedElementsAre("foo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo"));
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId0));
}
-TEST_F(QueryVisitorTest, PropertyFilter) {
+TEST_P(QueryVisitorTest, PropertyFilter) {
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("type")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop2")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // Section ids are assigned alphabetically.
+ SectionId prop1_section_id = 0;
+ SectionId prop2_section_id = 1;
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+ Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
+ TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ std::string query = CreateQuery("foo", /*property_restrict=*/"prop1");
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop1"));
+ EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo"));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ }
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(kDocumentId1, kDocumentId0));
+}
+
+TEST_F(QueryVisitorTest, MultiPropertyFilter) {
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("type")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop2")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop3")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // Section ids are assigned alphabetically.
+ SectionId prop1_section_id = 0;
+ SectionId prop2_section_id = 1;
+ SectionId prop3_section_id = 2;
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+ Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
+ TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId1, prop2_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId2, prop3_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ std::string query = R"(search("foo", createList("prop1", "prop2")))";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop1", "prop2"));
+ EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop2"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(kDocumentId1, kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest, PropertyFilterStringIsInvalid) {
ICING_ASSERT_OK(schema_store_->SetSchema(
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -1168,7 +2231,42 @@ TEST_F(QueryVisitorTest, PropertyFilter) {
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // "prop1" is a STRING token, which cannot be a property name.
+ std::string query = CreateQuery(R"(("prop1":foo))");
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(QueryVisitorTest, PropertyFilterNonNormalized) {
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("type")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("PROP1")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("PROP2")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// Section ids are assigned alphabetically.
SectionId prop1_section_id = 0;
SectionId prop2_section_id = 1;
@@ -1194,21 +2292,33 @@ TEST_F(QueryVisitorTest, PropertyFilter) {
editor.BufferTerm("foo");
editor.IndexAllBufferedTerms();
- std::string query = "prop1:foo";
+ std::string query = CreateQuery("foo", /*property_restrict=*/"PROP1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("PROP1"));
+ EXPECT_THAT(query_results.query_terms["PROP1"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo"));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ }
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId1, kDocumentId0));
}
-TEST_F(QueryVisitorTest, PropertyFilterWithGrouping) {
+TEST_P(QueryVisitorTest, PropertyFilterWithGrouping) {
ICING_ASSERT_OK(schema_store_->SetSchema(
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -1223,7 +2333,10 @@ TEST_F(QueryVisitorTest, PropertyFilterWithGrouping) {
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
// Section ids are assigned alphabetically.
SectionId prop1_section_id = 0;
SectionId prop2_section_id = 1;
@@ -1249,21 +2362,31 @@ TEST_F(QueryVisitorTest, PropertyFilterWithGrouping) {
editor.BufferTerm("foo");
editor.IndexAllBufferedTerms();
- std::string query = "prop1:(foo OR bar)";
+ std::string query =
+ CreateQuery("(foo OR bar)", /*property_restrict=*/"prop1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop1"));
+ EXPECT_THAT(query_results.query_terms["prop1"],
+ UnorderedElementsAre("foo", "bar"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo", "bar"));
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId1, kDocumentId0));
}
-TEST_F(QueryVisitorTest, PropertyFilterWithNot) {
+TEST_P(QueryVisitorTest, ValidNestedPropertyFilter) {
ICING_ASSERT_OK(schema_store_->SetSchema(
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -1278,7 +2401,10 @@ TEST_F(QueryVisitorTest, PropertyFilterWithNot) {
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
// Section ids are assigned alphabetically.
SectionId prop1_section_id = 0;
SectionId prop2_section_id = 1;
@@ -1304,32 +2430,1449 @@ TEST_F(QueryVisitorTest, PropertyFilterWithNot) {
editor.BufferTerm("foo");
editor.IndexAllBufferedTerms();
- std::string query = "-prop1:(foo OR bar)";
+ std::string query = CreateQuery("(prop1:foo)", /*property_restrict=*/"prop1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
- QueryVisitor query_visitor(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop1"));
+ EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(kDocumentId1));
+
+ query = CreateQuery("(prop1:(prop1:(prop1:(prop1:foo))))",
+ /*property_restrict=*/"prop1");
+ ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
+ QueryVisitor query_visitor_two(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor_two);
+ ICING_ASSERT_OK_AND_ASSIGN(query_results,
+ std::move(query_visitor_two).ConsumeResults());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop1"));
+ EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(kDocumentId1));
+}
+
+TEST_P(QueryVisitorTest, InvalidNestedPropertyFilter) {
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("type")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop2")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // Section ids are assigned alphabetically.
+ SectionId prop1_section_id = 0;
+ SectionId prop2_section_id = 1;
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+ Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
+ TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ editor.BufferTerm("bar");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ std::string query = CreateQuery("(prop2:foo)", /*property_restrict=*/"prop1");
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), IsEmpty());
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), IsEmpty());
+
+ // Resulting queries:
+ // - kPlain: `prop1:(prop2:(prop1:(prop2:(prop1:foo))))`
+ // - kSearch: `-search("(prop2:(prop1:(prop2:(prop1:foo))))",
+ // createList("prop1"))`
+ query = CreateQuery("(prop2:(prop1:(prop2:(prop1:foo))))",
+ /*property_restrict=*/"prop1");
+ ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
+ QueryVisitor query_visitor_two(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor_two);
+ ICING_ASSERT_OK_AND_ASSIGN(query_results,
+ std::move(query_visitor_two).ConsumeResults());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), IsEmpty());
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), IsEmpty());
+}
+
+TEST_P(QueryVisitorTest, NotWithPropertyFilter) {
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("type")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop2")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // Section ids are assigned alphabetically.
+ SectionId prop1_section_id = 0;
+ SectionId prop2_section_id = 1;
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+ Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
+ TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ editor.BufferTerm("bar");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ // Resulting queries:
+ // - kPlain: `-prop1:(foo OR bar)`
+ // - kSearch: `-search("foo OR bar", createList("prop1"))`
+ std::string query = absl_ports::StrCat(
+ "-", CreateQuery("(foo OR bar)", /*property_restrict=*/"prop1"));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), IsEmpty());
+ EXPECT_THAT(query_results.query_term_iterators, IsEmpty());
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId2));
- query = "NOT prop1:(foo OR bar)";
+ // Resulting queries:
+ // - kPlain: `NOT prop1:(foo OR bar)`
+ // - kSearch: `NOT search("foo OR bar", createList("prop1"))`
+ query = absl_ports::StrCat(
+ "NOT ", CreateQuery("(foo OR bar)", /*property_restrict=*/"prop1"));
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
- QueryVisitor query_visitor_two(index_.get(), numeric_index_.get(),
- document_store_.get(), schema_store_.get(),
- normalizer_.get(), TERM_MATCH_PREFIX);
+ QueryVisitor query_visitor_two(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
root_node->Accept(&query_visitor_two);
ICING_ASSERT_OK_AND_ASSIGN(query_results,
std::move(query_visitor_two).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), IsEmpty());
+ EXPECT_THAT(query_results.query_term_iterators, IsEmpty());
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId2));
}
+TEST_P(QueryVisitorTest, PropertyFilterWithNot) {
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("type")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop2")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // Section ids are assigned alphabetically.
+ SectionId prop1_section_id = 0;
+ SectionId prop2_section_id = 1;
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+ Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
+ TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ editor.BufferTerm("bar");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ // Resulting queries:
+ // - kPlain: `prop1:(-foo OR bar)`
+ // - kSearch: `search("-foo OR bar", createList("prop1"))`
+ std::string query =
+ CreateQuery("(-foo OR bar)", /*property_restrict=*/"prop1");
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop1"));
+ EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("bar"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("bar"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(kDocumentId0));
+
+ // Resulting queries:
+ // - kPlain: `prop1:(foo OR bar)`
+ // - kSearch: `search("foo OR bar", createList("prop1"))`
+ query = CreateQuery("(NOT foo OR bar)", /*property_restrict=*/"prop1");
+ ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
+ QueryVisitor query_visitor_two(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor_two);
+ ICING_ASSERT_OK_AND_ASSIGN(query_results,
+ std::move(query_visitor_two).ConsumeResults());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop1"));
+ EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("bar"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("bar"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest, SegmentationTest) {
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("type")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop2")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // Section ids are assigned alphabetically.
+ SectionId prop1_section_id = 0;
+ SectionId prop2_section_id = 1;
+
+ // ICU segmentation will break this into "每天" and "上班".
+ // CFStringTokenizer (ios) will break this into "每", "天" and "上班"
+ std::string query = CreateQuery("每天上班");
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+ Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
+ TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ editor.BufferTerm("上班");
+ editor.IndexAllBufferedTerms();
+ editor = index_->Edit(kDocumentId0, prop2_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ if (IsCfStringTokenization()) {
+ editor.BufferTerm("每");
+ editor.BufferTerm("天");
+ } else {
+ editor.BufferTerm("每天");
+ }
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("上班");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ if (IsCfStringTokenization()) {
+ editor.BufferTerm("每");
+ editor.BufferTerm("天");
+ } else {
+ editor.BufferTerm("每天");
+ }
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ }
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ if (IsCfStringTokenization()) {
+ EXPECT_THAT(query_results.query_terms[""],
+ UnorderedElementsAre("每", "天", "上班"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("每", "天", "上班"));
+ } else {
+ EXPECT_THAT(query_results.query_terms[""],
+ UnorderedElementsAre("每天", "上班"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("每天", "上班"));
+ }
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest, PropertyRestrictsPopCorrectly) {
+ PropertyConfigProto prop =
+ PropertyConfigBuilder()
+ .SetName("prop0")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("type")
+ .AddProperty(prop)
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop1"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop2")))
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ SectionId prop0_id = 0;
+ SectionId prop1_id = 1;
+ SectionId prop2_id = 2;
+ NamespaceId ns_id = 0;
+
+ // Create the following docs:
+ // - Doc 0: Contains 'val0', 'val1', 'val2' in 'prop0'. Shouldn't match.
+ DocumentProto doc =
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid0, document_store_->Put(doc));
+ Index::Editor editor =
+ index_->Edit(docid0, prop0_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val0");
+ editor.BufferTerm("val1");
+ editor.BufferTerm("val2");
+ editor.IndexAllBufferedTerms();
+
+ // - Doc 1: Contains 'val0', 'val1', 'val2' in 'prop1'. Should match.
+ doc = DocumentBuilder(doc).SetUri("uri1").Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid1, document_store_->Put(doc));
+ editor = index_->Edit(docid1, prop1_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val0");
+ editor.BufferTerm("val1");
+ editor.BufferTerm("val2");
+ editor.IndexAllBufferedTerms();
+
+ // - Doc 2: Contains 'val0', 'val1', 'val2' in 'prop2'. Shouldn't match.
+ doc = DocumentBuilder(doc).SetUri("uri2").Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid2, document_store_->Put(doc));
+ editor = index_->Edit(docid2, prop2_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val0");
+ editor.BufferTerm("val1");
+ editor.BufferTerm("val2");
+ editor.IndexAllBufferedTerms();
+
+ // - Doc 3: Contains 'val0' in 'prop0', 'val1' in 'prop1' etc. Should match.
+ doc = DocumentBuilder(doc).SetUri("uri3").Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid3, document_store_->Put(doc));
+ editor = index_->Edit(docid3, prop0_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val0");
+ editor.IndexAllBufferedTerms();
+ editor = index_->Edit(docid3, prop1_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val1");
+ editor.IndexAllBufferedTerms();
+ editor = index_->Edit(docid3, prop2_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val2");
+ editor.IndexAllBufferedTerms();
+
+ // - Doc 4: Contains 'val1' in 'prop0', 'val2' in 'prop1', 'val0' in 'prop2'.
+ // Shouldn't match.
+ doc = DocumentBuilder(doc).SetUri("uri4").Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid4, document_store_->Put(doc));
+ editor = index_->Edit(docid4, prop0_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val1");
+ editor.IndexAllBufferedTerms();
+ editor = index_->Edit(docid4, prop1_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val2");
+ editor.IndexAllBufferedTerms();
+ editor = index_->Edit(docid4, prop1_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val0");
+ editor.IndexAllBufferedTerms();
+
+ // Now issue a query with 'val1' restricted to 'prop1'. This should match only
+ // docs 1 and 3.
+ // Resulting queries:
+ // - kPlain: `val0 prop1:val1 val2`
+ // - kSearch: `val0 search("val1", createList("prop1")) val2`
+ std::string query = absl_ports::StrCat(
+ "val0 ", CreateQuery("val1", /*property_restrict=*/"prop1"), " val2");
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ }
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("", "prop1"));
+ EXPECT_THAT(query_results.query_terms[""],
+ UnorderedElementsAre("val0", "val2"));
+ EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("val1"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("val0", "val1", "val2"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(docid3, docid1));
+}
+
+TEST_P(QueryVisitorTest, UnsatisfiablePropertyRestrictsPopCorrectly) {
+ PropertyConfigProto prop =
+ PropertyConfigBuilder()
+ .SetName("prop0")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("type")
+ .AddProperty(prop)
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop1"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop2")))
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ SectionId prop0_id = 0;
+ SectionId prop1_id = 1;
+ SectionId prop2_id = 2;
+ NamespaceId ns_id = 0;
+
+ // Create the following docs:
+ // - Doc 0: Contains 'val0', 'val1', 'val2' in 'prop0'. Should match.
+ DocumentProto doc =
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid0, document_store_->Put(doc));
+ Index::Editor editor =
+ index_->Edit(docid0, prop0_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val0");
+ editor.BufferTerm("val1");
+ editor.BufferTerm("val2");
+ editor.IndexAllBufferedTerms();
+
+ // - Doc 1: Contains 'val0', 'val1', 'val2' in 'prop1'. Shouldn't match.
+ doc = DocumentBuilder(doc).SetUri("uri1").Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid1, document_store_->Put(doc));
+ editor = index_->Edit(docid1, prop1_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val0");
+ editor.BufferTerm("val1");
+ editor.BufferTerm("val2");
+ editor.IndexAllBufferedTerms();
+
+ // - Doc 2: Contains 'val0', 'val1', 'val2' in 'prop2'. Should match.
+ doc = DocumentBuilder(doc).SetUri("uri2").Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid2, document_store_->Put(doc));
+ editor = index_->Edit(docid2, prop2_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val0");
+ editor.BufferTerm("val1");
+ editor.BufferTerm("val2");
+ editor.IndexAllBufferedTerms();
+
+ // - Doc 3: Contains 'val0' in 'prop0', 'val1' in 'prop1' etc. Should match.
+ doc = DocumentBuilder(doc).SetUri("uri3").Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid3, document_store_->Put(doc));
+ editor = index_->Edit(docid3, prop0_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val0");
+ editor.IndexAllBufferedTerms();
+ editor = index_->Edit(docid3, prop1_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val1");
+ editor.IndexAllBufferedTerms();
+ editor = index_->Edit(docid3, prop2_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val2");
+ editor.IndexAllBufferedTerms();
+
+ // - Doc 4: Contains 'val1' in 'prop0', 'val2' in 'prop1', 'val0' in 'prop2'.
+ // Shouldn't match.
+ doc = DocumentBuilder(doc).SetUri("uri4").Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid4, document_store_->Put(doc));
+ editor = index_->Edit(docid4, prop0_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val1");
+ editor.IndexAllBufferedTerms();
+ editor = index_->Edit(docid4, prop1_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val2");
+ editor.IndexAllBufferedTerms();
+ editor = index_->Edit(docid4, prop1_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val0");
+ editor.IndexAllBufferedTerms();
+
+ // Now issue a query with 'val1' restricted to 'prop1'. This should match only
+ // docs 1 and 3.
+ // Resulting queries:
+ // - kPlain: `val0 OR prop1:(prop2:val1) OR val2`
+ // - kSearch: `prop0:val0 OR search("(prop2:val1)", createList("prop1")) OR
+ // prop2:val2`
+ std::string query = absl_ports::StrCat(
+ "prop0:val0 OR prop1:(",
+ CreateQuery("val1", /*property_restrict=*/"prop2"), ") OR prop2:val2");
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop0", "prop2"));
+ EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("val0"));
+ EXPECT_THAT(query_results.query_terms["prop2"], UnorderedElementsAre("val2"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("val0", "val2"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(docid3, docid2, docid0));
+}
+
+TEST_F(QueryVisitorTest, UnsupportedFunctionReturnsInvalidArgument) {
+ std::string query = "unsupportedFunction()";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest, SearchFunctionTooFewArgumentsReturnsInvalidArgument) {
+ std::string query = "search()";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest, SearchFunctionTooManyArgumentsReturnsInvalidArgument) {
+ std::string query = R"(search("foo", createList("subject"), "bar"))";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest,
+ SearchFunctionWrongFirstArgumentTypeReturnsInvalidArgument) {
+ // First argument type=TEXT, expected STRING.
+ std::string query = "search(7)";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // First argument type=string list, expected STRING.
+ query = R"(search(createList("subject")))";
+ ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
+ QueryVisitor query_visitor_two(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor_two);
+ EXPECT_THAT(std::move(query_visitor_two).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest,
+ SearchFunctionWrongSecondArgumentTypeReturnsInvalidArgument) {
+ // Second argument type=STRING, expected string list.
+ std::string query = R"(search("foo", "bar"))";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Second argument type=TEXT, expected string list.
+ query = R"(search("foo", 7))";
+ ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
+ QueryVisitor query_visitor_two(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor_two);
+ EXPECT_THAT(std::move(query_visitor_two).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest,
+ SearchFunctionCreateListZeroPropertiesReturnsInvalidArgument) {
+ std::string query = R"(search("foo", createList()))";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest, SearchFunctionNestedFunctionCalls) {
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("type")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop2")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // Section ids are assigned alphabetically.
+ SectionId prop1_section_id = 0;
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+ Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
+ TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ editor.BufferTerm("bar");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId2, prop1_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.BufferTerm("bar");
+ editor.IndexAllBufferedTerms();
+
+ // *If* nested function calls were allowed, then this would simplify as:
+ // `search("search(\"foo\") bar")` -> `search("foo bar")` -> `foo bar`
+ // But nested function calls are disallowed. So this is rejected.
+ std::string level_one_query = R"(search("foo", createList("prop1")) bar)";
+ std::string level_two_query =
+ absl_ports::StrCat(R"(search(")", EscapeString(level_one_query),
+ R"(", createList("prop1")))");
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(level_two_query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), level_two_query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop1"));
+ EXPECT_THAT(query_results.query_terms["prop1"],
+ UnorderedElementsAre("foo", "bar"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo", "bar"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(kDocumentId2));
+
+ std::string level_three_query =
+ absl_ports::StrCat(R"(search(")", EscapeString(level_two_query),
+ R"(", createList("prop1")))");
+ ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(level_three_query));
+ QueryVisitor query_visitor_two(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ level_three_query, DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor_two);
+ ICING_ASSERT_OK_AND_ASSIGN(query_results,
+ std::move(query_visitor_two).ConsumeResults());
+
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop1"));
+ EXPECT_THAT(query_results.query_terms["prop1"],
+ UnorderedElementsAre("foo", "bar"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo", "bar"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(kDocumentId2));
+
+ std::string level_four_query =
+ absl_ports::StrCat(R"(search(")", EscapeString(level_three_query),
+ R"(", createList("prop1")))");
+ ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(level_four_query));
+ QueryVisitor query_visitor_three(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ level_four_query, DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor_three);
+ ICING_ASSERT_OK_AND_ASSIGN(query_results,
+ std::move(query_visitor_three).ConsumeResults());
+
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop1"));
+ EXPECT_THAT(query_results.query_terms["prop1"],
+ UnorderedElementsAre("foo", "bar"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo", "bar"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(kDocumentId2));
+}
+
+// This test will nest `search` calls together with the set of restricts
+// narrowing at each level so that the set of docs matching the query shrinks.
+TEST_F(QueryVisitorTest, SearchFunctionNestedPropertyRestrictsNarrowing) {
+ PropertyConfigProto prop =
+ PropertyConfigBuilder()
+ .SetName("prop0")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("type")
+ .AddProperty(prop)
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop1"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop2"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop3"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop4"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop5"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop6"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop7")))
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // Section ids are assigned alphabetically.
+ SectionId prop0_id = 0;
+ SectionId prop1_id = 1;
+ SectionId prop2_id = 2;
+ SectionId prop3_id = 3;
+ SectionId prop4_id = 4;
+ SectionId prop5_id = 5;
+ SectionId prop6_id = 6;
+ SectionId prop7_id = 7;
+
+ NamespaceId ns_id = 0;
+ DocumentProto doc =
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid0, document_store_->Put(doc));
+ Index::Editor editor =
+ index_->Edit(kDocumentId0, prop0_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid1,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri1").Build()));
+ editor = index_->Edit(docid1, prop1_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid2,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri2").Build()));
+ editor = index_->Edit(docid2, prop2_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid3,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri3").Build()));
+ editor = index_->Edit(docid3, prop3_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid4,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri4").Build()));
+ editor = index_->Edit(docid4, prop4_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid5,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri5").Build()));
+ editor = index_->Edit(docid5, prop5_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid6,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri6").Build()));
+ editor = index_->Edit(docid6, prop6_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid7,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri7").Build()));
+ editor = index_->Edit(docid7, prop7_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ // *If* nested function calls were allowed, then this would simplify as:
+ // `search("search(\"foo\") bar")` -> `search("foo bar")` -> `foo bar`
+ // But nested function calls are disallowed. So this is rejected.
+ std::string level_one_query =
+ R"(search("foo", createList("prop2", "prop5", "prop1", "prop3", "prop0", "prop6", "prop4", "prop7")))";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(level_one_query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), level_one_query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop0", "prop1", "prop2", "prop3", "prop4",
+ "prop5", "prop6", "prop7"));
+ EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop2"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop3"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop4"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop5"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop6"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop7"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(docid7, docid6, docid5, docid4, docid3, docid2,
+ docid1, docid0));
+
+ std::string level_two_query = absl_ports::StrCat(
+ R"(search(")", EscapeString(level_one_query),
+ R"(", createList("prop6", "prop0", "prop4", "prop2")))");
+ ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(level_two_query));
+ QueryVisitor query_visitor_two(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), level_two_query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor_two);
+ ICING_ASSERT_OK_AND_ASSIGN(query_results,
+ std::move(query_visitor_two).ConsumeResults());
+
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop0", "prop2", "prop4", "prop6"));
+ EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop2"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop4"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop6"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(docid6, docid4, docid2, docid0));
+
+ std::string level_three_query =
+ absl_ports::StrCat(R"(search(")", EscapeString(level_two_query),
+ R"(", createList("prop0", "prop6")))");
+ ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(level_three_query));
+ QueryVisitor query_visitor_three(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ level_three_query, DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor_three);
+ ICING_ASSERT_OK_AND_ASSIGN(query_results,
+ std::move(query_visitor_three).ConsumeResults());
+
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop0", "prop6"));
+ EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop6"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(docid6, docid0));
+}
+
+// This test will nest `search` calls together with the set of restricts
+// narrowing at each level so that the set of docs matching the query shrinks.
+TEST_F(QueryVisitorTest, SearchFunctionNestedPropertyRestrictsExpanding) {
+ PropertyConfigProto prop =
+ PropertyConfigBuilder()
+ .SetName("prop0")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("type")
+ .AddProperty(prop)
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop1"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop2"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop3"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop4"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop5"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop6"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop7")))
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // Section ids are assigned alphabetically.
+ SectionId prop0_id = 0;
+ SectionId prop1_id = 1;
+ SectionId prop2_id = 2;
+ SectionId prop3_id = 3;
+ SectionId prop4_id = 4;
+ SectionId prop5_id = 5;
+ SectionId prop6_id = 6;
+ SectionId prop7_id = 7;
+
+ NamespaceId ns_id = 0;
+ DocumentProto doc =
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid0, document_store_->Put(doc));
+ Index::Editor editor =
+ index_->Edit(kDocumentId0, prop0_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid1,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri1").Build()));
+ editor = index_->Edit(docid1, prop1_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid2,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri2").Build()));
+ editor = index_->Edit(docid2, prop2_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid3,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri3").Build()));
+ editor = index_->Edit(docid3, prop3_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid4,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri4").Build()));
+ editor = index_->Edit(docid4, prop4_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid5,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri5").Build()));
+ editor = index_->Edit(docid5, prop5_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid6,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri6").Build()));
+ editor = index_->Edit(docid6, prop6_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid7,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri7").Build()));
+ editor = index_->Edit(docid7, prop7_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ // *If* nested function calls were allowed, then this would simplify as:
+ // `search("search(\"foo\") bar")` -> `search("foo bar")` -> `foo bar`
+ // But nested function calls are disallowed. So this is rejected.
+ std::string level_one_query =
+ R"(search("foo", createList("prop0", "prop6")))";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(level_one_query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), level_one_query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop0", "prop6"));
+ EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop6"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(docid6, docid0));
+
+ std::string level_two_query = absl_ports::StrCat(
+ R"(search(")", EscapeString(level_one_query),
+ R"(", createList("prop6", "prop0", "prop4", "prop2")))");
+ ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(level_two_query));
+ QueryVisitor query_visitor_two(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), level_two_query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor_two);
+ ICING_ASSERT_OK_AND_ASSIGN(query_results,
+ std::move(query_visitor_two).ConsumeResults());
+
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop0", "prop6"));
+ EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop6"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(docid6, docid0));
+
+ std::string level_three_query =
+ absl_ports::StrCat(R"(search(")", EscapeString(level_two_query),
+ R"(", createList("prop2", "prop5", "prop1", "prop3",)",
+ R"( "prop0", "prop6", "prop4", "prop7")))");
+ ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(level_three_query));
+ QueryVisitor query_visitor_three(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ level_three_query, DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor_three);
+ ICING_ASSERT_OK_AND_ASSIGN(query_results,
+ std::move(query_visitor_three).ConsumeResults());
+
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop0", "prop6"));
+ EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop6"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(docid6, docid0));
+}
+
+TEST_F(QueryVisitorTest,
+ PropertyDefinedFunctionWithNoArgumentReturnsInvalidArgument) {
+ std::string query = "propertyDefined()";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(
+ QueryVisitorTest,
+ PropertyDefinedFunctionWithMoreThanOneTextArgumentReturnsInvalidArgument) {
+ std::string query = "propertyDefined(\"foo\", \"bar\")";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest,
+ PropertyDefinedFunctionWithTextArgumentReturnsInvalidArgument) {
+ // The argument type is TEXT, not STRING here.
+ std::string query = "propertyDefined(foo)";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest,
+ PropertyDefinedFunctionWithNonTextArgumentReturnsInvalidArgument) {
+ std::string query = "propertyDefined(1 < 2)";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(QueryVisitorTest, PropertyDefinedFunctionReturnsMatchingDocuments) {
+ // Set up two schemas, one with a "url" field and one without.
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("typeWithUrl")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("url")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("typeWithoutUrl"))
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // Document 0 has the term "foo" and its schema has the url property.
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("typeWithUrl").Build()));
+ Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+ TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ // Document 1 has the term "foo" and its schema DOESN'T have the url property.
+ ICING_ASSERT_OK(document_store_->Put(DocumentBuilder()
+ .SetKey("ns", "uri1")
+ .SetSchema("typeWithoutUrl")
+ .Build()));
+ editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ // Document 2 has the term "bar" and its schema has the url property.
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri2").SetSchema("typeWithUrl").Build()));
+ editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("bar");
+ editor.IndexAllBufferedTerms();
+
+ std::string query = CreateQuery("foo propertyDefined(\"url\")");
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ UnorderedElementsAre(kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest,
+ PropertyDefinedFunctionReturnsNothingIfNoMatchingProperties) {
+ // Set up two schemas, one with a "url" field and one without.
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("typeWithUrl")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("url")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("typeWithoutUrl"))
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // Document 0 has the term "foo" and its schema has the url property.
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("typeWithUrl").Build()));
+ Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+ TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ // Document 1 has the term "foo" and its schema DOESN'T have the url property.
+ ICING_ASSERT_OK(document_store_->Put(DocumentBuilder()
+ .SetKey("ns", "uri1")
+ .SetSchema("typeWithoutUrl")
+ .Build()));
+ editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ // Attempt to query a non-existent property.
+ std::string query = CreateQuery("propertyDefined(\"nonexistentproperty\")");
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), IsEmpty());
+}
+
+TEST_P(QueryVisitorTest,
+ PropertyDefinedFunctionWithNegationMatchesDocsWithNoSuchProperty) {
+ // Set up two schemas, one with a "url" field and one without.
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("typeWithUrl")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("url")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("typeWithoutUrl"))
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // Document 0 has the term "foo" and its schema has the url property.
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("typeWithUrl").Build()));
+ Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+ TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ // Document 1 has the term "foo" and its schema DOESN'T have the url property.
+ ICING_ASSERT_OK(document_store_->Put(DocumentBuilder()
+ .SetKey("ns", "uri1")
+ .SetSchema("typeWithoutUrl")
+ .Build()));
+ editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ std::string query = CreateQuery("foo AND NOT propertyDefined(\"url\")");
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ UnorderedElementsAre(kDocumentId1));
+}
+
+INSTANTIATE_TEST_SUITE_P(QueryVisitorTest, QueryVisitorTest,
+ testing::Values(QueryType::kPlain,
+ QueryType::kSearch));
+
} // namespace
} // namespace lib
diff --git a/icing/query/advanced_query_parser/util/string-util.cc b/icing/query/advanced_query_parser/util/string-util.cc
new file mode 100644
index 0000000..9af2ed6
--- /dev/null
+++ b/icing/query/advanced_query_parser/util/string-util.cc
@@ -0,0 +1,106 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/query/advanced_query_parser/util/string-util.h"
+
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+
+namespace icing {
+namespace lib {
+
+namespace string_util {
+
+libtextclassifier3::StatusOr<std::string> UnescapeStringValue(
+ std::string_view value) {
+ std::string result;
+ bool in_escape = false;
+ for (char c : value) {
+ if (in_escape) {
+ in_escape = false;
+ } else if (c == '\\') {
+ in_escape = true;
+ continue;
+ } else if (c == '"') {
+ return absl_ports::InvalidArgumentError(
+ "Encountered an unescaped quotation mark!");
+ }
+ result += c;
+ }
+ return result;
+}
+
+libtextclassifier3::StatusOr<std::string_view> FindEscapedToken(
+ std::string_view escaped_string, std::string_view unescaped_token) {
+ if (unescaped_token.empty()) {
+ return absl_ports::InvalidArgumentError(
+ "Cannot find escaped token in empty unescaped token.");
+ }
+
+ // Find the start of unescaped_token within the escaped_string
+ const char* esc_string_end = escaped_string.data() + escaped_string.length();
+ size_t pos = escaped_string.find(unescaped_token[0]);
+ const char* esc_token_start = (pos == std::string_view::npos)
+ ? esc_string_end
+ : escaped_string.data() + pos;
+ const char* esc_token_cur = esc_token_start;
+ const char* possible_next_start = nullptr;
+ bool is_escaped = false;
+ int i = 0;
+ for (; i < unescaped_token.length() && esc_token_cur < esc_string_end;
+ ++esc_token_cur) {
+ if (esc_token_cur != esc_token_start &&
+ *esc_token_cur == unescaped_token[0] &&
+ possible_next_start == nullptr) {
+ possible_next_start = esc_token_cur;
+ }
+
+ // Every char in unescaped_token should either be an escape or match the
+ // next char in unescaped_token.
+ if (!is_escaped && *esc_token_cur == '\\') {
+ is_escaped = true;
+ } else if (*esc_token_cur == unescaped_token[i]) {
+ is_escaped = false;
+ ++i;
+ } else {
+ // No match. If we don't have a possible_next_start, then try to find one.
+ if (possible_next_start == nullptr) {
+ pos = escaped_string.find(unescaped_token[0],
+ esc_token_cur - escaped_string.data());
+ if (pos == std::string_view::npos) {
+ break;
+ }
+ esc_token_start = escaped_string.data() + pos;
+ } else {
+ esc_token_start = possible_next_start;
+ possible_next_start = nullptr;
+ }
+ // esc_token_start has been reset to a char that equals unescaped_token[0]
+ // The for loop above will advance esc_token_cur so set i to 1.
+ i = 1;
+ esc_token_cur = esc_token_start;
+ }
+ }
+ if (i != unescaped_token.length()) {
+ return absl_ports::InvalidArgumentError(
+ absl_ports::StrCat("Couldn't match chars at token=", unescaped_token,
+ ") and raw_text=", escaped_string));
+ }
+ return std::string_view(esc_token_start, esc_token_cur - esc_token_start);
+}
+
+} // namespace string_util
+
+} // namespace lib
+} // namespace icing \ No newline at end of file
diff --git a/icing/query/advanced_query_parser/util/string-util.h b/icing/query/advanced_query_parser/util/string-util.h
new file mode 100644
index 0000000..09fb451
--- /dev/null
+++ b/icing/query/advanced_query_parser/util/string-util.h
@@ -0,0 +1,49 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_QUERY_ADVANCED_QUERY_PARSER__STRING_UTIL_H_
+#define ICING_QUERY_ADVANCED_QUERY_PARSER__STRING_UTIL_H_
+
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+
+namespace icing {
+namespace lib {
+
+namespace string_util {
+
+// Returns:
+// - On success, value with the escapes removed.
+// - INVALID_ARGUMENT if an non-escaped quote is encountered.
+// Ex. "fo\\\\o" -> "fo\\o"
+libtextclassifier3::StatusOr<std::string> UnescapeStringValue(
+ std::string_view value);
+
+// Returns:
+// - On success, string_view pointing to the segment of escaped_string that,
+// if unescaped, would match unescaped_token.
+// - INVALID_ARGUMENT
+// Ex. escaped_string="foo b\\a\\\"r baz", unescaped_token="ba\"r"
+// returns "b\\a\\\"r"
+libtextclassifier3::StatusOr<std::string_view> FindEscapedToken(
+ std::string_view escaped_string, std::string_view unescaped_token);
+
+} // namespace string_util
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_QUERY_ADVANCED_QUERY_PARSER__STRING_UTIL_H_
diff --git a/icing/query/advanced_query_parser/util/string-util_test.cc b/icing/query/advanced_query_parser/util/string-util_test.cc
new file mode 100644
index 0000000..a7ccf3e
--- /dev/null
+++ b/icing/query/advanced_query_parser/util/string-util_test.cc
@@ -0,0 +1,125 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/query/advanced_query_parser/util/string-util.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsEmpty;
+
+TEST(StringUtilTest, UnescapeStringEmptyString) {
+ EXPECT_THAT(string_util::UnescapeStringValue(""), IsOkAndHolds(IsEmpty()));
+}
+
+TEST(StringUtilTest, UnescapeStringStringWithNoEscapes) {
+ EXPECT_THAT(string_util::UnescapeStringValue("foo"), IsOkAndHolds("foo"));
+ EXPECT_THAT(string_util::UnescapeStringValue("f o o"), IsOkAndHolds("f o o"));
+ EXPECT_THAT(string_util::UnescapeStringValue("f\to\to"),
+ IsOkAndHolds("f\to\to"));
+ EXPECT_THAT(string_util::UnescapeStringValue("f.o.o"), IsOkAndHolds("f.o.o"));
+}
+
+TEST(StringUtilTest, UnescapeStringStringWithEscapes) {
+ EXPECT_THAT(string_util::UnescapeStringValue("f\\oo"), IsOkAndHolds("foo"));
+ EXPECT_THAT(string_util::UnescapeStringValue("f\\\\oo"),
+ IsOkAndHolds("f\\oo"));
+ EXPECT_THAT(string_util::UnescapeStringValue("f\\\"oo"),
+ IsOkAndHolds("f\"oo"));
+ EXPECT_THAT(string_util::UnescapeStringValue("foo\\"), IsOkAndHolds("foo"));
+ EXPECT_THAT(string_util::UnescapeStringValue("foo b\\a\\\"r baz"),
+ IsOkAndHolds("foo ba\"r baz"));
+ EXPECT_THAT(string_util::UnescapeStringValue("bar b\\aar bar\\s bart"),
+ IsOkAndHolds("bar baar bars bart"));
+ EXPECT_THAT(string_util::UnescapeStringValue("\\\\\\\\a"),
+ IsOkAndHolds("\\\\a"));
+}
+
+TEST(StringUtilTest, UnescapeStringQuoteWithoutEscape) {
+ EXPECT_THAT(string_util::UnescapeStringValue("f\\o\"o"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(string_util::UnescapeStringValue("f\"oo"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(StringUtilTest, FindEscapedTokenEmptyUnescapedToken) {
+ EXPECT_THAT(string_util::FindEscapedToken("foo b\\a\\\"r baz", ""),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(StringUtilTest, FindEscapedTokenTokenNotPresent) {
+ EXPECT_THAT(string_util::FindEscapedToken("foo b\\a\\\"r baz", "elephant"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(string_util::FindEscapedToken("foo b\\a\\\"r baz", "bat"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(string_util::FindEscapedToken("foo b\\a\\\"r baz", "taz"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(string_util::FindEscapedToken("foo b\\a\\\"r baz", "bazz"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(StringUtilTest, FindEscapedTokenMatchInMiddleToken) {
+ EXPECT_THAT(string_util::FindEscapedToken("babar", "bar"),
+ IsOkAndHolds("bar"));
+}
+
+TEST(StringUtilTest, FindEscapedTokenMatches) {
+ EXPECT_THAT(string_util::FindEscapedToken("foo b\\a\\\"r baz", "ba\"r"),
+ IsOkAndHolds("b\\a\\\"r"));
+ EXPECT_THAT(string_util::FindEscapedToken("\\\\\\\\a", "\\\\a"),
+ IsOkAndHolds("\\\\\\\\a"));
+}
+
+TEST(StringUtilTest, FindEscapedTokenTraversesThroughEscapedText) {
+ std::string_view escaped_text = "bar b\\aar bar\\s bart";
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::string_view result,
+ string_util::FindEscapedToken(escaped_text, "bar"));
+ // escaped_text = "bar b\\aar bar\\s bart";
+ // escaped_token ^ ^
+ EXPECT_THAT(result, Eq("bar"));
+
+ // escaped_text = "b\\aar bar\\s bart";
+ // escaped_token ^ ^
+ const char* result_end = result.data() + result.length();
+ escaped_text = escaped_text.substr(result_end - escaped_text.data());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ result, string_util::FindEscapedToken(escaped_text, "bar"));
+ EXPECT_THAT(result, Eq("bar"));
+
+ // escaped_text = "\\s bart";
+ // escaped_token ^ ^
+ result_end = result.data() + result.length();
+ escaped_text = escaped_text.substr(result_end - escaped_text.data());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ result, string_util::FindEscapedToken(escaped_text, "bar"));
+ EXPECT_THAT(result, Eq("bar"));
+
+ result_end = result.data() + result.length();
+ escaped_text = escaped_text.substr(result_end - escaped_text.data());
+ EXPECT_THAT(string_util::FindEscapedToken(escaped_text, "bar"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing \ No newline at end of file
diff --git a/icing/query/query-features.h b/icing/query/query-features.h
index 1471063..158e13e 100644
--- a/icing/query/query-features.h
+++ b/icing/query/query-features.h
@@ -36,8 +36,15 @@ constexpr Feature kNumericSearchFeature =
constexpr Feature kVerbatimSearchFeature =
"VERBATIM_SEARCH"; // Features#VERBATIM_SEARCH
-// TODO(b/208654892): Add this as an enabled feature in the query visitor when
-// it gets invoked.
+// This feature covers all additions (other than numeric search and verbatim
+// search) to the query language to bring it into better alignment with the list
+// filters spec.
+// This includes:
+// - support for function calls
+// - expanding support for negation and property restriction expressions
+// - prefix operator '*'
+// - 'NOT' operator
+// - propertyDefined("url")
constexpr Feature kListFilterQueryLanguageFeature =
"LIST_FILTER_QUERY_LANGUAGE"; // Features#LIST_FILTER_QUERY_LANGUAGE
diff --git a/icing/query/query-processor.cc b/icing/query/query-processor.cc
index 283d83d..3e43ad9 100644
--- a/icing/query/query-processor.cc
+++ b/icing/query/query-processor.cc
@@ -39,8 +39,8 @@
#include "icing/query/advanced_query_parser/lexer.h"
#include "icing/query/advanced_query_parser/parser.h"
#include "icing/query/advanced_query_parser/query-visitor.h"
-#include "icing/query/query-processor.h"
#include "icing/query/query-features.h"
+#include "icing/query/query-processor.h"
#include "icing/query/query-results.h"
#include "icing/query/query-terms.h"
#include "icing/query/query-utils.h"
@@ -140,7 +140,8 @@ QueryProcessor::QueryProcessor(Index* index,
libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseSearch(
const SearchSpecProto& search_spec,
- ScoringSpecProto::RankingStrategy::Code ranking_strategy) {
+ ScoringSpecProto::RankingStrategy::Code ranking_strategy,
+ int64_t current_time_ms) {
if (search_spec.search_type() == SearchSpecProto::SearchType::UNDEFINED) {
return absl_ports::InvalidArgumentError(absl_ports::StrCat(
"Search type ",
@@ -151,21 +152,12 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseSearch(
if (search_spec.search_type() ==
SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
ICING_VLOG(1) << "Using EXPERIMENTAL_ICING_ADVANCED_QUERY parser!";
- libtextclassifier3::StatusOr<QueryResults> results_or =
- ParseAdvancedQuery(search_spec);
- if (results_or.ok()) {
- results = std::move(results_or).ValueOrDie();
- } else {
- ICING_VLOG(1)
- << "Unable to parse query using advanced query parser. Error: "
- << results_or.status().error_message()
- << ". Falling back to old query parser.";
- ICING_ASSIGN_OR_RETURN(results,
- ParseRawQuery(search_spec, ranking_strategy));
- }
+ ICING_ASSIGN_OR_RETURN(
+ results,
+ ParseAdvancedQuery(search_spec, ranking_strategy, current_time_ms));
} else {
- ICING_ASSIGN_OR_RETURN(results,
- ParseRawQuery(search_spec, ranking_strategy));
+ ICING_ASSIGN_OR_RETURN(
+ results, ParseRawQuery(search_spec, ranking_strategy, current_time_ms));
}
// Check that all new features used in the search have been enabled in the
@@ -183,12 +175,21 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseSearch(
DocHitInfoIteratorFilter::Options options = GetFilterOptions(search_spec);
results.root_iterator = std::make_unique<DocHitInfoIteratorFilter>(
std::move(results.root_iterator), &document_store_, &schema_store_,
- options);
+ options, current_time_ms);
+ // TODO(b/294114230): Move this SectionRestrict filter from root level to
+ // lower levels if that would improve performance.
+ if (!search_spec.type_property_filters().empty()) {
+ results.root_iterator = std::make_unique<DocHitInfoIteratorSectionRestrict>(
+ std::move(results.root_iterator), &document_store_, &schema_store_,
+ search_spec, current_time_ms);
+ }
return results;
}
libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseAdvancedQuery(
- const SearchSpecProto& search_spec) const {
+ const SearchSpecProto& search_spec,
+ ScoringSpecProto::RankingStrategy::Code ranking_strategy,
+ int64_t current_time_ms) const {
QueryResults results;
Lexer lexer(search_spec.query(), Lexer::Language::QUERY);
ICING_ASSIGN_OR_RETURN(std::vector<Lexer::LexerToken> lexer_tokens,
@@ -203,9 +204,18 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseAdvancedQuery(
document_store_.last_added_document_id());
return results;
}
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<Tokenizer> plain_tokenizer,
+ tokenizer_factory::CreateIndexingTokenizer(
+ StringIndexingConfig::TokenizerType::PLAIN, &language_segmenter_));
+ DocHitInfoIteratorFilter::Options options = GetFilterOptions(search_spec);
+ bool needs_term_frequency_info =
+ ranking_strategy == ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE;
QueryVisitor query_visitor(&index_, &numeric_index_, &document_store_,
&schema_store_, &normalizer_,
- search_spec.term_match_type());
+ plain_tokenizer.get(), search_spec.query(),
+ std::move(options), search_spec.term_match_type(),
+ needs_term_frequency_info, current_time_ms);
tree_root->Accept(&query_visitor);
return std::move(query_visitor).ConsumeResults();
}
@@ -213,7 +223,8 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseAdvancedQuery(
// TODO(cassiewang): Collect query stats to populate the SearchResultsProto
libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseRawQuery(
const SearchSpecProto& search_spec,
- ScoringSpecProto::RankingStrategy::Code ranking_strategy) {
+ ScoringSpecProto::RankingStrategy::Code ranking_strategy,
+ int64_t current_time_ms) {
DocHitInfoIteratorFilter::Options options = GetFilterOptions(search_spec);
// Tokenize the incoming raw query
@@ -231,7 +242,6 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseRawQuery(
std::stack<ParserStateFrame> frames;
frames.emplace();
-
QueryResults results;
// Process all the tokens
for (int i = 0; i < tokens.size(); i++) {
@@ -310,11 +320,12 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseRawQuery(
// We do the same amount of disk reads, so it may be dependent on how
// big the schema is and/or how popular schema type filtering and
// section filtering is.
-
ICING_ASSIGN_OR_RETURN(
result_iterator,
index_.GetIterator(
- normalized_text, kSectionIdMaskAll,
+ normalized_text,
+ token.text.data() - search_spec.query().c_str(),
+ token.text.length(), kSectionIdMaskAll,
search_spec.term_match_type(),
/*need_hit_term_frequency=*/ranking_strategy ==
ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
@@ -330,14 +341,16 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseRawQuery(
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<DocHitInfoIterator> term_iterator,
index_.GetIterator(
- normalized_text, kSectionIdMaskAll,
+ normalized_text,
+ token.text.data() - search_spec.query().c_str(),
+ token.text.length(), kSectionIdMaskAll,
search_spec.term_match_type(),
/*need_hit_term_frequency=*/ranking_strategy ==
ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
results.query_term_iterators[normalized_text] =
std::make_unique<DocHitInfoIteratorFilter>(
std::move(term_iterator), &document_store_, &schema_store_,
- options);
+ options, current_time_ms);
}
results.query_terms[frames.top().section_restrict].insert(
std::move(normalized_text));
@@ -391,9 +404,11 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseRawQuery(
if (!frames.top().section_restrict.empty()) {
// We saw a section restrict earlier, wrap the result iterator in
// the section restrict
+ std::set<std::string> section_restricts;
+ section_restricts.insert(std::move(frames.top().section_restrict));
result_iterator = std::make_unique<DocHitInfoIteratorSectionRestrict>(
std::move(result_iterator), &document_store_, &schema_store_,
- std::move(frames.top().section_restrict));
+ std::move(section_restricts), current_time_ms);
frames.top().section_restrict = "";
}
diff --git a/icing/query/query-processor.h b/icing/query/query-processor.h
index a4f8973..d4c22dd 100644
--- a/icing/query/query-processor.h
+++ b/icing/query/query-processor.h
@@ -67,7 +67,8 @@ class QueryProcessor {
// INTERNAL_ERROR on all other errors
libtextclassifier3::StatusOr<QueryResults> ParseSearch(
const SearchSpecProto& search_spec,
- ScoringSpecProto::RankingStrategy::Code ranking_strategy);
+ ScoringSpecProto::RankingStrategy::Code ranking_strategy,
+ int64_t current_time_ms);
private:
explicit QueryProcessor(Index* index,
@@ -85,7 +86,9 @@ class QueryProcessor {
// - One iterator that represents the entire query
// INVALID_ARGUMENT if query syntax is incorrect and cannot be tokenized
libtextclassifier3::StatusOr<QueryResults> ParseAdvancedQuery(
- const SearchSpecProto& search_spec) const;
+ const SearchSpecProto& search_spec,
+ ScoringSpecProto::RankingStrategy::Code ranking_strategy,
+ int64_t current_time_ms) const;
// Parse the query into a one DocHitInfoIterator that represents the root of a
// query tree.
@@ -98,7 +101,8 @@ class QueryProcessor {
// INTERNAL_ERROR on all other errors
libtextclassifier3::StatusOr<QueryResults> ParseRawQuery(
const SearchSpecProto& search_spec,
- ScoringSpecProto::RankingStrategy::Code ranking_strategy);
+ ScoringSpecProto::RankingStrategy::Code ranking_strategy,
+ int64_t current_time_ms);
// Not const because we could modify/sort the hit buffer in the lite index at
// query time.
diff --git a/icing/query/query-processor_benchmark.cc b/icing/query/query-processor_benchmark.cc
index 6d776ce..025e8e6 100644
--- a/icing/query/query-processor_benchmark.cc
+++ b/icing/query/query-processor_benchmark.cc
@@ -57,8 +57,8 @@
// $ adb push blaze-bin/icing/query/query-processor_benchmark
// /data/local/tmp/
//
-// $ adb shell /data/local/tmp/query-processor_benchmark --benchmark_filter=all
-// --adb
+// $ adb shell /data/local/tmp/query-processor_benchmark
+// --benchmark_filter=all --adb
// Flag to tell the benchmark that it'll be run on an Android device via adb,
// the benchmark will set up data files accordingly.
@@ -81,7 +81,9 @@ void AddTokenToIndex(Index* index, DocumentId document_id, SectionId section_id,
std::unique_ptr<Index> CreateIndex(const IcingFilesystem& icing_filesystem,
const Filesystem& filesystem,
const std::string& index_dir) {
- Index::Options options(index_dir, /*index_merge_size=*/1024 * 1024 * 10);
+ Index::Options options(index_dir, /*index_merge_size=*/1024 * 1024 * 10,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
return Index::Create(options, &filesystem, &icing_filesystem).ValueOrDie();
}
@@ -92,6 +94,18 @@ std::unique_ptr<Normalizer> CreateNormalizer() {
.ValueOrDie();
}
+libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
+ const Filesystem* filesystem, const std::string& base_dir,
+ const Clock* clock, const SchemaStore* schema_store) {
+ return DocumentStore::Create(
+ filesystem, base_dir, clock, schema_store,
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr);
+}
+
void BM_QueryOneTerm(benchmark::State& state) {
bool run_via_adb = absl::GetFlag(FLAGS_adb);
if (!run_via_adb) {
@@ -103,6 +117,7 @@ void BM_QueryOneTerm(benchmark::State& state) {
Filesystem filesystem;
const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark";
const std::string index_dir = base_dir + "/index";
+ const std::string numeric_index_dir = base_dir + "/numeric_index";
const std::string schema_dir = base_dir + "/schema";
const std::string doc_store_dir = base_dir + "/store";
@@ -116,7 +131,9 @@ void BM_QueryOneTerm(benchmark::State& state) {
std::unique_ptr<Index> index =
CreateIndex(icing_filesystem, filesystem, index_dir);
// TODO(b/249829533): switch to use persistent numeric index.
- auto numeric_index = std::make_unique<DummyNumericIndex<int64_t>>();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ auto numeric_index,
+ DummyNumericIndex<int64_t>::Create(filesystem, numeric_index_dir));
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
@@ -130,11 +147,13 @@ void BM_QueryOneTerm(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem, schema_dir, &clock));
- ICING_ASSERT_OK(schema_store->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
DocumentStore::CreateResult create_result =
- DocumentStore::Create(&filesystem, doc_store_dir, &clock,
- schema_store.get())
+ CreateDocumentStore(&filesystem, doc_store_dir, &clock,
+ schema_store.get())
.ValueOrDie();
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -164,7 +183,8 @@ void BM_QueryOneTerm(benchmark::State& state) {
QueryResults results =
query_processor
->ParseSearch(search_spec,
- ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)
+ ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ clock.GetSystemTimeMilliseconds())
.ValueOrDie();
while (results.root_iterator->Advance().ok()) {
results.root_iterator->doc_hit_info();
@@ -226,6 +246,7 @@ void BM_QueryFiveTerms(benchmark::State& state) {
Filesystem filesystem;
const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark";
const std::string index_dir = base_dir + "/index";
+ const std::string numeric_index_dir = base_dir + "/numeric_index";
const std::string schema_dir = base_dir + "/schema";
const std::string doc_store_dir = base_dir + "/store";
@@ -239,7 +260,9 @@ void BM_QueryFiveTerms(benchmark::State& state) {
std::unique_ptr<Index> index =
CreateIndex(icing_filesystem, filesystem, index_dir);
// TODO(b/249829533): switch to use persistent numeric index.
- auto numeric_index = std::make_unique<DummyNumericIndex<int64_t>>();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ auto numeric_index,
+ DummyNumericIndex<int64_t>::Create(filesystem, numeric_index_dir));
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
@@ -253,11 +276,13 @@ void BM_QueryFiveTerms(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem, schema_dir, &clock));
- ICING_ASSERT_OK(schema_store->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
DocumentStore::CreateResult create_result =
- DocumentStore::Create(&filesystem, doc_store_dir, &clock,
- schema_store.get())
+ CreateDocumentStore(&filesystem, doc_store_dir, &clock,
+ schema_store.get())
.ValueOrDie();
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -305,7 +330,8 @@ void BM_QueryFiveTerms(benchmark::State& state) {
QueryResults results =
query_processor
->ParseSearch(search_spec,
- ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)
+ ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ clock.GetSystemTimeMilliseconds())
.ValueOrDie();
while (results.root_iterator->Advance().ok()) {
results.root_iterator->doc_hit_info();
@@ -367,6 +393,7 @@ void BM_QueryDiacriticTerm(benchmark::State& state) {
Filesystem filesystem;
const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark";
const std::string index_dir = base_dir + "/index";
+ const std::string numeric_index_dir = base_dir + "/numeric_index";
const std::string schema_dir = base_dir + "/schema";
const std::string doc_store_dir = base_dir + "/store";
@@ -380,7 +407,9 @@ void BM_QueryDiacriticTerm(benchmark::State& state) {
std::unique_ptr<Index> index =
CreateIndex(icing_filesystem, filesystem, index_dir);
// TODO(b/249829533): switch to use persistent numeric index.
- auto numeric_index = std::make_unique<DummyNumericIndex<int64_t>>();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ auto numeric_index,
+ DummyNumericIndex<int64_t>::Create(filesystem, numeric_index_dir));
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
@@ -394,11 +423,13 @@ void BM_QueryDiacriticTerm(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem, schema_dir, &clock));
- ICING_ASSERT_OK(schema_store->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
DocumentStore::CreateResult create_result =
- DocumentStore::Create(&filesystem, doc_store_dir, &clock,
- schema_store.get())
+ CreateDocumentStore(&filesystem, doc_store_dir, &clock,
+ schema_store.get())
.ValueOrDie();
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -431,7 +462,8 @@ void BM_QueryDiacriticTerm(benchmark::State& state) {
QueryResults results =
query_processor
->ParseSearch(search_spec,
- ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)
+ ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ clock.GetSystemTimeMilliseconds())
.ValueOrDie();
while (results.root_iterator->Advance().ok()) {
results.root_iterator->doc_hit_info();
@@ -493,6 +525,7 @@ void BM_QueryHiragana(benchmark::State& state) {
Filesystem filesystem;
const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark";
const std::string index_dir = base_dir + "/index";
+ const std::string numeric_index_dir = base_dir + "/numeric_index";
const std::string schema_dir = base_dir + "/schema";
const std::string doc_store_dir = base_dir + "/store";
@@ -506,7 +539,9 @@ void BM_QueryHiragana(benchmark::State& state) {
std::unique_ptr<Index> index =
CreateIndex(icing_filesystem, filesystem, index_dir);
// TODO(b/249829533): switch to use persistent numeric index.
- auto numeric_index = std::make_unique<DummyNumericIndex<int64_t>>();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ auto numeric_index,
+ DummyNumericIndex<int64_t>::Create(filesystem, numeric_index_dir));
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
@@ -520,11 +555,13 @@ void BM_QueryHiragana(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem, schema_dir, &clock));
- ICING_ASSERT_OK(schema_store->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
DocumentStore::CreateResult create_result =
- DocumentStore::Create(&filesystem, doc_store_dir, &clock,
- schema_store.get())
+ CreateDocumentStore(&filesystem, doc_store_dir, &clock,
+ schema_store.get())
.ValueOrDie();
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -557,7 +594,8 @@ void BM_QueryHiragana(benchmark::State& state) {
QueryResults results =
query_processor
->ParseSearch(search_spec,
- ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)
+ ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ clock.GetSystemTimeMilliseconds())
.ValueOrDie();
while (results.root_iterator->Advance().ok()) {
results.root_iterator->doc_hit_info();
diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc
index c22f6aa..e64de32 100644
--- a/icing/query/query-processor_test.cc
+++ b/icing/query/query-processor_test.cc
@@ -63,6 +63,18 @@ using ::testing::IsEmpty;
using ::testing::SizeIs;
using ::testing::UnorderedElementsAre;
+libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
+ const Filesystem* filesystem, const std::string& base_dir,
+ const Clock* clock, const SchemaStore* schema_store) {
+ return DocumentStore::Create(
+ filesystem, base_dir, clock, schema_store,
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr);
+}
+
class QueryProcessorTest
: public ::testing::TestWithParam<SearchSpecProto::SearchType::Code> {
protected:
@@ -70,7 +82,8 @@ class QueryProcessorTest
: test_dir_(GetTestTempDir() + "/icing"),
store_dir_(test_dir_ + "/store"),
schema_store_dir_(test_dir_ + "/schema_store"),
- index_dir_(test_dir_ + "/index") {}
+ index_dir_(test_dir_ + "/index"),
+ numeric_index_dir_(test_dir_ + "/numeric_index") {}
void SetUp() override {
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
@@ -95,16 +108,20 @@ class QueryProcessorTest
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, store_dir_, &fake_clock_,
+ schema_store_.get()));
document_store_ = std::move(create_result.document_store);
Index::Options options(index_dir_,
- /*index_merge_size=*/1024 * 1024);
+ /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
// TODO(b/249829533): switch to use persistent numeric index.
- numeric_index_ = std::make_unique<DummyNumericIndex<int64_t>>();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ numeric_index_,
+ DummyNumericIndex<int64_t>::Create(filesystem_, numeric_index_dir_));
language_segmenter_factory::SegmenterOptions segmenter_options(
ULOC_US, jni_cache_.get());
@@ -138,7 +155,7 @@ class QueryProcessorTest
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit(property, document_id, section_id);
ICING_RETURN_IF_ERROR(editor->BufferKey(value));
- return editor->IndexAllBufferedKeys();
+ return std::move(*editor).IndexAllBufferedKeys();
}
void TearDown() override {
@@ -154,6 +171,7 @@ class QueryProcessorTest
private:
IcingFilesystem icing_filesystem_;
const std::string index_dir_;
+ const std::string numeric_index_dir_;
protected:
std::unique_ptr<Index> index_;
@@ -205,7 +223,10 @@ TEST_P(QueryProcessorTest, EmptyGroupMatchAllDocuments) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
@@ -223,17 +244,27 @@ TEST_P(QueryProcessorTest, EmptyGroupMatchAllDocuments) {
SearchSpecProto search_spec;
search_spec.set_query("()");
search_spec.set_search_type(GetParam());
+ if (GetParam() !=
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ QueryResults results,
+ query_processor_->ParseSearch(search_spec,
+ ScoringSpecProto::RankingStrategy::NONE,
+ fake_clock_.GetSystemTimeMilliseconds()));
- ICING_ASSERT_OK_AND_ASSIGN(
- QueryResults results,
- query_processor_->ParseSearch(search_spec,
- ScoringSpecProto::RankingStrategy::NONE));
-
- // Descending order of valid DocumentIds
- EXPECT_THAT(GetDocumentIds(results.root_iterator.get()),
- ElementsAre(document_id2, document_id1));
- EXPECT_THAT(results.query_terms, IsEmpty());
- EXPECT_THAT(results.query_term_iterators, IsEmpty());
+ // Descending order of valid DocumentIds
+ EXPECT_THAT(GetDocumentIds(results.root_iterator.get()),
+ ElementsAre(document_id2, document_id1));
+ EXPECT_THAT(results.query_terms, IsEmpty());
+ EXPECT_THAT(results.query_term_iterators, IsEmpty());
+ } else {
+ // TODO(b/208654892): Resolve the difference between RAW_QUERY and ADVANCED
+ // regarding empty composite expressions.
+ EXPECT_THAT(query_processor_->ParseSearch(
+ search_spec, ScoringSpecProto::RankingStrategy::NONE,
+ fake_clock_.GetSystemTimeMilliseconds()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ }
}
TEST_P(QueryProcessorTest, EmptyQueryMatchAllDocuments) {
@@ -241,7 +272,10 @@ TEST_P(QueryProcessorTest, EmptyQueryMatchAllDocuments) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
@@ -263,7 +297,8 @@ TEST_P(QueryProcessorTest, EmptyQueryMatchAllDocuments) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(search_spec,
- ScoringSpecProto::RankingStrategy::NONE));
+ ScoringSpecProto::RankingStrategy::NONE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
EXPECT_THAT(GetDocumentIds(results.root_iterator.get()),
@@ -277,7 +312,10 @@ TEST_P(QueryProcessorTest, QueryTermNormalized) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -308,29 +346,26 @@ TEST_P(QueryProcessorTest, QueryTermNormalized) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- std::unordered_map<SectionId, Hit::TermFrequency>
- expected_section_ids_tf_map = {{section_id, 1}};
- std::vector<TermMatchInfo> matched_terms_stats;
- results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
- EXPECT_THAT(
- matched_terms_stats,
- ElementsAre(EqualsTermMatchInfo("hello", expected_section_ids_tf_map),
- EqualsTermMatchInfo("world", expected_section_ids_tf_map)));
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms[""],
- UnorderedElementsAre("hello", "world"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(2));
- }
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map = {{section_id, 1}};
+ std::vector<TermMatchInfo> matched_terms_stats;
+ results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(
+ matched_terms_stats,
+ ElementsAre(EqualsTermMatchInfo("hello", expected_section_ids_tf_map),
+ EqualsTermMatchInfo("world", expected_section_ids_tf_map)));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(2));
+
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "world"));
}
TEST_P(QueryProcessorTest, OneTermPrefixMatch) {
@@ -338,7 +373,10 @@ TEST_P(QueryProcessorTest, OneTermPrefixMatch) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -366,26 +404,24 @@ TEST_P(QueryProcessorTest, OneTermPrefixMatch) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- std::unordered_map<SectionId, Hit::TermFrequency>
- expected_section_ids_tf_map = {{section_id, 1}};
- std::vector<TermMatchInfo> matched_terms_stats;
- results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
- EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
- "he", expected_section_ids_tf_map)));
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- }
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map = {{section_id, 1}};
+ std::vector<TermMatchInfo> matched_terms_stats;
+ results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "he", expected_section_ids_tf_map)));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he"));
}
TEST_P(QueryProcessorTest, OneTermPrefixMatchWithMaxSectionID) {
@@ -393,7 +429,10 @@ TEST_P(QueryProcessorTest, OneTermPrefixMatchWithMaxSectionID) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -423,26 +462,24 @@ TEST_P(QueryProcessorTest, OneTermPrefixMatchWithMaxSectionID) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- std::unordered_map<SectionId, Hit::TermFrequency>
- expected_section_ids_tf_map = {{section_id, 1}};
- std::vector<TermMatchInfo> matched_terms_stats;
- results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
- EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
- "he", expected_section_ids_tf_map)));
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- }
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map = {{section_id, 1}};
+ std::vector<TermMatchInfo> matched_terms_stats;
+ results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "he", expected_section_ids_tf_map)));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he"));
}
TEST_P(QueryProcessorTest, OneTermExactMatch) {
@@ -450,7 +487,10 @@ TEST_P(QueryProcessorTest, OneTermExactMatch) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -478,27 +518,24 @@ TEST_P(QueryProcessorTest, OneTermExactMatch) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- std::unordered_map<SectionId, Hit::TermFrequency>
- expected_section_ids_tf_map = {{section_id, 1}};
- std::vector<TermMatchInfo> matched_terms_stats;
- results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
- EXPECT_THAT(
- matched_terms_stats,
- ElementsAre(EqualsTermMatchInfo("hello", expected_section_ids_tf_map)));
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- }
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map = {{section_id, 1}};
+ std::vector<TermMatchInfo> matched_terms_stats;
+ results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "hello", expected_section_ids_tf_map)));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello"));
}
TEST_P(QueryProcessorTest, AndSameTermExactMatch) {
@@ -506,7 +543,10 @@ TEST_P(QueryProcessorTest, AndSameTermExactMatch) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -534,33 +574,26 @@ TEST_P(QueryProcessorTest, AndSameTermExactMatch) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- std::unordered_map<SectionId, Hit::TermFrequency>
- expected_section_ids_tf_map = {{section_id, 1}};
- std::vector<TermMatchInfo> matched_terms_stats;
- results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
- EXPECT_THAT(
- matched_terms_stats,
- ElementsAre(EqualsTermMatchInfo("hello", expected_section_ids_tf_map)));
- }
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map = {{section_id, 1}};
+ std::vector<TermMatchInfo> matched_terms_stats;
+ results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "hello", expected_section_ids_tf_map)));
+
ASSERT_FALSE(results.root_iterator->Advance().ok());
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- }
+ EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello"));
}
TEST_P(QueryProcessorTest, AndTwoTermExactMatch) {
@@ -568,7 +601,10 @@ TEST_P(QueryProcessorTest, AndTwoTermExactMatch) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -599,29 +635,26 @@ TEST_P(QueryProcessorTest, AndTwoTermExactMatch) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- std::unordered_map<SectionId, Hit::TermFrequency>
- expected_section_ids_tf_map = {{section_id, 1}};
- std::vector<TermMatchInfo> matched_terms_stats;
- results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
- EXPECT_THAT(
- matched_terms_stats,
- ElementsAre(EqualsTermMatchInfo("hello", expected_section_ids_tf_map),
- EqualsTermMatchInfo("world", expected_section_ids_tf_map)));
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms[""],
- UnorderedElementsAre("hello", "world"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(2));
- }
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map = {{section_id, 1}};
+ std::vector<TermMatchInfo> matched_terms_stats;
+ results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(
+ matched_terms_stats,
+ ElementsAre(EqualsTermMatchInfo("hello", expected_section_ids_tf_map),
+ EqualsTermMatchInfo("world", expected_section_ids_tf_map)));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(2));
+
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "world"));
}
TEST_P(QueryProcessorTest, AndSameTermPrefixMatch) {
@@ -629,7 +662,10 @@ TEST_P(QueryProcessorTest, AndSameTermPrefixMatch) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -657,33 +693,26 @@ TEST_P(QueryProcessorTest, AndSameTermPrefixMatch) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- std::unordered_map<SectionId, Hit::TermFrequency>
- expected_section_ids_tf_map = {{section_id, 1}};
- std::vector<TermMatchInfo> matched_terms_stats;
- results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
- EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
- "he", expected_section_ids_tf_map)));
- }
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map = {{section_id, 1}};
+ std::vector<TermMatchInfo> matched_terms_stats;
+ results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "he", expected_section_ids_tf_map)));
ASSERT_FALSE(results.root_iterator->Advance().ok());
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- }
+ EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he"));
}
TEST_P(QueryProcessorTest, AndTwoTermPrefixMatch) {
@@ -691,7 +720,10 @@ TEST_P(QueryProcessorTest, AndTwoTermPrefixMatch) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -722,7 +754,8 @@ TEST_P(QueryProcessorTest, AndTwoTermPrefixMatch) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
@@ -730,21 +763,18 @@ TEST_P(QueryProcessorTest, AndTwoTermPrefixMatch) {
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- std::unordered_map<SectionId, Hit::TermFrequency>
- expected_section_ids_tf_map = {{section_id, 1}};
- std::vector<TermMatchInfo> matched_terms_stats;
- results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
- EXPECT_THAT(
- matched_terms_stats,
- ElementsAre(EqualsTermMatchInfo("he", expected_section_ids_tf_map),
- EqualsTermMatchInfo("wo", expected_section_ids_tf_map)));
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he", "wo"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(2));
- }
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map = {{section_id, 1}};
+ std::vector<TermMatchInfo> matched_terms_stats;
+ results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(
+ matched_terms_stats,
+ ElementsAre(EqualsTermMatchInfo("he", expected_section_ids_tf_map),
+ EqualsTermMatchInfo("wo", expected_section_ids_tf_map)));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(2));
+
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he", "wo"));
}
TEST_P(QueryProcessorTest, AndTwoTermPrefixAndExactMatch) {
@@ -752,7 +782,10 @@ TEST_P(QueryProcessorTest, AndTwoTermPrefixAndExactMatch) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -783,7 +816,8 @@ TEST_P(QueryProcessorTest, AndTwoTermPrefixAndExactMatch) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
@@ -791,21 +825,18 @@ TEST_P(QueryProcessorTest, AndTwoTermPrefixAndExactMatch) {
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- std::unordered_map<SectionId, Hit::TermFrequency>
- expected_section_ids_tf_map = {{section_id, 1}};
- std::vector<TermMatchInfo> matched_terms_stats;
- results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
- EXPECT_THAT(
- matched_terms_stats,
- ElementsAre(EqualsTermMatchInfo("hello", expected_section_ids_tf_map),
- EqualsTermMatchInfo("wo", expected_section_ids_tf_map)));
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "wo"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(2));
- }
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map = {{section_id, 1}};
+ std::vector<TermMatchInfo> matched_terms_stats;
+ results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(
+ matched_terms_stats,
+ ElementsAre(EqualsTermMatchInfo("hello", expected_section_ids_tf_map),
+ EqualsTermMatchInfo("wo", expected_section_ids_tf_map)));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(2));
+
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "wo"));
}
TEST_P(QueryProcessorTest, OrTwoTermExactMatch) {
@@ -813,7 +844,10 @@ TEST_P(QueryProcessorTest, OrTwoTermExactMatch) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -849,7 +883,8 @@ TEST_P(QueryProcessorTest, OrTwoTermExactMatch) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
@@ -857,38 +892,26 @@ TEST_P(QueryProcessorTest, OrTwoTermExactMatch) {
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- std::unordered_map<SectionId, Hit::TermFrequency>
- expected_section_ids_tf_map = {{section_id, 1}};
- std::vector<TermMatchInfo> matched_terms_stats;
- results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
- EXPECT_THAT(
- matched_terms_stats,
- ElementsAre(EqualsTermMatchInfo("world", expected_section_ids_tf_map)));
- }
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map = {{section_id, 1}};
+ std::vector<TermMatchInfo> matched_terms_stats;
+ results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "world", expected_section_ids_tf_map)));
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id1);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- std::unordered_map<SectionId, Hit::TermFrequency>
- expected_section_ids_tf_map = {{section_id, 1}};
- std::vector<TermMatchInfo> matched_terms_stats;
- results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
- EXPECT_THAT(
- matched_terms_stats,
- ElementsAre(EqualsTermMatchInfo("hello", expected_section_ids_tf_map)));
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms[""],
- UnorderedElementsAre("hello", "world"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(2));
- }
+ matched_terms_stats.clear();
+ results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "hello", expected_section_ids_tf_map)));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(2));
+
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "world"));
}
TEST_P(QueryProcessorTest, OrTwoTermPrefixMatch) {
@@ -896,7 +919,10 @@ TEST_P(QueryProcessorTest, OrTwoTermPrefixMatch) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -932,7 +958,8 @@ TEST_P(QueryProcessorTest, OrTwoTermPrefixMatch) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
@@ -940,35 +967,26 @@ TEST_P(QueryProcessorTest, OrTwoTermPrefixMatch) {
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- std::unordered_map<SectionId, Hit::TermFrequency>
- expected_section_ids_tf_map = {{section_id, 1}};
- std::vector<TermMatchInfo> matched_terms_stats;
- results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
- EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
- "wo", expected_section_ids_tf_map)));
- }
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map = {{section_id, 1}};
+ std::vector<TermMatchInfo> matched_terms_stats;
+ results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "wo", expected_section_ids_tf_map)));
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id1);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- std::unordered_map<SectionId, Hit::TermFrequency>
- expected_section_ids_tf_map = {{section_id, 1}};
- std::vector<TermMatchInfo> matched_terms_stats;
- results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
- EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
- "he", expected_section_ids_tf_map)));
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he", "wo"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(2));
- }
+ matched_terms_stats.clear();
+ results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "he", expected_section_ids_tf_map)));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(2));
+
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he", "wo"));
}
TEST_P(QueryProcessorTest, OrTwoTermPrefixAndExactMatch) {
@@ -976,7 +994,10 @@ TEST_P(QueryProcessorTest, OrTwoTermPrefixAndExactMatch) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -1011,7 +1032,8 @@ TEST_P(QueryProcessorTest, OrTwoTermPrefixAndExactMatch) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
@@ -1019,36 +1041,25 @@ TEST_P(QueryProcessorTest, OrTwoTermPrefixAndExactMatch) {
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- std::unordered_map<SectionId, Hit::TermFrequency>
- expected_section_ids_tf_map = {{section_id, 1}};
- std::vector<TermMatchInfo> matched_terms_stats;
- results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
- EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
- "wo", expected_section_ids_tf_map)));
- }
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map = {{section_id, 1}};
+ std::vector<TermMatchInfo> matched_terms_stats;
+ results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "wo", expected_section_ids_tf_map)));
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id1);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- std::unordered_map<SectionId, Hit::TermFrequency>
- expected_section_ids_tf_map = {{section_id, 1}};
- std::vector<TermMatchInfo> matched_terms_stats;
- results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
- EXPECT_THAT(
- matched_terms_stats,
- ElementsAre(EqualsTermMatchInfo("hello", expected_section_ids_tf_map)));
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "wo"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(2));
- }
+ matched_terms_stats.clear();
+ results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "hello", expected_section_ids_tf_map)));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(2));
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "wo"));
}
TEST_P(QueryProcessorTest, CombinedAndOrTerms) {
@@ -1056,7 +1067,10 @@ TEST_P(QueryProcessorTest, CombinedAndOrTerms) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -1108,7 +1122,8 @@ TEST_P(QueryProcessorTest, CombinedAndOrTerms) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Only Document 1 matches since it has puppy AND dog
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
@@ -1117,23 +1132,19 @@ TEST_P(QueryProcessorTest, CombinedAndOrTerms) {
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- std::unordered_map<SectionId, Hit::TermFrequency>
- expected_section_ids_tf_map = {{section_id, 1}};
- std::vector<TermMatchInfo> matched_terms_stats;
- results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
- EXPECT_THAT(
- matched_terms_stats,
- ElementsAre(EqualsTermMatchInfo("puppy", expected_section_ids_tf_map),
- EqualsTermMatchInfo("dog", expected_section_ids_tf_map)));
-
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms[""],
- UnorderedElementsAre("puppy", "kitten", "dog"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(3));
- }
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map = {{section_id, 1}};
+ std::vector<TermMatchInfo> matched_terms_stats;
+ results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(
+ matched_terms_stats,
+ ElementsAre(EqualsTermMatchInfo("puppy", expected_section_ids_tf_map),
+ EqualsTermMatchInfo("dog", expected_section_ids_tf_map)));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(3));
+
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""],
+ UnorderedElementsAre("puppy", "kitten", "dog"));
}
{
@@ -1147,7 +1158,8 @@ TEST_P(QueryProcessorTest, CombinedAndOrTerms) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Both Document 1 and 2 match since Document 1 has animal AND puppy, and
// Document 2 has animal AND kitten
@@ -1158,19 +1170,15 @@ TEST_P(QueryProcessorTest, CombinedAndOrTerms) {
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- std::unordered_map<SectionId, Hit::TermFrequency>
- expected_section_ids_tf_map = {{section_id, 1}};
- std::vector<TermMatchInfo> matched_terms_stats;
- results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
- EXPECT_THAT(
- matched_terms_stats,
- ElementsAre(
- EqualsTermMatchInfo("animal", expected_section_ids_tf_map),
- EqualsTermMatchInfo("kitten", expected_section_ids_tf_map)));
- }
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map = {{section_id, 1}};
+ std::vector<TermMatchInfo> matched_terms_stats;
+ results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(
+ matched_terms_stats,
+ ElementsAre(
+ EqualsTermMatchInfo("animal", expected_section_ids_tf_map),
+ EqualsTermMatchInfo("kitten", expected_section_ids_tf_map)));
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(),
@@ -1178,23 +1186,17 @@ TEST_P(QueryProcessorTest, CombinedAndOrTerms) {
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- std::unordered_map<SectionId, Hit::TermFrequency>
- expected_section_ids_tf_map = {{section_id, 1}};
- std::vector<TermMatchInfo> matched_terms_stats;
- results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
- EXPECT_THAT(
- matched_terms_stats,
- ElementsAre(
- EqualsTermMatchInfo("animal", expected_section_ids_tf_map),
- EqualsTermMatchInfo("puppy", expected_section_ids_tf_map)));
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms[""],
- UnorderedElementsAre("animal", "puppy", "kitten"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(3));
- }
+ matched_terms_stats.clear();
+ results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(
+ matched_terms_stats,
+ ElementsAre(EqualsTermMatchInfo("animal", expected_section_ids_tf_map),
+ EqualsTermMatchInfo("puppy", expected_section_ids_tf_map)));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(3));
+
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""],
+ UnorderedElementsAre("animal", "puppy", "kitten"));
}
{
@@ -1208,7 +1210,8 @@ TEST_P(QueryProcessorTest, CombinedAndOrTerms) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Only Document 2 matches since it has both kitten and cat
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
@@ -1217,24 +1220,19 @@ TEST_P(QueryProcessorTest, CombinedAndOrTerms) {
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- std::unordered_map<SectionId, Hit::TermFrequency>
- expected_section_ids_tf_map = {{section_id, 1}};
- std::vector<TermMatchInfo> matched_terms_stats;
- results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
- EXPECT_THAT(
- matched_terms_stats,
- ElementsAre(
- EqualsTermMatchInfo("kitten", expected_section_ids_tf_map),
- EqualsTermMatchInfo("cat", expected_section_ids_tf_map)));
-
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms[""],
- UnorderedElementsAre("kitten", "foo", "bar", "cat"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(4));
- }
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map = {{section_id, 1}};
+ std::vector<TermMatchInfo> matched_terms_stats;
+ results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(
+ matched_terms_stats,
+ ElementsAre(EqualsTermMatchInfo("kitten", expected_section_ids_tf_map),
+ EqualsTermMatchInfo("cat", expected_section_ids_tf_map)));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(4));
+
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""],
+ UnorderedElementsAre("kitten", "foo", "bar", "cat"));
}
}
@@ -1243,7 +1241,10 @@ TEST_P(QueryProcessorTest, OneGroup) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -1287,22 +1288,19 @@ TEST_P(QueryProcessorTest, OneGroup) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
DocHitInfo expectedDocHitInfo(document_id1);
expectedDocHitInfo.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(3));
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms[""],
- UnorderedElementsAre("puppy", "kitten", "foo"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(3));
- }
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""],
+ UnorderedElementsAre("puppy", "kitten", "foo"));
}
TEST_P(QueryProcessorTest, TwoGroups) {
@@ -1310,7 +1308,10 @@ TEST_P(QueryProcessorTest, TwoGroups) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -1355,7 +1356,8 @@ TEST_P(QueryProcessorTest, TwoGroups) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
DocHitInfo expectedDocHitInfo1(document_id1);
@@ -1364,15 +1366,11 @@ TEST_P(QueryProcessorTest, TwoGroups) {
expectedDocHitInfo2.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo2, expectedDocHitInfo1));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(4));
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms[""],
- UnorderedElementsAre("puppy", "dog", "kitten", "cat"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(4));
- }
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""],
+ UnorderedElementsAre("puppy", "dog", "kitten", "cat"));
}
TEST_P(QueryProcessorTest, ManyLevelNestedGrouping) {
@@ -1380,7 +1378,10 @@ TEST_P(QueryProcessorTest, ManyLevelNestedGrouping) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -1424,22 +1425,19 @@ TEST_P(QueryProcessorTest, ManyLevelNestedGrouping) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
DocHitInfo expectedDocHitInfo(document_id1);
expectedDocHitInfo.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(3));
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms[""],
- UnorderedElementsAre("puppy", "kitten", "foo"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(3));
- }
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""],
+ UnorderedElementsAre("puppy", "kitten", "foo"));
}
TEST_P(QueryProcessorTest, OneLevelNestedGrouping) {
@@ -1447,7 +1445,10 @@ TEST_P(QueryProcessorTest, OneLevelNestedGrouping) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -1491,7 +1492,8 @@ TEST_P(QueryProcessorTest, OneLevelNestedGrouping) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
DocHitInfo expectedDocHitInfo1(document_id1);
@@ -1500,15 +1502,11 @@ TEST_P(QueryProcessorTest, OneLevelNestedGrouping) {
expectedDocHitInfo2.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo2, expectedDocHitInfo1));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(3));
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms[""],
- UnorderedElementsAre("puppy", "kitten", "cat"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(3));
- }
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""],
+ UnorderedElementsAre("puppy", "kitten", "cat"));
}
TEST_P(QueryProcessorTest, ExcludeTerm) {
@@ -1516,7 +1514,10 @@ TEST_P(QueryProcessorTest, ExcludeTerm) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that they'll bump the
@@ -1551,7 +1552,8 @@ TEST_P(QueryProcessorTest, ExcludeTerm) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(search_spec,
- ScoringSpecProto::RankingStrategy::NONE));
+ ScoringSpecProto::RankingStrategy::NONE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// We don't know have the section mask to indicate what section "world"
// came. It doesn't matter which section it was in since the query doesn't
@@ -1567,7 +1569,10 @@ TEST_P(QueryProcessorTest, ExcludeNonexistentTerm) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that they'll bump the
@@ -1601,7 +1606,8 @@ TEST_P(QueryProcessorTest, ExcludeNonexistentTerm) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(search_spec,
- ScoringSpecProto::RankingStrategy::NONE));
+ ScoringSpecProto::RankingStrategy::NONE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
@@ -1616,7 +1622,10 @@ TEST_P(QueryProcessorTest, ExcludeAnd) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that they'll bump the
@@ -1659,19 +1668,16 @@ TEST_P(QueryProcessorTest, ExcludeAnd) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// The query is interpreted as "exclude all documents that have animal,
// and exclude all documents that have cat". Since both documents contain
// animal, there are no results.
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
+ EXPECT_THAT(results.query_term_iterators, IsEmpty());
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- EXPECT_THAT(results.query_terms, IsEmpty());
- EXPECT_THAT(results.query_term_iterators, IsEmpty());
- }
+ EXPECT_THAT(results.query_terms, IsEmpty());
}
{
@@ -1683,20 +1689,17 @@ TEST_P(QueryProcessorTest, ExcludeAnd) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// The query is interpreted as "exclude all documents that have animal,
// and include all documents that have cat". Since both documents contain
// animal, there are no results.
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
+ EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("cat"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- }
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("cat"));
}
}
@@ -1705,7 +1708,10 @@ TEST_P(QueryProcessorTest, ExcludeOr) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that they'll bump the
@@ -1748,20 +1754,17 @@ TEST_P(QueryProcessorTest, ExcludeOr) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// We don't have a section mask indicating which sections in this document
// matched the query since it's not based on section-term matching. It's
// more based on the fact that the query excluded all the other documents.
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(DocHitInfo(document_id1, kSectionIdMaskNone)));
+ EXPECT_THAT(results.query_term_iterators, IsEmpty());
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- EXPECT_THAT(results.query_terms, IsEmpty());
- EXPECT_THAT(results.query_term_iterators, IsEmpty());
- }
+ EXPECT_THAT(results.query_terms, IsEmpty());
}
{
@@ -1773,7 +1776,8 @@ TEST_P(QueryProcessorTest, ExcludeOr) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
DocHitInfo expectedDocHitInfo1(document_id1);
@@ -1783,12 +1787,8 @@ TEST_P(QueryProcessorTest, ExcludeOr) {
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo2, expectedDocHitInfo1));
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
- }
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
}
}
@@ -1797,7 +1797,10 @@ TEST_P(QueryProcessorTest, WithoutTermFrequency) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -1851,7 +1854,8 @@ TEST_P(QueryProcessorTest, WithoutTermFrequency) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(search_spec,
- ScoringSpecProto::RankingStrategy::NONE));
+ ScoringSpecProto::RankingStrategy::NONE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
// The first Document to match (Document 2) matches on 'animal' AND 'kitten'
@@ -1860,21 +1864,16 @@ TEST_P(QueryProcessorTest, WithoutTermFrequency) {
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- // Since need_hit_term_frequency is false, the expected term frequency for
- // the section with the hit should be 0.
- std::unordered_map<SectionId, Hit::TermFrequency>
- expected_section_ids_tf_map = {{section_id, 0}};
- std::vector<TermMatchInfo> matched_terms_stats;
- results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
- EXPECT_THAT(
- matched_terms_stats,
- ElementsAre(
- EqualsTermMatchInfo("animal", expected_section_ids_tf_map),
- EqualsTermMatchInfo("kitten", expected_section_ids_tf_map)));
- }
+ // Since need_hit_term_frequency is false, the expected term frequency for
+ // the section with the hit should be 0.
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map = {{section_id, 0}};
+ std::vector<TermMatchInfo> matched_terms_stats;
+ results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(
+ matched_terms_stats,
+ ElementsAre(EqualsTermMatchInfo("animal", expected_section_ids_tf_map),
+ EqualsTermMatchInfo("kitten", expected_section_ids_tf_map)));
// The second Document to match (Document 1) matches on 'animal' AND 'puppy'
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
@@ -1882,21 +1881,15 @@ TEST_P(QueryProcessorTest, WithoutTermFrequency) {
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- std::unordered_map<SectionId, Hit::TermFrequency>
- expected_section_ids_tf_map = {{section_id, 0}};
- std::vector<TermMatchInfo> matched_terms_stats;
- results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
- EXPECT_THAT(
- matched_terms_stats,
- ElementsAre(EqualsTermMatchInfo("animal", expected_section_ids_tf_map),
- EqualsTermMatchInfo("puppy", expected_section_ids_tf_map)));
+ matched_terms_stats.clear();
+ results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(
+ matched_terms_stats,
+ ElementsAre(EqualsTermMatchInfo("animal", expected_section_ids_tf_map),
+ EqualsTermMatchInfo("puppy", expected_section_ids_tf_map)));
- // This should be empty because ranking_strategy != RELEVANCE_SCORE
- EXPECT_THAT(results.query_term_iterators, IsEmpty());
- }
+ // This should be empty because ranking_strategy != RELEVANCE_SCORE
+ EXPECT_THAT(results.query_term_iterators, IsEmpty());
}
TEST_P(QueryProcessorTest, DeletedFilter) {
@@ -1904,7 +1897,10 @@ TEST_P(QueryProcessorTest, DeletedFilter) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -1919,7 +1915,9 @@ TEST_P(QueryProcessorTest, DeletedFilter) {
.SetKey("namespace", "2")
.SetSchema("email")
.Build()));
- EXPECT_THAT(document_store_->Delete("namespace", "1"), IsOk());
+ EXPECT_THAT(document_store_->Delete("namespace", "1",
+ fake_clock_.GetSystemTimeMilliseconds()),
+ IsOk());
// Populate the index
SectionId section_id = 0;
@@ -1947,21 +1945,18 @@ TEST_P(QueryProcessorTest, DeletedFilter) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
DocHitInfo expectedDocHitInfo(document_id2);
expectedDocHitInfo.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- }
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
}
TEST_P(QueryProcessorTest, NamespaceFilter) {
@@ -1969,7 +1964,10 @@ TEST_P(QueryProcessorTest, NamespaceFilter) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -2012,21 +2010,18 @@ TEST_P(QueryProcessorTest, NamespaceFilter) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
DocHitInfo expectedDocHitInfo(document_id1);
expectedDocHitInfo.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- }
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
}
TEST_P(QueryProcessorTest, SchemaTypeFilter) {
@@ -2036,7 +2031,10 @@ TEST_P(QueryProcessorTest, SchemaTypeFilter) {
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.AddType(SchemaTypeConfigBuilder().SetType("message"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -2075,21 +2073,18 @@ TEST_P(QueryProcessorTest, SchemaTypeFilter) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
DocHitInfo expectedDocHitInfo(document_id1);
expectedDocHitInfo.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- }
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
}
TEST_P(QueryProcessorTest, PropertyFilterForOneDocument) {
@@ -2104,7 +2099,10 @@ TEST_P(QueryProcessorTest, PropertyFilterForOneDocument) {
.Build();
// First and only indexed property, so it gets a section_id of 0
int subject_section_id = 0;
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -2132,21 +2130,18 @@ TEST_P(QueryProcessorTest, PropertyFilterForOneDocument) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
DocHitInfo expectedDocHitInfo(document_id);
expectedDocHitInfo.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms["subject"], UnorderedElementsAre("animal"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- }
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms["subject"], UnorderedElementsAre("animal"));
}
TEST_P(QueryProcessorTest, PropertyFilterAcrossSchemaTypes) {
@@ -2177,7 +2172,10 @@ TEST_P(QueryProcessorTest, PropertyFilterAcrossSchemaTypes) {
// alphabetically.
int email_foo_section_id = 1;
int message_foo_section_id = 0;
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -2215,7 +2213,8 @@ TEST_P(QueryProcessorTest, PropertyFilterAcrossSchemaTypes) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Ordered by descending DocumentId, so message comes first since it was
// inserted last
@@ -2225,14 +2224,10 @@ TEST_P(QueryProcessorTest, PropertyFilterAcrossSchemaTypes) {
expectedDocHitInfo2.UpdateSection(/*section_id=*/1);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo1, expectedDocHitInfo2));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- }
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
}
TEST_P(QueryProcessorTest, PropertyFilterWithinSchemaType) {
@@ -2251,7 +2246,10 @@ TEST_P(QueryProcessorTest, PropertyFilterWithinSchemaType) {
.Build();
int email_foo_section_id = 0;
int message_foo_section_id = 0;
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -2291,7 +2289,8 @@ TEST_P(QueryProcessorTest, PropertyFilterWithinSchemaType) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Shouldn't include the message document since we're only looking at email
// types
@@ -2299,13 +2298,10 @@ TEST_P(QueryProcessorTest, PropertyFilterWithinSchemaType) {
expectedDocHitInfo.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo));
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- }
+ EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
}
TEST_P(QueryProcessorTest, NestedPropertyFilter) {
@@ -2342,7 +2338,10 @@ TEST_P(QueryProcessorTest, NestedPropertyFilter) {
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -2371,7 +2370,8 @@ TEST_P(QueryProcessorTest, NestedPropertyFilter) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Even though the section id is the same, we should be able to tell that it
// doesn't match to the name of the section filter
@@ -2379,15 +2379,11 @@ TEST_P(QueryProcessorTest, NestedPropertyFilter) {
expectedDocHitInfo1.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo1));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms["foo.bar.baz"],
- UnorderedElementsAre("animal"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- }
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms["foo.bar.baz"],
+ UnorderedElementsAre("animal"));
}
TEST_P(QueryProcessorTest, PropertyFilterRespectsDifferentSectionIds) {
@@ -2407,7 +2403,10 @@ TEST_P(QueryProcessorTest, PropertyFilterRespectsDifferentSectionIds) {
.Build();
int email_foo_section_id = 0;
int message_foo_section_id = 0;
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -2448,7 +2447,8 @@ TEST_P(QueryProcessorTest, PropertyFilterRespectsDifferentSectionIds) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Even though the section id is the same, we should be able to tell that it
// doesn't match to the name of the section filter
@@ -2456,14 +2456,10 @@ TEST_P(QueryProcessorTest, PropertyFilterRespectsDifferentSectionIds) {
expectedDocHitInfo.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- }
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
}
TEST_P(QueryProcessorTest, NonexistentPropertyFilterReturnsEmptyResults) {
@@ -2471,7 +2467,10 @@ TEST_P(QueryProcessorTest, NonexistentPropertyFilterReturnsEmptyResults) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -2500,20 +2499,17 @@ TEST_P(QueryProcessorTest, NonexistentPropertyFilterReturnsEmptyResults) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Even though the section id is the same, we should be able to tell that it
// doesn't match to the name of the section filter
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
+ EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms["nonexistent"],
- UnorderedElementsAre("animal"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- }
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms["nonexistent"],
+ UnorderedElementsAre("animal"));
}
TEST_P(QueryProcessorTest, UnindexedPropertyFilterReturnsEmptyResults) {
@@ -2529,7 +2525,10 @@ TEST_P(QueryProcessorTest, UnindexedPropertyFilterReturnsEmptyResults) {
.SetDataType(TYPE_STRING)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -2558,19 +2557,16 @@ TEST_P(QueryProcessorTest, UnindexedPropertyFilterReturnsEmptyResults) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Even though the section id is the same, we should be able to tell that it
// doesn't match to the name of the section filter
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
+ EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- EXPECT_THAT(results.query_terms, SizeIs(1));
- EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(1));
- }
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
}
TEST_P(QueryProcessorTest, PropertyFilterTermAndUnrestrictedTerm) {
@@ -2590,7 +2586,10 @@ TEST_P(QueryProcessorTest, PropertyFilterTermAndUnrestrictedTerm) {
.Build();
int email_foo_section_id = 0;
int message_foo_section_id = 0;
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -2631,7 +2630,8 @@ TEST_P(QueryProcessorTest, PropertyFilterTermAndUnrestrictedTerm) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
// Ordered by descending DocumentId, so message comes first since it was
// inserted last
@@ -2641,15 +2641,266 @@ TEST_P(QueryProcessorTest, PropertyFilterTermAndUnrestrictedTerm) {
expectedDocHitInfo2.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo1, expectedDocHitInfo2));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(2));
- // TODO(b/208654892) Support Query Terms with advanced query
- if (GetParam() !=
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
- EXPECT_THAT(results.query_terms, SizeIs(2));
- EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("cat"));
- EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
- EXPECT_THAT(results.query_term_iterators, SizeIs(2));
- }
+ EXPECT_THAT(results.query_terms, SizeIs(2));
+ EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("cat"));
+ EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
+}
+
+TEST_P(QueryProcessorTest, TypePropertyFilter) {
+ // Create the schema and document store
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("foo")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("bar")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("baz")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("foo")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("bar")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("baz")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ // SectionIds are assigned in ascending order per schema type,
+ // alphabetically.
+ int email_bar_section_id = 0;
+ int email_baz_section_id = 1;
+ int email_foo_section_id = 2;
+ int message_bar_section_id = 0;
+ int message_baz_section_id = 1;
+ int message_foo_section_id = 2;
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
+
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // schema types populated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "1")
+ .SetSchema("email")
+ .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "2")
+ .SetSchema("message")
+ .Build()));
+
+ // Poplate the index
+ TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+ // Email document has content "animal" in all sections
+ ASSERT_THAT(AddTokenToIndex(email_document_id, email_foo_section_id,
+ term_match_type, "animal"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(email_document_id, email_bar_section_id,
+ term_match_type, "animal"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(email_document_id, email_baz_section_id,
+ term_match_type, "animal"),
+ IsOk());
+
+ // Message document has content "animal" in all sections
+ ASSERT_THAT(AddTokenToIndex(message_document_id, message_foo_section_id,
+ term_match_type, "animal"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(message_document_id, message_bar_section_id,
+ term_match_type, "animal"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(message_document_id, message_baz_section_id,
+ term_match_type, "animal"),
+ IsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("animal");
+ search_spec.set_term_match_type(term_match_type);
+ search_spec.set_search_type(GetParam());
+
+ // email has property filters for foo and baz properties
+ TypePropertyMask *email_mask = search_spec.add_type_property_filters();
+ email_mask->set_schema_type("email");
+ email_mask->add_paths("foo");
+ email_mask->add_paths("baz");
+
+ // message has property filters for bar and baz properties
+ TypePropertyMask *message_mask = search_spec.add_type_property_filters();
+ message_mask->set_schema_type("message");
+ message_mask->add_paths("bar");
+ message_mask->add_paths("baz");
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ QueryResults results,
+ query_processor_->ParseSearch(
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
+
+ // Ordered by descending DocumentId, so message comes first since it was
+ // inserted last
+ DocHitInfo expected_doc_hit_info1(message_document_id);
+ expected_doc_hit_info1.UpdateSection(message_bar_section_id);
+ expected_doc_hit_info1.UpdateSection(message_baz_section_id);
+ DocHitInfo expected_doc_hit_info2(email_document_id);
+ expected_doc_hit_info2.UpdateSection(email_foo_section_id);
+ expected_doc_hit_info2.UpdateSection(email_baz_section_id);
+ EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+ ElementsAre(expected_doc_hit_info1, expected_doc_hit_info2));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
+}
+
+TEST_P(QueryProcessorTest, TypePropertyFilterWithSectionRestrict) {
+ // Create the schema and document store
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("foo")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("bar")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("baz")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("foo")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("bar")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("baz")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ // SectionIds are assigned in ascending order per schema type,
+ // alphabetically.
+ int email_bar_section_id = 0;
+ int email_baz_section_id = 1;
+ int email_foo_section_id = 2;
+ int message_bar_section_id = 0;
+ int message_baz_section_id = 1;
+ int message_foo_section_id = 2;
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
+
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // schema types populated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "1")
+ .SetSchema("email")
+ .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "2")
+ .SetSchema("message")
+ .Build()));
+
+ // Poplate the index
+ TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+ // Email document has content "animal" in all sections
+ ASSERT_THAT(AddTokenToIndex(email_document_id, email_foo_section_id,
+ term_match_type, "animal"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(email_document_id, email_bar_section_id,
+ term_match_type, "animal"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(email_document_id, email_baz_section_id,
+ term_match_type, "animal"),
+ IsOk());
+
+ // Message document has content "animal" in all sections
+ ASSERT_THAT(AddTokenToIndex(message_document_id, message_foo_section_id,
+ term_match_type, "animal"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(message_document_id, message_bar_section_id,
+ term_match_type, "animal"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(message_document_id, message_baz_section_id,
+ term_match_type, "animal"),
+ IsOk());
+
+ SearchSpecProto search_spec;
+ // Create a section filter '<section name>:<query term>'
+ search_spec.set_query("foo:animal");
+ search_spec.set_term_match_type(term_match_type);
+ search_spec.set_search_type(GetParam());
+
+ // email has property filters for foo and baz properties
+ TypePropertyMask *email_mask = search_spec.add_type_property_filters();
+ email_mask->set_schema_type("email");
+ email_mask->add_paths("foo");
+ email_mask->add_paths("baz");
+
+ // message has property filters for bar and baz properties
+ TypePropertyMask *message_mask = search_spec.add_type_property_filters();
+ message_mask->set_schema_type("message");
+ message_mask->add_paths("bar");
+ message_mask->add_paths("baz");
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ QueryResults results,
+ query_processor_->ParseSearch(
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
+
+ // Only hits in sections allowed by both the property filters and section
+ // restricts should be returned. Message document should not be returned since
+ // section foo specified in the section restrict is not allowed by the
+ // property filters.
+ DocHitInfo expected_doc_hit_info(email_document_id);
+ expected_doc_hit_info.UpdateSection(email_foo_section_id);
+ EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+ ElementsAre(expected_doc_hit_info));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
}
TEST_P(QueryProcessorTest, DocumentBeforeTtlNotFilteredOut) {
@@ -2657,7 +2908,10 @@ TEST_P(QueryProcessorTest, DocumentBeforeTtlNotFilteredOut) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// Arbitrary value, just has to be less than the document's creation
// timestamp + ttl
@@ -2666,8 +2920,8 @@ TEST_P(QueryProcessorTest, DocumentBeforeTtlNotFilteredOut) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, store_dir_, &fake_clock,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, store_dir_, &fake_clock,
+ schema_store_.get()));
document_store_ = std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -2702,7 +2956,8 @@ TEST_P(QueryProcessorTest, DocumentBeforeTtlNotFilteredOut) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
local_query_processor->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::NONE));
+ search_spec, ScoringSpecProto::RankingStrategy::NONE,
+ fake_clock_.GetSystemTimeMilliseconds()));
DocHitInfo expectedDocHitInfo(document_id);
expectedDocHitInfo.UpdateSection(/*section_id=*/0);
@@ -2715,17 +2970,20 @@ TEST_P(QueryProcessorTest, DocumentPastTtlFilteredOut) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// Arbitrary value, just has to be greater than the document's creation
// timestamp + ttl
- FakeClock fake_clock;
- fake_clock.SetSystemTimeMilliseconds(200);
+ FakeClock fake_clock_local;
+ fake_clock_local.SetSystemTimeMilliseconds(200);
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, store_dir_, &fake_clock,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, store_dir_, &fake_clock_local,
+ schema_store_.get()));
document_store_ = std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -2760,7 +3018,8 @@ TEST_P(QueryProcessorTest, DocumentPastTtlFilteredOut) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
local_query_processor->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::NONE));
+ search_spec, ScoringSpecProto::RankingStrategy::NONE,
+ fake_clock_local.GetSystemTimeMilliseconds()));
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
}
@@ -2788,7 +3047,10 @@ TEST_P(QueryProcessorTest, NumericFilter) {
// SectionIds are assigned alphabetically
SectionId cost_section_id = 0;
SectionId price_section_id = 1;
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_one_id,
@@ -2827,7 +3089,8 @@ TEST_P(QueryProcessorTest, NumericFilter) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(search_spec,
- ScoringSpecProto::RankingStrategy::NONE));
+ ScoringSpecProto::RankingStrategy::NONE,
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(EqualsDocHitInfo(
document_one_id, std::vector<SectionId>{price_section_id})));
@@ -2835,7 +3098,8 @@ TEST_P(QueryProcessorTest, NumericFilter) {
search_spec.set_query("price == 25");
ICING_ASSERT_OK_AND_ASSIGN(
results, query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::NONE));
+ search_spec, ScoringSpecProto::RankingStrategy::NONE,
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(EqualsDocHitInfo(
document_two_id, std::vector<SectionId>{price_section_id})));
@@ -2843,13 +3107,15 @@ TEST_P(QueryProcessorTest, NumericFilter) {
search_spec.set_query("cost > 2");
ICING_ASSERT_OK_AND_ASSIGN(
results, query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::NONE));
+ search_spec, ScoringSpecProto::RankingStrategy::NONE,
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
search_spec.set_query("cost >= 2");
ICING_ASSERT_OK_AND_ASSIGN(
results, query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::NONE));
+ search_spec, ScoringSpecProto::RankingStrategy::NONE,
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(EqualsDocHitInfo(
document_three_id, std::vector<SectionId>{cost_section_id})));
@@ -2857,7 +3123,8 @@ TEST_P(QueryProcessorTest, NumericFilter) {
search_spec.set_query("price <= 25");
ICING_ASSERT_OK_AND_ASSIGN(
results, query_processor_->ParseSearch(
- search_spec, ScoringSpecProto::RankingStrategy::NONE));
+ search_spec, ScoringSpecProto::RankingStrategy::NONE,
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(
GetDocHitInfos(results.root_iterator.get()),
ElementsAre(EqualsDocHitInfo(document_two_id,
@@ -2883,7 +3150,10 @@ TEST_P(QueryProcessorTest, NumericFilterWithoutEnablingFeatureFails) {
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
SectionId price_section_id = 0;
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_one_id,
@@ -2901,7 +3171,8 @@ TEST_P(QueryProcessorTest, NumericFilterWithoutEnablingFeatureFails) {
libtextclassifier3::StatusOr<QueryResults> result_or =
query_processor_->ParseSearch(search_spec,
- ScoringSpecProto::RankingStrategy::NONE);
+ ScoringSpecProto::RankingStrategy::NONE,
+ fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(result_or,
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
diff --git a/icing/query/suggestion-processor.cc b/icing/query/suggestion-processor.cc
index af84d1c..eb86e3b 100644
--- a/icing/query/suggestion-processor.cc
+++ b/icing/query/suggestion-processor.cc
@@ -16,6 +16,9 @@
#include "icing/proto/schema.pb.h"
#include "icing/proto/search.pb.h"
+#include "icing/query/query-processor.h"
+#include "icing/store/document-id.h"
+#include "icing/store/suggestion-result-checker-impl.h"
#include "icing/tokenization/tokenizer-factory.h"
#include "icing/tokenization/tokenizer.h"
#include "icing/transform/normalizer.h"
@@ -25,63 +28,268 @@ namespace lib {
libtextclassifier3::StatusOr<std::unique_ptr<SuggestionProcessor>>
SuggestionProcessor::Create(Index* index,
+ const NumericIndex<int64_t>* numeric_index,
const LanguageSegmenter* language_segmenter,
- const Normalizer* normalizer) {
+ const Normalizer* normalizer,
+ const DocumentStore* document_store,
+ const SchemaStore* schema_store) {
ICING_RETURN_ERROR_IF_NULL(index);
+ ICING_RETURN_ERROR_IF_NULL(numeric_index);
ICING_RETURN_ERROR_IF_NULL(language_segmenter);
+ ICING_RETURN_ERROR_IF_NULL(normalizer);
+ ICING_RETURN_ERROR_IF_NULL(document_store);
+ ICING_RETURN_ERROR_IF_NULL(schema_store);
return std::unique_ptr<SuggestionProcessor>(
- new SuggestionProcessor(index, language_segmenter, normalizer));
+ new SuggestionProcessor(index, numeric_index, language_segmenter,
+ normalizer, document_store, schema_store));
+}
+
+libtextclassifier3::StatusOr<
+ std::unordered_map<NamespaceId, std::unordered_set<DocumentId>>>
+PopulateDocumentIdFilters(
+ const DocumentStore* document_store,
+ const icing::lib::SuggestionSpecProto& suggestion_spec,
+ const std::unordered_set<NamespaceId>& namespace_ids) {
+ std::unordered_map<NamespaceId, std::unordered_set<DocumentId>>
+ document_id_filter_map;
+ document_id_filter_map.reserve(suggestion_spec.document_uri_filters_size());
+ for (const NamespaceDocumentUriGroup& namespace_document_uri_group :
+ suggestion_spec.document_uri_filters()) {
+ auto namespace_id_or = document_store->GetNamespaceId(
+ namespace_document_uri_group.namespace_());
+ if (!namespace_id_or.ok()) {
+ // The current namespace doesn't exist.
+ continue;
+ }
+ NamespaceId namespace_id = namespace_id_or.ValueOrDie();
+ if (!namespace_ids.empty() &&
+ namespace_ids.find(namespace_id) == namespace_ids.end()) {
+ // The current namespace doesn't appear in the namespace filter.
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "The namespace : ", namespace_document_uri_group.namespace_(),
+ " appears in the document uri filter, but doesn't appear in the "
+ "namespace filter."));
+ }
+
+ if (namespace_document_uri_group.document_uris().empty()) {
+ // Client should use namespace filter to filter out all document under
+ // a namespace.
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "The namespace : ", namespace_document_uri_group.namespace_(),
+ " has empty document uri in the document uri filter. Please use the "
+ "namespace filter to exclude a namespace instead of the document uri "
+ "filter."));
+ }
+
+ // Translate namespace document Uris into document_ids
+ std::unordered_set<DocumentId> target_document_ids;
+ target_document_ids.reserve(
+ namespace_document_uri_group.document_uris_size());
+ for (std::string_view document_uri :
+ namespace_document_uri_group.document_uris()) {
+ auto document_id_or = document_store->GetDocumentId(
+ namespace_document_uri_group.namespace_(), document_uri);
+ if (!document_id_or.ok()) {
+ continue;
+ }
+ target_document_ids.insert(document_id_or.ValueOrDie());
+ }
+ document_id_filter_map.insert({namespace_id, target_document_ids});
+ }
+ return document_id_filter_map;
+}
+
+libtextclassifier3::StatusOr<std::unordered_map<SchemaTypeId, SectionIdMask>>
+PopulatePropertyFilters(
+ const SchemaStore* schema_store,
+ const icing::lib::SuggestionSpecProto& suggestion_spec,
+ const std::unordered_set<SchemaTypeId>& schema_type_ids) {
+ std::unordered_map<SchemaTypeId, SectionIdMask> property_filter_map;
+ property_filter_map.reserve(suggestion_spec.type_property_filters_size());
+ for (const TypePropertyMask& type_field_mask :
+ suggestion_spec.type_property_filters()) {
+ auto schema_type_id_or =
+ schema_store->GetSchemaTypeId(type_field_mask.schema_type());
+ if (!schema_type_id_or.ok()) {
+ // The current schema doesn't exist
+ continue;
+ }
+ SchemaTypeId schema_type_id = schema_type_id_or.ValueOrDie();
+
+ if (!schema_type_ids.empty() &&
+ schema_type_ids.find(schema_type_id) == schema_type_ids.end()) {
+ // The current schema type doesn't appear in the schema type filter.
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "The schema : ", type_field_mask.schema_type(),
+ " appears in the property filter, but doesn't appear in the schema"
+ " type filter."));
+ }
+
+ if (type_field_mask.paths().empty()) {
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "The schema type : ", type_field_mask.schema_type(),
+ " has empty path in the property filter. Please use the schema type"
+ " filter to exclude a schema type instead of the property filter."));
+ }
+
+ // Translate property paths into section id mask
+ SectionIdMask section_mask = kSectionIdMaskNone;
+ auto section_metadata_list_or =
+ schema_store->GetSectionMetadata(type_field_mask.schema_type());
+ if (!section_metadata_list_or.ok()) {
+ // The current schema doesn't has section metadata.
+ continue;
+ }
+ std::unordered_set<std::string> target_property_paths;
+ target_property_paths.reserve(type_field_mask.paths_size());
+ for (const std::string& target_property_path : type_field_mask.paths()) {
+ target_property_paths.insert(target_property_path);
+ }
+ const std::vector<SectionMetadata>* section_metadata_list =
+ section_metadata_list_or.ValueOrDie();
+ for (const SectionMetadata& section_metadata : *section_metadata_list) {
+ if (target_property_paths.find(section_metadata.path) !=
+ target_property_paths.end()) {
+ section_mask |= UINT64_C(1) << section_metadata.id;
+ }
+ }
+ property_filter_map.insert({schema_type_id, section_mask});
+ }
+ return property_filter_map;
}
libtextclassifier3::StatusOr<std::vector<TermMetadata>>
SuggestionProcessor::QuerySuggestions(
const icing::lib::SuggestionSpecProto& suggestion_spec,
- const SuggestionResultChecker* suggestion_result_checker) {
+ int64_t current_time_ms) {
// We use query tokenizer to tokenize the give prefix, and we only use the
// last token to be the suggestion prefix.
- ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<Tokenizer> tokenizer,
- tokenizer_factory::CreateIndexingTokenizer(
- StringIndexingConfig::TokenizerType::PLAIN, &language_segmenter_));
- ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> iterator,
- tokenizer->Tokenize(suggestion_spec.prefix()));
-
- // If there are previous tokens, they are prepended to the suggestion,
- // separated by spaces.
- std::string last_token;
- int token_start_pos;
- while (iterator->Advance()) {
- for (const Token& token : iterator->GetTokens()) {
- last_token = token.text;
- token_start_pos = token.text.data() - suggestion_spec.prefix().c_str();
+
+ // Populate target namespace filter.
+ std::unordered_set<NamespaceId> namespace_ids;
+ namespace_ids.reserve(suggestion_spec.namespace_filters_size());
+ for (std::string_view name_space : suggestion_spec.namespace_filters()) {
+ auto namespace_id_or = document_store_.GetNamespaceId(name_space);
+ if (!namespace_id_or.ok()) {
+ // The current namespace doesn't exist.
+ continue;
}
+ namespace_ids.insert(namespace_id_or.ValueOrDie());
+ }
+ if (namespace_ids.empty() && !suggestion_spec.namespace_filters().empty()) {
+ // None of desired namespace exists, we should return directly.
+ return std::vector<TermMetadata>();
+ }
+
+ // Populate target document id filter.
+ auto document_id_filter_map_or = PopulateDocumentIdFilters(
+ &document_store_, suggestion_spec, namespace_ids);
+ if (!document_id_filter_map_or.ok()) {
+ return std::move(document_id_filter_map_or).status();
+ }
+
+ std::unordered_map<NamespaceId, std::unordered_set<DocumentId>>
+ document_id_filter_map = document_id_filter_map_or.ValueOrDie();
+ if (document_id_filter_map.empty() &&
+ !suggestion_spec.document_uri_filters().empty()) {
+ // None of desired DocumentId exists, we should return directly.
+ return std::vector<TermMetadata>();
}
+ // Populate target schema type filter.
+ std::unordered_set<SchemaTypeId> schema_type_ids;
+ schema_type_ids.reserve(suggestion_spec.schema_type_filters_size());
+ for (std::string_view schema_type : suggestion_spec.schema_type_filters()) {
+ auto schema_type_id_or = schema_store_.GetSchemaTypeId(schema_type);
+ if (!schema_type_id_or.ok()) {
+ continue;
+ }
+ schema_type_ids.insert(schema_type_id_or.ValueOrDie());
+ }
+ if (schema_type_ids.empty() &&
+ !suggestion_spec.schema_type_filters().empty()) {
+ // None of desired schema type exists, we should return directly.
+ return std::vector<TermMetadata>();
+ }
+
+ // Populate target properties filter.
+ auto property_filter_map_or =
+ PopulatePropertyFilters(&schema_store_, suggestion_spec, schema_type_ids);
+ if (!property_filter_map_or.ok()) {
+ return std::move(property_filter_map_or).status();
+ }
+ std::unordered_map<SchemaTypeId, SectionIdMask> property_filter_map =
+ property_filter_map_or.ValueOrDie();
+
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<QueryProcessor> query_processor,
+ QueryProcessor::Create(&index_, &numeric_index_, &language_segmenter_,
+ &normalizer_, &document_store_, &schema_store_));
+
+ SearchSpecProto search_spec;
+ search_spec.set_query(suggestion_spec.prefix());
+ search_spec.set_term_match_type(
+ suggestion_spec.scoring_spec().scoring_match_type());
+ ICING_ASSIGN_OR_RETURN(
+ QueryResults query_results,
+ query_processor->ParseSearch(search_spec,
+ ScoringSpecProto::RankingStrategy::NONE,
+ current_time_ms));
+
+ ICING_ASSIGN_OR_RETURN(
+ DocHitInfoIterator::TrimmedNode trimmed_node,
+ std::move(*query_results.root_iterator).TrimRightMostNode());
+
// If the position of the last token is not the end of the prefix, it means
// there should be some operator tokens after it and are ignored by the
// tokenizer.
- bool is_last_token = token_start_pos + last_token.length() >=
- suggestion_spec.prefix().length();
+ bool is_last_token =
+ trimmed_node.term_start_index_ + trimmed_node.unnormalized_term_length_ >=
+ suggestion_spec.prefix().length();
- if (!is_last_token || last_token.empty()) {
+ if (!is_last_token || trimmed_node.term_.empty()) {
// We don't have a valid last token, return early.
return std::vector<TermMetadata>();
}
+ // Populate the search base in document ids.
+ // Suggestions are only generated for the very last term,
+ // trimmed_node.iterator_ tracks search results for all previous terms. If it
+ // is null means there is no pervious term and we are generating suggetion for
+ // a single term.
+ std::unordered_set<DocumentId> search_base;
+ if (trimmed_node.iterator_ != nullptr) {
+ while (trimmed_node.iterator_->Advance().ok()) {
+ search_base.insert(trimmed_node.iterator_->doc_hit_info().document_id());
+ }
+ if (search_base.empty()) {
+ // Nothing matches the previous terms in the query. There are no valid
+ // suggestions to make, we should return directly.
+ return std::vector<TermMetadata>();
+ }
+ }
+
+ // Create result checker based on given filters.
+ SuggestionResultCheckerImpl suggestion_result_checker_impl(
+ &document_store_, &schema_store_, std::move(namespace_ids),
+ std::move(document_id_filter_map), std::move(schema_type_ids),
+ std::move(property_filter_map), std::move(trimmed_node.target_section_),
+ std::move(search_base), current_time_ms);
+ // TODO(b/228240987) support generate suggestion and append suffix for advance
+ // query and function call.
std::string query_prefix =
- suggestion_spec.prefix().substr(0, token_start_pos);
+ suggestion_spec.prefix().substr(0, trimmed_node.term_start_index_);
// Run suggestion based on given SuggestionSpec.
// Normalize token text to lowercase since all tokens in the lexicon are
// lowercase.
ICING_ASSIGN_OR_RETURN(
std::vector<TermMetadata> terms,
index_.FindTermsByPrefix(
- normalizer_.NormalizeTerm(last_token),
- suggestion_spec.num_to_return(),
+ trimmed_node.term_, suggestion_spec.num_to_return(),
suggestion_spec.scoring_spec().scoring_match_type(),
- suggestion_spec.scoring_spec().rank_by(), suggestion_result_checker));
-
+ suggestion_spec.scoring_spec().rank_by(),
+ &suggestion_result_checker_impl));
for (TermMetadata& term : terms) {
term.content = query_prefix + term.content;
}
@@ -89,11 +297,15 @@ SuggestionProcessor::QuerySuggestions(
}
SuggestionProcessor::SuggestionProcessor(
- Index* index, const LanguageSegmenter* language_segmenter,
- const Normalizer* normalizer)
+ Index* index, const NumericIndex<int64_t>* numeric_index,
+ const LanguageSegmenter* language_segmenter, const Normalizer* normalizer,
+ const DocumentStore* document_store, const SchemaStore* schema_store)
: index_(*index),
+ numeric_index_(*numeric_index),
language_segmenter_(*language_segmenter),
- normalizer_(*normalizer) {}
+ normalizer_(*normalizer),
+ document_store_(*document_store),
+ schema_store_(*schema_store) {}
} // namespace lib
} // namespace icing
diff --git a/icing/query/suggestion-processor.h b/icing/query/suggestion-processor.h
index 97ced90..e100031 100644
--- a/icing/query/suggestion-processor.h
+++ b/icing/query/suggestion-processor.h
@@ -17,7 +17,10 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/index/index.h"
+#include "icing/index/numeric/numeric-index.h"
#include "icing/proto/search.pb.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-store.h"
#include "icing/tokenization/language-segmenter.h"
#include "icing/transform/normalizer.h"
@@ -37,8 +40,10 @@ class SuggestionProcessor {
// An SuggestionProcessor on success
// FAILED_PRECONDITION if any of the pointers is null.
static libtextclassifier3::StatusOr<std::unique_ptr<SuggestionProcessor>>
- Create(Index* index, const LanguageSegmenter* language_segmenter,
- const Normalizer* normalizer);
+ Create(Index* index, const NumericIndex<int64_t>* numeric_index,
+ const LanguageSegmenter* language_segmenter,
+ const Normalizer* normalizer, const DocumentStore* document_store,
+ const SchemaStore* schema_store);
// Query suggestions based on the given SuggestionSpecProto.
//
@@ -47,19 +52,24 @@ class SuggestionProcessor {
// - One vector that represents the entire TermMetadata
// INTERNAL_ERROR on all other errors
libtextclassifier3::StatusOr<std::vector<TermMetadata>> QuerySuggestions(
- const SuggestionSpecProto& suggestion_spec,
- const SuggestionResultChecker* suggestion_result_checker);
+ const SuggestionSpecProto& suggestion_spec, int64_t current_time_ms);
private:
explicit SuggestionProcessor(Index* index,
+ const NumericIndex<int64_t>* numeric_index,
const LanguageSegmenter* language_segmenter,
- const Normalizer* normalizer);
+ const Normalizer* normalizer,
+ const DocumentStore* document_store,
+ const SchemaStore* schema_store);
// Not const because we could modify/sort the TermMetaData buffer in the lite
// index.
Index& index_;
+ const NumericIndex<int64_t>& numeric_index_;
const LanguageSegmenter& language_segmenter_;
const Normalizer& normalizer_;
+ const DocumentStore& document_store_;
+ const SchemaStore& schema_store_;
};
} // namespace lib
diff --git a/icing/query/suggestion-processor_test.cc b/icing/query/suggestion-processor_test.cc
index d541620..9f9094d 100644
--- a/icing/query/suggestion-processor_test.cc
+++ b/icing/query/suggestion-processor_test.cc
@@ -14,9 +14,15 @@
#include "icing/query/suggestion-processor.h"
+#include <string>
+#include <vector>
+
#include "gmock/gmock.h"
+#include "icing/document-builder.h"
+#include "icing/index/numeric/dummy-numeric-index.h"
+#include "icing/index/term-metadata.h"
+#include "icing/schema-builder.h"
#include "icing/store/document-store.h"
-#include "icing/testing/always-true-suggestion-result-checker-impl.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/icu-data-file-helper.h"
@@ -34,18 +40,32 @@ namespace {
using ::testing::IsEmpty;
using ::testing::Test;
+using ::testing::UnorderedElementsAre;
+
+std::vector<std::string> RetrieveSuggestionsText(
+ const std::vector<TermMetadata>& terms) {
+ std::vector<std::string> suggestions;
+ suggestions.reserve(terms.size());
+ for (const TermMetadata& term : terms) {
+ suggestions.push_back(term.content);
+ }
+ return suggestions;
+}
class SuggestionProcessorTest : public Test {
protected:
SuggestionProcessorTest()
: test_dir_(GetTestTempDir() + "/icing"),
store_dir_(test_dir_ + "/store"),
- index_dir_(test_dir_ + "/index") {}
+ schema_store_dir_(test_dir_ + "/schema_store"),
+ index_dir_(test_dir_ + "/index"),
+ numeric_index_dir_(test_dir_ + "/numeric_index") {}
void SetUp() override {
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
filesystem_.CreateDirectoryRecursively(index_dir_.c_str());
filesystem_.CreateDirectoryRecursively(store_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
// If we've specified using the reverse-JNI method for segmentation (i.e.
@@ -59,10 +79,32 @@ class SuggestionProcessorTest : public Test {
GetTestFilePath("icing/icu.dat")));
}
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(
+ &filesystem_, store_dir_, &fake_clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
+ document_store_ = std::move(create_result.document_store);
+
Index::Options options(index_dir_,
- /*index_merge_size=*/1024 * 1024);
+ /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
+ // TODO(b/249829533): switch to use persistent numeric index.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ numeric_index_,
+ DummyNumericIndex<int64_t>::Create(filesystem_, numeric_index_dir_));
language_segmenter_factory::SegmenterOptions segmenter_options(
ULOC_US, jni_cache_.get());
@@ -74,13 +116,10 @@ class SuggestionProcessorTest : public Test {
/*max_term_byte_size=*/1000));
ICING_ASSERT_OK_AND_ASSIGN(
- schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
-
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
- schema_store_.get()));
+ suggestion_processor_,
+ SuggestionProcessor::Create(
+ index_.get(), numeric_index_.get(), language_segmenter_.get(),
+ normalizer_.get(), document_store_.get(), schema_store_.get()));
}
libtextclassifier3::Status AddTokenToIndex(
@@ -93,233 +132,588 @@ class SuggestionProcessorTest : public Test {
}
void TearDown() override {
+ document_store_.reset();
+ schema_store_.reset();
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
}
Filesystem filesystem_;
const std::string test_dir_;
const std::string store_dir_;
+ const std::string schema_store_dir_;
private:
IcingFilesystem icing_filesystem_;
const std::string index_dir_;
+ const std::string numeric_index_dir_;
protected:
std::unique_ptr<Index> index_;
+ std::unique_ptr<NumericIndex<int64_t>> numeric_index_;
std::unique_ptr<LanguageSegmenter> language_segmenter_;
std::unique_ptr<Normalizer> normalizer_;
FakeClock fake_clock_;
std::unique_ptr<SchemaStore> schema_store_;
+ std::unique_ptr<DocumentStore> document_store_;
std::unique_ptr<const JniCache> jni_cache_ = GetTestJniCache();
+ std::unique_ptr<SuggestionProcessor> suggestion_processor_;
};
-constexpr DocumentId kDocumentId0 = 0;
constexpr SectionId kSectionId2 = 2;
-TEST_F(SuggestionProcessorTest, PrependedPrefixTokenTest) {
- ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
+TEST_F(SuggestionProcessorTest, MultipleTermsTest_And) {
+ // Create the schema and document store
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
+
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // namespaces populated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "1")
+ .SetSchema("email")
+ .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId1,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "2")
+ .SetSchema("email")
+ .Build()));
+
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "foo"),
IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "bar"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
+ TermMatchType::EXACT_ONLY, "fool"),
+ IsOk());
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("bar f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<SuggestionProcessor> suggestion_processor,
- SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
- normalizer_.get()));
+ std::vector<TermMetadata> terms,
+ suggestion_processor_->QuerySuggestions(
+ suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(RetrieveSuggestionsText(terms), UnorderedElementsAre("bar foo"));
+}
+
+TEST_F(SuggestionProcessorTest, MultipleTermsTest_AndNary) {
+ // Create the schema and document store
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
+
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // namespaces populated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "1")
+ .SetSchema("email")
+ .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId1,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "2")
+ .SetSchema("email")
+ .Build()));
+
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "foo"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "bar"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "cat"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
+ TermMatchType::EXACT_ONLY, "fool"),
+ IsOk());
SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix(
- "prefix token should be prepended to the suggestion f");
+ suggestion_spec.set_prefix("bar cat f");
suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
- AlwaysTrueSuggestionResultCheckerImpl impl;
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
- suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
- EXPECT_THAT(terms.at(0).content,
- "prefix token should be prepended to the suggestion foo");
+ suggestion_processor_->QuerySuggestions(
+ suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(RetrieveSuggestionsText(terms),
+ UnorderedElementsAre("bar cat foo"));
}
-TEST_F(SuggestionProcessorTest, NonExistentPrefixTest) {
- ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
+TEST_F(SuggestionProcessorTest, MultipleTermsTest_Or) {
+ // Create the schema and document store
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
+
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // namespaces populated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "1")
+ .SetSchema("email")
+ .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId1,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "2")
+ .SetSchema("email")
+ .Build()));
+
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "fo"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "bar"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
TermMatchType::EXACT_ONLY, "foo"),
IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
+ TermMatchType::EXACT_ONLY, "cat"),
+ IsOk());
+
+ // Search for "(bar OR cat) AND f" both document1 "bar fo" and document2 "cat
+ // foo" could match.
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("bar OR cat f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<SuggestionProcessor> suggestion_processor,
- SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
- normalizer_.get()));
+ std::vector<TermMetadata> terms,
+ suggestion_processor_->QuerySuggestions(
+ suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(RetrieveSuggestionsText(terms),
+ UnorderedElementsAre("bar OR cat fo", "bar OR cat foo"));
+}
+
+TEST_F(SuggestionProcessorTest, MultipleTermsTest_OrNary) {
+ // Create the schema and document store
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
+
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // namespaces populated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "1")
+ .SetSchema("email")
+ .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId1,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "2")
+ .SetSchema("email")
+ .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId2,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "3")
+ .SetSchema("email")
+ .Build()));
+
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "fo"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "bar"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
+ TermMatchType::EXACT_ONLY, "foo"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
+ TermMatchType::EXACT_ONLY, "cat"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId2, kSectionId2,
+ TermMatchType::EXACT_ONLY, "fool"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId2, kSectionId2,
+ TermMatchType::EXACT_ONLY, "lot"),
+ IsOk());
SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("nonExistTerm");
+ // Search for "((bar OR cat) OR lot) AND f"
+ suggestion_spec.set_prefix("bar OR cat OR lot f");
suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
- AlwaysTrueSuggestionResultCheckerImpl impl;
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
- suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
+ suggestion_processor_->QuerySuggestions(
+ suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+ // "fo" in document1, "foo" in document2 and "fool" in document3 could match.
+ EXPECT_THAT(
+ RetrieveSuggestionsText(terms),
+ UnorderedElementsAre("bar OR cat OR lot fo", "bar OR cat OR lot foo",
+ "bar OR cat OR lot fool"));
+}
- EXPECT_THAT(terms, IsEmpty());
+TEST_F(SuggestionProcessorTest, MultipleTermsTest_NormalizedTerm) {
+ // Create the schema and document store
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
+
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // namespaces populated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "1")
+ .SetSchema("email")
+ .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId1,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "2")
+ .SetSchema("email")
+ .Build()));
+
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "foo"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "bar"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
+ TermMatchType::EXACT_ONLY, "fool"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
+ TermMatchType::EXACT_ONLY, "bar"),
+ IsOk());
+
+ SuggestionSpecProto suggestion_spec;
+ // Search for "bar AND FO"
+ suggestion_spec.set_prefix("bar FO");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<TermMetadata> terms,
+ suggestion_processor_->QuerySuggestions(
+ suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+ // The term is normalized.
+ EXPECT_THAT(RetrieveSuggestionsText(terms),
+ UnorderedElementsAre("bar foo", "bar fool"));
+
+ // Search for "bar AND ḞÖ"
+ suggestion_spec.set_prefix("bar ḞÖ");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ terms, suggestion_processor_->QuerySuggestions(
+ suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+ // The term is normalized.
+ EXPECT_THAT(RetrieveSuggestionsText(terms),
+ UnorderedElementsAre("bar foo", "bar fool"));
}
-TEST_F(SuggestionProcessorTest, PrefixTrailingSpaceTest) {
- ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
+TEST_F(SuggestionProcessorTest, NonExistentPrefixTest) {
+ // Create the schema and document store
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
+
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // namespaces populated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "1")
+ .SetSchema("email")
+ .Build()));
+
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "foo"),
IsOk());
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("nonExistTerm");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<SuggestionProcessor> suggestion_processor,
- SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
- normalizer_.get()));
+ std::vector<TermMetadata> terms,
+ suggestion_processor_->QuerySuggestions(
+ suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(terms, IsEmpty());
+}
+
+TEST_F(SuggestionProcessorTest, PrefixTrailingSpaceTest) {
+ // Create the schema and document store
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
+
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // namespaces populated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "1")
+ .SetSchema("email")
+ .Build()));
+
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "foo"),
+ IsOk());
SuggestionSpecProto suggestion_spec;
suggestion_spec.set_prefix("f ");
suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
- AlwaysTrueSuggestionResultCheckerImpl impl;
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
- suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
-
+ suggestion_processor_->QuerySuggestions(
+ suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(terms, IsEmpty());
}
TEST_F(SuggestionProcessorTest, NormalizePrefixTest) {
- ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
- TermMatchType::EXACT_ONLY, "foo"),
+ // Create the schema and document store
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOk());
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<SuggestionProcessor> suggestion_processor,
- SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
- normalizer_.get()));
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // namespaces populated.
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "1")
+ .SetSchema("email")
+ .Build()));
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "foo"),
+ IsOk());
SuggestionSpecProto suggestion_spec;
suggestion_spec.set_prefix("F");
suggestion_spec.set_num_to_return(10);
-
- AlwaysTrueSuggestionResultCheckerImpl impl;
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
- suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
- EXPECT_THAT(terms.at(0).content, "foo");
+ suggestion_processor_->QuerySuggestions(
+ suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(RetrieveSuggestionsText(terms), UnorderedElementsAre("foo"));
suggestion_spec.set_prefix("fO");
ICING_ASSERT_OK_AND_ASSIGN(
- terms, suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
- EXPECT_THAT(terms.at(0).content, "foo");
+ terms, suggestion_processor_->QuerySuggestions(
+ suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(RetrieveSuggestionsText(terms), UnorderedElementsAre("foo"));
suggestion_spec.set_prefix("Fo");
ICING_ASSERT_OK_AND_ASSIGN(
- terms, suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
- EXPECT_THAT(terms.at(0).content, "foo");
+ terms, suggestion_processor_->QuerySuggestions(
+ suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(RetrieveSuggestionsText(terms), UnorderedElementsAre("foo"));
suggestion_spec.set_prefix("FO");
ICING_ASSERT_OK_AND_ASSIGN(
- terms, suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
- EXPECT_THAT(terms.at(0).content, "foo");
+ terms, suggestion_processor_->QuerySuggestions(
+ suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(RetrieveSuggestionsText(terms), UnorderedElementsAre("foo"));
}
-TEST_F(SuggestionProcessorTest, OrOperatorPrefixTest) {
- ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
- TermMatchType::EXACT_ONLY, "foo"),
- IsOk());
- ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
- TermMatchType::EXACT_ONLY, "original"),
+TEST_F(SuggestionProcessorTest, ParenthesesOperatorPrefixTest) {
+ // Create the schema and document store
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOk());
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<SuggestionProcessor> suggestion_processor,
- SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
- normalizer_.get()));
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // namespaces populated.
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f OR");
- suggestion_spec.set_num_to_return(10);
-
- AlwaysTrueSuggestionResultCheckerImpl impl;
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<TermMetadata> terms,
- suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
-
- // Last Operator token will be used to query suggestion
- EXPECT_THAT(terms.at(0).content, "f original");
-}
-
-TEST_F(SuggestionProcessorTest, ParenthesesOperatorPrefixTest) {
- ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "1")
+ .SetSchema("email")
+ .Build()));
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "foo"),
IsOk());
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<SuggestionProcessor> suggestion_processor,
- SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
- normalizer_.get()));
SuggestionSpecProto suggestion_spec;
suggestion_spec.set_prefix("{f}");
suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
- AlwaysTrueSuggestionResultCheckerImpl impl;
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
- suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
+ suggestion_processor_->QuerySuggestions(
+ suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(terms, IsEmpty());
suggestion_spec.set_prefix("[f]");
ICING_ASSERT_OK_AND_ASSIGN(
- terms, suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
+ terms, suggestion_processor_->QuerySuggestions(
+ suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(terms, IsEmpty());
suggestion_spec.set_prefix("(f)");
ICING_ASSERT_OK_AND_ASSIGN(
- terms, suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
+ terms, suggestion_processor_->QuerySuggestions(
+ suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(terms, IsEmpty());
}
TEST_F(SuggestionProcessorTest, OtherSpecialPrefixTest) {
- ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
- TermMatchType::EXACT_ONLY, "foo"),
+ // Create the schema and document store
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOk());
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<SuggestionProcessor> suggestion_processor,
- SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
- normalizer_.get()));
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // namespaces populated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "1")
+ .SetSchema("email")
+ .Build()));
+
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "foo"),
+ IsOk());
SuggestionSpecProto suggestion_spec;
suggestion_spec.set_prefix("f:");
suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+
+ auto terms_or = suggestion_processor_->QuerySuggestions(
+ suggestion_spec, fake_clock_.GetSystemTimeMilliseconds());
+ if (SearchSpecProto::default_instance().search_type() ==
+ SearchSpecProto::SearchType::ICING_RAW_QUERY) {
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms, terms_or);
+ EXPECT_THAT(terms, IsEmpty());
+ } else {
+ EXPECT_THAT(terms_or,
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ }
- AlwaysTrueSuggestionResultCheckerImpl impl;
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<TermMetadata> terms,
- suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
- EXPECT_THAT(terms, IsEmpty());
-
+ // TODO(b/208654892): Update handling for hyphens to only consider it a hyphen
+ // within a TEXT token (rather than a MINUS token) when surrounded on both
+ // sides by TEXT rather than just preceded by TEXT.
suggestion_spec.set_prefix("f-");
- ICING_ASSERT_OK_AND_ASSIGN(
- terms, suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
+ terms_or = suggestion_processor_->QuerySuggestions(
+ suggestion_spec, fake_clock_.GetSystemTimeMilliseconds());
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms, terms_or);
EXPECT_THAT(terms, IsEmpty());
+
+ suggestion_spec.set_prefix("f OR");
+ terms_or = suggestion_processor_->QuerySuggestions(
+ suggestion_spec, fake_clock_.GetSystemTimeMilliseconds());
+ if (SearchSpecProto::default_instance().search_type() ==
+ SearchSpecProto::SearchType::ICING_RAW_QUERY) {
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms, terms_or);
+ EXPECT_THAT(terms, IsEmpty());
+ } else {
+ EXPECT_THAT(terms_or,
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ }
}
TEST_F(SuggestionProcessorTest, InvalidPrefixTest) {
- ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
- TermMatchType::EXACT_ONLY, "original"),
+ // Create the schema and document store
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOk());
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<SuggestionProcessor> suggestion_processor,
- SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
- normalizer_.get()));
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // namespaces populated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "1")
+ .SetSchema("email")
+ .Build()));
+
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "original"),
+ IsOk());
SuggestionSpecProto suggestion_spec;
suggestion_spec.set_prefix("OR OR - :");
suggestion_spec.set_num_to_return(10);
-
- AlwaysTrueSuggestionResultCheckerImpl impl;
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<TermMetadata> terms,
- suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
- EXPECT_THAT(terms, IsEmpty());
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+
+ auto terms_or = suggestion_processor_->QuerySuggestions(
+ suggestion_spec, fake_clock_.GetSystemTimeMilliseconds());
+ if (SearchSpecProto::default_instance().search_type() ==
+ SearchSpecProto::SearchType::ICING_RAW_QUERY) {
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms, terms_or);
+ EXPECT_THAT(terms, IsEmpty());
+ } else {
+ EXPECT_THAT(terms_or,
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ }
}
} // namespace
diff --git a/icing/result/projection-tree.cc b/icing/result/projection-tree.cc
index 03bb849..9896491 100644
--- a/icing/result/projection-tree.cc
+++ b/icing/result/projection-tree.cc
@@ -16,18 +16,18 @@
#include <algorithm>
-#include "icing/absl_ports/str_join.h"
#include "icing/proto/search.pb.h"
-#include "icing/schema/section-manager.h"
+#include "icing/schema/property-util.h"
namespace icing {
namespace lib {
-ProjectionTree::ProjectionTree(const TypePropertyMask& type_field_mask) {
- for (const std::string& field_mask : type_field_mask.paths()) {
+ProjectionTree::ProjectionTree(
+ const SchemaStore::ExpandedTypePropertyMask& type_field_mask) {
+ for (const std::string& field_mask : type_field_mask.paths) {
Node* current_node = &root_;
for (std::string_view sub_field_mask :
- absl_ports::StrSplit(field_mask, kPropertySeparator)) {
+ property_util::SplitPropertyPathExpr(field_mask)) {
current_node = AddChildNode(sub_field_mask, &current_node->children);
}
}
diff --git a/icing/result/projection-tree.h b/icing/result/projection-tree.h
index 5916fe6..cdf268a 100644
--- a/icing/result/projection-tree.h
+++ b/icing/result/projection-tree.h
@@ -19,14 +19,13 @@
#include <vector>
#include "icing/proto/search.pb.h"
+#include "icing/schema/schema-store.h"
namespace icing {
namespace lib {
class ProjectionTree {
public:
- static constexpr std::string_view kSchemaTypeWildcard = "*";
-
struct Node {
explicit Node(std::string name = "") : name(std::move(name)) {}
@@ -38,7 +37,8 @@ class ProjectionTree {
}
};
- explicit ProjectionTree(const TypePropertyMask& type_field_mask);
+ explicit ProjectionTree(
+ const SchemaStore::ExpandedTypePropertyMask& type_field_mask);
const Node& root() const { return root_; }
diff --git a/icing/result/projection-tree_test.cc b/icing/result/projection-tree_test.cc
index 2b0f966..46d0c12 100644
--- a/icing/result/projection-tree_test.cc
+++ b/icing/result/projection-tree_test.cc
@@ -17,6 +17,7 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/proto/search.pb.h"
+#include "icing/schema/schema-store.h"
namespace icing {
namespace lib {
@@ -28,72 +29,87 @@ using ::testing::IsEmpty;
using ::testing::SizeIs;
TEST(ProjectionTreeTest, CreateEmptyFieldMasks) {
- TypePropertyMask type_field_mask;
- ProjectionTree tree(type_field_mask);
+ ProjectionTree tree({});
EXPECT_THAT(tree.root().name, IsEmpty());
EXPECT_THAT(tree.root().children, IsEmpty());
}
TEST(ProjectionTreeTest, CreateTreeTopLevel) {
- TypePropertyMask type_field_mask;
- type_field_mask.add_paths("subject");
+ SchemaStore::ExpandedTypePropertyMask type_field_mask{"", {"subject"}};
ProjectionTree tree(type_field_mask);
EXPECT_THAT(tree.root().name, IsEmpty());
ASSERT_THAT(tree.root().children, SizeIs(1));
- ASSERT_THAT(tree.root().children.at(0).name, Eq("subject"));
- ASSERT_THAT(tree.root().children.at(0).children, IsEmpty());
+ EXPECT_THAT(tree.root().children.at(0).name, Eq("subject"));
+ EXPECT_THAT(tree.root().children.at(0).children, IsEmpty());
}
TEST(ProjectionTreeTest, CreateTreeMultipleTopLevel) {
- TypePropertyMask type_field_mask;
- type_field_mask.add_paths("subject");
- type_field_mask.add_paths("body");
+ SchemaStore::ExpandedTypePropertyMask type_field_mask{"",
+ {"subject", "body"}};
ProjectionTree tree(type_field_mask);
EXPECT_THAT(tree.root().name, IsEmpty());
ASSERT_THAT(tree.root().children, SizeIs(2));
- ASSERT_THAT(tree.root().children.at(0).name, Eq("subject"));
- ASSERT_THAT(tree.root().children.at(0).children, IsEmpty());
- ASSERT_THAT(tree.root().children.at(1).name, Eq("body"));
- ASSERT_THAT(tree.root().children.at(1).children, IsEmpty());
+
+ const ProjectionTree::Node* child0 = &tree.root().children.at(0);
+ const ProjectionTree::Node* child1 = &tree.root().children.at(1);
+ if (child0->name != "subject") {
+ std::swap(child0, child1);
+ }
+
+ EXPECT_THAT(child0->name, Eq("subject"));
+ EXPECT_THAT(child0->children, IsEmpty());
+ EXPECT_THAT(child1->name, Eq("body"));
+ EXPECT_THAT(child1->children, IsEmpty());
}
TEST(ProjectionTreeTest, CreateTreeNested) {
- TypePropertyMask type_field_mask;
- type_field_mask.add_paths("subject.body");
- type_field_mask.add_paths("body");
+ SchemaStore::ExpandedTypePropertyMask type_field_mask{
+ "", {"subject.body", "body"}};
ProjectionTree tree(type_field_mask);
EXPECT_THAT(tree.root().name, IsEmpty());
ASSERT_THAT(tree.root().children, SizeIs(2));
- ASSERT_THAT(tree.root().children.at(0).name, Eq("subject"));
- ASSERT_THAT(tree.root().children.at(0).children, SizeIs(1));
- ASSERT_THAT(tree.root().children.at(0).children.at(0).name, Eq("body"));
- ASSERT_THAT(tree.root().children.at(0).children.at(0).children, IsEmpty());
- ASSERT_THAT(tree.root().children.at(1).name, Eq("body"));
- ASSERT_THAT(tree.root().children.at(1).children, IsEmpty());
+
+ const ProjectionTree::Node* child0 = &tree.root().children.at(0);
+ const ProjectionTree::Node* child1 = &tree.root().children.at(1);
+ if (child0->name != "subject.body") {
+ std::swap(child0, child1);
+ }
+
+ EXPECT_THAT(child0->name, Eq("subject"));
+ ASSERT_THAT(child0->children, SizeIs(1));
+ EXPECT_THAT(child0->children.at(0).name, Eq("body"));
+ EXPECT_THAT(child0->children.at(0).children, IsEmpty());
+ EXPECT_THAT(child1->name, Eq("body"));
+ EXPECT_THAT(child1->children, IsEmpty());
}
TEST(ProjectionTreeTest, CreateTreeNestedSharedNode) {
- TypePropertyMask type_field_mask;
- type_field_mask.add_paths("sender.name.first");
- type_field_mask.add_paths("sender.emailAddress");
+ SchemaStore::ExpandedTypePropertyMask type_field_mask{
+ "", {"sender.name.first", "sender.emailAddress"}};
ProjectionTree tree(type_field_mask);
EXPECT_THAT(tree.root().name, IsEmpty());
ASSERT_THAT(tree.root().children, SizeIs(1));
- ASSERT_THAT(tree.root().children.at(0).name, Eq("sender"));
+ EXPECT_THAT(tree.root().children.at(0).name, Eq("sender"));
ASSERT_THAT(tree.root().children.at(0).children, SizeIs(2));
- ASSERT_THAT(tree.root().children.at(0).children.at(0).name, Eq("name"));
- ASSERT_THAT(tree.root().children.at(0).children.at(0).children, SizeIs(1));
- ASSERT_THAT(tree.root().children.at(0).children.at(0).children.at(0).name,
- Eq("first"));
- ASSERT_THAT(tree.root().children.at(0).children.at(0).children.at(0).children,
- IsEmpty());
- ASSERT_THAT(tree.root().children.at(0).children.at(1).name,
- Eq("emailAddress"));
- ASSERT_THAT(tree.root().children.at(0).children.at(1).children, IsEmpty());
+
+ const ProjectionTree::Node* child0_child0 =
+ &tree.root().children.at(0).children.at(0);
+ const ProjectionTree::Node* child0_child1 =
+ &tree.root().children.at(0).children.at(1);
+ if (child0_child0->name != "name") {
+ std::swap(child0_child0, child0_child1);
+ }
+
+ EXPECT_THAT(child0_child0->name, Eq("name"));
+ ASSERT_THAT(child0_child0->children, SizeIs(1));
+ EXPECT_THAT(child0_child0->children.at(0).name, Eq("first"));
+ EXPECT_THAT(child0_child0->children.at(0).children, IsEmpty());
+ EXPECT_THAT(child0_child1->name, Eq("emailAddress"));
+ EXPECT_THAT(child0_child1->children, IsEmpty());
}
} // namespace
diff --git a/icing/result/result-adjustment-info.cc b/icing/result/result-adjustment-info.cc
new file mode 100644
index 0000000..00ac379
--- /dev/null
+++ b/icing/result/result-adjustment-info.cc
@@ -0,0 +1,64 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/result/result-adjustment-info.h"
+
+#include <string>
+#include <unordered_map>
+
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/result/projection-tree.h"
+#include "icing/result/snippet-context.h"
+#include "icing/schema/schema-store.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+SnippetContext CreateSnippetContext(const SearchSpecProto& search_spec,
+ const ResultSpecProto& result_spec,
+ SectionRestrictQueryTermsMap query_terms) {
+ if (result_spec.snippet_spec().num_to_snippet() > 0 &&
+ result_spec.snippet_spec().num_matches_per_property() > 0) {
+ // Needs snippeting
+ return SnippetContext(std::move(query_terms), result_spec.snippet_spec(),
+ search_spec.term_match_type());
+ }
+ return SnippetContext(/*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::UNKNOWN);
+}
+
+} // namespace
+
+ResultAdjustmentInfo::ResultAdjustmentInfo(
+ const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
+ const ResultSpecProto& result_spec, const SchemaStore* schema_store,
+ SectionRestrictQueryTermsMap query_terms)
+ : snippet_context(CreateSnippetContext(search_spec, result_spec,
+ std::move(query_terms))),
+ remaining_num_to_snippet(snippet_context.snippet_spec.num_to_snippet()) {
+ for (const SchemaStore::ExpandedTypePropertyMask& type_field_mask :
+ schema_store->ExpandTypePropertyMasks(
+ result_spec.type_property_masks())) {
+ projection_tree_map.insert(
+ {type_field_mask.schema_type, ProjectionTree(type_field_mask)});
+ }
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/result/result-adjustment-info.h b/icing/result/result-adjustment-info.h
new file mode 100644
index 0000000..e859492
--- /dev/null
+++ b/icing/result/result-adjustment-info.h
@@ -0,0 +1,53 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_RESULT_RESULT_ADJUSTMENT_INFO_H_
+#define ICING_RESULT_RESULT_ADJUSTMENT_INFO_H_
+
+#include <string>
+#include <unordered_map>
+
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/result/projection-tree.h"
+#include "icing/result/snippet-context.h"
+#include "icing/schema/schema-store.h"
+
+namespace icing {
+namespace lib {
+
+// A wrapper struct for information used in result retrieval.
+// - Snippet
+// - Projection
+struct ResultAdjustmentInfo {
+ // Information needed for snippeting.
+ SnippetContext snippet_context;
+
+ // Remaining # of docs to snippet.
+ int remaining_num_to_snippet;
+
+ // Information needed for projection.
+ std::unordered_map<std::string, ProjectionTree> projection_tree_map;
+
+ explicit ResultAdjustmentInfo(const SearchSpecProto& search_spec,
+ const ScoringSpecProto& scoring_spec,
+ const ResultSpecProto& result_spec,
+ const SchemaStore* schema_store,
+ SectionRestrictQueryTermsMap query_terms);
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_RESULT_RESULT_ADJUSTMENT_INFO_H_
diff --git a/icing/result/result-adjustment-info_test.cc b/icing/result/result-adjustment-info_test.cc
new file mode 100644
index 0000000..cbce557
--- /dev/null
+++ b/icing/result/result-adjustment-info_test.cc
@@ -0,0 +1,198 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/result/result-adjustment-info.h"
+
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/result/projection-tree.h"
+#include "icing/result/snippet-context.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::AnyOf;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::Pair;
+using ::testing::UnorderedElementsAre;
+
+class ResultAdjustmentInfoTest : public testing::Test {
+ protected:
+ ResultAdjustmentInfoTest() : test_dir_(GetTestTempDir() + "/icing") {
+ filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+ }
+
+ void SetUp() override {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Email"))
+ .AddType(SchemaTypeConfigBuilder().SetType("Phone"))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ }
+
+ const Filesystem filesystem_;
+ const std::string test_dir_;
+ std::unique_ptr<SchemaStore> schema_store_;
+ FakeClock fake_clock_;
+};
+
+SearchSpecProto CreateSearchSpec(TermMatchType::Code match_type) {
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(match_type);
+ return search_spec;
+}
+
+ScoringSpecProto CreateScoringSpec(bool is_descending_order) {
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_order_by(is_descending_order ? ScoringSpecProto::Order::DESC
+ : ScoringSpecProto::Order::ASC);
+ return scoring_spec;
+}
+
+ResultSpecProto CreateResultSpec(
+ int num_per_page, ResultSpecProto::ResultGroupingType result_group_type) {
+ ResultSpecProto result_spec;
+ result_spec.set_result_group_type(result_group_type);
+ result_spec.set_num_per_page(num_per_page);
+ return result_spec;
+}
+
+TEST_F(ResultAdjustmentInfoTest,
+ ShouldConstructSnippetContextAccordingToSpecs) {
+ ResultSpecProto result_spec =
+ CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(5);
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(5);
+ result_spec.mutable_snippet_spec()->set_max_window_utf32_length(5);
+
+ SectionRestrictQueryTermsMap query_terms_map;
+ query_terms_map.emplace("term1", std::unordered_set<std::string>());
+
+ ResultAdjustmentInfo result_adjustment_info(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true), result_spec,
+ schema_store_.get(), query_terms_map);
+ const SnippetContext snippet_context = result_adjustment_info.snippet_context;
+
+ // Snippet context should be derived from the specs above.
+ EXPECT_TRUE(
+ result_adjustment_info.snippet_context.query_terms.find("term1") !=
+ result_adjustment_info.snippet_context.query_terms.end());
+ EXPECT_THAT(result_adjustment_info.snippet_context.snippet_spec,
+ EqualsProto(result_spec.snippet_spec()));
+ EXPECT_THAT(result_adjustment_info.snippet_context.match_type,
+ Eq(TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(result_adjustment_info.remaining_num_to_snippet, Eq(5));
+}
+
+TEST_F(ResultAdjustmentInfoTest, NoSnippetingShouldReturnNull) {
+ ResultSpecProto result_spec =
+ CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
+ // Setting num_to_snippet to 0 so that snippeting info won't be
+ // stored.
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(0);
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(5);
+ result_spec.mutable_snippet_spec()->set_max_window_utf32_length(5);
+
+ SectionRestrictQueryTermsMap query_terms_map;
+ query_terms_map.emplace("term1", std::unordered_set<std::string>());
+
+ ResultAdjustmentInfo result_adjustment_info(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true), result_spec,
+ schema_store_.get(), query_terms_map);
+
+ EXPECT_THAT(result_adjustment_info.snippet_context.query_terms, IsEmpty());
+ EXPECT_THAT(
+ result_adjustment_info.snippet_context.snippet_spec,
+ EqualsProto(ResultSpecProto::SnippetSpecProto::default_instance()));
+ EXPECT_THAT(result_adjustment_info.snippet_context.match_type,
+ TermMatchType::UNKNOWN);
+ EXPECT_THAT(result_adjustment_info.remaining_num_to_snippet, Eq(0));
+}
+
+TEST_F(ResultAdjustmentInfoTest,
+ ShouldConstructProjectionTreeMapAccordingToSpecs) {
+ // Create a ResultSpec with type property mask.
+ ResultSpecProto result_spec =
+ CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
+ TypePropertyMask* email_type_property_mask =
+ result_spec.add_type_property_masks();
+ email_type_property_mask->set_schema_type("Email");
+ email_type_property_mask->add_paths("sender.name");
+ email_type_property_mask->add_paths("sender.emailAddress");
+ TypePropertyMask* phone_type_property_mask =
+ result_spec.add_type_property_masks();
+ phone_type_property_mask->set_schema_type("Phone");
+ phone_type_property_mask->add_paths("caller");
+ TypePropertyMask* wildcard_type_property_mask =
+ result_spec.add_type_property_masks();
+ wildcard_type_property_mask->set_schema_type(
+ std::string(SchemaStore::kSchemaTypeWildcard));
+ wildcard_type_property_mask->add_paths("wild.card");
+
+ ResultAdjustmentInfo result_adjustment_info(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true), result_spec,
+ schema_store_.get(),
+ /*query_terms=*/{});
+
+ ProjectionTree email_projection_tree =
+ ProjectionTree({"Email", {"sender.name", "sender.emailAddress"}});
+ ProjectionTree alternative_email_projection_tree =
+ ProjectionTree({"Email", {"sender.emailAddress", "sender.name"}});
+ ProjectionTree phone_projection_tree = ProjectionTree({"Phone", {"caller"}});
+ ProjectionTree wildcard_projection_tree = ProjectionTree(
+ {std::string(SchemaStore::kSchemaTypeWildcard), {"wild.card"}});
+
+ EXPECT_THAT(result_adjustment_info.projection_tree_map,
+ UnorderedElementsAre(
+ Pair("Email", AnyOf(email_projection_tree,
+ alternative_email_projection_tree)),
+ Pair("Phone", phone_projection_tree),
+ Pair(std::string(SchemaStore::kSchemaTypeWildcard),
+ wildcard_projection_tree)));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/result/result-retriever-v2.cc b/icing/result/result-retriever-v2.cc
index 53ce10a..44fa602 100644
--- a/icing/result/result-retriever-v2.cc
+++ b/icing/result/result-retriever-v2.cc
@@ -14,38 +14,97 @@
#include "icing/result/result-retriever-v2.h"
+#include <cstddef>
+#include <cstdint>
#include <memory>
-#include <string_view>
+#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/mutex.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/search.pb.h"
#include "icing/result/page-result.h"
#include "icing/result/projection-tree.h"
#include "icing/result/projector.h"
+#include "icing/result/result-adjustment-info.h"
+#include "icing/result/result-state-v2.h"
#include "icing/result/snippet-context.h"
#include "icing/result/snippet-retriever.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-filter-data.h"
#include "icing/store/document-store.h"
#include "icing/store/namespace-id.h"
#include "icing/tokenization/language-segmenter.h"
#include "icing/transform/normalizer.h"
+#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
+namespace {
+
+void ApplyProjection(const ResultAdjustmentInfo* adjustment_info,
+ DocumentProto* document) {
+ if (adjustment_info == nullptr) {
+ return;
+ }
+
+ auto itr = adjustment_info->projection_tree_map.find(document->schema());
+ if (itr != adjustment_info->projection_tree_map.end()) {
+ projector::Project(itr->second.root().children, document);
+ } else {
+ auto wildcard_projection_tree_itr =
+ adjustment_info->projection_tree_map.find(
+ std::string(SchemaStore::kSchemaTypeWildcard));
+ if (wildcard_projection_tree_itr !=
+ adjustment_info->projection_tree_map.end()) {
+ projector::Project(wildcard_projection_tree_itr->second.root().children,
+ document);
+ }
+ }
+}
+
+bool ApplySnippet(ResultAdjustmentInfo* adjustment_info,
+ const SnippetRetriever& snippet_retriever,
+ const DocumentProto& document, SectionIdMask section_id_mask,
+ SearchResultProto::ResultProto* result) {
+ if (adjustment_info == nullptr) {
+ return false;
+ }
+
+ const SnippetContext& snippet_context = adjustment_info->snippet_context;
+ int& remaining_num_to_snippet = adjustment_info->remaining_num_to_snippet;
+
+ if (snippet_context.snippet_spec.num_matches_per_property() > 0 &&
+ remaining_num_to_snippet > 0) {
+ SnippetProto snippet_proto = snippet_retriever.RetrieveSnippet(
+ snippet_context.query_terms, snippet_context.match_type,
+ snippet_context.snippet_spec, document, section_id_mask);
+ *result->mutable_snippet() = std::move(snippet_proto);
+ --remaining_num_to_snippet;
+ return true;
+ }
+
+ return false;
+}
+
+} // namespace
+
bool GroupResultLimiterV2::ShouldBeRemoved(
const ScoredDocumentHit& scored_document_hit,
const std::unordered_map<int32_t, int>& entry_id_group_id_map,
const DocumentStore& document_store, std::vector<int>& group_result_limits,
- ResultSpecProto::ResultGroupingType result_group_type) const {
+ ResultSpecProto::ResultGroupingType result_group_type,
+ int64_t current_time_ms) const {
auto document_filter_data_optional =
document_store.GetAliveDocumentFilterData(
- scored_document_hit.document_id());
+ scored_document_hit.document_id(), current_time_ms);
if (!document_filter_data_optional) {
// The document doesn't exist.
return true;
@@ -95,7 +154,7 @@ ResultRetrieverV2::Create(
}
std::pair<PageResult, bool> ResultRetrieverV2::RetrieveNextPage(
- ResultStateV2& result_state) const {
+ ResultStateV2& result_state, int64_t current_time_ms) const {
absl_ports::unique_lock l(&result_state.mutex);
// For calculating page
@@ -103,19 +162,6 @@ std::pair<PageResult, bool> ResultRetrieverV2::RetrieveNextPage(
result_state.scored_document_hits_ranker->size();
int num_results_with_snippets = 0;
- const SnippetContext& snippet_context = result_state.snippet_context();
- const std::unordered_map<std::string, ProjectionTree>& projection_tree_map =
- result_state.projection_tree_map();
- auto wildcard_projection_tree_itr = projection_tree_map.find(
- std::string(ProjectionTree::kSchemaTypeWildcard));
-
- // Calculates how many snippets to return for this page.
- int remaining_num_to_snippet =
- snippet_context.snippet_spec.num_to_snippet() - result_state.num_returned;
- if (remaining_num_to_snippet < 0) {
- remaining_num_to_snippet = 0;
- }
-
// Retrieve info
std::vector<SearchResultProto::ResultProto> results;
int32_t num_total_bytes = 0;
@@ -126,8 +172,8 @@ std::pair<PageResult, bool> ResultRetrieverV2::RetrieveNextPage(
if (group_result_limiter_->ShouldBeRemoved(
next_best_document_hit.parent_scored_document_hit(),
result_state.entry_id_group_id_map(), doc_store_,
- result_state.group_result_limits,
- result_state.result_group_type())) {
+ result_state.group_result_limits, result_state.result_group_type(),
+ current_time_ms)) {
continue;
}
@@ -141,25 +187,16 @@ std::pair<PageResult, bool> ResultRetrieverV2::RetrieveNextPage(
}
DocumentProto document = std::move(document_or).ValueOrDie();
- // Apply projection
- auto itr = projection_tree_map.find(document.schema());
- if (itr != projection_tree_map.end()) {
- projector::Project(itr->second.root().children, &document);
- } else if (wildcard_projection_tree_itr != projection_tree_map.end()) {
- projector::Project(wildcard_projection_tree_itr->second.root().children,
- &document);
- }
+ // Apply parent projection
+ ApplyProjection(result_state.parent_adjustment_info(), &document);
SearchResultProto::ResultProto result;
- // Add the snippet if requested.
- if (snippet_context.snippet_spec.num_matches_per_property() > 0 &&
- remaining_num_to_snippet > results.size()) {
- SnippetProto snippet_proto = snippet_retriever_->RetrieveSnippet(
- snippet_context.query_terms, snippet_context.match_type,
- snippet_context.snippet_spec, document,
- next_best_document_hit.parent_scored_document_hit()
- .hit_section_id_mask());
- *result.mutable_snippet() = std::move(snippet_proto);
+ // Add parent snippet if requested.
+ if (ApplySnippet(result_state.parent_adjustment_info(), *snippet_retriever_,
+ document,
+ next_best_document_hit.parent_scored_document_hit()
+ .hit_section_id_mask(),
+ &result)) {
++num_results_with_snippets;
}
@@ -170,6 +207,11 @@ std::pair<PageResult, bool> ResultRetrieverV2::RetrieveNextPage(
// Retrieve child documents
for (const ScoredDocumentHit& child_scored_document_hit :
next_best_document_hit.child_scored_document_hits()) {
+ if (result.joined_results_size() >=
+ result_state.max_joined_children_per_parent_to_return()) {
+ break;
+ }
+
libtextclassifier3::StatusOr<DocumentProto> child_document_or =
doc_store_.Get(child_scored_document_hit.document_id());
if (!child_document_or.ok()) {
@@ -181,10 +223,16 @@ std::pair<PageResult, bool> ResultRetrieverV2::RetrieveNextPage(
}
DocumentProto child_document = std::move(child_document_or).ValueOrDie();
- // TODO(b/256022027): apply projection and add snippet for child doc
+ ApplyProjection(result_state.child_adjustment_info(), &child_document);
SearchResultProto::ResultProto* child_result =
result.add_joined_results();
+ // Add child snippet if requested.
+ ApplySnippet(result_state.child_adjustment_info(), *snippet_retriever_,
+ child_document,
+ child_scored_document_hit.hit_section_id_mask(),
+ child_result);
+
*child_result->mutable_document() = std::move(child_document);
child_result->set_score(child_scored_document_hit.score());
}
diff --git a/icing/result/result-retriever-v2.h b/icing/result/result-retriever-v2.h
index 48fb88d..7b1a364 100644
--- a/icing/result/result-retriever-v2.h
+++ b/icing/result/result-retriever-v2.h
@@ -15,19 +15,20 @@
#ifndef ICING_RESULT_RETRIEVER_V2_H_
#define ICING_RESULT_RETRIEVER_V2_H_
+#include <cstdint>
#include <memory>
#include <unordered_map>
#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/proto/search.pb.h"
#include "icing/result/page-result.h"
#include "icing/result/result-state-v2.h"
#include "icing/result/snippet-retriever.h"
#include "icing/schema/schema-store.h"
#include "icing/scoring/scored-document-hit.h"
#include "icing/store/document-store.h"
-#include "icing/store/namespace-id.h"
#include "icing/tokenization/language-segmenter.h"
#include "icing/transform/normalizer.h"
@@ -46,7 +47,8 @@ class GroupResultLimiterV2 {
const std::unordered_map<int32_t, int>& entry_id_group_id_map,
const DocumentStore& document_store,
std::vector<int>& group_result_limits,
- ResultSpecProto::ResultGroupingType result_group_type) const;
+ ResultSpecProto::ResultGroupingType result_group_type,
+ int64_t current_time_ms) const;
};
class ResultRetrieverV2 {
@@ -86,8 +88,8 @@ class ResultRetrieverV2 {
//
// Returns:
// std::pair<PageResult, bool>
- std::pair<PageResult, bool> RetrieveNextPage(
- ResultStateV2& result_state) const;
+ std::pair<PageResult, bool> RetrieveNextPage(ResultStateV2& result_state,
+ int64_t current_time_ms) const;
private:
explicit ResultRetrieverV2(
diff --git a/icing/result/result-retriever-v2_group-result-limiter_test.cc b/icing/result/result-retriever-v2_group-result-limiter_test.cc
index f59864b..2914a8d 100644
--- a/icing/result/result-retriever-v2_group-result-limiter_test.cc
+++ b/icing/result/result-retriever-v2_group-result-limiter_test.cc
@@ -22,7 +22,6 @@
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/search.pb.h"
-#include "icing/proto/term.pb.h"
#include "icing/result/page-result.h"
#include "icing/result/result-retriever-v2.h"
#include "icing/result/result-state-v2.h"
@@ -31,7 +30,6 @@
#include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
#include "icing/scoring/scored-document-hit.h"
#include "icing/store/document-id.h"
-#include "icing/store/namespace-id.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/icu-data-file-helper.h"
@@ -85,12 +83,20 @@ class ResultRetrieverV2GroupResultLimiterTest : public testing::Test {
schema.add_types()->set_schema_type("Document");
schema.add_types()->set_schema_type("Message");
schema.add_types()->set_schema_type("Person");
- ICING_ASSERT_OK(schema_store_->SetSchema(std::move(schema)));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ std::move(schema), /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ DocumentStore::Create(
+ &filesystem_, test_dir_, &fake_clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
}
@@ -107,22 +113,6 @@ class ResultRetrieverV2GroupResultLimiterTest : public testing::Test {
FakeClock fake_clock_;
};
-// TODO(sungyc): Refactor helper functions below (builder classes or common test
-// utility).
-
-SearchSpecProto CreateSearchSpec(TermMatchType::Code match_type) {
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(match_type);
- return search_spec;
-}
-
-ScoringSpecProto CreateScoringSpec(bool is_descending_order) {
- ScoringSpecProto scoring_spec;
- scoring_spec.set_order_by(is_descending_order ? ScoringSpecProto::Order::DESC
- : ScoringSpecProto::Order::ASC);
- return scoring_spec;
-}
-
ResultSpecProto CreateResultSpec(
int num_per_page, ResultSpecProto::ResultGroupingType result_group_type) {
ResultSpecProto result_spec;
@@ -172,9 +162,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+ result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetrieverV2> result_retriever,
@@ -183,8 +172,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
// Only the top ranked document in "namespace" (document2), should be
// returned.
- auto [page_result, has_more_results] =
- result_retriever->RetrieveNextPage(result_state);
+ auto [page_result, has_more_results] = result_retriever->RetrieveNextPage(
+ result_state, fake_clock_.GetSystemTimeMilliseconds());
ASSERT_THAT(page_result.results, SizeIs(1));
EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document2));
// Document1 has not been returned due to GroupResultLimiter, but since it was
@@ -233,9 +222,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+ result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetrieverV2> result_retriever,
@@ -243,8 +231,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
language_segmenter_.get(), normalizer_.get()));
// First page: empty page
- auto [page_result, has_more_results] =
- result_retriever->RetrieveNextPage(result_state);
+ auto [page_result, has_more_results] = result_retriever->RetrieveNextPage(
+ result_state, fake_clock_.GetSystemTimeMilliseconds());
ASSERT_THAT(page_result.results, IsEmpty());
EXPECT_FALSE(has_more_results);
}
@@ -310,9 +298,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+ result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetrieverV2> result_retriever,
@@ -320,8 +307,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
language_segmenter_.get(), normalizer_.get()));
// First page: document4 and document3 should be returned.
- auto [page_result1, has_more_results1] =
- result_retriever->RetrieveNextPage(result_state);
+ auto [page_result1, has_more_results1] = result_retriever->RetrieveNextPage(
+ result_state, fake_clock_.GetSystemTimeMilliseconds());
ASSERT_THAT(page_result1.results, SizeIs(2));
EXPECT_THAT(page_result1.results.at(0).document(), EqualsProto(document4));
EXPECT_THAT(page_result1.results.at(1).document(), EqualsProto(document3));
@@ -330,8 +317,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
// Second page: although there are valid document hits in result state, all of
// them will be filtered out by group result limiter, so we should get an
// empty page.
- auto [page_result2, has_more_results2] =
- result_retriever->RetrieveNextPage(result_state);
+ auto [page_result2, has_more_results2] = result_retriever->RetrieveNextPage(
+ result_state, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(page_result2.results, SizeIs(0));
EXPECT_FALSE(has_more_results2);
}
@@ -398,9 +385,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+ result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetrieverV2> result_retriever,
@@ -409,7 +395,10 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
// All documents in "namespace2" should be returned.
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result.results, SizeIs(3));
EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document4));
EXPECT_THAT(page_result.results.at(1).document(), EqualsProto(document3));
@@ -460,9 +449,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+ result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetrieverV2> result_retriever,
@@ -473,7 +461,10 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
// returned. The presence of "nonexistentNamespace" in the same result
// grouping should have no effect.
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result.results, SizeIs(1));
EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document2));
}
@@ -522,9 +513,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+ result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetrieverV2> result_retriever,
@@ -535,7 +525,10 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
// returned. The presence of "nonexistentNamespace" in the same result
// grouping should have no effect.
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result.results, SizeIs(1));
EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document2));
}
@@ -629,9 +622,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+ result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetrieverV2> result_retriever,
@@ -642,7 +634,10 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
// Only the top-ranked results across "namespace2" and "namespace3"
// (document6, document5) should be returned.
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result.results, SizeIs(3));
EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document6));
EXPECT_THAT(page_result.results.at(1).document(), EqualsProto(document5));
@@ -738,9 +733,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+ result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetrieverV2> result_retriever,
@@ -751,7 +745,10 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
// Only the top-ranked results across "Message" and "Person"
// (document5, document3) should be returned.
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result.results, SizeIs(3));
EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document6));
EXPECT_THAT(page_result.results.at(1).document(), EqualsProto(document4));
@@ -850,9 +847,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+ result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetrieverV2> result_retriever,
@@ -865,7 +861,10 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
// "namespace3xMessage" (document6, document5) should be returned.
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result.results, SizeIs(3));
EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document6));
EXPECT_THAT(page_result.results.at(1).document(), EqualsProto(document5));
@@ -914,9 +913,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+ result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetrieverV2> result_retriever,
@@ -926,7 +924,10 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
// All documents in "namespace" should be returned. The presence of
// "nonexistentNamespace" should have no effect.
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result.results, SizeIs(2));
EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document2));
EXPECT_THAT(page_result.results.at(1).document(), EqualsProto(document1));
@@ -974,9 +975,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+ result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetrieverV2> result_retriever,
@@ -986,7 +986,10 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
// All documents in "Document" should be returned. The presence of
// "nonexistentDocument" should have no effect.
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result.results, SizeIs(2));
EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document2));
EXPECT_THAT(page_result.results.at(1).document(), EqualsProto(document1));
@@ -1078,9 +1081,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+ result_spec, *document_store_);
{
absl_ports::shared_lock l(&result_state.mutex);
@@ -1099,8 +1101,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
// docuemnt3, document2 belong to namespace 1 (with max_results = 3).
// Since num_per_page is 2, we expect to get document5 and document3 in the
// first page.
- auto [page_result1, has_more_results1] =
- result_retriever->RetrieveNextPage(result_state);
+ auto [page_result1, has_more_results1] = result_retriever->RetrieveNextPage(
+ result_state, fake_clock_.GetSystemTimeMilliseconds());
ASSERT_THAT(page_result1.results, SizeIs(2));
ASSERT_THAT(page_result1.results.at(0).document(), EqualsProto(document5));
ASSERT_THAT(page_result1.results.at(1).document(), EqualsProto(document3));
@@ -1132,8 +1134,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest,
// Although there are document2 and document1 left, since namespace2 has
// reached its max results, document1 should be excluded from the second page.
- auto [page_result2, has_more_results2] =
- result_retriever->RetrieveNextPage(result_state);
+ auto [page_result2, has_more_results2] = result_retriever->RetrieveNextPage(
+ result_state, fake_clock_.GetSystemTimeMilliseconds());
ASSERT_THAT(page_result2.results, SizeIs(1));
ASSERT_THAT(page_result2.results.at(0).document(), EqualsProto(document2));
ASSERT_FALSE(has_more_results2);
diff --git a/icing/result/result-retriever-v2_projection_test.cc b/icing/result/result-retriever-v2_projection_test.cc
index d093d1f..1a75631 100644
--- a/icing/result/result-retriever-v2_projection_test.cc
+++ b/icing/result/result-retriever-v2_projection_test.cc
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#include <limits>
#include <memory>
#include <vector>
@@ -25,6 +26,7 @@
#include "icing/proto/term.pb.h"
#include "icing/result/page-result.h"
#include "icing/result/projection-tree.h"
+#include "icing/result/result-adjustment-info.h"
#include "icing/result/result-retriever-v2.h"
#include "icing/result/result-state-v2.h"
#include "icing/schema-builder.h"
@@ -108,13 +110,88 @@ class ResultRetrieverV2ProjectionTest : public testing::Test {
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Artist")
+ .AddParentType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Musician")
+ .AddParentType("Artist")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("WithPhone")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("phoneNumber")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("phoneModel")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("PersonWithPhone")
+ .AddParentType("Person")
+ .AddParentType("WithPhone")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("phoneNumber")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("phoneModel")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build())
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ DocumentStore::Create(
+ &filesystem_, test_dir_, &fake_clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
}
@@ -150,9 +227,6 @@ class ResultRetrieverV2ProjectionTest : public testing::Test {
FakeClock fake_clock_;
};
-// TODO(sungyc): Refactor helper functions below (builder classes or common test
-// utility).
-
SectionIdMask CreateSectionIdMask(const std::vector<SectionId>& section_ids) {
SectionIdMask mask = 0;
for (SectionId section_id : section_ids) {
@@ -225,10 +299,12 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionTopLevelLeadNodeFieldPath) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/false),
- /*query_terms=*/SectionRestrictQueryTermsMap{},
- CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetrieverV2> result_retriever,
@@ -237,7 +313,10 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionTopLevelLeadNodeFieldPath) {
// 5. Verify that the returned results only contain the 'name' property.
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result.results, SizeIs(2));
DocumentProto projected_document_one =
@@ -321,10 +400,12 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionNestedLeafNodeFieldPath) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/false),
- /*query_terms=*/SectionRestrictQueryTermsMap{},
- CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetrieverV2> result_retriever,
@@ -334,7 +415,10 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionNestedLeafNodeFieldPath) {
// 5. Verify that the returned results only contain the 'sender.name'
// property.
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result.results, SizeIs(2));
DocumentProto projected_document_one =
@@ -428,10 +512,12 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionIntermediateNodeFieldPath) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/false),
- /*query_terms=*/SectionRestrictQueryTermsMap{},
- CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetrieverV2> result_retriever,
@@ -441,7 +527,10 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionIntermediateNodeFieldPath) {
// 5. Verify that the returned results only contain the 'sender'
// property and all of the subproperties of 'sender'.
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result.results, SizeIs(2));
DocumentProto projected_document_one =
@@ -539,10 +628,12 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleNestedFieldPaths) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/false),
- /*query_terms=*/SectionRestrictQueryTermsMap{},
- CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetrieverV2> result_retriever,
@@ -552,7 +643,10 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleNestedFieldPaths) {
// 5. Verify that the returned results only contain the 'sender.name' and
// 'sender.address' properties.
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result.results, SizeIs(2));
DocumentProto projected_document_one =
@@ -633,10 +727,12 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionEmptyFieldPath) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/false),
- /*query_terms=*/SectionRestrictQueryTermsMap{},
- CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetrieverV2> result_retriever,
@@ -645,7 +741,10 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionEmptyFieldPath) {
// 5. Verify that the returned results contain *no* properties.
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result.results, SizeIs(2));
DocumentProto projected_document_one = DocumentBuilder()
@@ -710,10 +809,12 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionInvalidFieldPath) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/false),
- /*query_terms=*/SectionRestrictQueryTermsMap{},
- CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetrieverV2> result_retriever,
@@ -722,7 +823,10 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionInvalidFieldPath) {
// 5. Verify that the returned results contain *no* properties.
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result.results, SizeIs(2));
DocumentProto projected_document_one = DocumentBuilder()
@@ -788,10 +892,12 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionValidAndInvalidFieldPath) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/false),
- /*query_terms=*/SectionRestrictQueryTermsMap{},
- CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetrieverV2> result_retriever,
@@ -800,7 +906,10 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionValidAndInvalidFieldPath) {
// 5. Verify that the returned results only contain the 'name' property.
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result.results, SizeIs(2));
DocumentProto projected_document_one =
@@ -868,10 +977,12 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleTypesNoWildcards) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/false),
- /*query_terms=*/SectionRestrictQueryTermsMap{},
- CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- *document_store_);
+ //*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetrieverV2> result_retriever,
@@ -881,7 +992,10 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleTypesNoWildcards) {
// 5. Verify that the returned Email results only contain the 'name'
// property and the returned Person results have all of their properties.
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result.results, SizeIs(2));
DocumentProto projected_document_one =
@@ -944,7 +1058,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleTypesWildcard) {
TypePropertyMask* wildcard_type_property_mask =
result_spec.add_type_property_masks();
wildcard_type_property_mask->set_schema_type(
- std::string(ProjectionTree::kSchemaTypeWildcard));
+ std::string(SchemaStore::kSchemaTypeWildcard));
wildcard_type_property_mask->add_paths("name");
// 4. Create ResultState with custom ResultSpec.
@@ -952,10 +1066,12 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleTypesWildcard) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/false),
- /*query_terms=*/SectionRestrictQueryTermsMap{},
- CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetrieverV2> result_retriever,
@@ -965,7 +1081,10 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleTypesWildcard) {
// 5. Verify that the returned Email results only contain the 'name'
// property and the returned Person results only contain the 'name' property.
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result.results, SizeIs(2));
DocumentProto projected_document_one =
@@ -1032,7 +1151,7 @@ TEST_F(ResultRetrieverV2ProjectionTest,
TypePropertyMask* wildcard_type_property_mask =
result_spec.add_type_property_masks();
wildcard_type_property_mask->set_schema_type(
- std::string(ProjectionTree::kSchemaTypeWildcard));
+ std::string(SchemaStore::kSchemaTypeWildcard));
wildcard_type_property_mask->add_paths("name");
// 4. Create ResultState with custom ResultSpec.
@@ -1040,10 +1159,12 @@ TEST_F(ResultRetrieverV2ProjectionTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/false),
- /*query_terms=*/SectionRestrictQueryTermsMap{},
- CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetrieverV2> result_retriever,
@@ -1053,7 +1174,10 @@ TEST_F(ResultRetrieverV2ProjectionTest,
// 5. Verify that the returned Email results only contain the 'body'
// property and the returned Person results only contain the 'name' property.
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result.results, SizeIs(2));
DocumentProto projected_document_one =
@@ -1129,7 +1253,7 @@ TEST_F(ResultRetrieverV2ProjectionTest,
TypePropertyMask* wildcard_type_property_mask =
result_spec.add_type_property_masks();
wildcard_type_property_mask->set_schema_type(
- std::string(ProjectionTree::kSchemaTypeWildcard));
+ std::string(SchemaStore::kSchemaTypeWildcard));
wildcard_type_property_mask->add_paths("name");
// 4. Create ResultState with custom ResultSpec.
@@ -1137,10 +1261,12 @@ TEST_F(ResultRetrieverV2ProjectionTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/false),
- /*query_terms=*/SectionRestrictQueryTermsMap{},
- CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetrieverV2> result_retriever,
@@ -1150,7 +1276,10 @@ TEST_F(ResultRetrieverV2ProjectionTest,
// 5. Verify that the returned Email results only contain the 'sender.name'
// property and the returned Person results only contain the 'name' property.
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result.results, SizeIs(2));
DocumentProto projected_document_one =
@@ -1230,7 +1359,7 @@ TEST_F(ResultRetrieverV2ProjectionTest,
TypePropertyMask* wildcard_type_property_mask =
result_spec.add_type_property_masks();
wildcard_type_property_mask->set_schema_type(
- std::string(ProjectionTree::kSchemaTypeWildcard));
+ std::string(SchemaStore::kSchemaTypeWildcard));
wildcard_type_property_mask->add_paths("sender");
// 4. Create ResultState with custom ResultSpec.
@@ -1238,10 +1367,12 @@ TEST_F(ResultRetrieverV2ProjectionTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/false),
- /*query_terms=*/SectionRestrictQueryTermsMap{},
- CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetrieverV2> result_retriever,
@@ -1251,7 +1382,10 @@ TEST_F(ResultRetrieverV2ProjectionTest,
// 5. Verify that the returned Email results only contain the 'sender.name'
// property and the returned Person results contain no properties.
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result.results, SizeIs(2));
DocumentProto projected_document_one =
@@ -1278,6 +1412,545 @@ TEST_F(ResultRetrieverV2ProjectionTest,
EqualsProto(projected_document_two));
}
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionJoinDocuments) {
+ // 1. Add one Person document
+ DocumentProto person_document =
+ DocumentBuilder()
+ .SetKey("namespace", "Person/1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId person_document_id,
+ document_store_->Put(person_document));
+
+ // 2. Add two Email documents
+ DocumentProto email_document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "Email/1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id1,
+ document_store_->Put(email_document1));
+
+ DocumentProto email_document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "Email/2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id2,
+ document_store_->Put(email_document2));
+
+ // 3. Setup the joined scored results.
+ std::vector<SectionId> person_hit_section_ids = {
+ GetSectionId("Person", "name")};
+ std::vector<SectionId> email_hit_section_ids = {
+ GetSectionId("Email", "name"), GetSectionId("Email", "body")};
+ SectionIdMask person_hit_section_id_mask =
+ CreateSectionIdMask(person_hit_section_ids);
+ SectionIdMask email_hit_section_id_mask =
+ CreateSectionIdMask(email_hit_section_ids);
+
+ ScoredDocumentHit person_scored_doc_hit(
+ person_document_id, person_hit_section_id_mask, /*score=*/0);
+ ScoredDocumentHit email1_scored_doc_hit(
+ email_document_id1, email_hit_section_id_mask, /*score=*/0);
+ ScoredDocumentHit email2_scored_doc_hit(
+ email_document_id2, email_hit_section_id_mask, /*score=*/0);
+ // Create JoinedScoredDocumentHits mapping Person to Email1 and Email2
+ std::vector<JoinedScoredDocumentHit> joined_scored_document_hits = {
+ JoinedScoredDocumentHit(
+ /*final_score=*/0,
+ /*parent_scored_document_hit=*/person_scored_doc_hit,
+ /*child_scored_document_hits=*/
+ {email1_scored_doc_hit, email2_scored_doc_hit})};
+
+ // 4. Create parent ResultSpec with type property mask.
+ ResultSpecProto parent_result_spec = CreateResultSpec(/*num_per_page=*/2);
+ parent_result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int>::max());
+ TypePropertyMask* type_property_mask =
+ parent_result_spec.add_type_property_masks();
+ type_property_mask->set_schema_type("Person");
+ type_property_mask->add_paths("name");
+
+ // 5. Create child ResultSpec with type property mask.
+ ResultSpecProto child_result_spec;
+ type_property_mask = child_result_spec.add_type_property_masks();
+ type_property_mask->set_schema_type("Email");
+ type_property_mask->add_paths("body");
+
+ // 6. Create ResultState with custom ResultSpecs.
+ ResultStateV2 result_state(
+ std::make_unique<
+ PriorityQueueScoredDocumentHitsRanker<JoinedScoredDocumentHit>>(
+ std::move(joined_scored_document_hits), /*is_descending=*/false),
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), parent_result_spec,
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
+ /*child_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), child_result_spec,
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
+ parent_result_spec, *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 7. Verify that the returned results:
+ // - Person docs only contain the "name" property.
+ // - Email docs only contain the "body" property.
+ PageResult page_result =
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
+ ASSERT_THAT(page_result.results, SizeIs(1));
+
+ // Check parent doc.
+ DocumentProto projected_person_document =
+ DocumentBuilder()
+ .SetKey("namespace", "Person/1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .Build();
+ EXPECT_THAT(page_result.results.at(0).document(),
+ EqualsProto(projected_person_document));
+
+ // Check child docs.
+ ASSERT_THAT(page_result.results.at(0).joined_results(), SizeIs(2));
+ // Check Email1
+ DocumentProto projected_email_document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "Email/1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ EXPECT_THAT(page_result.results.at(0).joined_results(0).document(),
+ EqualsProto(projected_email_document1));
+ // Check Email2
+ DocumentProto projected_email_document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "Email/2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ EXPECT_THAT(page_result.results.at(0).joined_results(1).document(),
+ EqualsProto(projected_email_document2));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionPolymorphism) {
+ // 1. Add two documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Artist")
+ .AddStringProperty("name", "Joe Artist")
+ .AddStringProperty("emailAddress", "artist@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, kSectionIdMaskAll, /*score=*/0},
+ {document_id2, kSectionIdMaskAll, /*score=*/0}};
+
+ // 3. Create a ResultSpec with type property mask.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+ // Since Artist is a child type of Person, the TypePropertyMask for Person
+ // also applies to Artist.
+ TypePropertyMask* person_type_property_mask =
+ result_spec.add_type_property_masks();
+ person_type_property_mask->set_schema_type("Person");
+ person_type_property_mask->add_paths("name");
+
+ // 4. Create ResultState with custom ResultSpec.
+ ResultStateV2 result_state(
+ std::make_unique<
+ PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 5. Verify that the returned Person and Artist results only contain the
+ // 'name' property.
+ PageResult page_result =
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
+ ASSERT_THAT(page_result.results, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .Build();
+ EXPECT_THAT(page_result.results.at(0).document(),
+ EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Artist")
+ .AddStringProperty("name", "Joe Artist")
+ .Build();
+ EXPECT_THAT(page_result.results.at(1).document(),
+ EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionTransitivePolymorphism) {
+ // 1. Add two documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Musician")
+ .AddStringProperty("name", "Joe Musician")
+ .AddStringProperty("emailAddress", "Musician@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, kSectionIdMaskAll, /*score=*/0},
+ {document_id2, kSectionIdMaskAll, /*score=*/0}};
+
+ // 3. Create a ResultSpec with type property mask.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+ // Since Musician is a transitive child type of Person, the TypePropertyMask
+ // for Person also applies to Musician.
+ TypePropertyMask* person_type_property_mask =
+ result_spec.add_type_property_masks();
+ person_type_property_mask->set_schema_type("Person");
+ person_type_property_mask->add_paths("name");
+
+ // 4. Create ResultState with custom ResultSpec.
+ ResultStateV2 result_state(
+ std::make_unique<
+ PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 5. Verify that the returned Person and Musician results only contain the
+ // 'name' property.
+ PageResult page_result =
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
+ ASSERT_THAT(page_result.results, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .Build();
+ EXPECT_THAT(page_result.results.at(0).document(),
+ EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Musician")
+ .AddStringProperty("name", "Joe Musician")
+ .Build();
+ EXPECT_THAT(page_result.results.at(1).document(),
+ EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest,
+ ProjectionPolymorphismChildMissingProperty) {
+ // 1. Add an artist document with missing 'emailAddress', which is allowed
+ // since 'emailAddress' in the parent type 'Person' is defined as optional.
+ DocumentProto document = DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Artist")
+ .AddStringProperty("name", "Joe Artist")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store_->Put(document));
+
+ // 2. Setup the scored results.
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id, kSectionIdMaskAll, /*score=*/0}};
+
+ // 3. Create a ResultSpec with type property mask for the missing property
+ // 'emailAddress' in the Person type. Since Artist is a child type of Person,
+ // the TypePropertyMask for Person also applies to Artist.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+ TypePropertyMask* person_type_property_mask =
+ result_spec.add_type_property_masks();
+ person_type_property_mask->set_schema_type("Person");
+ person_type_property_mask->add_paths("emailAddress");
+
+ // 4. Create ResultState with custom ResultSpec.
+ ResultStateV2 result_state(
+ std::make_unique<
+ PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 5. Verify that the returned person document does not contain any property,
+ // since 'emailAddress' is missing.
+ PageResult page_result =
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
+ ASSERT_THAT(page_result.results, SizeIs(1));
+ DocumentProto projected_document = DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Artist")
+ .Build();
+ EXPECT_THAT(page_result.results.at(0).document(),
+ EqualsProto(projected_document));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionPolymorphismMerge) {
+ // 1. Add two documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Artist")
+ .AddStringProperty("name", "Joe Artist")
+ .AddStringProperty("emailAddress", "artist@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, kSectionIdMaskAll, /*score=*/0},
+ {document_id2, kSectionIdMaskAll, /*score=*/0}};
+
+ // 3. Create a ResultSpec with type property mask.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+ TypePropertyMask* person_type_property_mask =
+ result_spec.add_type_property_masks();
+ person_type_property_mask->set_schema_type("Person");
+ person_type_property_mask->add_paths("name");
+ // Since Artist is a child type of Person, the TypePropertyMask for Person
+ // will be merged to Artist's TypePropertyMask by polymorphism, so that 'name'
+ // will also show in Artist's projection results.
+ TypePropertyMask* artist_type_property_mask =
+ result_spec.add_type_property_masks();
+ artist_type_property_mask->set_schema_type("Artist");
+ artist_type_property_mask->add_paths("emailAddress");
+
+ // 4. Create ResultState with custom ResultSpec.
+ ResultStateV2 result_state(
+ std::make_unique<
+ PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 5. Verify that the returned Person results only contain the 'name'
+ // property and the returned Artist results contain both the 'name' and
+ // 'emailAddress' properties.
+ PageResult page_result =
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
+ ASSERT_THAT(page_result.results, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .Build();
+ EXPECT_THAT(page_result.results.at(0).document(),
+ EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Artist")
+ .AddStringProperty("name", "Joe Artist")
+ .AddStringProperty("emailAddress", "artist@aol.com")
+ .Build();
+ EXPECT_THAT(page_result.results.at(1).document(),
+ EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleParentPolymorphism) {
+ // 1. Add a document
+ DocumentProto document = DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("PersonWithPhone")
+ .AddStringProperty("name", "name")
+ .AddStringProperty("emailAddress", "email")
+ .AddStringProperty("phoneNumber", "12345")
+ .AddStringProperty("phoneModel", "pixel")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store_->Put(document));
+
+ // 2. Setup the scored results.
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id, kSectionIdMaskAll, /*score=*/0}};
+
+ // 3. Create a ResultSpec with type property mask.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/1);
+ // Since PersonWithPhone is a child type of Person, the TypePropertyMask
+ // also applies to PersonWithPhone.
+ TypePropertyMask* person_type_property_mask =
+ result_spec.add_type_property_masks();
+ person_type_property_mask->set_schema_type("Person");
+ person_type_property_mask->add_paths("name");
+ // Since PersonWithPhone is a child type of WithPhone, the
+ // TypePropertyMask also applies to PersonWithPhone.
+ TypePropertyMask* with_phone_type_property_mask =
+ result_spec.add_type_property_masks();
+ with_phone_type_property_mask->set_schema_type("WithPhone");
+ with_phone_type_property_mask->add_paths("phoneNumber");
+
+ // 4. Create ResultState with custom ResultSpec.
+ ResultStateV2 result_state(
+ std::make_unique<
+ PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 5. Verify that the returned document only contains the 'name' and the
+ // 'phoneNumber' property.
+ PageResult page_result =
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
+ ASSERT_THAT(page_result.results, SizeIs(1));
+
+ DocumentProto projected_document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("PersonWithPhone")
+ .AddStringProperty("name", "name")
+ .AddStringProperty("phoneNumber", "12345")
+ .Build();
+ EXPECT_THAT(page_result.results.at(0).document(),
+ EqualsProto(projected_document));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/result/result-retriever-v2_snippet_test.cc b/icing/result/result-retriever-v2_snippet_test.cc
index 6123bf4..440d31c 100644
--- a/icing/result/result-retriever-v2_snippet_test.cc
+++ b/icing/result/result-retriever-v2_snippet_test.cc
@@ -26,6 +26,7 @@
#include "icing/proto/search.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/result/page-result.h"
+#include "icing/result/result-adjustment-info.h"
#include "icing/result/result-retriever-v2.h"
#include "icing/result/result-state-v2.h"
#include "icing/schema-builder.h"
@@ -82,44 +83,40 @@ class ResultRetrieverV2SnippetTest : public testing::Test {
SchemaProto schema =
SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("name")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_EXACT,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(
- PropertyConfigBuilder()
- .SetName("sender")
- .SetDataTypeDocument(
- "Person", /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL)))
.AddType(
SchemaTypeConfigBuilder()
- .SetType("Person")
+ .SetType("Email")
.AddProperty(PropertyConfigBuilder()
- .SetName("name")
+ .SetName("subject")
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL))
.AddProperty(PropertyConfigBuilder()
- .SetName("emailAddress")
- .SetDataTypeString(TERM_MATCH_PREFIX,
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_EXACT,
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ DocumentStore::Create(
+ &filesystem_, test_dir_, &fake_clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
}
@@ -155,9 +152,6 @@ class ResultRetrieverV2SnippetTest : public testing::Test {
FakeClock fake_clock_;
};
-// TODO(sungyc): Refactor helper functions below (builder classes or common test
-// utility).
-
ResultSpecProto::SnippetSpecProto CreateSnippetSpec() {
ResultSpecProto::SnippetSpecProto snippet_spec;
snippet_spec.set_num_to_snippet(std::numeric_limits<int>::max());
@@ -166,16 +160,25 @@ ResultSpecProto::SnippetSpecProto CreateSnippetSpec() {
return snippet_spec;
}
-DocumentProto CreateDocument(int id) {
+DocumentProto CreateEmailDocument(int id) {
return DocumentBuilder()
.SetKey("icing", "Email/" + std::to_string(id))
.SetSchema("Email")
- .AddStringProperty("name", "subject foo " + std::to_string(id))
+ .AddStringProperty("subject", "subject foo " + std::to_string(id))
.AddStringProperty("body", "body bar " + std::to_string(id))
.SetCreationTimestampMs(1574365086666 + id)
.Build();
}
+DocumentProto CreatePersonDocument(int id) {
+ return DocumentBuilder()
+ .SetKey("icing", "Person/" + std::to_string(id))
+ .SetSchema("Person")
+ .AddStringProperty("name", "person " + std::to_string(id))
+ .SetCreationTimestampMs(1574365086666 + id)
+ .Build();
+}
+
SectionIdMask CreateSectionIdMask(const std::vector<SectionId>& section_ids) {
SectionIdMask mask = 0;
for (SectionId section_id : section_ids) {
@@ -205,14 +208,17 @@ ResultSpecProto CreateResultSpec(int num_per_page) {
TEST_F(ResultRetrieverV2SnippetTest,
DefaultSnippetSpecShouldDisableSnippeting) {
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- document_store_->Put(CreateDocument(/*id=*/1)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- document_store_->Put(CreateDocument(/*id=*/2)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
- document_store_->Put(CreateDocument(/*id=*/3)));
-
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id1,
+ document_store_->Put(CreateEmailDocument(/*id=*/1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id2,
+ document_store_->Put(CreateEmailDocument(/*id=*/2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id3,
+ document_store_->Put(CreateEmailDocument(/*id=*/3)));
+
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "subject"),
GetSectionId("Email", "body")};
SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
@@ -224,15 +230,23 @@ TEST_F(ResultRetrieverV2SnippetTest,
ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
language_segmenter_.get(), normalizer_.get()));
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/3);
+
ResultStateV2 result_state(
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true),
- CreateResultSpec(/*num_per_page=*/3), *document_store_);
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true), result_spec,
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result.results, SizeIs(3));
EXPECT_THAT(page_result.results.at(0).snippet(),
EqualsProto(SnippetProto::default_instance()));
@@ -244,14 +258,17 @@ TEST_F(ResultRetrieverV2SnippetTest,
}
TEST_F(ResultRetrieverV2SnippetTest, SimpleSnippeted) {
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- document_store_->Put(CreateDocument(/*id=*/1)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- document_store_->Put(CreateDocument(/*id=*/2)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
- document_store_->Put(CreateDocument(/*id=*/3)));
-
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id1,
+ document_store_->Put(CreateEmailDocument(/*id=*/1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id2,
+ document_store_->Put(CreateEmailDocument(/*id=*/2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id3,
+ document_store_->Put(CreateEmailDocument(/*id=*/3)));
+
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "subject"),
GetSectionId("Email", "body")};
SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
@@ -271,20 +288,26 @@ TEST_F(ResultRetrieverV2SnippetTest, SimpleSnippeted) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/false),
- /*query_terms=*/{{"", {"foo", "bar"}}},
- CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(),
+ SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result.results, SizeIs(3));
EXPECT_THAT(page_result.num_results_with_snippets, Eq(3));
const DocumentProto& result_document_one =
page_result.results.at(0).document();
const SnippetProto& result_snippet_one = page_result.results.at(0).snippet();
- EXPECT_THAT(result_document_one, EqualsProto(CreateDocument(/*id=*/1)));
+ EXPECT_THAT(result_document_one, EqualsProto(CreateEmailDocument(/*id=*/1)));
EXPECT_THAT(result_snippet_one.entries(), SizeIs(2));
EXPECT_THAT(result_snippet_one.entries(0).property_name(), Eq("body"));
std::string_view content = GetString(
@@ -293,7 +316,7 @@ TEST_F(ResultRetrieverV2SnippetTest, SimpleSnippeted) {
ElementsAre("body bar 1"));
EXPECT_THAT(GetMatches(content, result_snippet_one.entries(0)),
ElementsAre("bar"));
- EXPECT_THAT(result_snippet_one.entries(1).property_name(), Eq("name"));
+ EXPECT_THAT(result_snippet_one.entries(1).property_name(), Eq("subject"));
content = GetString(&result_document_one,
result_snippet_one.entries(1).property_name());
EXPECT_THAT(GetWindows(content, result_snippet_one.entries(1)),
@@ -304,7 +327,7 @@ TEST_F(ResultRetrieverV2SnippetTest, SimpleSnippeted) {
const DocumentProto& result_document_two =
page_result.results.at(1).document();
const SnippetProto& result_snippet_two = page_result.results.at(1).snippet();
- EXPECT_THAT(result_document_two, EqualsProto(CreateDocument(/*id=*/2)));
+ EXPECT_THAT(result_document_two, EqualsProto(CreateEmailDocument(/*id=*/2)));
EXPECT_THAT(result_snippet_two.entries(), SizeIs(2));
EXPECT_THAT(result_snippet_two.entries(0).property_name(), Eq("body"));
content = GetString(&result_document_two,
@@ -313,7 +336,7 @@ TEST_F(ResultRetrieverV2SnippetTest, SimpleSnippeted) {
ElementsAre("body bar 2"));
EXPECT_THAT(GetMatches(content, result_snippet_two.entries(0)),
ElementsAre("bar"));
- EXPECT_THAT(result_snippet_two.entries(1).property_name(), Eq("name"));
+ EXPECT_THAT(result_snippet_two.entries(1).property_name(), Eq("subject"));
content = GetString(&result_document_two,
result_snippet_two.entries(1).property_name());
EXPECT_THAT(GetWindows(content, result_snippet_two.entries(1)),
@@ -325,7 +348,8 @@ TEST_F(ResultRetrieverV2SnippetTest, SimpleSnippeted) {
page_result.results.at(2).document();
const SnippetProto& result_snippet_three =
page_result.results.at(2).snippet();
- EXPECT_THAT(result_document_three, EqualsProto(CreateDocument(/*id=*/3)));
+ EXPECT_THAT(result_document_three,
+ EqualsProto(CreateEmailDocument(/*id=*/3)));
EXPECT_THAT(result_snippet_three.entries(), SizeIs(2));
EXPECT_THAT(result_snippet_three.entries(0).property_name(), Eq("body"));
content = GetString(&result_document_three,
@@ -334,7 +358,7 @@ TEST_F(ResultRetrieverV2SnippetTest, SimpleSnippeted) {
ElementsAre("body bar 3"));
EXPECT_THAT(GetMatches(content, result_snippet_three.entries(0)),
ElementsAre("bar"));
- EXPECT_THAT(result_snippet_three.entries(1).property_name(), Eq("name"));
+ EXPECT_THAT(result_snippet_three.entries(1).property_name(), Eq("subject"));
content = GetString(&result_document_three,
result_snippet_three.entries(1).property_name());
EXPECT_THAT(GetWindows(content, result_snippet_three.entries(1)),
@@ -344,14 +368,17 @@ TEST_F(ResultRetrieverV2SnippetTest, SimpleSnippeted) {
}
TEST_F(ResultRetrieverV2SnippetTest, OnlyOneDocumentSnippeted) {
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- document_store_->Put(CreateDocument(/*id=*/1)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- document_store_->Put(CreateDocument(/*id=*/2)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
- document_store_->Put(CreateDocument(/*id=*/3)));
-
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id1,
+ document_store_->Put(CreateEmailDocument(/*id=*/1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id2,
+ document_store_->Put(CreateEmailDocument(/*id=*/2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id3,
+ document_store_->Put(CreateEmailDocument(/*id=*/3)));
+
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "subject"),
GetSectionId("Email", "body")};
SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
@@ -373,19 +400,25 @@ TEST_F(ResultRetrieverV2SnippetTest, OnlyOneDocumentSnippeted) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/false),
- /*query_terms=*/{{"", {"foo", "bar"}}},
- CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(),
+ SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result.results, SizeIs(3));
EXPECT_THAT(page_result.num_results_with_snippets, Eq(1));
const DocumentProto& result_document = page_result.results.at(0).document();
const SnippetProto& result_snippet = page_result.results.at(0).snippet();
- EXPECT_THAT(result_document, EqualsProto(CreateDocument(/*id=*/1)));
+ EXPECT_THAT(result_document, EqualsProto(CreateEmailDocument(/*id=*/1)));
EXPECT_THAT(result_snippet.entries(), SizeIs(2));
EXPECT_THAT(result_snippet.entries(0).property_name(), Eq("body"));
std::string_view content =
@@ -394,7 +427,7 @@ TEST_F(ResultRetrieverV2SnippetTest, OnlyOneDocumentSnippeted) {
ElementsAre("body bar 1"));
EXPECT_THAT(GetMatches(content, result_snippet.entries(0)),
ElementsAre("bar"));
- EXPECT_THAT(result_snippet.entries(1).property_name(), Eq("name"));
+ EXPECT_THAT(result_snippet.entries(1).property_name(), Eq("subject"));
content =
GetString(&result_document, result_snippet.entries(1).property_name());
EXPECT_THAT(GetWindows(content, result_snippet.entries(1)),
@@ -403,25 +436,28 @@ TEST_F(ResultRetrieverV2SnippetTest, OnlyOneDocumentSnippeted) {
ElementsAre("foo"));
EXPECT_THAT(page_result.results.at(1).document(),
- EqualsProto(CreateDocument(/*id=*/2)));
+ EqualsProto(CreateEmailDocument(/*id=*/2)));
EXPECT_THAT(page_result.results.at(1).snippet(),
EqualsProto(SnippetProto::default_instance()));
EXPECT_THAT(page_result.results.at(2).document(),
- EqualsProto(CreateDocument(/*id=*/3)));
+ EqualsProto(CreateEmailDocument(/*id=*/3)));
EXPECT_THAT(page_result.results.at(2).snippet(),
EqualsProto(SnippetProto::default_instance()));
}
TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetAllResults) {
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- document_store_->Put(CreateDocument(/*id=*/1)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- document_store_->Put(CreateDocument(/*id=*/2)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
- document_store_->Put(CreateDocument(/*id=*/3)));
-
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id1,
+ document_store_->Put(CreateEmailDocument(/*id=*/1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id2,
+ document_store_->Put(CreateEmailDocument(/*id=*/2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id3,
+ document_store_->Put(CreateEmailDocument(/*id=*/3)));
+
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "subject"),
GetSectionId("Email", "body")};
SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
@@ -443,13 +479,19 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetAllResults) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/false),
- /*query_terms=*/{{"", {"foo", "bar"}}},
- CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(),
+ SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
// num_to_snippet = 5, num_previously_returned_in = 0,
// We can return 5 - 0 = 5 snippets at most. We're able to return all 3
// snippets here.
@@ -461,14 +503,17 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetAllResults) {
}
TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetSomeResults) {
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- document_store_->Put(CreateDocument(/*id=*/1)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- document_store_->Put(CreateDocument(/*id=*/2)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
- document_store_->Put(CreateDocument(/*id=*/3)));
-
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id1,
+ document_store_->Put(CreateEmailDocument(/*id=*/1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id2,
+ document_store_->Put(CreateEmailDocument(/*id=*/2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id3,
+ document_store_->Put(CreateEmailDocument(/*id=*/3)));
+
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "subject"),
GetSectionId("Email", "body")};
SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
@@ -490,21 +535,25 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetSomeResults) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/false),
- /*query_terms=*/{{"", {"foo", "bar"}}},
- CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(),
+ SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
{
absl_ports::unique_lock l(&result_state.mutex);
- // Set (previously) num_returned = 3 docs
- result_state.num_returned = 3;
+ // Set remaining_num_to_snippet = 2
+ result_state.parent_adjustment_info()->remaining_num_to_snippet = 2;
}
- // num_to_snippet = 5, (previously) num_returned = 3,
- // We can return 5 - 3 = 2 snippets.
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result.results, SizeIs(3));
EXPECT_THAT(page_result.results.at(0).snippet().entries(), Not(IsEmpty()));
EXPECT_THAT(page_result.results.at(1).snippet().entries(), Not(IsEmpty()));
@@ -513,14 +562,17 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetSomeResults) {
}
TEST_F(ResultRetrieverV2SnippetTest, ShouldNotSnippetAnyResults) {
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- document_store_->Put(CreateDocument(/*id=*/1)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- document_store_->Put(CreateDocument(/*id=*/2)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
- document_store_->Put(CreateDocument(/*id=*/3)));
-
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id1,
+ document_store_->Put(CreateEmailDocument(/*id=*/1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id2,
+ document_store_->Put(CreateEmailDocument(/*id=*/2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id3,
+ document_store_->Put(CreateEmailDocument(/*id=*/3)));
+
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "subject"),
GetSectionId("Email", "body")};
SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
@@ -542,21 +594,89 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldNotSnippetAnyResults) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/false),
- /*query_terms=*/{{"", {"foo", "bar"}}},
- CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- *document_store_);
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(),
+ SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
{
absl_ports::unique_lock l(&result_state.mutex);
- // Set (previously) num_returned = 6 docs
- result_state.num_returned = 6;
+ // Set remaining_num_to_snippet = 0
+ result_state.parent_adjustment_info()->remaining_num_to_snippet = 0;
}
- // num_to_snippet = 5, (previously) num_returned = 6,
// We can't return any snippets for this page.
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
+ ASSERT_THAT(page_result.results, SizeIs(3));
+ EXPECT_THAT(page_result.results.at(0).snippet().entries(), IsEmpty());
+ EXPECT_THAT(page_result.results.at(1).snippet().entries(), IsEmpty());
+ EXPECT_THAT(page_result.results.at(2).snippet().entries(), IsEmpty());
+ EXPECT_THAT(page_result.num_results_with_snippets, Eq(0));
+}
+
+TEST_F(ResultRetrieverV2SnippetTest,
+ ShouldNotSnippetAnyResultsForNonPositiveNumMatchesPerProperty) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id1,
+ document_store_->Put(CreateEmailDocument(/*id=*/1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id2,
+ document_store_->Put(CreateEmailDocument(/*id=*/2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id3,
+ document_store_->Put(CreateEmailDocument(/*id=*/3)));
+
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "subject"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {document_id3, hit_section_id_mask, /*score=*/0}};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // Create ResultSpec with custom snippet spec.
+ ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec();
+ snippet_spec.set_num_to_snippet(5);
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/3);
+ *result_spec.mutable_snippet_spec() = std::move(snippet_spec);
+
+ ResultStateV2 result_state(
+ std::make_unique<
+ PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(),
+ SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+ {
+ absl_ports::unique_lock l(&result_state.mutex);
+
+ // Set num_matchers_per_property = 0
+ result_state.parent_adjustment_info()
+ ->snippet_context.snippet_spec.set_num_matches_per_property(0);
+ }
+
+ // We can't return any snippets for this page even though num_to_snippet > 0.
+ PageResult page_result =
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result.results, SizeIs(3));
EXPECT_THAT(page_result.results.at(0).snippet().entries(), IsEmpty());
EXPECT_THAT(page_result.results.at(1).snippet().entries(), IsEmpty());
@@ -564,6 +684,478 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldNotSnippetAnyResults) {
EXPECT_THAT(page_result.num_results_with_snippets, Eq(0));
}
+TEST_F(ResultRetrieverV2SnippetTest, JoinSnippeted) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId person_document_id1,
+ document_store_->Put(CreatePersonDocument(/*id=*/1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId person_document_id2,
+ document_store_->Put(CreatePersonDocument(/*id=*/2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId person_document_id3,
+ document_store_->Put(CreatePersonDocument(/*id=*/3)));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId email_document_id1,
+ document_store_->Put(CreateEmailDocument(/*id=*/1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId email_document_id2,
+ document_store_->Put(CreateEmailDocument(/*id=*/2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId email_document_id3,
+ document_store_->Put(CreateEmailDocument(/*id=*/3)));
+
+ std::vector<SectionId> person_hit_section_ids = {
+ GetSectionId("Person", "name")};
+ std::vector<SectionId> email_hit_section_ids = {
+ GetSectionId("Email", "subject"), GetSectionId("Email", "body")};
+ SectionIdMask person_hit_section_id_mask =
+ CreateSectionIdMask(person_hit_section_ids);
+ SectionIdMask email_hit_section_id_mask =
+ CreateSectionIdMask(email_hit_section_ids);
+
+ ScoredDocumentHit person1_scored_doc_hit(
+ person_document_id1, person_hit_section_id_mask, /*score=*/0);
+ ScoredDocumentHit person2_scored_doc_hit(
+ person_document_id2, person_hit_section_id_mask, /*score=*/0);
+ ScoredDocumentHit person3_scored_doc_hit(
+ person_document_id3, person_hit_section_id_mask, /*score=*/0);
+ ScoredDocumentHit email1_scored_doc_hit(
+ email_document_id1, email_hit_section_id_mask, /*score=*/0);
+ ScoredDocumentHit email2_scored_doc_hit(
+ email_document_id2, email_hit_section_id_mask, /*score=*/0);
+ ScoredDocumentHit email3_scored_doc_hit(
+ email_document_id3, email_hit_section_id_mask, /*score=*/0);
+
+ // Create JoinedScoredDocumentHits mapping:
+ // - Person1 to Email1 and Email2
+ // - Person2 to empty
+ // - Person3 to Email3
+ JoinedScoredDocumentHit joined_scored_document_hit1(
+ /*final_score=*/0, /*parent_scored_document_hit=*/person1_scored_doc_hit,
+ /*child_scored_document_hits=*/
+ {email1_scored_doc_hit, email2_scored_doc_hit});
+ JoinedScoredDocumentHit joined_scored_document_hit2(
+ /*final_score=*/0, /*parent_scored_document_hit=*/person2_scored_doc_hit,
+ /*child_scored_document_hits=*/{});
+ JoinedScoredDocumentHit joined_scored_document_hit3(
+ /*final_score=*/0, /*parent_scored_document_hit=*/person3_scored_doc_hit,
+ /*child_scored_document_hits=*/{email3_scored_doc_hit});
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // Create parent ResultSpec with custom snippet spec.
+ ResultSpecProto parent_result_spec = CreateResultSpec(/*num_per_page=*/3);
+ parent_result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+ *parent_result_spec.mutable_snippet_spec() = CreateSnippetSpec();
+
+ // Create child ResultSpec with custom snippet spec.
+ ResultSpecProto child_result_spec;
+ *child_result_spec.mutable_snippet_spec() = CreateSnippetSpec();
+
+ ResultStateV2 result_state(
+ std::make_unique<
+ PriorityQueueScoredDocumentHitsRanker<JoinedScoredDocumentHit>>(
+ std::vector<JoinedScoredDocumentHit>{joined_scored_document_hit1,
+ joined_scored_document_hit2,
+ joined_scored_document_hit3},
+ /*is_descending=*/false),
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), parent_result_spec,
+ schema_store_.get(),
+ SectionRestrictQueryTermsMap({{"", {"person"}}})),
+ /*child_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), child_result_spec,
+ schema_store_.get(),
+ SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
+ parent_result_spec, *document_store_);
+
+ PageResult page_result =
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
+ ASSERT_THAT(page_result.results, SizeIs(3));
+ EXPECT_THAT(page_result.num_results_with_snippets, Eq(3));
+
+ // Result1: Person1 for parent and [Email1, Email2] for children.
+ // Check parent doc (Person1).
+ const DocumentProto& result_parent_document_one =
+ page_result.results.at(0).document();
+ const SnippetProto& result_parent_snippet_one =
+ page_result.results.at(0).snippet();
+ EXPECT_THAT(result_parent_document_one,
+ EqualsProto(CreatePersonDocument(/*id=*/1)));
+ ASSERT_THAT(result_parent_snippet_one.entries(), SizeIs(1));
+ EXPECT_THAT(result_parent_snippet_one.entries(0).property_name(), Eq("name"));
+ std::string_view content =
+ GetString(&result_parent_document_one,
+ result_parent_snippet_one.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, result_parent_snippet_one.entries(0)),
+ ElementsAre("person 1"));
+ EXPECT_THAT(GetMatches(content, result_parent_snippet_one.entries(0)),
+ ElementsAre("person"));
+
+ // Check child docs.
+ ASSERT_THAT(page_result.results.at(0).joined_results(), SizeIs(2));
+ // Check Email1.
+ const DocumentProto& result_child_document_one =
+ page_result.results.at(0).joined_results(0).document();
+ const SnippetProto& result_child_snippet_one =
+ page_result.results.at(0).joined_results(0).snippet();
+ EXPECT_THAT(result_child_document_one,
+ EqualsProto(CreateEmailDocument(/*id=*/1)));
+ ASSERT_THAT(result_child_snippet_one.entries(), SizeIs(2));
+ EXPECT_THAT(result_child_snippet_one.entries(0).property_name(), Eq("body"));
+ content = GetString(&result_child_document_one,
+ result_child_snippet_one.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, result_child_snippet_one.entries(0)),
+ ElementsAre("body bar 1"));
+ EXPECT_THAT(GetMatches(content, result_child_snippet_one.entries(0)),
+ ElementsAre("bar"));
+ EXPECT_THAT(result_child_snippet_one.entries(1).property_name(),
+ Eq("subject"));
+ content = GetString(&result_child_document_one,
+ result_child_snippet_one.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, result_child_snippet_one.entries(1)),
+ ElementsAre("subject foo 1"));
+ EXPECT_THAT(GetMatches(content, result_child_snippet_one.entries(1)),
+ ElementsAre("foo"));
+ // Check Email2.
+ const DocumentProto& result_child_document_two =
+ page_result.results.at(0).joined_results(1).document();
+ const SnippetProto& result_child_snippet_two =
+ page_result.results.at(0).joined_results(1).snippet();
+ EXPECT_THAT(result_child_document_two,
+ EqualsProto(CreateEmailDocument(/*id=*/2)));
+ ASSERT_THAT(result_child_snippet_two.entries(), SizeIs(2));
+ EXPECT_THAT(result_child_snippet_two.entries(0).property_name(), Eq("body"));
+ content = GetString(&result_child_document_two,
+ result_child_snippet_two.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, result_child_snippet_two.entries(0)),
+ ElementsAre("body bar 2"));
+ EXPECT_THAT(GetMatches(content, result_child_snippet_two.entries(0)),
+ ElementsAre("bar"));
+ EXPECT_THAT(result_child_snippet_two.entries(1).property_name(),
+ Eq("subject"));
+ content = GetString(&result_child_document_two,
+ result_child_snippet_two.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, result_child_snippet_two.entries(1)),
+ ElementsAre("subject foo 2"));
+ EXPECT_THAT(GetMatches(content, result_child_snippet_two.entries(1)),
+ ElementsAre("foo"));
+
+ // Result2: Person2 for parent and [] for children.
+ // Check parent doc (Person1).
+ const DocumentProto& result_parent_document_two =
+ page_result.results.at(1).document();
+ const SnippetProto& result_parent_snippet_two =
+ page_result.results.at(1).snippet();
+ EXPECT_THAT(result_parent_document_two,
+ EqualsProto(CreatePersonDocument(/*id=*/2)));
+ ASSERT_THAT(result_parent_snippet_two.entries(), SizeIs(1));
+ EXPECT_THAT(result_parent_snippet_two.entries(0).property_name(), Eq("name"));
+ content = GetString(&result_parent_document_two,
+ result_parent_snippet_two.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, result_parent_snippet_two.entries(0)),
+ ElementsAre("person 2"));
+ EXPECT_THAT(GetMatches(content, result_parent_snippet_two.entries(0)),
+ ElementsAre("person"));
+ // Check child docs.
+ ASSERT_THAT(page_result.results.at(1).joined_results(), IsEmpty());
+
+ // Result3: Person3 for parent and [Email3] for children.
+ // Check parent doc (Person3).
+ const DocumentProto& result_parent_document_three =
+ page_result.results.at(2).document();
+ const SnippetProto& result_parent_snippet_three =
+ page_result.results.at(2).snippet();
+ EXPECT_THAT(result_parent_document_three,
+ EqualsProto(CreatePersonDocument(/*id=*/3)));
+ ASSERT_THAT(result_parent_snippet_three.entries(), SizeIs(1));
+ EXPECT_THAT(result_parent_snippet_three.entries(0).property_name(),
+ Eq("name"));
+ content = GetString(&result_parent_document_three,
+ result_parent_snippet_three.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, result_parent_snippet_three.entries(0)),
+ ElementsAre("person 3"));
+ EXPECT_THAT(GetMatches(content, result_parent_snippet_three.entries(0)),
+ ElementsAre("person"));
+
+ // Check child docs.
+ ASSERT_THAT(page_result.results.at(2).joined_results(), SizeIs(1));
+ // Check Email3.
+ const DocumentProto& result_child_document_three =
+ page_result.results.at(2).joined_results(0).document();
+ const SnippetProto& result_child_snippet_three =
+ page_result.results.at(2).joined_results(0).snippet();
+ EXPECT_THAT(result_child_document_three,
+ EqualsProto(CreateEmailDocument(/*id=*/3)));
+ ASSERT_THAT(result_child_snippet_three.entries(), SizeIs(2));
+ EXPECT_THAT(result_child_snippet_three.entries(0).property_name(),
+ Eq("body"));
+ content = GetString(&result_child_document_three,
+ result_child_snippet_three.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, result_child_snippet_three.entries(0)),
+ ElementsAre("body bar 3"));
+ EXPECT_THAT(GetMatches(content, result_child_snippet_three.entries(0)),
+ ElementsAre("bar"));
+ EXPECT_THAT(result_child_snippet_three.entries(1).property_name(),
+ Eq("subject"));
+ content = GetString(&result_child_document_three,
+ result_child_snippet_three.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, result_child_snippet_three.entries(1)),
+ ElementsAre("subject foo 3"));
+ EXPECT_THAT(GetMatches(content, result_child_snippet_three.entries(1)),
+ ElementsAre("foo"));
+}
+
+TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetAllJoinedResults) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId person_document_id1,
+ document_store_->Put(CreatePersonDocument(/*id=*/1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId person_document_id2,
+ document_store_->Put(CreatePersonDocument(/*id=*/2)));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId email_document_id1,
+ document_store_->Put(CreateEmailDocument(/*id=*/1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId email_document_id2,
+ document_store_->Put(CreateEmailDocument(/*id=*/2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId email_document_id3,
+ document_store_->Put(CreateEmailDocument(/*id=*/3)));
+
+ std::vector<SectionId> person_hit_section_ids = {
+ GetSectionId("Person", "name")};
+ std::vector<SectionId> email_hit_section_ids = {
+ GetSectionId("Email", "subject"), GetSectionId("Email", "body")};
+ SectionIdMask person_hit_section_id_mask =
+ CreateSectionIdMask(person_hit_section_ids);
+ SectionIdMask email_hit_section_id_mask =
+ CreateSectionIdMask(email_hit_section_ids);
+
+ ScoredDocumentHit person1_scored_doc_hit(
+ person_document_id1, person_hit_section_id_mask, /*score=*/0);
+ ScoredDocumentHit person2_scored_doc_hit(
+ person_document_id2, person_hit_section_id_mask, /*score=*/0);
+ ScoredDocumentHit email1_scored_doc_hit(
+ email_document_id1, email_hit_section_id_mask, /*score=*/0);
+ ScoredDocumentHit email2_scored_doc_hit(
+ email_document_id2, email_hit_section_id_mask, /*score=*/0);
+ ScoredDocumentHit email3_scored_doc_hit(
+ email_document_id3, email_hit_section_id_mask, /*score=*/0);
+
+ // Create JoinedScoredDocumentHits mapping:
+ // - Person1 to Email1
+ // - Person2 to Email2, Email3
+ JoinedScoredDocumentHit joined_scored_document_hit1(
+ /*final_score=*/0, /*parent_scored_document_hit=*/person1_scored_doc_hit,
+ /*child_scored_document_hits=*/
+ {email1_scored_doc_hit});
+ JoinedScoredDocumentHit joined_scored_document_hit2(
+ /*final_score=*/0, /*parent_scored_document_hit=*/person2_scored_doc_hit,
+ /*child_scored_document_hits=*/
+ {email2_scored_doc_hit, email3_scored_doc_hit});
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // Create parent ResultSpec with custom snippet spec.
+ ResultSpecProto::SnippetSpecProto parent_snippet_spec = CreateSnippetSpec();
+ parent_snippet_spec.set_num_to_snippet(1);
+ ResultSpecProto parent_result_spec = CreateResultSpec(/*num_per_page=*/3);
+ parent_result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+ *parent_result_spec.mutable_snippet_spec() = std::move(parent_snippet_spec);
+
+ // Create child ResultSpec with custom snippet spec.
+ ResultSpecProto::SnippetSpecProto child_snippet_spec = CreateSnippetSpec();
+ child_snippet_spec.set_num_to_snippet(3);
+ ResultSpecProto child_result_spec;
+ *child_result_spec.mutable_snippet_spec() = std::move(child_snippet_spec);
+
+ ResultStateV2 result_state(
+ std::make_unique<
+ PriorityQueueScoredDocumentHitsRanker<JoinedScoredDocumentHit>>(
+ std::vector<JoinedScoredDocumentHit>{joined_scored_document_hit1,
+ joined_scored_document_hit2},
+ /*is_descending=*/false),
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), parent_result_spec,
+ schema_store_.get(),
+ SectionRestrictQueryTermsMap({{"", {"person"}}})),
+ /*child_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), child_result_spec,
+ schema_store_.get(),
+ SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
+ parent_result_spec, *document_store_);
+
+ // Only 1 parent document should be snippeted, but all of the child documents
+ // should be snippeted.
+ PageResult page_result =
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
+ ASSERT_THAT(page_result.results, SizeIs(2));
+
+ // Result1: Person1 for parent and [Email1] for children.
+ // Check parent doc (Person1).
+ EXPECT_THAT(page_result.results.at(0).snippet().entries(), Not(IsEmpty()));
+ // Check child docs.
+ ASSERT_THAT(page_result.results.at(0).joined_results(), SizeIs(1));
+ EXPECT_THAT(page_result.results.at(0).joined_results(0).snippet().entries(),
+ Not(IsEmpty()));
+
+ // Result2: Person2 for parent and [Email2, Email3] for children.
+ // Check parent doc (Person2).
+ EXPECT_THAT(page_result.results.at(1).snippet().entries(), IsEmpty());
+ // Check child docs.
+ ASSERT_THAT(page_result.results.at(1).joined_results(), SizeIs(2));
+ EXPECT_THAT(page_result.results.at(1).joined_results(0).snippet().entries(),
+ Not(IsEmpty()));
+ EXPECT_THAT(page_result.results.at(1).joined_results(1).snippet().entries(),
+ Not(IsEmpty()));
+
+ EXPECT_THAT(page_result.num_results_with_snippets, Eq(1));
+}
+
+TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetSomeJoinedResults) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId person_document_id1,
+ document_store_->Put(CreatePersonDocument(/*id=*/1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId person_document_id2,
+ document_store_->Put(CreatePersonDocument(/*id=*/2)));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId email_document_id1,
+ document_store_->Put(CreateEmailDocument(/*id=*/1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId email_document_id2,
+ document_store_->Put(CreateEmailDocument(/*id=*/2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId email_document_id3,
+ document_store_->Put(CreateEmailDocument(/*id=*/3)));
+
+ std::vector<SectionId> person_hit_section_ids = {
+ GetSectionId("Person", "name")};
+ std::vector<SectionId> email_hit_section_ids = {
+ GetSectionId("Email", "subject"), GetSectionId("Email", "body")};
+ SectionIdMask person_hit_section_id_mask =
+ CreateSectionIdMask(person_hit_section_ids);
+ SectionIdMask email_hit_section_id_mask =
+ CreateSectionIdMask(email_hit_section_ids);
+
+ ScoredDocumentHit person1_scored_doc_hit(
+ person_document_id1, person_hit_section_id_mask, /*score=*/0);
+ ScoredDocumentHit person2_scored_doc_hit(
+ person_document_id2, person_hit_section_id_mask, /*score=*/0);
+ ScoredDocumentHit email1_scored_doc_hit(
+ email_document_id1, email_hit_section_id_mask, /*score=*/0);
+ ScoredDocumentHit email2_scored_doc_hit(
+ email_document_id2, email_hit_section_id_mask, /*score=*/0);
+ ScoredDocumentHit email3_scored_doc_hit(
+ email_document_id3, email_hit_section_id_mask, /*score=*/0);
+
+ // Create JoinedScoredDocumentHits mapping:
+ // - Person1 to Email1
+ // - Person2 to Email2, Email3
+ JoinedScoredDocumentHit joined_scored_document_hit1(
+ /*final_score=*/0, /*parent_scored_document_hit=*/person1_scored_doc_hit,
+ /*child_scored_document_hits=*/
+ {email1_scored_doc_hit});
+ JoinedScoredDocumentHit joined_scored_document_hit2(
+ /*final_score=*/0, /*parent_scored_document_hit=*/person2_scored_doc_hit,
+ /*child_scored_document_hits=*/
+ {email2_scored_doc_hit, email3_scored_doc_hit});
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // Create parent ResultSpec with custom snippet spec.
+ ResultSpecProto::SnippetSpecProto parent_snippet_spec = CreateSnippetSpec();
+ parent_snippet_spec.set_num_to_snippet(3);
+ ResultSpecProto parent_result_spec = CreateResultSpec(/*num_per_page=*/3);
+ parent_result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+ *parent_result_spec.mutable_snippet_spec() = std::move(parent_snippet_spec);
+
+ // Create child ResultSpec with custom snippet spec.
+ ResultSpecProto::SnippetSpecProto child_snippet_spec = CreateSnippetSpec();
+ child_snippet_spec.set_num_to_snippet(2);
+ ResultSpecProto child_result_spec;
+ *child_result_spec.mutable_snippet_spec() = std::move(child_snippet_spec);
+
+ ResultStateV2 result_state(
+ std::make_unique<
+ PriorityQueueScoredDocumentHitsRanker<JoinedScoredDocumentHit>>(
+ std::vector<JoinedScoredDocumentHit>{joined_scored_document_hit1,
+ joined_scored_document_hit2},
+ /*is_descending=*/false),
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), parent_result_spec,
+ schema_store_.get(),
+ SectionRestrictQueryTermsMap({{"", {"person"}}})),
+ /*child_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), child_result_spec,
+ schema_store_.get(),
+ SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
+ parent_result_spec, *document_store_);
+
+ // All parents document should be snippeted. Only 2 child documents should be
+ // snippeted.
+ PageResult page_result =
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
+ ASSERT_THAT(page_result.results, SizeIs(2));
+
+ // Result1: Person1 for parent and [Email1] for children.
+ // Check parent doc (Person1).
+ EXPECT_THAT(page_result.results.at(0).snippet().entries(), Not(IsEmpty()));
+ // Check child docs.
+ ASSERT_THAT(page_result.results.at(0).joined_results(), SizeIs(1));
+ EXPECT_THAT(page_result.results.at(0).joined_results(0).snippet().entries(),
+ Not(IsEmpty()));
+
+ // Result2: Person2 for parent and [Email2, Email3] for children.
+ // Check parent doc (Person2).
+ EXPECT_THAT(page_result.results.at(1).snippet().entries(), Not(IsEmpty()));
+ // Check child docs.
+ ASSERT_THAT(page_result.results.at(1).joined_results(), SizeIs(2));
+ EXPECT_THAT(page_result.results.at(1).joined_results(0).snippet().entries(),
+ Not(IsEmpty()));
+ EXPECT_THAT(page_result.results.at(1).joined_results(1).snippet().entries(),
+ IsEmpty());
+
+ EXPECT_THAT(page_result.num_results_with_snippets, Eq(2));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/result/result-retriever-v2_test.cc b/icing/result/result-retriever-v2_test.cc
index 874a8f1..0bd40cc 100644
--- a/icing/result/result-retriever-v2_test.cc
+++ b/icing/result/result-retriever-v2_test.cc
@@ -15,21 +15,29 @@
#include "icing/result/result-retriever-v2.h"
#include <atomic>
+#include <cstddef>
+#include <cstdint>
#include <memory>
+#include <string>
#include <unordered_map>
+#include <utility>
#include <vector>
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include "icing/absl_ports/mutex.h"
#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
#include "icing/file/mock-filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
#include "icing/portable/equals-proto.h"
#include "icing/portable/platform.h"
#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
#include "icing/proto/schema.pb.h"
-#include "icing/proto/scoring.pb.h"
#include "icing/proto/search.pb.h"
-#include "icing/proto/term.pb.h"
#include "icing/result/page-result.h"
#include "icing/result/result-state-v2.h"
#include "icing/schema-builder.h"
@@ -37,15 +45,19 @@
#include "icing/schema/section.h"
#include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/icu-data-file-helper.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
#include "icing/transform/normalizer-factory.h"
#include "icing/transform/normalizer.h"
+#include "icing/util/clock.h"
#include "unicode/uloc.h"
namespace icing {
@@ -74,7 +86,7 @@ class MockGroupResultLimiter : public GroupResultLimiterV2 {
MOCK_METHOD(bool, ShouldBeRemoved,
(const ScoredDocumentHit&, const EntryIdMap&,
const DocumentStore&, std::vector<int>&,
- ResultSpecProto::ResultGroupingType),
+ ResultSpecProto::ResultGroupingType, int64_t),
(const, override));
};
@@ -136,7 +148,10 @@ class ResultRetrieverV2Test : public ::testing::Test {
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
num_total_hits_ = 0;
}
@@ -173,9 +188,6 @@ class ResultRetrieverV2Test : public ::testing::Test {
FakeClock fake_clock_;
};
-// TODO(sungyc): Refactor helper functions below (builder classes or common test
-// utility).
-
DocumentProto CreateDocument(int id) {
return DocumentBuilder()
.SetKey("icing", "Email/" + std::to_string(id))
@@ -194,19 +206,6 @@ SectionIdMask CreateSectionIdMask(const std::vector<SectionId>& section_ids) {
return mask;
}
-SearchSpecProto CreateSearchSpec(TermMatchType::Code match_type) {
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(match_type);
- return search_spec;
-}
-
-ScoringSpecProto CreateScoringSpec(bool is_descending_order) {
- ScoringSpecProto scoring_spec;
- scoring_spec.set_order_by(is_descending_order ? ScoringSpecProto::Order::DESC
- : ScoringSpecProto::Order::ASC);
- return scoring_spec;
-}
-
ResultSpecProto CreateResultSpec(
int num_per_page, ResultSpecProto::ResultGroupingType result_group_type) {
ResultSpecProto result_spec;
@@ -215,6 +214,18 @@ ResultSpecProto CreateResultSpec(
return result_spec;
}
+libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
+ const Filesystem* filesystem, const std::string& base_dir,
+ const Clock* clock, const SchemaStore* schema_store) {
+ return DocumentStore::Create(
+ filesystem, base_dir, clock, schema_store,
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr);
+}
+
TEST_F(ResultRetrieverV2Test, CreationWithNullPointerShouldFail) {
EXPECT_THAT(
ResultRetrieverV2::Create(/*doc_store=*/nullptr, schema_store_.get(),
@@ -223,8 +234,8 @@ TEST_F(ResultRetrieverV2Test, CreationWithNullPointerShouldFail) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -245,8 +256,8 @@ TEST_F(ResultRetrieverV2Test, CreationWithNullPointerShouldFail) {
TEST_F(ResultRetrieverV2Test, ShouldRetrieveSimpleResults) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -295,14 +306,13 @@ TEST_F(ResultRetrieverV2Test, ShouldRetrieveSimpleResults) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE),
*doc_store);
// First page, 2 results
- auto [page_result1, has_more_results1] =
- result_retriever->RetrieveNextPage(result_state);
+ auto [page_result1, has_more_results1] = result_retriever->RetrieveNextPage(
+ result_state, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(page_result1.results,
ElementsAre(EqualsProto(result1), EqualsProto(result2)));
// num_results_with_snippets is 0 when there is no snippet.
@@ -313,8 +323,8 @@ TEST_F(ResultRetrieverV2Test, ShouldRetrieveSimpleResults) {
EXPECT_TRUE(has_more_results1);
// Second page, 2 results
- auto [page_result2, has_more_results2] =
- result_retriever->RetrieveNextPage(result_state);
+ auto [page_result2, has_more_results2] = result_retriever->RetrieveNextPage(
+ result_state, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(page_result2.results,
ElementsAre(EqualsProto(result3), EqualsProto(result4)));
// num_results_with_snippets is 0 when there is no snippet.
@@ -325,8 +335,8 @@ TEST_F(ResultRetrieverV2Test, ShouldRetrieveSimpleResults) {
EXPECT_TRUE(has_more_results2);
// Third page, 1 result
- auto [page_result3, has_more_results3] =
- result_retriever->RetrieveNextPage(result_state);
+ auto [page_result3, has_more_results3] = result_retriever->RetrieveNextPage(
+ result_state, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(page_result3.results, ElementsAre(EqualsProto(result5)));
// num_results_with_snippets is 0 when there is no snippet.
EXPECT_THAT(page_result3.num_results_with_snippets, Eq(0));
@@ -339,8 +349,8 @@ TEST_F(ResultRetrieverV2Test, ShouldRetrieveSimpleResults) {
TEST_F(ResultRetrieverV2Test, ShouldIgnoreNonInternalErrors) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -375,12 +385,14 @@ TEST_F(ResultRetrieverV2Test, ShouldIgnoreNonInternalErrors) {
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits),
/*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/3, ResultSpecProto::NAMESPACE),
*doc_store);
PageResult page_result1 =
- result_retriever->RetrieveNextPage(result_state1).first;
+ result_retriever
+ ->RetrieveNextPage(result_state1,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
EXPECT_THAT(page_result1.results,
ElementsAre(EqualsProto(result1), EqualsProto(result2)));
@@ -394,16 +406,171 @@ TEST_F(ResultRetrieverV2Test, ShouldIgnoreNonInternalErrors) {
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits),
/*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/3, ResultSpecProto::NAMESPACE),
*doc_store);
PageResult page_result2 =
- result_retriever->RetrieveNextPage(result_state2).first;
+ result_retriever
+ ->RetrieveNextPage(result_state2,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
EXPECT_THAT(page_result2.results,
ElementsAre(EqualsProto(result1), EqualsProto(result2)));
}
+TEST_F(ResultRetrieverV2Test,
+ ShouldLimitNumChildDocumentsByMaxJoinedChildPerParent) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add 2 Person document
+ DocumentProto person_document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "Person/1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId person_document_id1,
+ doc_store->Put(person_document1));
+
+ DocumentProto person_document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "Person/2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId person_document_id2,
+ doc_store->Put(person_document2));
+
+ // 2. Add 4 Email documents
+ DocumentProto email_document1 = DocumentBuilder()
+ .SetKey("namespace", "Email/1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Test 1")
+ .AddStringProperty("body", "Test 1")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id1,
+ doc_store->Put(email_document1));
+
+ DocumentProto email_document2 = DocumentBuilder()
+ .SetKey("namespace", "Email/2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Test 2")
+ .AddStringProperty("body", "Test 2")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id2,
+ doc_store->Put(email_document2));
+
+ DocumentProto email_document3 = DocumentBuilder()
+ .SetKey("namespace", "Email/3")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Test 3")
+ .AddStringProperty("body", "Test 3")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id3,
+ doc_store->Put(email_document3));
+
+ DocumentProto email_document4 = DocumentBuilder()
+ .SetKey("namespace", "Email/4")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Test 4")
+ .AddStringProperty("body", "Test 4")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id4,
+ doc_store->Put(email_document4));
+
+ // 3. Setup the joined scored results.
+ std::vector<SectionId> person_hit_section_ids = {
+ GetSectionId("Person", "name")};
+ std::vector<SectionId> email_hit_section_ids = {
+ GetSectionId("Email", "name"), GetSectionId("Email", "body")};
+ SectionIdMask person_hit_section_id_mask =
+ CreateSectionIdMask(person_hit_section_ids);
+ SectionIdMask email_hit_section_id_mask =
+ CreateSectionIdMask(email_hit_section_ids);
+
+ ScoredDocumentHit person1_scored_doc_hit(
+ person_document_id1, person_hit_section_id_mask, /*score=*/1);
+ ScoredDocumentHit person2_scored_doc_hit(
+ person_document_id2, person_hit_section_id_mask, /*score=*/2);
+ ScoredDocumentHit email1_scored_doc_hit(
+ email_document_id1, email_hit_section_id_mask, /*score=*/3);
+ ScoredDocumentHit email2_scored_doc_hit(
+ email_document_id2, email_hit_section_id_mask, /*score=*/4);
+ ScoredDocumentHit email3_scored_doc_hit(
+ email_document_id3, email_hit_section_id_mask, /*score=*/5);
+ ScoredDocumentHit email4_scored_doc_hit(
+ email_document_id4, email_hit_section_id_mask, /*score=*/6);
+ // Create JoinedScoredDocumentHits mapping:
+ // - Person1 to Email1
+ // - Person2 to Email2, Email3, Email4
+ std::vector<JoinedScoredDocumentHit> joined_scored_document_hits = {
+ JoinedScoredDocumentHit(
+ /*final_score=*/1,
+ /*parent_scored_document_hit=*/person1_scored_doc_hit,
+ /*child_scored_document_hits=*/{email1_scored_doc_hit}),
+ JoinedScoredDocumentHit(
+ /*final_score=*/3,
+ /*parent_scored_document_hit=*/person2_scored_doc_hit,
+ /*child_scored_document_hits=*/
+ {email4_scored_doc_hit, email3_scored_doc_hit,
+ email2_scored_doc_hit})};
+
+ // 4. Retrieve result with max_joined_children_per_parent_to_return = 2.
+ ResultSpecProto result_spec =
+ CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
+ result_spec.set_max_joined_children_per_parent_to_return(2);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+ ResultStateV2 result_state(
+ std::make_unique<
+ PriorityQueueScoredDocumentHitsRanker<JoinedScoredDocumentHit>>(
+ std::move(joined_scored_document_hits), /*is_descending=*/true),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+ result_spec, *doc_store);
+
+ // Result1: person2 with child docs = [email4, email3]
+ SearchResultProto::ResultProto result1;
+ *result1.mutable_document() = person_document2;
+ result1.set_score(3);
+ SearchResultProto::ResultProto* child1 = result1.add_joined_results();
+ *child1->mutable_document() = email_document4;
+ child1->set_score(6);
+ SearchResultProto::ResultProto* child2 = result1.add_joined_results();
+ *child2->mutable_document() = email_document3;
+ child2->set_score(5);
+
+ // Result2: person1 with child docs = [email1]
+ SearchResultProto::ResultProto result2;
+ *result2.mutable_document() = person_document1;
+ result2.set_score(1);
+ SearchResultProto::ResultProto* child3 = result2.add_joined_results();
+ *child3->mutable_document() = email_document1;
+ child3->set_score(3);
+
+ auto [page_result, has_more_results] = result_retriever->RetrieveNextPage(
+ result_state, fake_clock_.GetSystemTimeMilliseconds());
+ EXPECT_THAT(page_result.results,
+ ElementsAre(EqualsProto(result1), EqualsProto(result2)));
+ // No more results.
+ EXPECT_FALSE(has_more_results);
+}
+
TEST_F(ResultRetrieverV2Test, ShouldIgnoreInternalErrors) {
MockFilesystem mock_filesystem;
EXPECT_CALL(mock_filesystem,
@@ -413,8 +580,8 @@ TEST_F(ResultRetrieverV2Test, ShouldIgnoreInternalErrors) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&mock_filesystem, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&mock_filesystem, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -445,12 +612,14 @@ TEST_F(ResultRetrieverV2Test, ShouldIgnoreInternalErrors) {
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits),
/*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE),
*doc_store);
PageResult page_result =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
// We mocked mock_filesystem to return an internal error when retrieving doc2,
// so doc2 should be skipped and doc1 should still be returned.
EXPECT_THAT(page_result.results, ElementsAre(EqualsProto(result1)));
@@ -459,8 +628,8 @@ TEST_F(ResultRetrieverV2Test, ShouldIgnoreInternalErrors) {
TEST_F(ResultRetrieverV2Test, ShouldUpdateResultState) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -494,14 +663,16 @@ TEST_F(ResultRetrieverV2Test, ShouldUpdateResultState) {
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits),
/*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE),
*doc_store);
// First page, 2 results
PageResult page_result1 =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result1.results, SizeIs(2));
{
absl_ports::shared_lock l(&result_state.mutex);
@@ -515,7 +686,10 @@ TEST_F(ResultRetrieverV2Test, ShouldUpdateResultState) {
// Second page, 2 results
PageResult page_result2 =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result2.results, SizeIs(2));
{
absl_ports::shared_lock l(&result_state.mutex);
@@ -529,7 +703,10 @@ TEST_F(ResultRetrieverV2Test, ShouldUpdateResultState) {
// Third page, 1 result
PageResult page_result3 =
- result_retriever->RetrieveNextPage(result_state).first;
+ result_retriever
+ ->RetrieveNextPage(result_state,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result3.results, SizeIs(1));
{
absl_ports::shared_lock l(&result_state.mutex);
@@ -545,8 +722,8 @@ TEST_F(ResultRetrieverV2Test, ShouldUpdateResultState) {
TEST_F(ResultRetrieverV2Test, ShouldUpdateNumTotalHits) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -567,9 +744,7 @@ TEST_F(ResultRetrieverV2Test, ShouldUpdateNumTotalHits) {
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits1),
/*is_descending=*/true),
- /*query_terms=*/SectionRestrictQueryTermsMap{},
- CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
*doc_store);
{
@@ -595,9 +770,7 @@ TEST_F(ResultRetrieverV2Test, ShouldUpdateNumTotalHits) {
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits2),
/*is_descending=*/true),
- /*query_terms=*/SectionRestrictQueryTermsMap{},
- CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE),
*doc_store);
{
@@ -615,14 +788,20 @@ TEST_F(ResultRetrieverV2Test, ShouldUpdateNumTotalHits) {
// Should get 1 doc in the first page of result_state1, and num_total_hits
// should be decremented by 1.
PageResult page_result1 =
- result_retriever->RetrieveNextPage(*result_state1).first;
+ result_retriever
+ ->RetrieveNextPage(*result_state1,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result1.results, SizeIs(1));
EXPECT_THAT(num_total_hits_, Eq(4));
// Should get 2 docs in the first page of result_state2, and num_total_hits
// should be decremented by 2.
PageResult page_result2 =
- result_retriever->RetrieveNextPage(*result_state2).first;
+ result_retriever
+ ->RetrieveNextPage(*result_state2,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result2.results, SizeIs(2));
EXPECT_THAT(num_total_hits_, Eq(2));
@@ -630,7 +809,10 @@ TEST_F(ResultRetrieverV2Test, ShouldUpdateNumTotalHits) {
// is 2, there is only 1 doc left), and num_total_hits should be decremented
// by 1.
PageResult page_result3 =
- result_retriever->RetrieveNextPage(*result_state2).first;
+ result_retriever
+ ->RetrieveNextPage(*result_state2,
+ fake_clock_.GetSystemTimeMilliseconds())
+ .first;
ASSERT_THAT(page_result3.results, SizeIs(1));
EXPECT_THAT(num_total_hits_, Eq(1));
@@ -648,8 +830,8 @@ TEST_F(ResultRetrieverV2Test, ShouldUpdateNumTotalHits) {
TEST_F(ResultRetrieverV2Test, ShouldLimitNumTotalBytesPerPage) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -684,21 +866,21 @@ TEST_F(ResultRetrieverV2Test, ShouldLimitNumTotalBytesPerPage) {
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits),
/*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true), result_spec, *doc_store);
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+ result_spec, *doc_store);
// First page. Only result1 should be returned, since its byte size meets
// num_total_bytes_per_page_threshold and ResultRetriever should terminate
// early even though # of results is still below num_per_page.
- auto [page_result1, has_more_results1] =
- result_retriever->RetrieveNextPage(result_state);
+ auto [page_result1, has_more_results1] = result_retriever->RetrieveNextPage(
+ result_state, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(page_result1.results, ElementsAre(EqualsProto(result1)));
// Has more results.
EXPECT_TRUE(has_more_results1);
// Second page, result2.
- auto [page_result2, has_more_results2] =
- result_retriever->RetrieveNextPage(result_state);
+ auto [page_result2, has_more_results2] = result_retriever->RetrieveNextPage(
+ result_state, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(page_result2.results, ElementsAre(EqualsProto(result2)));
// No more results.
EXPECT_FALSE(has_more_results2);
@@ -708,8 +890,8 @@ TEST_F(ResultRetrieverV2Test,
ShouldReturnSingleLargeResultAboveNumTotalBytesPerPageThreshold) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -747,20 +929,20 @@ TEST_F(ResultRetrieverV2Test,
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits),
/*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true), result_spec, *doc_store);
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+ result_spec, *doc_store);
// First page. Should return single result1 even though its byte size exceeds
// num_total_bytes_per_page_threshold.
- auto [page_result1, has_more_results1] =
- result_retriever->RetrieveNextPage(result_state);
+ auto [page_result1, has_more_results1] = result_retriever->RetrieveNextPage(
+ result_state, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(page_result1.results, ElementsAre(EqualsProto(result1)));
// Has more results.
EXPECT_TRUE(has_more_results1);
// Second page, result2.
- auto [page_result2, has_more_results2] =
- result_retriever->RetrieveNextPage(result_state);
+ auto [page_result2, has_more_results2] = result_retriever->RetrieveNextPage(
+ result_state, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(page_result2.results, ElementsAre(EqualsProto(result2)));
// No more results.
EXPECT_FALSE(has_more_results2);
@@ -770,8 +952,8 @@ TEST_F(ResultRetrieverV2Test,
ShouldRetrieveNextResultWhenBelowNumTotalBytesPerPageThreshold) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -809,15 +991,15 @@ TEST_F(ResultRetrieverV2Test,
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits),
/*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true), result_spec, *doc_store);
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+ result_spec, *doc_store);
// After retrieving result1, total bytes are still below the threshold and #
// of results is still below num_per_page, so ResultRetriever should continue
// the retrieval process and thus include result2 into this page, even though
// finally total bytes of result1 + result2 exceed the threshold.
- auto [page_result, has_more_results] =
- result_retriever->RetrieveNextPage(result_state);
+ auto [page_result, has_more_results] = result_retriever->RetrieveNextPage(
+ result_state, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(page_result.results,
ElementsAre(EqualsProto(result1), EqualsProto(result2)));
// No more results.
diff --git a/icing/result/result-retriever.cc b/icing/result/result-retriever.cc
deleted file mode 100644
index 37b212a..0000000
--- a/icing/result/result-retriever.cc
+++ /dev/null
@@ -1,117 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/result/result-retriever.h"
-
-#include <string_view>
-#include <utility>
-
-#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/proto/document.pb.h"
-#include "icing/proto/search.pb.h"
-#include "icing/result/page-result-state.h"
-#include "icing/result/projection-tree.h"
-#include "icing/result/projector.h"
-#include "icing/result/snippet-context.h"
-#include "icing/util/status-macros.h"
-
-namespace icing {
-namespace lib {
-
-libtextclassifier3::StatusOr<std::unique_ptr<ResultRetriever>>
-ResultRetriever::Create(const DocumentStore* doc_store,
- const SchemaStore* schema_store,
- const LanguageSegmenter* language_segmenter,
- const Normalizer* normalizer,
- bool ignore_bad_document_ids) {
- ICING_RETURN_ERROR_IF_NULL(doc_store);
- ICING_RETURN_ERROR_IF_NULL(schema_store);
- ICING_RETURN_ERROR_IF_NULL(language_segmenter);
-
- ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<SnippetRetriever> snippet_retriever,
- SnippetRetriever::Create(schema_store, language_segmenter, normalizer));
-
- return std::unique_ptr<ResultRetriever>(new ResultRetriever(
- doc_store, std::move(snippet_retriever), ignore_bad_document_ids));
-}
-
-libtextclassifier3::StatusOr<std::vector<SearchResultProto::ResultProto>>
-ResultRetriever::RetrieveResults(
- const PageResultState& page_result_state) const {
- std::vector<SearchResultProto::ResultProto> search_results;
- search_results.reserve(page_result_state.scored_document_hits.size());
-
- const SnippetContext& snippet_context = page_result_state.snippet_context;
- // Calculates how many snippets to return for this page.
- int remaining_num_to_snippet = snippet_context.snippet_spec.num_to_snippet() -
- page_result_state.num_previously_returned;
-
- if (remaining_num_to_snippet < 0) {
- remaining_num_to_snippet = 0;
- }
-
- auto wildcard_projection_tree_itr =
- page_result_state.projection_tree_map.find(
- std::string(ProjectionTree::kSchemaTypeWildcard));
- for (const auto& scored_document_hit :
- page_result_state.scored_document_hits) {
- libtextclassifier3::StatusOr<DocumentProto> document_or =
- doc_store_.Get(scored_document_hit.document_id());
-
- if (!document_or.ok()) {
- // Internal errors from document store are IO errors, return directly.
- if (absl_ports::IsInternal(document_or.status())) {
- return document_or.status();
- }
-
- if (ignore_bad_document_ids_) {
- continue;
- } else {
- return document_or.status();
- }
- }
-
- DocumentProto document = std::move(document_or).ValueOrDie();
- // Apply projection
- auto itr = page_result_state.projection_tree_map.find(document.schema());
- if (itr != page_result_state.projection_tree_map.end()) {
- projector::Project(itr->second.root().children, &document);
- } else if (wildcard_projection_tree_itr !=
- page_result_state.projection_tree_map.end()) {
- projector::Project(wildcard_projection_tree_itr->second.root().children,
- &document);
- }
-
- SearchResultProto::ResultProto result;
- // Add the snippet if requested.
- if (snippet_context.snippet_spec.num_matches_per_property() > 0 &&
- remaining_num_to_snippet > search_results.size()) {
- SnippetProto snippet_proto = snippet_retriever_->RetrieveSnippet(
- snippet_context.query_terms, snippet_context.match_type,
- snippet_context.snippet_spec, document,
- scored_document_hit.hit_section_id_mask());
- *result.mutable_snippet() = std::move(snippet_proto);
- }
-
- // Add the document, itself.
- *result.mutable_document() = std::move(document);
- result.set_score(scored_document_hit.score());
- search_results.push_back(std::move(result));
- }
- return search_results;
-}
-
-} // namespace lib
-} // namespace icing
diff --git a/icing/result/result-retriever.h b/icing/result/result-retriever.h
deleted file mode 100644
index ade8441..0000000
--- a/icing/result/result-retriever.h
+++ /dev/null
@@ -1,94 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_RESULT_RETRIEVER_H_
-#define ICING_RESULT_RETRIEVER_H_
-
-#include <utility>
-#include <vector>
-
-#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/proto/search.pb.h"
-#include "icing/query/query-terms.h"
-#include "icing/result/page-result-state.h"
-#include "icing/result/snippet-context.h"
-#include "icing/result/snippet-retriever.h"
-#include "icing/schema/schema-store.h"
-#include "icing/schema/section.h"
-#include "icing/scoring/scored-document-hit.h"
-#include "icing/store/document-id.h"
-#include "icing/store/document-store.h"
-#include "icing/tokenization/language-segmenter.h"
-#include "icing/transform/normalizer.h"
-
-namespace icing {
-namespace lib {
-
-class ResultRetriever {
- public:
- // Factory function to create a ResultRetriever which does not take ownership
- // of any input components, and all pointers must refer to valid objects that
- // outlive the created ResultRetriever instance.
- //
- // Returns:
- // A ResultRetriever on success
- // FAILED_PRECONDITION on any null pointer input
- static libtextclassifier3::StatusOr<std::unique_ptr<ResultRetriever>> Create(
- const DocumentStore* doc_store, const SchemaStore* schema_store,
- const LanguageSegmenter* language_segmenter, const Normalizer* normalizer,
- bool ignore_bad_document_ids = true);
-
- // Retrieves results (pairs of DocumentProtos and SnippetProtos) with the
- // given document and snippet information. The expected number of documents to
- // return is the number of all scored document hits inside PageResultState.
- // The number of snippets to return is based on the total number of snippets
- // needed and number of snippets that have already been returned previously
- // for the same query. The order of results returned is the same as the order
- // of scored document hits inside PageResultState.
- //
- // "ignore_bad_document_ids" from constructor indicates whether to ignore
- // invalid and non-existing document ids. If it's true, errors on some
- // document ids will be ignored and valid documents will be returned,
- // otherwise any error will be returned immediately. Note that IO errors will
- // always be returned.
- //
- // Returns when ignore_bad_document_ids is true:
- // A list of ResultProto on success
- // INTERNAL_ERROR on IO error
- //
- // Returns when ignore_bad_document_ids is false:
- // A list of ResultProto on success
- // INVALID_ARGUMENT if any document_id < 0
- // NOT_FOUND if any doc doesn't exist or has been deleted
- // INTERNAL_ERROR on IO error
- libtextclassifier3::StatusOr<std::vector<SearchResultProto::ResultProto>>
- RetrieveResults(const PageResultState& page_result_state) const;
-
- private:
- explicit ResultRetriever(const DocumentStore* doc_store,
- std::unique_ptr<SnippetRetriever> snippet_retriever,
- bool ignore_bad_document_ids)
- : doc_store_(*doc_store),
- snippet_retriever_(std::move(snippet_retriever)),
- ignore_bad_document_ids_(ignore_bad_document_ids) {}
-
- const DocumentStore& doc_store_;
- std::unique_ptr<SnippetRetriever> snippet_retriever_;
- const bool ignore_bad_document_ids_;
-};
-
-} // namespace lib
-} // namespace icing
-
-#endif // ICING_RESULT_RETRIEVER_H_
diff --git a/icing/result/result-retriever_test.cc b/icing/result/result-retriever_test.cc
deleted file mode 100644
index 044e0f2..0000000
--- a/icing/result/result-retriever_test.cc
+++ /dev/null
@@ -1,1951 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/result/result-retriever.h"
-
-#include <limits>
-#include <memory>
-#include <string_view>
-#include <unordered_map>
-
-#include "gtest/gtest.h"
-#include "icing/document-builder.h"
-#include "icing/file/mock-filesystem.h"
-#include "icing/portable/equals-proto.h"
-#include "icing/portable/platform.h"
-#include "icing/proto/document.pb.h"
-#include "icing/proto/schema.pb.h"
-#include "icing/proto/search.pb.h"
-#include "icing/proto/term.pb.h"
-#include "icing/result/projection-tree.h"
-#include "icing/schema-builder.h"
-#include "icing/schema/schema-store.h"
-#include "icing/schema/section.h"
-#include "icing/store/document-id.h"
-#include "icing/testing/common-matchers.h"
-#include "icing/testing/fake-clock.h"
-#include "icing/testing/icu-data-file-helper.h"
-#include "icing/testing/test-data.h"
-#include "icing/testing/tmp-directory.h"
-#include "icing/tokenization/language-segmenter-factory.h"
-#include "icing/transform/normalizer-factory.h"
-#include "icing/transform/normalizer.h"
-#include "icing/util/snippet-helpers.h"
-#include "unicode/uloc.h"
-
-namespace icing {
-namespace lib {
-
-namespace {
-using ::icing::lib::portable_equals_proto::EqualsProto;
-using ::testing::ElementsAre;
-using ::testing::Eq;
-using ::testing::IsEmpty;
-using ::testing::Return;
-using ::testing::SizeIs;
-
-class ResultRetrieverTest : public testing::Test {
- protected:
- ResultRetrieverTest() : test_dir_(GetTestTempDir() + "/icing") {
- filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
- }
-
- void SetUp() override {
- if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
- ICING_ASSERT_OK(
- // File generated via icu_data_file rule in //icing/BUILD.
- icu_data_file_helper::SetUpICUDataFile(
- GetTestFilePath("icing/icu.dat")));
- }
- language_segmenter_factory::SegmenterOptions options(ULOC_US);
- ICING_ASSERT_OK_AND_ASSIGN(
- language_segmenter_,
- language_segmenter_factory::Create(std::move(options)));
-
- ICING_ASSERT_OK_AND_ASSIGN(
- schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
- /*max_term_byte_size=*/10000));
-
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("name")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_EXACT,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(
- PropertyConfigBuilder()
- .SetName("sender")
- .SetDataTypeDocument(
- "Person", /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .AddType(
- SchemaTypeConfigBuilder()
- .SetType("Person")
- .AddProperty(PropertyConfigBuilder()
- .SetName("name")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("emailAddress")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
- }
-
- void TearDown() override {
- filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
- }
-
- SectionId GetSectionId(const std::string& type, const std::string& property) {
- auto type_id_or = schema_store_->GetSchemaTypeId(type);
- if (!type_id_or.ok()) {
- return kInvalidSectionId;
- }
- SchemaTypeId type_id = type_id_or.ValueOrDie();
- for (SectionId section_id = 0; section_id <= kMaxSectionId; ++section_id) {
- auto metadata_or = schema_store_->GetSectionMetadata(type_id, section_id);
- if (!metadata_or.ok()) {
- break;
- }
- const SectionMetadata* metadata = metadata_or.ValueOrDie();
- if (metadata->path == property) {
- return metadata->id;
- }
- }
- return kInvalidSectionId;
- }
-
- const Filesystem filesystem_;
- const std::string test_dir_;
- std::unique_ptr<LanguageSegmenter> language_segmenter_;
- std::unique_ptr<SchemaStore> schema_store_;
- std::unique_ptr<Normalizer> normalizer_;
- FakeClock fake_clock_;
-};
-
-ResultSpecProto::SnippetSpecProto CreateSnippetSpec() {
- ResultSpecProto::SnippetSpecProto snippet_spec;
- snippet_spec.set_num_to_snippet(std::numeric_limits<int>::max());
- snippet_spec.set_num_matches_per_property(std::numeric_limits<int>::max());
- snippet_spec.set_max_window_utf32_length(1024);
- return snippet_spec;
-}
-
-DocumentProto CreateDocument(int id) {
- return DocumentBuilder()
- .SetKey("icing", "Email/" + std::to_string(id))
- .SetSchema("Email")
- .AddStringProperty("name", "subject foo " + std::to_string(id))
- .AddStringProperty("body", "body bar " + std::to_string(id))
- .SetCreationTimestampMs(1574365086666 + id)
- .Build();
-}
-
-SectionIdMask CreateSectionIdMask(const std::vector<SectionId>& section_ids) {
- SectionIdMask mask = 0;
- for (SectionId section_id : section_ids) {
- mask |= (UINT64_C(1) << section_id);
- }
- return mask;
-}
-
-TEST_F(ResultRetrieverTest, CreationWithNullPointerShouldFail) {
- EXPECT_THAT(
- ResultRetriever::Create(/*doc_store=*/nullptr, schema_store_.get(),
- language_segmenter_.get(), normalizer_.get()),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
-
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- EXPECT_THAT(
- ResultRetriever::Create(doc_store.get(), /*schema_store=*/nullptr,
- language_segmenter_.get(), normalizer_.get()),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
- EXPECT_THAT(ResultRetriever::Create(doc_store.get(), schema_store_.get(),
- /*language_segmenter=*/nullptr,
- normalizer_.get()),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
- EXPECT_THAT(ResultRetriever::Create(doc_store.get(), schema_store_.get(),
- language_segmenter_.get(),
- /*normalizer=*/nullptr),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
-}
-
-TEST_F(ResultRetrieverTest, ShouldRetrieveSimpleResults) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- doc_store->Put(CreateDocument(/*id=*/1)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- doc_store->Put(CreateDocument(/*id=*/2)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
- doc_store->Put(CreateDocument(/*id=*/3)));
-
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
- GetSectionId("Email", "body")};
- SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
- std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, hit_section_id_mask, /*score=*/19},
- {document_id2, hit_section_id_mask, /*score=*/5},
- {document_id3, hit_section_id_mask, /*score=*/1}};
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<ResultRetriever> result_retriever,
- ResultRetriever::Create(doc_store.get(), schema_store_.get(),
- language_segmenter_.get(), normalizer_.get()));
-
- SearchResultProto::ResultProto result1;
- *result1.mutable_document() = CreateDocument(/*id=*/1);
- result1.set_score(19);
- SearchResultProto::ResultProto result2;
- *result2.mutable_document() = CreateDocument(/*id=*/2);
- result2.set_score(5);
- SearchResultProto::ResultProto result3;
- *result3.mutable_document() = CreateDocument(/*id=*/3);
- result3.set_score(1);
-
- SnippetContext snippet_context(
- /*query_terms_in=*/{},
- ResultSpecProto::SnippetSpecProto::default_instance(),
- TermMatchType::EXACT_ONLY);
- PageResultState page_result_state(
- std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context),
- std::unordered_map<std::string, ProjectionTree>(),
- /*num_previously_returned_in=*/0,
- /*num_per_page_in=*/3);
- EXPECT_THAT(
- result_retriever->RetrieveResults(page_result_state),
- IsOkAndHolds(ElementsAre(EqualsProto(result1), EqualsProto(result2),
- EqualsProto(result3))));
-}
-
-TEST_F(ResultRetrieverTest, IgnoreErrors) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- doc_store->Put(CreateDocument(/*id=*/1)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- doc_store->Put(CreateDocument(/*id=*/2)));
-
- DocumentId invalid_document_id = -1;
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
- GetSectionId("Email", "body")};
- SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
- std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, hit_section_id_mask, /*score=*/12},
- {document_id2, hit_section_id_mask, /*score=*/4},
- {invalid_document_id, hit_section_id_mask, /*score=*/0}};
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<ResultRetriever> result_retriever,
- ResultRetriever::Create(doc_store.get(), schema_store_.get(),
- language_segmenter_.get(), normalizer_.get(),
- /*ignore_bad_document_ids=*/true));
-
- SearchResultProto::ResultProto result1;
- *result1.mutable_document() = CreateDocument(/*id=*/1);
- result1.set_score(12);
- SearchResultProto::ResultProto result2;
- *result2.mutable_document() = CreateDocument(/*id=*/2);
- result2.set_score(4);
-
- SnippetContext snippet_context(
- /*query_terms_in=*/{},
- ResultSpecProto::SnippetSpecProto::default_instance(),
- TermMatchType::EXACT_ONLY);
- PageResultState page_result_state(
- std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context),
- std::unordered_map<std::string, ProjectionTree>(),
- /*num_previously_returned_in=*/0,
- /*num_per_page_in=*/3);
- EXPECT_THAT(
- result_retriever->RetrieveResults(page_result_state),
- IsOkAndHolds(ElementsAre(EqualsProto(result1), EqualsProto(result2))));
-}
-
-TEST_F(ResultRetrieverTest, NotIgnoreErrors) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- doc_store->Put(CreateDocument(/*id=*/1)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- doc_store->Put(CreateDocument(/*id=*/2)));
-
- DocumentId invalid_document_id = -1;
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
- GetSectionId("Email", "body")};
- SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
- std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, hit_section_id_mask, /*score=*/0},
- {document_id2, hit_section_id_mask, /*score=*/0},
- {invalid_document_id, hit_section_id_mask, /*score=*/0}};
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<ResultRetriever> result_retriever,
- ResultRetriever::Create(doc_store.get(), schema_store_.get(),
- language_segmenter_.get(), normalizer_.get(),
- /*ignore_bad_document_ids=*/false));
-
- SnippetContext snippet_context(
- /*query_terms_in=*/{},
- ResultSpecProto::SnippetSpecProto::default_instance(),
- TermMatchType::EXACT_ONLY);
- PageResultState page_result_state(
- std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context),
- std::unordered_map<std::string, ProjectionTree>(),
- /*num_previously_returned_in=*/0,
- /*num_per_page_in=*/3);
- EXPECT_THAT(result_retriever->RetrieveResults(page_result_state),
- StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-
- DocumentId non_existing_document_id = 4;
- page_result_state.scored_document_hits = {
- {document_id1, hit_section_id_mask, /*score=*/0},
- {document_id2, hit_section_id_mask, /*score=*/0},
- {non_existing_document_id, hit_section_id_mask, /*score=*/0}};
- EXPECT_THAT(result_retriever->RetrieveResults(page_result_state),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-}
-
-TEST_F(ResultRetrieverTest, IOErrorShouldReturnInternalError) {
- MockFilesystem mock_filesystem;
- ON_CALL(mock_filesystem, PRead(A<int>(), A<void*>(), A<size_t>(), A<off_t>()))
- .WillByDefault(Return(false));
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&mock_filesystem, test_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- doc_store->Put(CreateDocument(/*id=*/1)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- doc_store->Put(CreateDocument(/*id=*/2)));
-
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
- GetSectionId("Email", "body")};
- SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
- std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, hit_section_id_mask, /*score=*/0},
- {document_id2, hit_section_id_mask, /*score=*/0}};
-
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<ResultRetriever> result_retriever,
- ResultRetriever::Create(doc_store.get(), schema_store_.get(),
- language_segmenter_.get(), normalizer_.get(),
- /*ignore_bad_document_ids=*/true));
-
- SnippetContext snippet_context(
- /*query_terms_in=*/{},
- ResultSpecProto::SnippetSpecProto::default_instance(),
- TermMatchType::EXACT_ONLY);
- PageResultState page_result_state(
- std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context),
- std::unordered_map<std::string, ProjectionTree>(),
- /*num_previously_returned_in=*/0,
- /*num_per_page_in=*/2);
- EXPECT_THAT(result_retriever->RetrieveResults(page_result_state),
- StatusIs(libtextclassifier3::StatusCode::INTERNAL));
-}
-
-TEST_F(ResultRetrieverTest, DefaultSnippetSpecShouldDisableSnippeting) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- doc_store->Put(CreateDocument(/*id=*/1)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- doc_store->Put(CreateDocument(/*id=*/2)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
- doc_store->Put(CreateDocument(/*id=*/3)));
-
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
- GetSectionId("Email", "body")};
- SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
- std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, hit_section_id_mask, /*score=*/0},
- {document_id2, hit_section_id_mask, /*score=*/0},
- {document_id3, hit_section_id_mask, /*score=*/0}};
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<ResultRetriever> result_retriever,
- ResultRetriever::Create(doc_store.get(), schema_store_.get(),
- language_segmenter_.get(), normalizer_.get()));
-
- SnippetContext snippet_context(
- /*query_terms_in=*/{},
- ResultSpecProto::SnippetSpecProto::default_instance(),
- TermMatchType::EXACT_ONLY);
- PageResultState page_result_state(
- std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context),
- std::unordered_map<std::string, ProjectionTree>(),
- /*num_previously_returned_in=*/0,
- /*num_per_page_in=*/3);
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<SearchResultProto::ResultProto> results,
- result_retriever->RetrieveResults(page_result_state));
- ASSERT_THAT(results, SizeIs(3));
- EXPECT_THAT(results.at(0).snippet(),
- EqualsProto(SnippetProto::default_instance()));
- EXPECT_THAT(results.at(1).snippet(),
- EqualsProto(SnippetProto::default_instance()));
- EXPECT_THAT(results.at(2).snippet(),
- EqualsProto(SnippetProto::default_instance()));
-}
-
-TEST_F(ResultRetrieverTest, SimpleSnippeted) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- doc_store->Put(CreateDocument(/*id=*/1)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- doc_store->Put(CreateDocument(/*id=*/2)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
- doc_store->Put(CreateDocument(/*id=*/3)));
-
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
- GetSectionId("Email", "body")};
- SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
- std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, hit_section_id_mask, /*score=*/0},
- {document_id2, hit_section_id_mask, /*score=*/0},
- {document_id3, hit_section_id_mask, /*score=*/0}};
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<ResultRetriever> result_retriever,
- ResultRetriever::Create(doc_store.get(), schema_store_.get(),
- language_segmenter_.get(), normalizer_.get()));
-
- SnippetContext snippet_context(
- /*query_terms_in=*/{{"", {"foo", "bar"}}}, CreateSnippetSpec(),
- TermMatchType::EXACT_ONLY);
- PageResultState page_result_state(
- std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context),
- std::unordered_map<std::string, ProjectionTree>(),
- /*num_previously_returned_in=*/0,
- /*num_per_page_in=*/3);
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<SearchResultProto::ResultProto> result,
- result_retriever->RetrieveResults(page_result_state));
- EXPECT_THAT(result, SizeIs(3));
-
- const DocumentProto& result_document_one = result.at(0).document();
- const SnippetProto& result_snippet_one = result.at(0).snippet();
- EXPECT_THAT(result_document_one, EqualsProto(CreateDocument(/*id=*/1)));
- EXPECT_THAT(result_snippet_one.entries(), SizeIs(2));
- EXPECT_THAT(result_snippet_one.entries(0).property_name(), Eq("body"));
- std::string_view content = GetString(
- &result_document_one, result_snippet_one.entries(0).property_name());
- EXPECT_THAT(GetWindows(content, result_snippet_one.entries(0)),
- ElementsAre("body bar 1"));
- EXPECT_THAT(GetMatches(content, result_snippet_one.entries(0)),
- ElementsAre("bar"));
- EXPECT_THAT(result_snippet_one.entries(1).property_name(), Eq("name"));
- content = GetString(&result_document_one,
- result_snippet_one.entries(1).property_name());
- EXPECT_THAT(GetWindows(content, result_snippet_one.entries(1)),
- ElementsAre("subject foo 1"));
- EXPECT_THAT(GetMatches(content, result_snippet_one.entries(1)),
- ElementsAre("foo"));
-
- const DocumentProto& result_document_two = result.at(1).document();
- const SnippetProto& result_snippet_two = result.at(1).snippet();
- EXPECT_THAT(result_document_two, EqualsProto(CreateDocument(/*id=*/2)));
- EXPECT_THAT(result_snippet_two.entries(), SizeIs(2));
- EXPECT_THAT(result_snippet_two.entries(0).property_name(), Eq("body"));
- content = GetString(&result_document_two,
- result_snippet_two.entries(0).property_name());
- EXPECT_THAT(GetWindows(content, result_snippet_two.entries(0)),
- ElementsAre("body bar 2"));
- EXPECT_THAT(GetMatches(content, result_snippet_two.entries(0)),
- ElementsAre("bar"));
- EXPECT_THAT(result_snippet_two.entries(1).property_name(), Eq("name"));
- content = GetString(&result_document_two,
- result_snippet_two.entries(1).property_name());
- EXPECT_THAT(GetWindows(content, result_snippet_two.entries(1)),
- ElementsAre("subject foo 2"));
- EXPECT_THAT(GetMatches(content, result_snippet_two.entries(1)),
- ElementsAre("foo"));
-
- const DocumentProto& result_document_three = result.at(2).document();
- const SnippetProto& result_snippet_three = result.at(2).snippet();
- EXPECT_THAT(result_document_three, EqualsProto(CreateDocument(/*id=*/3)));
- EXPECT_THAT(result_snippet_three.entries(), SizeIs(2));
- EXPECT_THAT(result_snippet_three.entries(0).property_name(), Eq("body"));
- content = GetString(&result_document_three,
- result_snippet_three.entries(0).property_name());
- EXPECT_THAT(GetWindows(content, result_snippet_three.entries(0)),
- ElementsAre("body bar 3"));
- EXPECT_THAT(GetMatches(content, result_snippet_three.entries(0)),
- ElementsAre("bar"));
- EXPECT_THAT(result_snippet_three.entries(1).property_name(), Eq("name"));
- content = GetString(&result_document_three,
- result_snippet_three.entries(1).property_name());
- EXPECT_THAT(GetWindows(content, result_snippet_three.entries(1)),
- ElementsAre("subject foo 3"));
- EXPECT_THAT(GetMatches(content, result_snippet_three.entries(1)),
- ElementsAre("foo"));
-}
-
-TEST_F(ResultRetrieverTest, OnlyOneDocumentSnippeted) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- doc_store->Put(CreateDocument(/*id=*/1)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- doc_store->Put(CreateDocument(/*id=*/2)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
- doc_store->Put(CreateDocument(/*id=*/3)));
-
- ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec();
- snippet_spec.set_num_to_snippet(1);
-
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
- GetSectionId("Email", "body")};
- SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
- std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, hit_section_id_mask, /*score=*/0},
- {document_id2, hit_section_id_mask, /*score=*/0},
- {document_id3, hit_section_id_mask, /*score=*/0}};
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<ResultRetriever> result_retriever,
- ResultRetriever::Create(doc_store.get(), schema_store_.get(),
- language_segmenter_.get(), normalizer_.get()));
-
- SnippetContext snippet_context(/*query_terms_in=*/{{"", {"foo", "bar"}}},
- snippet_spec, TermMatchType::EXACT_ONLY);
- PageResultState page_result_state(
- std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context),
- std::unordered_map<std::string, ProjectionTree>(),
- /*num_previously_returned_in=*/0,
- /*num_per_page_in=*/3);
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<SearchResultProto::ResultProto> result,
- result_retriever->RetrieveResults(page_result_state));
- EXPECT_THAT(result, SizeIs(3));
-
- const DocumentProto& result_document = result.at(0).document();
- const SnippetProto& result_snippet = result.at(0).snippet();
- EXPECT_THAT(result_document, EqualsProto(CreateDocument(/*id=*/1)));
- EXPECT_THAT(result_snippet.entries(), SizeIs(2));
- EXPECT_THAT(result_snippet.entries(0).property_name(), Eq("body"));
- std::string_view content =
- GetString(&result_document, result_snippet.entries(0).property_name());
- EXPECT_THAT(GetWindows(content, result_snippet.entries(0)),
- ElementsAre("body bar 1"));
- EXPECT_THAT(GetMatches(content, result_snippet.entries(0)),
- ElementsAre("bar"));
- EXPECT_THAT(result_snippet.entries(1).property_name(), Eq("name"));
- content =
- GetString(&result_document, result_snippet.entries(1).property_name());
- EXPECT_THAT(GetWindows(content, result_snippet.entries(1)),
- ElementsAre("subject foo 1"));
- EXPECT_THAT(GetMatches(content, result_snippet.entries(1)),
- ElementsAre("foo"));
-
- EXPECT_THAT(result[1].document(), EqualsProto(CreateDocument(/*id=*/2)));
- EXPECT_THAT(result[1].snippet(),
- EqualsProto(SnippetProto::default_instance()));
-
- EXPECT_THAT(result[2].document(), EqualsProto(CreateDocument(/*id=*/3)));
- EXPECT_THAT(result[2].snippet(),
- EqualsProto(SnippetProto::default_instance()));
-}
-
-TEST_F(ResultRetrieverTest, ShouldSnippetAllResults) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- doc_store->Put(CreateDocument(/*id=*/1)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- doc_store->Put(CreateDocument(/*id=*/2)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
- doc_store->Put(CreateDocument(/*id=*/3)));
-
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
- GetSectionId("Email", "body")};
- SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
- std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, hit_section_id_mask, /*score=*/0},
- {document_id2, hit_section_id_mask, /*score=*/0},
- {document_id3, hit_section_id_mask, /*score=*/0}};
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<ResultRetriever> result_retriever,
- ResultRetriever::Create(doc_store.get(), schema_store_.get(),
- language_segmenter_.get(), normalizer_.get()));
-
- ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec();
- snippet_spec.set_num_to_snippet(5);
- SnippetContext snippet_context(
- /*query_terms_in=*/{{"", {"foo", "bar"}}}, std::move(snippet_spec),
- TermMatchType::EXACT_ONLY);
- PageResultState page_result_state(
- std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context),
- std::unordered_map<std::string, ProjectionTree>(),
- /*num_previously_returned_in=*/0,
- /*num_per_page_in=*/3);
-
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<SearchResultProto::ResultProto> result,
- result_retriever->RetrieveResults(page_result_state));
- // num_to_snippet = 5, num_previously_returned_in = 0,
- // We can return 5 - 0 = 5 snippets at most. We're able to return all 3
- // snippets here.
- ASSERT_THAT(result, SizeIs(3));
- EXPECT_THAT(result[0].snippet().entries(), Not(IsEmpty()));
- EXPECT_THAT(result[1].snippet().entries(), Not(IsEmpty()));
- EXPECT_THAT(result[2].snippet().entries(), Not(IsEmpty()));
-}
-
-TEST_F(ResultRetrieverTest, ShouldSnippetSomeResults) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- doc_store->Put(CreateDocument(/*id=*/1)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- doc_store->Put(CreateDocument(/*id=*/2)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
- doc_store->Put(CreateDocument(/*id=*/3)));
-
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
- GetSectionId("Email", "body")};
- SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
- std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, hit_section_id_mask, /*score=*/0},
- {document_id2, hit_section_id_mask, /*score=*/0},
- {document_id3, hit_section_id_mask, /*score=*/0}};
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<ResultRetriever> result_retriever,
- ResultRetriever::Create(doc_store.get(), schema_store_.get(),
- language_segmenter_.get(), normalizer_.get()));
-
- ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec();
- snippet_spec.set_num_to_snippet(5);
- SnippetContext snippet_context(
- /*query_terms_in=*/{{"", {"foo", "bar"}}}, std::move(snippet_spec),
- TermMatchType::EXACT_ONLY);
- PageResultState page_result_state(
- std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context),
- std::unordered_map<std::string, ProjectionTree>(),
- /*num_previously_returned_in=*/3,
- /*num_per_page_in=*/3);
-
- // num_to_snippet = 5, num_previously_returned_in = 3,
- // We can return 5 - 3 = 2 snippets.
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<SearchResultProto::ResultProto> result,
- result_retriever->RetrieveResults(page_result_state));
- ASSERT_THAT(result, SizeIs(3));
- EXPECT_THAT(result[0].snippet().entries(), Not(IsEmpty()));
- EXPECT_THAT(result[1].snippet().entries(), Not(IsEmpty()));
- EXPECT_THAT(result[2].snippet().entries(), IsEmpty());
-}
-
-TEST_F(ResultRetrieverTest, ShouldNotSnippetAnyResults) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- doc_store->Put(CreateDocument(/*id=*/1)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- doc_store->Put(CreateDocument(/*id=*/2)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
- doc_store->Put(CreateDocument(/*id=*/3)));
-
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
- GetSectionId("Email", "body")};
- SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
- std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, hit_section_id_mask, /*score=*/0},
- {document_id2, hit_section_id_mask, /*score=*/0},
- {document_id3, hit_section_id_mask, /*score=*/0}};
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<ResultRetriever> result_retriever,
- ResultRetriever::Create(doc_store.get(), schema_store_.get(),
- language_segmenter_.get(), normalizer_.get()));
-
- ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec();
- snippet_spec.set_num_to_snippet(5);
- SnippetContext snippet_context(
- /*query_terms_in=*/{{"", {"foo", "bar"}}}, std::move(snippet_spec),
- TermMatchType::EXACT_ONLY);
- PageResultState page_result_state(
- std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context),
- std::unordered_map<std::string, ProjectionTree>(),
- /*num_previously_returned_in=*/6,
- /*num_per_page_in=*/3);
-
- // num_to_snippet = 5, num_previously_returned_in = 6,
- // We can't return any snippets for this page.
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<SearchResultProto::ResultProto> result,
- result_retriever->RetrieveResults(page_result_state));
- ASSERT_THAT(result, SizeIs(3));
- EXPECT_THAT(result[0].snippet().entries(), IsEmpty());
- EXPECT_THAT(result[1].snippet().entries(), IsEmpty());
- EXPECT_THAT(result[2].snippet().entries(), IsEmpty());
-}
-
-TEST_F(ResultRetrieverTest, ProjectionTopLevelLeadNodeFieldPath) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- // 1. Add two Email documents
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddStringProperty("name", "Hello World!")
- .AddStringProperty(
- "body", "Oh what a beautiful morning! Oh what a beautiful day!")
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- doc_store->Put(document_one));
-
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddStringProperty("name", "Goodnight Moon!")
- .AddStringProperty("body",
- "Count all the sheep and tell them 'Hello'.")
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- doc_store->Put(document_two));
-
- // 2. Setup the scored results.
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
- GetSectionId("Email", "body")};
- SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
- std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, hit_section_id_mask, /*score=*/0},
- {document_id2, hit_section_id_mask, /*score=*/0}};
-
- TypePropertyMask type_property_mask;
- type_property_mask.set_schema_type("Email");
- type_property_mask.add_paths("name");
- std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
- type_projection_tree_map.insert(
- {"Email", ProjectionTree(type_property_mask)});
-
- SnippetContext snippet_context(
- /*query_terms_in=*/{},
- ResultSpecProto::SnippetSpecProto::default_instance(),
- TermMatchType::EXACT_ONLY);
- PageResultState page_result_state(
- std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), std::move(type_projection_tree_map),
- /*num_previously_returned_in=*/0,
- /*num_per_page_in=*/2);
-
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<ResultRetriever> result_retriever,
- ResultRetriever::Create(doc_store.get(), schema_store_.get(),
- language_segmenter_.get(), normalizer_.get()));
-
- // 3. Verify that the returned results only contain the 'name' property.
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<SearchResultProto::ResultProto> result,
- result_retriever->RetrieveResults(page_result_state));
- ASSERT_THAT(result, SizeIs(2));
-
- DocumentProto projected_document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddStringProperty("name", "Hello World!")
- .Build();
- EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
-
- DocumentProto projected_document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddStringProperty("name", "Goodnight Moon!")
- .Build();
- EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
-}
-
-TEST_F(ResultRetrieverTest, ProjectionNestedLeafNodeFieldPath) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- // 1. Add two Email documents
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty(
- "sender",
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Person")
- .AddStringProperty("name", "Meg Ryan")
- .AddStringProperty("emailAddress", "shopgirl@aol.com")
- .Build())
- .AddStringProperty("name", "Hello World!")
- .AddStringProperty(
- "body", "Oh what a beautiful morning! Oh what a beautiful day!")
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- doc_store->Put(document_one));
-
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty(
- "sender", DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Person")
- .AddStringProperty("name", "Tom Hanks")
- .AddStringProperty("emailAddress", "ny152@aol.com")
- .Build())
- .AddStringProperty("name", "Goodnight Moon!")
- .AddStringProperty("body",
- "Count all the sheep and tell them 'Hello'.")
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- doc_store->Put(document_two));
-
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
- GetSectionId("Email", "body")};
- SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
- std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, hit_section_id_mask, /*score=*/0},
- {document_id2, hit_section_id_mask, /*score=*/0}};
-
- TypePropertyMask type_property_mask;
- type_property_mask.set_schema_type("Email");
- type_property_mask.add_paths("sender.name");
- std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
- type_projection_tree_map.insert(
- {"Email", ProjectionTree(type_property_mask)});
-
- SnippetContext snippet_context(
- /*query_terms_in=*/{},
- ResultSpecProto::SnippetSpecProto::default_instance(),
- TermMatchType::EXACT_ONLY);
- PageResultState page_result_state(
- std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), std::move(type_projection_tree_map),
- /*num_previously_returned_in=*/0,
- /*num_per_page_in=*/2);
-
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<ResultRetriever> result_retriever,
- ResultRetriever::Create(doc_store.get(), schema_store_.get(),
- language_segmenter_.get(), normalizer_.get()));
-
- // 3. Verify that the returned results only contain the 'sender.name'
- // property.
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<SearchResultProto::ResultProto> result,
- result_retriever->RetrieveResults(page_result_state));
- ASSERT_THAT(result, SizeIs(2));
-
- DocumentProto projected_document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty("sender",
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Person")
- .AddStringProperty("name", "Meg Ryan")
- .Build())
- .Build();
- EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
-
- DocumentProto projected_document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty("sender",
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Person")
- .AddStringProperty("name", "Tom Hanks")
- .Build())
- .Build();
- EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
-}
-
-TEST_F(ResultRetrieverTest, ProjectionIntermediateNodeFieldPath) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- // 1. Add two Email documents
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty(
- "sender",
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Person")
- .AddStringProperty("name", "Meg Ryan")
- .AddStringProperty("emailAddress", "shopgirl@aol.com")
- .Build())
- .AddStringProperty("name", "Hello World!")
- .AddStringProperty(
- "body", "Oh what a beautiful morning! Oh what a beautiful day!")
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- doc_store->Put(document_one));
-
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty(
- "sender", DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Person")
- .AddStringProperty("name", "Tom Hanks")
- .AddStringProperty("emailAddress", "ny152@aol.com")
- .Build())
- .AddStringProperty("name", "Goodnight Moon!")
- .AddStringProperty("body",
- "Count all the sheep and tell them 'Hello'.")
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- doc_store->Put(document_two));
-
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
- GetSectionId("Email", "body")};
- SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
- std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, hit_section_id_mask, /*score=*/0},
- {document_id2, hit_section_id_mask, /*score=*/0}};
-
- TypePropertyMask type_property_mask;
- type_property_mask.set_schema_type("Email");
- type_property_mask.add_paths("sender");
- std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
- type_projection_tree_map.insert(
- {"Email", ProjectionTree(type_property_mask)});
-
- SnippetContext snippet_context(
- /*query_terms_in=*/{},
- ResultSpecProto::SnippetSpecProto::default_instance(),
- TermMatchType::EXACT_ONLY);
- PageResultState page_result_state(
- std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), std::move(type_projection_tree_map),
- /*num_previously_returned_in=*/0,
- /*num_per_page_in=*/2);
-
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<ResultRetriever> result_retriever,
- ResultRetriever::Create(doc_store.get(), schema_store_.get(),
- language_segmenter_.get(), normalizer_.get()));
-
- // 3. Verify that the returned results only contain the 'sender'
- // property and all of the subproperties of 'sender'.
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<SearchResultProto::ResultProto> result,
- result_retriever->RetrieveResults(page_result_state));
- ASSERT_THAT(result, SizeIs(2));
-
- DocumentProto projected_document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty(
- "sender",
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Person")
- .AddStringProperty("name", "Meg Ryan")
- .AddStringProperty("emailAddress", "shopgirl@aol.com")
- .Build())
- .Build();
- EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
-
- DocumentProto projected_document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty(
- "sender", DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Person")
- .AddStringProperty("name", "Tom Hanks")
- .AddStringProperty("emailAddress", "ny152@aol.com")
- .Build())
- .Build();
- EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
-}
-
-TEST_F(ResultRetrieverTest, ProjectionMultipleNestedFieldPaths) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- // 1. Add two Email documents
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty(
- "sender",
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Person")
- .AddStringProperty("name", "Meg Ryan")
- .AddStringProperty("emailAddress", "shopgirl@aol.com")
- .Build())
- .AddStringProperty("name", "Hello World!")
- .AddStringProperty(
- "body", "Oh what a beautiful morning! Oh what a beautiful day!")
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- doc_store->Put(document_one));
-
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty(
- "sender", DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Person")
- .AddStringProperty("name", "Tom Hanks")
- .AddStringProperty("emailAddress", "ny152@aol.com")
- .Build())
- .AddStringProperty("name", "Goodnight Moon!")
- .AddStringProperty("body",
- "Count all the sheep and tell them 'Hello'.")
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- doc_store->Put(document_two));
-
- // 2. Setup the scored results.
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
- GetSectionId("Email", "body")};
- SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
- std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, hit_section_id_mask, /*score=*/0},
- {document_id2, hit_section_id_mask, /*score=*/0}};
-
- TypePropertyMask type_property_mask;
- type_property_mask.set_schema_type("Email");
- type_property_mask.add_paths("sender.name");
- type_property_mask.add_paths("sender.emailAddress");
- std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
- type_projection_tree_map.insert(
- {"Email", ProjectionTree(type_property_mask)});
-
- SnippetContext snippet_context(
- /*query_terms_in=*/{},
- ResultSpecProto::SnippetSpecProto::default_instance(),
- TermMatchType::EXACT_ONLY);
- PageResultState page_result_state(
- std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), std::move(type_projection_tree_map),
- /*num_previously_returned_in=*/0,
- /*num_per_page_in=*/2);
-
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<ResultRetriever> result_retriever,
- ResultRetriever::Create(doc_store.get(), schema_store_.get(),
- language_segmenter_.get(), normalizer_.get()));
-
- // 3. Verify that the returned results only contain the 'sender.name' and
- // 'sender.address' properties.
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<SearchResultProto::ResultProto> result,
- result_retriever->RetrieveResults(page_result_state));
- ASSERT_THAT(result, SizeIs(2));
-
- DocumentProto projected_document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty(
- "sender",
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Person")
- .AddStringProperty("name", "Meg Ryan")
- .AddStringProperty("emailAddress", "shopgirl@aol.com")
- .Build())
- .Build();
- EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
-
- DocumentProto projected_document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty(
- "sender", DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Person")
- .AddStringProperty("name", "Tom Hanks")
- .AddStringProperty("emailAddress", "ny152@aol.com")
- .Build())
- .Build();
- EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
-}
-
-TEST_F(ResultRetrieverTest, ProjectionEmptyFieldPath) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- // 1. Add two Email documents
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddStringProperty("name", "Hello World!")
- .AddStringProperty(
- "body", "Oh what a beautiful morning! Oh what a beautiful day!")
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- doc_store->Put(document_one));
-
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddStringProperty("name", "Goodnight Moon!")
- .AddStringProperty("body",
- "Count all the sheep and tell them 'Hello'.")
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- doc_store->Put(document_two));
-
- // 2. Setup the scored results.
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
- GetSectionId("Email", "body")};
- SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
- std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, hit_section_id_mask, /*score=*/0},
- {document_id2, hit_section_id_mask, /*score=*/0}};
-
- TypePropertyMask type_property_mask;
- type_property_mask.set_schema_type("Email");
- std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
- type_projection_tree_map.insert(
- {"Email", ProjectionTree(type_property_mask)});
-
- SnippetContext snippet_context(
- /*query_terms_in=*/{},
- ResultSpecProto::SnippetSpecProto::default_instance(),
- TermMatchType::EXACT_ONLY);
- PageResultState page_result_state(
- std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), std::move(type_projection_tree_map),
- /*num_previously_returned_in=*/0,
- /*num_per_page_in=*/2);
-
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<ResultRetriever> result_retriever,
- ResultRetriever::Create(doc_store.get(), schema_store_.get(),
- language_segmenter_.get(), normalizer_.get()));
-
- // 3. Verify that the returned results contain *no* properties.
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<SearchResultProto::ResultProto> result,
- result_retriever->RetrieveResults(page_result_state));
- ASSERT_THAT(result, SizeIs(2));
-
- DocumentProto projected_document_one = DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .Build();
- EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
-
- DocumentProto projected_document_two = DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .Build();
- EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
-}
-
-TEST_F(ResultRetrieverTest, ProjectionInvalidFieldPath) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- // 1. Add two Email documents
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddStringProperty("name", "Hello World!")
- .AddStringProperty(
- "body", "Oh what a beautiful morning! Oh what a beautiful day!")
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- doc_store->Put(document_one));
-
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddStringProperty("name", "Goodnight Moon!")
- .AddStringProperty("body",
- "Count all the sheep and tell them 'Hello'.")
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- doc_store->Put(document_two));
-
- // 2. Setup the scored results.
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
- GetSectionId("Email", "body")};
- SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
- std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, hit_section_id_mask, /*score=*/0},
- {document_id2, hit_section_id_mask, /*score=*/0}};
-
- TypePropertyMask type_property_mask;
- type_property_mask.set_schema_type("Email");
- type_property_mask.add_paths("nonExistentProperty");
- std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
- type_projection_tree_map.insert(
- {"Email", ProjectionTree(type_property_mask)});
-
- SnippetContext snippet_context(
- /*query_terms_in=*/{},
- ResultSpecProto::SnippetSpecProto::default_instance(),
- TermMatchType::EXACT_ONLY);
- PageResultState page_result_state(
- std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), std::move(type_projection_tree_map),
- /*num_previously_returned_in=*/0,
- /*num_per_page_in=*/2);
-
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<ResultRetriever> result_retriever,
- ResultRetriever::Create(doc_store.get(), schema_store_.get(),
- language_segmenter_.get(), normalizer_.get()));
-
- // 3. Verify that the returned results contain *no* properties.
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<SearchResultProto::ResultProto> result,
- result_retriever->RetrieveResults(page_result_state));
- ASSERT_THAT(result, SizeIs(2));
-
- DocumentProto projected_document_one = DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .Build();
- EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
-
- DocumentProto projected_document_two = DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .Build();
- EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
-}
-
-TEST_F(ResultRetrieverTest, ProjectionValidAndInvalidFieldPath) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- // 1. Add two Email documents
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddStringProperty("name", "Hello World!")
- .AddStringProperty(
- "body", "Oh what a beautiful morning! Oh what a beautiful day!")
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- doc_store->Put(document_one));
-
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddStringProperty("name", "Goodnight Moon!")
- .AddStringProperty("body",
- "Count all the sheep and tell them 'Hello'.")
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- doc_store->Put(document_two));
-
- // 2. Setup the scored results.
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
- GetSectionId("Email", "body")};
- SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
- std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, hit_section_id_mask, /*score=*/0},
- {document_id2, hit_section_id_mask, /*score=*/0}};
-
- TypePropertyMask type_property_mask;
- type_property_mask.set_schema_type("Email");
- type_property_mask.add_paths("name");
- type_property_mask.add_paths("nonExistentProperty");
- std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
- type_projection_tree_map.insert(
- {"Email", ProjectionTree(type_property_mask)});
-
- SnippetContext snippet_context(
- /*query_terms_in=*/{},
- ResultSpecProto::SnippetSpecProto::default_instance(),
- TermMatchType::EXACT_ONLY);
- PageResultState page_result_state(
- std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), std::move(type_projection_tree_map),
- /*num_previously_returned_in=*/0,
- /*num_per_page_in=*/2);
-
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<ResultRetriever> result_retriever,
- ResultRetriever::Create(doc_store.get(), schema_store_.get(),
- language_segmenter_.get(), normalizer_.get()));
-
- // 3. Verify that the returned results only contain the 'name' property.
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<SearchResultProto::ResultProto> result,
- result_retriever->RetrieveResults(page_result_state));
- ASSERT_THAT(result, SizeIs(2));
-
- DocumentProto projected_document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddStringProperty("name", "Hello World!")
- .Build();
- EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
-
- DocumentProto projected_document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddStringProperty("name", "Goodnight Moon!")
- .Build();
- EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
-}
-
-TEST_F(ResultRetrieverTest, ProjectionMultipleTypesNoWildcards) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- // 1. Add two documents
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddStringProperty("name", "Hello World!")
- .AddStringProperty(
- "body", "Oh what a beautiful morning! Oh what a beautiful day!")
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- doc_store->Put(document_one));
-
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Person")
- .AddStringProperty("name", "Joe Fox")
- .AddStringProperty("emailAddress", "ny152@aol.com")
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- doc_store->Put(document_two));
-
- // 2. Setup the scored results.
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
- GetSectionId("Email", "body")};
- SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
- std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, hit_section_id_mask, /*score=*/0},
- {document_id2, hit_section_id_mask, /*score=*/0}};
-
- TypePropertyMask type_property_mask;
- type_property_mask.set_schema_type("Email");
- type_property_mask.add_paths("name");
- std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
- type_projection_tree_map.insert(
- {"Email", ProjectionTree(type_property_mask)});
-
- SnippetContext snippet_context(
- /*query_terms_in=*/{},
- ResultSpecProto::SnippetSpecProto::default_instance(),
- TermMatchType::EXACT_ONLY);
- PageResultState page_result_state(
- std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), std::move(type_projection_tree_map),
- /*num_previously_returned_in=*/0,
- /*num_per_page_in=*/2);
-
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<ResultRetriever> result_retriever,
- ResultRetriever::Create(doc_store.get(), schema_store_.get(),
- language_segmenter_.get(), normalizer_.get()));
-
- // 3. Verify that the returned Email results only contain the 'name'
- // property and the returned Person results have all of their properties.
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<SearchResultProto::ResultProto> result,
- result_retriever->RetrieveResults(page_result_state));
- ASSERT_THAT(result, SizeIs(2));
-
- DocumentProto projected_document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddStringProperty("name", "Hello World!")
- .Build();
- EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
-
- DocumentProto projected_document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Person")
- .AddStringProperty("name", "Joe Fox")
- .AddStringProperty("emailAddress", "ny152@aol.com")
- .Build();
- EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
-}
-
-TEST_F(ResultRetrieverTest, ProjectionMultipleTypesWildcard) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- // 1. Add two documents
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddStringProperty("name", "Hello World!")
- .AddStringProperty(
- "body", "Oh what a beautiful morning! Oh what a beautiful day!")
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- doc_store->Put(document_one));
-
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Person")
- .AddStringProperty("name", "Joe Fox")
- .AddStringProperty("emailAddress", "ny152@aol.com")
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- doc_store->Put(document_two));
-
- // 2. Setup the scored results.
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
- GetSectionId("Email", "body")};
- SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
- std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, hit_section_id_mask, /*score=*/0},
- {document_id2, hit_section_id_mask, /*score=*/0}};
-
- TypePropertyMask wildcard_type_property_mask;
- wildcard_type_property_mask.set_schema_type(
- std::string(ProjectionTree::kSchemaTypeWildcard));
- wildcard_type_property_mask.add_paths("name");
- std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
- type_projection_tree_map.insert(
- {std::string(ProjectionTree::kSchemaTypeWildcard),
- ProjectionTree(wildcard_type_property_mask)});
-
- SnippetContext snippet_context(
- /*query_terms_in=*/{},
- ResultSpecProto::SnippetSpecProto::default_instance(),
- TermMatchType::EXACT_ONLY);
- PageResultState page_result_state(
- std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), std::move(type_projection_tree_map),
- /*num_previously_returned_in=*/0,
- /*num_per_page_in=*/2);
-
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<ResultRetriever> result_retriever,
- ResultRetriever::Create(doc_store.get(), schema_store_.get(),
- language_segmenter_.get(), normalizer_.get()));
-
- // 3. Verify that the returned Email results only contain the 'name'
- // property and the returned Person results only contain the 'name' property.
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<SearchResultProto::ResultProto> result,
- result_retriever->RetrieveResults(page_result_state));
- ASSERT_THAT(result, SizeIs(2));
-
- DocumentProto projected_document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddStringProperty("name", "Hello World!")
- .Build();
- EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
-
- DocumentProto projected_document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Person")
- .AddStringProperty("name", "Joe Fox")
- .Build();
- EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
-}
-
-TEST_F(ResultRetrieverTest, ProjectionMultipleTypesWildcardWithOneOverride) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- // 1. Add two documents
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddStringProperty("name", "Hello World!")
- .AddStringProperty(
- "body", "Oh what a beautiful morning! Oh what a beautiful day!")
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- doc_store->Put(document_one));
-
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Person")
- .AddStringProperty("name", "Joe Fox")
- .AddStringProperty("emailAddress", "ny152@aol.com")
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- doc_store->Put(document_two));
-
- // 2. Setup the scored results.
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
- GetSectionId("Email", "body")};
- SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
- std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, hit_section_id_mask, /*score=*/0},
- {document_id2, hit_section_id_mask, /*score=*/0}};
-
- TypePropertyMask email_type_property_mask;
- email_type_property_mask.set_schema_type("Email");
- email_type_property_mask.add_paths("body");
- TypePropertyMask wildcard_type_property_mask;
- wildcard_type_property_mask.set_schema_type(
- std::string(ProjectionTree::kSchemaTypeWildcard));
- wildcard_type_property_mask.add_paths("name");
- std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
- type_projection_tree_map.insert(
- {"Email", ProjectionTree(email_type_property_mask)});
- type_projection_tree_map.insert(
- {std::string(ProjectionTree::kSchemaTypeWildcard),
- ProjectionTree(wildcard_type_property_mask)});
-
- SnippetContext snippet_context(
- /*query_terms_in=*/{},
- ResultSpecProto::SnippetSpecProto::default_instance(),
- TermMatchType::EXACT_ONLY);
- PageResultState page_result_state(
- std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), std::move(type_projection_tree_map),
- /*num_previously_returned_in=*/0,
- /*num_per_page_in=*/2);
-
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<ResultRetriever> result_retriever,
- ResultRetriever::Create(doc_store.get(), schema_store_.get(),
- language_segmenter_.get(), normalizer_.get()));
-
- // 3. Verify that the returned Email results only contain the 'body'
- // property and the returned Person results only contain the 'name' property.
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<SearchResultProto::ResultProto> result,
- result_retriever->RetrieveResults(page_result_state));
- ASSERT_THAT(result, SizeIs(2));
-
- DocumentProto projected_document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddStringProperty(
- "body", "Oh what a beautiful morning! Oh what a beautiful day!")
- .Build();
- EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
-
- DocumentProto projected_document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Person")
- .AddStringProperty("name", "Joe Fox")
- .Build();
- EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
-}
-
-TEST_F(ResultRetrieverTest, ProjectionSingleTypesWildcardAndOverride) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- // 1. Add two documents
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddStringProperty("name", "Hello World!")
- .AddStringProperty(
- "body", "Oh what a beautiful morning! Oh what a beautiful day!")
- .AddDocumentProperty(
- "sender",
- DocumentBuilder()
- .SetKey("namespace", "uri")
- .SetSchema("Person")
- .AddStringProperty("name", "Mr. Body")
- .AddStringProperty("emailAddress", "mr.body123@gmail.com")
- .Build())
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- doc_store->Put(document_one));
-
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Person")
- .AddStringProperty("name", "Joe Fox")
- .AddStringProperty("emailAddress", "ny152@aol.com")
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- doc_store->Put(document_two));
-
- // 2. Setup the scored results.
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
- GetSectionId("Email", "body")};
- SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
- std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, hit_section_id_mask, /*score=*/0},
- {document_id2, hit_section_id_mask, /*score=*/0}};
-
- TypePropertyMask email_type_property_mask;
- email_type_property_mask.set_schema_type("Email");
- email_type_property_mask.add_paths("sender.name");
- TypePropertyMask wildcard_type_property_mask;
- wildcard_type_property_mask.set_schema_type(
- std::string(ProjectionTree::kSchemaTypeWildcard));
- wildcard_type_property_mask.add_paths("name");
- std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
- type_projection_tree_map.insert(
- {"Email", ProjectionTree(email_type_property_mask)});
- type_projection_tree_map.insert(
- {std::string(ProjectionTree::kSchemaTypeWildcard),
- ProjectionTree(wildcard_type_property_mask)});
-
- SnippetContext snippet_context(
- /*query_terms_in=*/{},
- ResultSpecProto::SnippetSpecProto::default_instance(),
- TermMatchType::EXACT_ONLY);
- PageResultState page_result_state(
- std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), std::move(type_projection_tree_map),
- /*num_previously_returned_in=*/0,
- /*num_per_page_in=*/2);
-
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<ResultRetriever> result_retriever,
- ResultRetriever::Create(doc_store.get(), schema_store_.get(),
- language_segmenter_.get(), normalizer_.get()));
-
- // 3. Verify that the returned Email results only contain the 'sender.name'
- // property and the returned Person results only contain the 'name' property.
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<SearchResultProto::ResultProto> result,
- result_retriever->RetrieveResults(page_result_state));
- ASSERT_THAT(result, SizeIs(2));
-
- DocumentProto projected_document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty("sender",
- DocumentBuilder()
- .SetKey("namespace", "uri")
- .SetSchema("Person")
- .AddStringProperty("name", "Mr. Body")
- .Build())
- .Build();
- EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
-
- DocumentProto projected_document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Person")
- .AddStringProperty("name", "Joe Fox")
- .Build();
- EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
-}
-
-TEST_F(ResultRetrieverTest,
- ProjectionSingleTypesWildcardAndOverrideNestedProperty) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- // 1. Add two documents
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddStringProperty("name", "Hello World!")
- .AddStringProperty(
- "body", "Oh what a beautiful morning! Oh what a beautiful day!")
- .AddDocumentProperty(
- "sender",
- DocumentBuilder()
- .SetKey("namespace", "uri")
- .SetSchema("Person")
- .AddStringProperty("name", "Mr. Body")
- .AddStringProperty("emailAddress", "mr.body123@gmail.com")
- .Build())
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- doc_store->Put(document_one));
-
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Person")
- .AddStringProperty("name", "Joe Fox")
- .AddStringProperty("emailAddress", "ny152@aol.com")
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- doc_store->Put(document_two));
-
- // 2. Setup the scored results.
- std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
- GetSectionId("Email", "body")};
- SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
- std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, hit_section_id_mask, /*score=*/0},
- {document_id2, hit_section_id_mask, /*score=*/0}};
-
- TypePropertyMask email_type_property_mask;
- email_type_property_mask.set_schema_type("Email");
- email_type_property_mask.add_paths("sender.name");
- TypePropertyMask wildcard_type_property_mask;
- wildcard_type_property_mask.set_schema_type(
- std::string(ProjectionTree::kSchemaTypeWildcard));
- wildcard_type_property_mask.add_paths("sender");
- std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
- type_projection_tree_map.insert(
- {"Email", ProjectionTree(email_type_property_mask)});
- type_projection_tree_map.insert(
- {std::string(ProjectionTree::kSchemaTypeWildcard),
- ProjectionTree(wildcard_type_property_mask)});
-
- SnippetContext snippet_context(
- /*query_terms_in=*/{},
- ResultSpecProto::SnippetSpecProto::default_instance(),
- TermMatchType::EXACT_ONLY);
- PageResultState page_result_state(
- std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), std::move(type_projection_tree_map),
- /*num_previously_returned_in=*/0,
- /*num_per_page_in=*/2);
-
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<ResultRetriever> result_retriever,
- ResultRetriever::Create(doc_store.get(), schema_store_.get(),
- language_segmenter_.get(), normalizer_.get()));
-
- // 3. Verify that the returned Email results only contain the 'sender.name'
- // property and the returned Person results contain no properties.
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<SearchResultProto::ResultProto> result,
- result_retriever->RetrieveResults(page_result_state));
- ASSERT_THAT(result, SizeIs(2));
-
- DocumentProto projected_document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty("sender",
- DocumentBuilder()
- .SetKey("namespace", "uri")
- .SetSchema("Person")
- .AddStringProperty("name", "Mr. Body")
- .Build())
- .Build();
- EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
-
- DocumentProto projected_document_two = DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Person")
- .Build();
- EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
-}
-
-} // namespace
-
-} // namespace lib
-} // namespace icing
diff --git a/icing/result/result-state-manager.cc b/icing/result/result-state-manager.cc
index 2783fe2..382f7db 100644
--- a/icing/result/result-state-manager.cc
+++ b/icing/result/result-state-manager.cc
@@ -18,9 +18,8 @@
#include <queue>
#include <utility>
-#include "icing/proto/search.pb.h"
-#include "icing/query/query-terms.h"
#include "icing/result/page-result.h"
+#include "icing/result/result-adjustment-info.h"
#include "icing/result/result-retriever-v2.h"
#include "icing/result/result-state-v2.h"
#include "icing/scoring/scored-document-hits-ranker.h"
@@ -32,21 +31,19 @@ namespace icing {
namespace lib {
ResultStateManager::ResultStateManager(int max_total_hits,
- const DocumentStore& document_store,
- const Clock* clock)
+ const DocumentStore& document_store)
: document_store_(document_store),
max_total_hits_(max_total_hits),
num_total_hits_(0),
- random_generator_(GetSteadyTimeNanoseconds()),
- clock_(*clock) {}
+ random_generator_(GetSteadyTimeNanoseconds()) {}
libtextclassifier3::StatusOr<std::pair<uint64_t, PageResult>>
ResultStateManager::CacheAndRetrieveFirstPage(
std::unique_ptr<ScoredDocumentHitsRanker> ranker,
- SectionRestrictQueryTermsMap query_terms,
- const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
+ std::unique_ptr<ResultAdjustmentInfo> parent_adjustment_info,
+ std::unique_ptr<ResultAdjustmentInfo> child_adjustment_info,
const ResultSpecProto& result_spec, const DocumentStore& document_store,
- const ResultRetrieverV2& result_retriever) {
+ const ResultRetrieverV2& result_retriever, int64_t current_time_ms) {
if (ranker == nullptr) {
return absl_ports::InvalidArgumentError("Should not provide null ranker");
}
@@ -54,13 +51,13 @@ ResultStateManager::CacheAndRetrieveFirstPage(
// Create shared pointer of ResultState.
// ResultState should be created by ResultStateManager only.
std::shared_ptr<ResultStateV2> result_state = std::make_shared<ResultStateV2>(
- std::move(ranker), std::move(query_terms), search_spec, scoring_spec,
- result_spec, document_store);
+ std::move(ranker), std::move(parent_adjustment_info),
+ std::move(child_adjustment_info), result_spec, document_store);
// Retrieve docs outside of ResultStateManager critical section.
// Will enter ResultState critical section inside ResultRetriever.
auto [page_result, has_more_results] =
- result_retriever.RetrieveNextPage(*result_state);
+ result_retriever.RetrieveNextPage(*result_state, current_time_ms);
if (!has_more_results) {
// No more pages, won't store ResultState, returns directly
return std::make_pair(kInvalidNextPageToken, std::move(page_result));
@@ -87,37 +84,40 @@ ResultStateManager::CacheAndRetrieveFirstPage(
absl_ports::unique_lock l(&mutex_);
// Remove expired result states first.
- InternalInvalidateExpiredResultStates(kDefaultResultStateTtlInMs);
+ InternalInvalidateExpiredResultStates(kDefaultResultStateTtlInMs,
+ current_time_ms);
// Remove states to make room for this new state.
RemoveStatesIfNeeded(num_hits_to_add);
// Generate a new unique token and add it into result_state_map_.
- next_page_token = Add(std::move(result_state));
+ next_page_token = Add(std::move(result_state), current_time_ms);
}
return std::make_pair(next_page_token, std::move(page_result));
}
-uint64_t ResultStateManager::Add(std::shared_ptr<ResultStateV2> result_state) {
+uint64_t ResultStateManager::Add(std::shared_ptr<ResultStateV2> result_state,
+ int64_t current_time_ms) {
uint64_t new_token = GetUniqueToken();
result_state_map_.emplace(new_token, std::move(result_state));
// Tracks the insertion order
- token_queue_.push(
- std::make_pair(new_token, clock_.GetSystemTimeMilliseconds()));
+ token_queue_.push(std::make_pair(new_token, current_time_ms));
return new_token;
}
libtextclassifier3::StatusOr<std::pair<uint64_t, PageResult>>
ResultStateManager::GetNextPage(uint64_t next_page_token,
- const ResultRetrieverV2& result_retriever) {
+ const ResultRetrieverV2& result_retriever,
+ int64_t current_time_ms) {
std::shared_ptr<ResultStateV2> result_state = nullptr;
{
// ResultStateManager critical section
absl_ports::unique_lock l(&mutex_);
// Remove expired result states before fetching
- InternalInvalidateExpiredResultStates(kDefaultResultStateTtlInMs);
+ InternalInvalidateExpiredResultStates(kDefaultResultStateTtlInMs,
+ current_time_ms);
const auto& state_iterator = result_state_map_.find(next_page_token);
if (state_iterator == result_state_map_.end()) {
@@ -129,7 +129,7 @@ ResultStateManager::GetNextPage(uint64_t next_page_token,
// Retrieve docs outside of ResultStateManager critical section.
// Will enter ResultState critical section inside ResultRetriever.
auto [page_result, has_more_results] =
- result_retriever.RetrieveNextPage(*result_state);
+ result_retriever.RetrieveNextPage(*result_state, current_time_ms);
if (!has_more_results) {
{
@@ -234,10 +234,9 @@ void ResultStateManager::InternalInvalidateResultState(uint64_t token) {
}
void ResultStateManager::InternalInvalidateExpiredResultStates(
- int64_t result_state_ttl) {
- int64_t current_time = clock_.GetSystemTimeMilliseconds();
+ int64_t result_state_ttl, int64_t current_time_ms) {
while (!token_queue_.empty() &&
- current_time - token_queue_.front().second >= result_state_ttl) {
+ current_time_ms - token_queue_.front().second >= result_state_ttl) {
auto itr = result_state_map_.find(token_queue_.front().first);
if (itr != result_state_map_.end()) {
// We don't have to decrement num_total_hits_ here, since erasing the
diff --git a/icing/result/result-state-manager.h b/icing/result/result-state-manager.h
index e2bc797..a64ae2c 100644
--- a/icing/result/result-state-manager.h
+++ b/icing/result/result-state-manager.h
@@ -24,10 +24,9 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/mutex.h"
-#include "icing/proto/scoring.pb.h"
#include "icing/proto/search.pb.h"
-#include "icing/query/query-terms.h"
#include "icing/result/page-result.h"
+#include "icing/result/result-adjustment-info.h"
#include "icing/result/result-retriever-v2.h"
#include "icing/result/result-state-v2.h"
#include "icing/scoring/scored-document-hits-ranker.h"
@@ -48,8 +47,7 @@ inline constexpr int64_t kDefaultResultStateTtlInMs = 1LL * 60 * 60 * 1000;
class ResultStateManager {
public:
explicit ResultStateManager(int max_total_hits,
- const DocumentStore& document_store,
- const Clock* clock);
+ const DocumentStore& document_store);
ResultStateManager(const ResultStateManager&) = delete;
ResultStateManager& operator=(const ResultStateManager&) = delete;
@@ -61,6 +59,10 @@ class ResultStateManager {
// result states if exceeding the cache size limit. next_page_token will be
// set to a default value kInvalidNextPageToken if there're no more pages.
//
+ // NOTE: parent_adjustment_info and child_adjustment_info can be nullptr if
+ // there is no requirement to apply adjustment (snippet, projection) to
+ // them.
+ //
// NOTE: it is possible to have empty result for the first page even if the
// ranker was not empty before the retrieval, since GroupResultLimiter
// may filter out all docs. In this case, the first page is also the
@@ -70,13 +72,12 @@ class ResultStateManager {
// A token and PageResult wrapped by std::pair on success
// INVALID_ARGUMENT if the input ranker is null or contains no results
libtextclassifier3::StatusOr<std::pair<uint64_t, PageResult>>
- CacheAndRetrieveFirstPage(std::unique_ptr<ScoredDocumentHitsRanker> ranker,
- SectionRestrictQueryTermsMap query_terms,
- const SearchSpecProto& search_spec,
- const ScoringSpecProto& scoring_spec,
- const ResultSpecProto& result_spec,
- const DocumentStore& document_store,
- const ResultRetrieverV2& result_retriever)
+ CacheAndRetrieveFirstPage(
+ std::unique_ptr<ScoredDocumentHitsRanker> ranker,
+ std::unique_ptr<ResultAdjustmentInfo> parent_adjustment_info,
+ std::unique_ptr<ResultAdjustmentInfo> child_adjustment_info,
+ const ResultSpecProto& result_spec, const DocumentStore& document_store,
+ const ResultRetrieverV2& result_retriever, int64_t current_time_ms)
ICING_LOCKS_EXCLUDED(mutex_);
// Retrieves and returns PageResult for the next page.
@@ -92,8 +93,8 @@ class ResultStateManager {
// A token and PageResult wrapped by std::pair on success
// NOT_FOUND if failed to find any more results
libtextclassifier3::StatusOr<std::pair<uint64_t, PageResult>> GetNextPage(
- uint64_t next_page_token, const ResultRetrieverV2& result_retriever)
- ICING_LOCKS_EXCLUDED(mutex_);
+ uint64_t next_page_token, const ResultRetrieverV2& result_retriever,
+ int64_t current_time_ms) ICING_LOCKS_EXCLUDED(mutex_);
// Invalidates the result state associated with the given next-page token.
void InvalidateResultState(uint64_t next_page_token)
@@ -135,15 +136,13 @@ class ResultStateManager {
// A random 64-bit number generator
std::mt19937_64 random_generator_ ICING_GUARDED_BY(mutex_);
- const Clock& clock_; // Does not own.
-
// Puts a new result state into the internal storage and returns a next-page
// token associated with it. The token is guaranteed to be unique among all
// currently valid tokens. When the maximum number of result states is
// reached, the oldest / firstly added result state will be removed to make
// room for the new state.
- uint64_t Add(std::shared_ptr<ResultStateV2> result_state)
- ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+ uint64_t Add(std::shared_ptr<ResultStateV2> result_state,
+ int64_t current_time_ms) ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Helper method to generate a next-page token that is unique among all
// existing tokens in token_queue_.
@@ -170,7 +169,8 @@ class ResultStateManager {
// Internal method to invalidate and remove expired result states / tokens
// currently in ResultStateManager that were created before
// current_time - result_state_ttl.
- void InternalInvalidateExpiredResultStates(int64_t result_state_ttl)
+ void InternalInvalidateExpiredResultStates(int64_t result_state_ttl,
+ int64_t current_time_ms)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
};
diff --git a/icing/result/result-state-manager_test.cc b/icing/result/result-state-manager_test.cc
index c8af5fe..75d1d93 100644
--- a/icing/result/result-state-manager_test.cc
+++ b/icing/result/result-state-manager_test.cc
@@ -20,6 +20,7 @@
#include "icing/file/filesystem.h"
#include "icing/portable/equals-proto.h"
#include "icing/result/page-result.h"
+#include "icing/result/result-adjustment-info.h"
#include "icing/result/result-retriever-v2.h"
#include "icing/schema/schema-store.h"
#include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
@@ -47,9 +48,6 @@ using ::testing::Not;
using ::testing::SizeIs;
using PageResultInfo = std::pair<uint64_t, PageResult>;
-// TODO(sungyc): Refactor helper functions below (builder classes or common test
-// utility).
-
ScoringSpecProto CreateScoringSpec() {
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
@@ -100,15 +98,23 @@ class ResultStateManagerTest : public testing::Test {
SchemaStore::Create(&filesystem_, test_dir_, clock_.get()));
SchemaProto schema;
schema.add_types()->set_schema_type("Document");
- ICING_ASSERT_OK(schema_store_->SetSchema(std::move(schema)));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ std::move(schema), /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
/*max_term_byte_size=*/10000));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult result,
- DocumentStore::Create(&filesystem_, test_dir_, clock_.get(),
- schema_store_.get()));
+ DocumentStore::Create(
+ &filesystem_, test_dir_, clock_.get(), schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -159,6 +165,9 @@ class ResultStateManagerTest : public testing::Test {
DocumentStore& document_store() { return *document_store_; }
const DocumentStore& document_store() const { return *document_store_; }
+ SchemaStore& schema_store() { return *schema_store_; }
+ const SchemaStore& schema_store() const { return *schema_store_; }
+
const ResultRetrieverV2& result_retriever() const {
return *result_retriever_;
}
@@ -190,17 +199,16 @@ TEST_F(ResultStateManagerTest, ShouldCacheAndRetrieveFirstPageOnePage) {
std::move(scored_document_hits), /*is_descending=*/true);
ResultStateManager result_state_manager(
- /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(),
- clock());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info,
result_state_manager.CacheAndRetrieveFirstPage(
- std::move(ranker),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ std::move(ranker), /*parent_adjustment_info=*/nullptr,
+ /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/10, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
EXPECT_THAT(page_result_info.first, Eq(kInvalidNextPageToken));
@@ -236,18 +244,17 @@ TEST_F(ResultStateManagerTest, ShouldCacheAndRetrieveFirstPageMultiplePages) {
std::move(scored_document_hits), /*is_descending=*/true);
ResultStateManager result_state_manager(
- /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(),
- clock());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
// First page, 2 results
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info1,
result_state_manager.CacheAndRetrieveFirstPage(
- std::move(ranker),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ std::move(ranker), /*parent_adjustment_info=*/nullptr,
+ /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
EXPECT_THAT(page_result_info1.first, Not(Eq(kInvalidNextPageToken)));
ASSERT_THAT(page_result_info1.second.results, SizeIs(2));
EXPECT_THAT(page_result_info1.second.results.at(0).document(),
@@ -260,7 +267,8 @@ TEST_F(ResultStateManagerTest, ShouldCacheAndRetrieveFirstPageMultiplePages) {
// Second page, 2 results
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info2,
- result_state_manager.GetNextPage(next_page_token, result_retriever()));
+ result_state_manager.GetNextPage(next_page_token, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
EXPECT_THAT(page_result_info2.first, Eq(next_page_token));
ASSERT_THAT(page_result_info2.second.results, SizeIs(2));
EXPECT_THAT(page_result_info2.second.results.at(0).document(),
@@ -271,7 +279,8 @@ TEST_F(ResultStateManagerTest, ShouldCacheAndRetrieveFirstPageMultiplePages) {
// Third page, 1 result
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info3,
- result_state_manager.GetNextPage(next_page_token, result_retriever()));
+ result_state_manager.GetNextPage(next_page_token, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
EXPECT_THAT(page_result_info3.first, Eq(kInvalidNextPageToken));
ASSERT_THAT(page_result_info3.second.results, SizeIs(1));
EXPECT_THAT(page_result_info3.second.results.at(0).document(),
@@ -279,39 +288,38 @@ TEST_F(ResultStateManagerTest, ShouldCacheAndRetrieveFirstPageMultiplePages) {
// No results
EXPECT_THAT(
- result_state_manager.GetNextPage(next_page_token, result_retriever()),
+ result_state_manager.GetNextPage(next_page_token, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
TEST_F(ResultStateManagerTest, NullRankerShouldReturnError) {
ResultStateManager result_state_manager(
- /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(),
- clock());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
EXPECT_THAT(
result_state_manager.CacheAndRetrieveFirstPage(
- /*ranker=*/nullptr,
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*ranker=*/nullptr, /*parent_adjustment_info=*/nullptr,
+ /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()),
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST_F(ResultStateManagerTest, EmptyRankerShouldReturnEmptyFirstPage) {
ResultStateManager result_state_manager(
- /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(),
- clock());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info,
result_state_manager.CacheAndRetrieveFirstPage(
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::vector<ScoredDocumentHit>(), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
EXPECT_THAT(page_result_info.first, Eq(kInvalidNextPageToken));
EXPECT_THAT(page_result_info.second.results, IsEmpty());
@@ -327,8 +335,7 @@ TEST_F(ResultStateManagerTest, ShouldAllowEmptyFirstPage) {
{document_id2, kSectionIdMaskNone, /*score=*/1}};
ResultStateManager result_state_manager(
- /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(),
- clock());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
// Create a ResultSpec that limits "namespace" to 0 results.
ResultSpecProto result_spec =
@@ -347,9 +354,9 @@ TEST_F(ResultStateManagerTest, ShouldAllowEmptyFirstPage) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(), result_spec, document_store(),
- result_retriever()));
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+ result_spec, document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
// If the first page has no result, then it should be the last page.
EXPECT_THAT(page_result_info.first, Eq(kInvalidNextPageToken));
EXPECT_THAT(page_result_info.second.results, IsEmpty());
@@ -371,8 +378,7 @@ TEST_F(ResultStateManagerTest, ShouldAllowEmptyLastPage) {
{document_id4, kSectionIdMaskNone, /*score=*/1}};
ResultStateManager result_state_manager(
- /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(),
- clock());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
// Create a ResultSpec that limits "namespace" to 2 results.
ResultSpecProto result_spec =
@@ -391,9 +397,9 @@ TEST_F(ResultStateManagerTest, ShouldAllowEmptyLastPage) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(), result_spec, document_store(),
- result_retriever()));
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+ result_spec, document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
EXPECT_THAT(page_result_info1.first, Not(Eq(kInvalidNextPageToken)));
ASSERT_THAT(page_result_info1.second.results, SizeIs(2));
EXPECT_THAT(page_result_info1.second.results.at(0).document(),
@@ -407,7 +413,8 @@ TEST_F(ResultStateManagerTest, ShouldAllowEmptyLastPage) {
// limiter, so we should get an empty page.
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info2,
- result_state_manager.GetNextPage(next_page_token, result_retriever()));
+ result_state_manager.GetNextPage(next_page_token, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
EXPECT_THAT(page_result_info2.first, Eq(kInvalidNextPageToken));
EXPECT_THAT(page_result_info2.second.results, IsEmpty());
}
@@ -420,8 +427,7 @@ TEST_F(ResultStateManagerTest,
{/*document_id=*/3, /*document_id=*/4, /*document_id=*/5});
ResultStateManager result_state_manager(
- /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(),
- clock());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
SectionRestrictQueryTermsMap query_terms;
SearchSpecProto search_spec;
@@ -437,8 +443,12 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits1), /*is_descending=*/true),
- query_terms, search_spec, scoring_spec, result_spec, document_store(),
- result_retriever()));
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(search_spec, scoring_spec,
+ result_spec, &schema_store(),
+ query_terms),
+ /*child_adjustment_info=*/nullptr, result_spec, document_store(),
+ result_retriever(), clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info1.first, Not(Eq(kInvalidNextPageToken)));
// Set time as 1hr1s and add state 2.
@@ -449,8 +459,12 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits2), /*is_descending=*/true),
- query_terms, search_spec, scoring_spec, result_spec, document_store(),
- result_retriever()));
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(search_spec, scoring_spec,
+ result_spec, &schema_store(),
+ query_terms),
+ /*child_adjustment_info=*/nullptr, result_spec, document_store(),
+ result_retriever(), clock()->GetSystemTimeMilliseconds()));
// Calling CacheAndRetrieveFirstPage() on state 2 should invalidate the
// expired state 1 internally.
@@ -460,8 +474,9 @@ TEST_F(ResultStateManagerTest,
// CacheAndRetrieveFirstPage() instead of the following GetNextPage().
clock()->SetSystemTimeMilliseconds(1000);
// page_result_info1's token (page_result_info1.first) shouldn't be found.
- EXPECT_THAT(result_state_manager.GetNextPage(page_result_info1.first,
- result_retriever()),
+ EXPECT_THAT(result_state_manager.GetNextPage(
+ page_result_info1.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
@@ -473,8 +488,7 @@ TEST_F(ResultStateManagerTest,
{/*document_id=*/3, /*document_id=*/4, /*document_id=*/5});
ResultStateManager result_state_manager(
- /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(),
- clock());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
// Set time as 1s and add state 1.
clock()->SetSystemTimeMilliseconds(1000);
@@ -484,10 +498,10 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits1), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info1.first, Not(Eq(kInvalidNextPageToken)));
// Set time as 2s and add state 2.
@@ -498,10 +512,10 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits2), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info2.first, Not(Eq(kInvalidNextPageToken)));
// 1. Set time as 1hr1s.
@@ -512,14 +526,16 @@ TEST_F(ResultStateManagerTest,
// page_result_info2's token (page_result_info2.first) should be found
ICING_ASSERT_OK_AND_ASSIGN(page_result_info2,
result_state_manager.GetNextPage(
- page_result_info2.first, result_retriever()));
+ page_result_info2.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
// We test the behavior by setting time back to 2s, to make sure the
// invalidation of state 1 was done by the previous GetNextPage() instead of
// the following GetNextPage().
clock()->SetSystemTimeMilliseconds(2000);
// page_result_info1's token (page_result_info1.first) shouldn't be found.
- EXPECT_THAT(result_state_manager.GetNextPage(page_result_info1.first,
- result_retriever()),
+ EXPECT_THAT(result_state_manager.GetNextPage(
+ page_result_info1.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
@@ -531,8 +547,7 @@ TEST_F(ResultStateManagerTest,
{/*document_id=*/3, /*document_id=*/4, /*document_id=*/5});
ResultStateManager result_state_manager(
- /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(),
- clock());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
// Set time as 1s and add state.
clock()->SetSystemTimeMilliseconds(1000);
@@ -542,18 +557,19 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits1), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info.first, Not(Eq(kInvalidNextPageToken)));
// 1. Set time as 1hr1s.
// 2. Then calling GetNextPage() on the state shouldn't get anything.
clock()->SetSystemTimeMilliseconds(kDefaultResultStateTtlInMs + 1000);
// page_result_info's token (page_result_info.first) shouldn't be found.
- EXPECT_THAT(result_state_manager.GetNextPage(page_result_info.first,
- result_retriever()),
+ EXPECT_THAT(result_state_manager.GetNextPage(
+ page_result_info.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
@@ -580,8 +596,7 @@ TEST_F(ResultStateManagerTest, ShouldInvalidateOneToken) {
{document_id6, kSectionIdMaskNone, /*score=*/1}};
ResultStateManager result_state_manager(
- /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(),
- clock());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info1,
@@ -589,10 +604,10 @@ TEST_F(ResultStateManagerTest, ShouldInvalidateOneToken) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits1), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info2,
@@ -600,23 +615,25 @@ TEST_F(ResultStateManagerTest, ShouldInvalidateOneToken) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits2), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
// Invalidate first result state by the token.
result_state_manager.InvalidateResultState(page_result_info1.first);
// page_result_info1's token (page_result_info1.first) shouldn't be found
- EXPECT_THAT(result_state_manager.GetNextPage(page_result_info1.first,
- result_retriever()),
+ EXPECT_THAT(result_state_manager.GetNextPage(
+ page_result_info1.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
// page_result_info2's token (page_result_info2.first) should still exist
ICING_ASSERT_OK_AND_ASSIGN(page_result_info2,
result_state_manager.GetNextPage(
- page_result_info2.first, result_retriever()));
+ page_result_info2.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
// Should get docs.
ASSERT_THAT(page_result_info2.second.results, SizeIs(1));
EXPECT_THAT(page_result_info2.second.results.at(0).document(),
@@ -630,8 +647,7 @@ TEST_F(ResultStateManagerTest, ShouldInvalidateAllTokens) {
{/*document_id=*/3, /*document_id=*/4, /*document_id=*/5});
ResultStateManager result_state_manager(
- /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(),
- clock());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info1,
@@ -639,10 +655,10 @@ TEST_F(ResultStateManagerTest, ShouldInvalidateAllTokens) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits1), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info2,
@@ -650,21 +666,23 @@ TEST_F(ResultStateManagerTest, ShouldInvalidateAllTokens) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits2), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
result_state_manager.InvalidateAllResultStates();
// page_result_info1's token (page_result_info1.first) shouldn't be found
- EXPECT_THAT(result_state_manager.GetNextPage(page_result_info1.first,
- result_retriever()),
+ EXPECT_THAT(result_state_manager.GetNextPage(
+ page_result_info1.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
// page_result_info2's token (page_result_info2.first) shouldn't be found
- EXPECT_THAT(result_state_manager.GetNextPage(page_result_info2.first,
- result_retriever()),
+ EXPECT_THAT(result_state_manager.GetNextPage(
+ page_result_info2.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
@@ -677,7 +695,7 @@ TEST_F(ResultStateManagerTest, ShouldRemoveOldestResultState) {
AddScoredDocuments({/*document_id=*/4, /*document_id=*/5});
ResultStateManager result_state_manager(/*max_total_hits=*/2,
- document_store(), clock());
+ document_store());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info1,
@@ -685,10 +703,10 @@ TEST_F(ResultStateManagerTest, ShouldRemoveOldestResultState) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits1), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info2,
@@ -696,10 +714,10 @@ TEST_F(ResultStateManagerTest, ShouldRemoveOldestResultState) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits2), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
// Adding state 3 should cause state 1 to be removed.
ICING_ASSERT_OK_AND_ASSIGN(
@@ -708,25 +726,28 @@ TEST_F(ResultStateManagerTest, ShouldRemoveOldestResultState) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits3), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
- EXPECT_THAT(result_state_manager.GetNextPage(page_result_info1.first,
- result_retriever()),
+ EXPECT_THAT(result_state_manager.GetNextPage(
+ page_result_info1.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
ICING_ASSERT_OK_AND_ASSIGN(page_result_info2,
result_state_manager.GetNextPage(
- page_result_info2.first, result_retriever()));
+ page_result_info2.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info2.second.results, SizeIs(1));
EXPECT_THAT(page_result_info2.second.results.at(0).document(),
EqualsProto(document_protos2.at(1)));
ICING_ASSERT_OK_AND_ASSIGN(page_result_info3,
result_state_manager.GetNextPage(
- page_result_info3.first, result_retriever()));
+ page_result_info3.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info3.second.results, SizeIs(1));
EXPECT_THAT(page_result_info3.second.results.at(0).document(),
EqualsProto(document_protos3.at(1)));
@@ -747,7 +768,7 @@ TEST_F(ResultStateManagerTest,
// result set of 2 hits. So each result will take up one hit of our three hit
// budget.
ResultStateManager result_state_manager(/*max_total_hits=*/3,
- document_store(), clock());
+ document_store());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info1,
@@ -755,10 +776,10 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits1), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info2,
@@ -766,10 +787,10 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits2), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info3,
@@ -777,10 +798,10 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits3), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
// Invalidates state 2, so that the number of hits current cached should be
// decremented to 2.
@@ -797,32 +818,36 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits4), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(page_result_info1,
result_state_manager.GetNextPage(
- page_result_info1.first, result_retriever()));
+ page_result_info1.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info1.second.results, SizeIs(1));
EXPECT_THAT(page_result_info1.second.results.at(0).document(),
EqualsProto(document_protos1.at(1)));
- EXPECT_THAT(result_state_manager.GetNextPage(page_result_info2.first,
- result_retriever()),
+ EXPECT_THAT(result_state_manager.GetNextPage(
+ page_result_info2.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
ICING_ASSERT_OK_AND_ASSIGN(page_result_info3,
result_state_manager.GetNextPage(
- page_result_info3.first, result_retriever()));
+ page_result_info3.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info3.second.results, SizeIs(1));
EXPECT_THAT(page_result_info3.second.results.at(0).document(),
EqualsProto(document_protos3.at(1)));
ICING_ASSERT_OK_AND_ASSIGN(page_result_info4,
result_state_manager.GetNextPage(
- page_result_info4.first, result_retriever()));
+ page_result_info4.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info4.second.results, SizeIs(1));
EXPECT_THAT(page_result_info4.second.results.at(0).document(),
EqualsProto(document_protos4.at(1)));
@@ -843,7 +868,7 @@ TEST_F(ResultStateManagerTest,
// result set of 2 hits. So each result will take up one hit of our three hit
// budget.
ResultStateManager result_state_manager(/*max_total_hits=*/3,
- document_store(), clock());
+ document_store());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info1,
@@ -851,10 +876,10 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits1), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info2,
@@ -862,10 +887,10 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits2), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info3,
@@ -873,10 +898,10 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits3), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
// Invalidates all states so that the current hit count will be 0.
result_state_manager.InvalidateAllResultStates();
@@ -897,10 +922,10 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits4), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info5,
@@ -908,10 +933,10 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits5), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info6,
@@ -919,40 +944,46 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits6), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
- EXPECT_THAT(result_state_manager.GetNextPage(page_result_info1.first,
- result_retriever()),
+ EXPECT_THAT(result_state_manager.GetNextPage(
+ page_result_info1.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(result_state_manager.GetNextPage(page_result_info2.first,
- result_retriever()),
+ EXPECT_THAT(result_state_manager.GetNextPage(
+ page_result_info2.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(result_state_manager.GetNextPage(page_result_info3.first,
- result_retriever()),
+ EXPECT_THAT(result_state_manager.GetNextPage(
+ page_result_info3.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
ICING_ASSERT_OK_AND_ASSIGN(page_result_info4,
result_state_manager.GetNextPage(
- page_result_info4.first, result_retriever()));
+ page_result_info4.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info4.second.results, SizeIs(1));
EXPECT_THAT(page_result_info4.second.results.at(0).document(),
EqualsProto(document_protos4.at(1)));
ICING_ASSERT_OK_AND_ASSIGN(page_result_info5,
result_state_manager.GetNextPage(
- page_result_info5.first, result_retriever()));
+ page_result_info5.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info5.second.results, SizeIs(1));
EXPECT_THAT(page_result_info5.second.results.at(0).document(),
EqualsProto(document_protos5.at(1)));
ICING_ASSERT_OK_AND_ASSIGN(page_result_info6,
result_state_manager.GetNextPage(
- page_result_info6.first, result_retriever()));
+ page_result_info6.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info6.second.results, SizeIs(1));
EXPECT_THAT(page_result_info6.second.results.at(0).document(),
EqualsProto(document_protos6.at(1)));
@@ -974,7 +1005,7 @@ TEST_F(
// result set of 2 hits. So each result will take up one hit of our three hit
// budget.
ResultStateManager result_state_manager(/*max_total_hits=*/3,
- document_store(), clock());
+ document_store());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info1,
@@ -982,10 +1013,10 @@ TEST_F(
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits1), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info2,
@@ -993,10 +1024,10 @@ TEST_F(
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits2), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info3,
@@ -1004,10 +1035,10 @@ TEST_F(
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits3), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
// Invalidates state 2, so that the number of hits current cached should be
// decremented to 2.
@@ -1024,10 +1055,10 @@ TEST_F(
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits4), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
// If invalidating result state 2 correctly decremented the current hit count
// to 2 and adding state 4 correctly incremented it to 3, then adding this
@@ -1040,36 +1071,41 @@ TEST_F(
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits5), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
- EXPECT_THAT(result_state_manager.GetNextPage(page_result_info1.first,
- result_retriever()),
+ EXPECT_THAT(result_state_manager.GetNextPage(
+ page_result_info1.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(result_state_manager.GetNextPage(page_result_info2.first,
- result_retriever()),
+ EXPECT_THAT(result_state_manager.GetNextPage(
+ page_result_info2.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
ICING_ASSERT_OK_AND_ASSIGN(page_result_info3,
result_state_manager.GetNextPage(
- page_result_info3.first, result_retriever()));
+ page_result_info3.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info3.second.results, SizeIs(1));
EXPECT_THAT(page_result_info3.second.results.at(0).document(),
EqualsProto(document_protos3.at(1)));
ICING_ASSERT_OK_AND_ASSIGN(page_result_info4,
result_state_manager.GetNextPage(
- page_result_info4.first, result_retriever()));
+ page_result_info4.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info4.second.results, SizeIs(1));
EXPECT_THAT(page_result_info4.second.results.at(0).document(),
EqualsProto(document_protos4.at(1)));
ICING_ASSERT_OK_AND_ASSIGN(page_result_info5,
result_state_manager.GetNextPage(
- page_result_info5.first, result_retriever()));
+ page_result_info5.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info5.second.results, SizeIs(1));
EXPECT_THAT(page_result_info5.second.results.at(0).document(),
EqualsProto(document_protos5.at(1)));
@@ -1089,7 +1125,7 @@ TEST_F(ResultStateManagerTest, GetNextPageShouldDecreaseCurrentHitsCount) {
// result set of 2 hits. So each result will take up one hit of our three hit
// budget.
ResultStateManager result_state_manager(/*max_total_hits=*/3,
- document_store(), clock());
+ document_store());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info1,
@@ -1097,10 +1133,10 @@ TEST_F(ResultStateManagerTest, GetNextPageShouldDecreaseCurrentHitsCount) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits1), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info2,
@@ -1108,10 +1144,10 @@ TEST_F(ResultStateManagerTest, GetNextPageShouldDecreaseCurrentHitsCount) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits2), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info3,
@@ -1119,16 +1155,17 @@ TEST_F(ResultStateManagerTest, GetNextPageShouldDecreaseCurrentHitsCount) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits3), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
// GetNextPage for result state 1 should return its result and decrement the
// number of cached hits to 2.
ICING_ASSERT_OK_AND_ASSIGN(page_result_info1,
result_state_manager.GetNextPage(
- page_result_info1.first, result_retriever()));
+ page_result_info1.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info1.second.results, SizeIs(1));
EXPECT_THAT(page_result_info1.second.results.at(0).document(),
EqualsProto(document_protos1.at(1)));
@@ -1144,32 +1181,36 @@ TEST_F(ResultStateManagerTest, GetNextPageShouldDecreaseCurrentHitsCount) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits4), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
- EXPECT_THAT(result_state_manager.GetNextPage(page_result_info1.first,
- result_retriever()),
+ EXPECT_THAT(result_state_manager.GetNextPage(
+ page_result_info1.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
ICING_ASSERT_OK_AND_ASSIGN(page_result_info2,
result_state_manager.GetNextPage(
- page_result_info2.first, result_retriever()));
+ page_result_info2.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info2.second.results, SizeIs(1));
EXPECT_THAT(page_result_info2.second.results.at(0).document(),
EqualsProto(document_protos2.at(1)));
ICING_ASSERT_OK_AND_ASSIGN(page_result_info3,
result_state_manager.GetNextPage(
- page_result_info3.first, result_retriever()));
+ page_result_info3.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info3.second.results, SizeIs(1));
EXPECT_THAT(page_result_info3.second.results.at(0).document(),
EqualsProto(document_protos3.at(1)));
ICING_ASSERT_OK_AND_ASSIGN(page_result_info4,
result_state_manager.GetNextPage(
- page_result_info4.first, result_retriever()));
+ page_result_info4.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info4.second.results, SizeIs(1));
EXPECT_THAT(page_result_info4.second.results.at(0).document(),
EqualsProto(document_protos4.at(1)));
@@ -1190,7 +1231,7 @@ TEST_F(ResultStateManagerTest,
// result set of 2 hits. So each result will take up one hit of our three hit
// budget.
ResultStateManager result_state_manager(/*max_total_hits=*/3,
- document_store(), clock());
+ document_store());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info1,
@@ -1198,10 +1239,10 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits1), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info2,
@@ -1209,10 +1250,10 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits2), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info3,
@@ -1220,16 +1261,17 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits3), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
// GetNextPage for result state 1 should return its result and decrement the
// number of cached hits to 2.
ICING_ASSERT_OK_AND_ASSIGN(page_result_info1,
result_state_manager.GetNextPage(
- page_result_info1.first, result_retriever()));
+ page_result_info1.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info1.second.results, SizeIs(1));
EXPECT_THAT(page_result_info1.second.results.at(0).document(),
EqualsProto(document_protos1.at(1)));
@@ -1245,10 +1287,10 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits4), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
// If retrieving the next page for result state 1 correctly decremented the
// current hit count to 2 and adding state 4 correctly incremented it to 3,
@@ -1261,36 +1303,41 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits5), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
- EXPECT_THAT(result_state_manager.GetNextPage(page_result_info1.first,
- result_retriever()),
+ EXPECT_THAT(result_state_manager.GetNextPage(
+ page_result_info1.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(result_state_manager.GetNextPage(page_result_info2.first,
- result_retriever()),
+ EXPECT_THAT(result_state_manager.GetNextPage(
+ page_result_info2.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
ICING_ASSERT_OK_AND_ASSIGN(page_result_info3,
result_state_manager.GetNextPage(
- page_result_info3.first, result_retriever()));
+ page_result_info3.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info3.second.results, SizeIs(1));
EXPECT_THAT(page_result_info3.second.results.at(0).document(),
EqualsProto(document_protos3.at(1)));
ICING_ASSERT_OK_AND_ASSIGN(page_result_info4,
result_state_manager.GetNextPage(
- page_result_info4.first, result_retriever()));
+ page_result_info4.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info4.second.results, SizeIs(1));
EXPECT_THAT(page_result_info4.second.results.at(0).document(),
EqualsProto(document_protos4.at(1)));
ICING_ASSERT_OK_AND_ASSIGN(page_result_info5,
result_state_manager.GetNextPage(
- page_result_info5.first, result_retriever()));
+ page_result_info5.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info5.second.results, SizeIs(1));
EXPECT_THAT(page_result_info5.second.results.at(0).document(),
EqualsProto(document_protos5.at(1)));
@@ -1308,7 +1355,7 @@ TEST_F(ResultStateManagerTest,
// CacheAndRetrieveFirstPage). Each result state has a page size of 1. So 3
// hits will remain cached.
ResultStateManager result_state_manager(/*max_total_hits=*/4,
- document_store(), clock());
+ document_store());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info1,
@@ -1316,10 +1363,10 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits1), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info2,
@@ -1327,10 +1374,10 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits2), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
// Add a result state that is larger than the entire budget. This should
// result in all previous result states being evicted, the first hit from
@@ -1345,26 +1392,29 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits3), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
EXPECT_THAT(page_result_info3.first, Not(Eq(kInvalidNextPageToken)));
// GetNextPage for result state 1 and 2 should return NOT_FOUND.
- EXPECT_THAT(result_state_manager.GetNextPage(page_result_info1.first,
- result_retriever()),
+ EXPECT_THAT(result_state_manager.GetNextPage(
+ page_result_info1.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(result_state_manager.GetNextPage(page_result_info2.first,
- result_retriever()),
+ EXPECT_THAT(result_state_manager.GetNextPage(
+ page_result_info2.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
// Only the next four results in state 3 should be retrievable.
uint64_t next_page_token3 = page_result_info3.first;
ICING_ASSERT_OK_AND_ASSIGN(
page_result_info3,
- result_state_manager.GetNextPage(next_page_token3, result_retriever()));
+ result_state_manager.GetNextPage(next_page_token3, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
EXPECT_THAT(page_result_info3.first, Eq(next_page_token3));
ASSERT_THAT(page_result_info3.second.results, SizeIs(1));
EXPECT_THAT(page_result_info3.second.results.at(0).document(),
@@ -1372,7 +1422,8 @@ TEST_F(ResultStateManagerTest,
ICING_ASSERT_OK_AND_ASSIGN(
page_result_info3,
- result_state_manager.GetNextPage(next_page_token3, result_retriever()));
+ result_state_manager.GetNextPage(next_page_token3, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
EXPECT_THAT(page_result_info3.first, Eq(next_page_token3));
ASSERT_THAT(page_result_info3.second.results, SizeIs(1));
EXPECT_THAT(page_result_info3.second.results.at(0).document(),
@@ -1380,7 +1431,8 @@ TEST_F(ResultStateManagerTest,
ICING_ASSERT_OK_AND_ASSIGN(
page_result_info3,
- result_state_manager.GetNextPage(next_page_token3, result_retriever()));
+ result_state_manager.GetNextPage(next_page_token3, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
EXPECT_THAT(page_result_info3.first, Eq(next_page_token3));
ASSERT_THAT(page_result_info3.second.results, SizeIs(1));
EXPECT_THAT(page_result_info3.second.results.at(0).document(),
@@ -1388,7 +1440,8 @@ TEST_F(ResultStateManagerTest,
ICING_ASSERT_OK_AND_ASSIGN(
page_result_info3,
- result_state_manager.GetNextPage(next_page_token3, result_retriever()));
+ result_state_manager.GetNextPage(next_page_token3, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
// The final document should have been dropped because it exceeded the budget,
// so the next page token of the second last round should be
// kInvalidNextPageToken.
@@ -1399,7 +1452,8 @@ TEST_F(ResultStateManagerTest,
// Double check that next_page_token3 is not retrievable anymore.
EXPECT_THAT(
- result_state_manager.GetNextPage(next_page_token3, result_retriever()),
+ result_state_manager.GetNextPage(next_page_token3, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
@@ -1412,7 +1466,7 @@ TEST_F(ResultStateManagerTest,
/*document_id=*/3, /*document_id=*/4, /*document_id=*/5});
ResultStateManager result_state_manager(/*max_total_hits=*/4,
- document_store(), clock());
+ document_store());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info1,
@@ -1420,10 +1474,10 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits1), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
// Add a result state. Because state2 + state1 is larger than the budget,
// state1 should be evicted.
@@ -1435,19 +1489,21 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits2), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
// state1 should have been evicted and state2 should still be retrievable.
- EXPECT_THAT(result_state_manager.GetNextPage(page_result_info1.first,
- result_retriever()),
+ EXPECT_THAT(result_state_manager.GetNextPage(
+ page_result_info1.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
ICING_ASSERT_OK_AND_ASSIGN(page_result_info2,
result_state_manager.GetNextPage(
- page_result_info2.first, result_retriever()));
+ page_result_info2.first, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info2.second.results, SizeIs(1));
EXPECT_THAT(page_result_info2.second.results.at(0).document(),
EqualsProto(document_protos2.at(1)));
@@ -1463,7 +1519,7 @@ TEST_F(ResultStateManagerTest,
/*document_id=*/3, /*document_id=*/4});
ResultStateManager result_state_manager(/*max_total_hits=*/4,
- document_store(), clock());
+ document_store());
// The 5 input scored document hits will not be truncated. The first page of
// two hits will be returned immediately and the other three hits will fit
@@ -1474,10 +1530,10 @@ TEST_F(ResultStateManagerTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/true),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE),
- document_store(), result_retriever()));
+ document_store(), result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
// First page, 2 results
ASSERT_THAT(page_result_info1.second.results, SizeIs(2));
@@ -1491,7 +1547,8 @@ TEST_F(ResultStateManagerTest,
// Second page, 2 results.
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info2,
- result_state_manager.GetNextPage(next_page_token, result_retriever()));
+ result_state_manager.GetNextPage(next_page_token, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info2.second.results, SizeIs(2));
EXPECT_THAT(page_result_info2.second.results.at(0).document(),
EqualsProto(document_protos.at(2)));
@@ -1501,14 +1558,16 @@ TEST_F(ResultStateManagerTest,
// Third page, 1 result.
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info3,
- result_state_manager.GetNextPage(next_page_token, result_retriever()));
+ result_state_manager.GetNextPage(next_page_token, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info3.second.results, SizeIs(1));
EXPECT_THAT(page_result_info3.second.results.at(0).document(),
EqualsProto(document_protos.at(4)));
// Fourth page, 0 results.
EXPECT_THAT(
- result_state_manager.GetNextPage(next_page_token, result_retriever()),
+ result_state_manager.GetNextPage(next_page_token, result_retriever(),
+ clock()->GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
diff --git a/icing/result/result-state-manager_thread-safety_test.cc b/icing/result/result-state-manager_thread-safety_test.cc
index 0da37d8..7e7e13c 100644
--- a/icing/result/result-state-manager_thread-safety_test.cc
+++ b/icing/result/result-state-manager_thread-safety_test.cc
@@ -26,7 +26,6 @@
#include "icing/result/result-state-manager.h"
#include "icing/schema/schema-store.h"
#include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
-#include "icing/scoring/scored-document-hits-ranker.h"
#include "icing/store/document-store.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
@@ -49,12 +48,6 @@ using ::testing::Not;
using ::testing::SizeIs;
using PageResultInfo = std::pair<uint64_t, PageResult>;
-ScoringSpecProto CreateScoringSpec() {
- ScoringSpecProto scoring_spec;
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
- return scoring_spec;
-}
-
ResultSpecProto CreateResultSpec(int num_per_page) {
ResultSpecProto result_spec;
result_spec.set_num_per_page(num_per_page);
@@ -98,15 +91,23 @@ class ResultStateManagerThreadSafetyTest : public testing::Test {
SchemaStore::Create(&filesystem_, test_dir_, clock_.get()));
SchemaProto schema;
schema.add_types()->set_schema_type("Document");
- ICING_ASSERT_OK(schema_store_->SetSchema(std::move(schema)));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ std::move(schema), /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
/*max_term_byte_size=*/10000));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult result,
- DocumentStore::Create(&filesystem_, test_dir_, clock_.get(),
- schema_store_.get()));
+ DocumentStore::Create(
+ &filesystem_, test_dir_, clock_.get(), schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -152,7 +153,7 @@ TEST_F(ResultStateManagerThreadSafetyTest,
constexpr int kNumPerPage = 100;
ResultStateManager result_state_manager(/*max_total_hits=*/kNumDocuments,
- *document_store_, clock_.get());
+ *document_store_);
// Retrieve the first page.
// Documents are ordered by score *ascending*, so the first page should
@@ -163,9 +164,9 @@ TEST_F(ResultStateManagerThreadSafetyTest,
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/false),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(), CreateResultSpec(kNumPerPage), *document_store_,
- *result_retriever_));
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+ CreateResultSpec(kNumPerPage), *document_store_, *result_retriever_,
+ clock_->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info1.second.results, SizeIs(kNumPerPage));
for (int i = 0; i < kNumPerPage; ++i) {
ASSERT_THAT(page_result_info1.second.results[i].score(), Eq(i));
@@ -187,7 +188,8 @@ TEST_F(ResultStateManagerThreadSafetyTest,
normalizer_.get()));
ICING_ASSERT_OK_AND_ASSIGN(
PageResultInfo page_result_info,
- result_state_manager.GetNextPage(next_page_token, *result_retriever));
+ result_state_manager.GetNextPage(next_page_token, *result_retriever,
+ clock_->GetSystemTimeMilliseconds()));
page_results[thread_id] =
std::make_optional<PageResultInfo>(std::move(page_result_info));
};
@@ -253,7 +255,7 @@ TEST_F(ResultStateManagerThreadSafetyTest, InvalidateResultStateWhileUsing) {
constexpr int kNumPerPage = 100;
ResultStateManager result_state_manager(/*max_total_hits=*/kNumDocuments,
- *document_store_, clock_.get());
+ *document_store_);
// Retrieve the first page.
// Documents are ordered by score *ascending*, so the first page should
@@ -264,9 +266,9 @@ TEST_F(ResultStateManagerThreadSafetyTest, InvalidateResultStateWhileUsing) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits), /*is_descending=*/false),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(), CreateResultSpec(kNumPerPage), *document_store_,
- *result_retriever_));
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+ CreateResultSpec(kNumPerPage), *document_store_, *result_retriever_,
+ clock_->GetSystemTimeMilliseconds()));
ASSERT_THAT(page_result_info1.second.results, SizeIs(kNumPerPage));
for (int i = 0; i < kNumPerPage; ++i) {
ASSERT_THAT(page_result_info1.second.results[i].score(), Eq(i));
@@ -289,7 +291,8 @@ TEST_F(ResultStateManagerThreadSafetyTest, InvalidateResultStateWhileUsing) {
normalizer_.get()));
libtextclassifier3::StatusOr<PageResultInfo> page_result_info_or =
- result_state_manager.GetNextPage(next_page_token, *result_retriever);
+ result_state_manager.GetNextPage(next_page_token, *result_retriever,
+ clock_->GetSystemTimeMilliseconds());
if (page_result_info_or.ok()) {
page_results[thread_id] = std::make_optional<PageResultInfo>(
std::move(page_result_info_or).ValueOrDie());
@@ -366,8 +369,7 @@ TEST_F(ResultStateManagerThreadSafetyTest, MultipleResultStates) {
constexpr int kNumThreads = 50;
constexpr int kNumPerPage = 30;
ResultStateManager result_state_manager(
- /*max_total_hits=*/kNumDocuments * kNumThreads, *document_store_,
- clock_.get());
+ /*max_total_hits=*/kNumDocuments * kNumThreads, *document_store_);
// Create kNumThreads threads to:
// - Call CacheAndRetrieveFirstPage() once to create its own ResultState.
@@ -394,9 +396,10 @@ TEST_F(ResultStateManagerThreadSafetyTest, MultipleResultStates) {
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits_copy), /*is_descending=*/false),
- /*query_terms=*/{}, SearchSpecProto::default_instance(),
- CreateScoringSpec(), CreateResultSpec(kNumPerPage),
- *document_store_, *result_retriever));
+ /*parent_adjustment_info=*/nullptr,
+ /*child_adjustment_info=*/nullptr, CreateResultSpec(kNumPerPage),
+ *document_store_, *result_retriever,
+ clock_->GetSystemTimeMilliseconds()));
EXPECT_THAT(page_result_info1.second.results, SizeIs(kNumPerPage));
for (int i = 0; i < kNumPerPage; ++i) {
EXPECT_THAT(page_result_info1.second.results[i].score(), Eq(i));
@@ -415,9 +418,10 @@ TEST_F(ResultStateManagerThreadSafetyTest, MultipleResultStates) {
// each thread should retrieve 1, 2, 3, ..., kNumThreads pages.
int num_subsequent_pages_to_retrieve = thread_id;
for (int i = 0; i < num_subsequent_pages_to_retrieve; ++i) {
- ICING_ASSERT_OK_AND_ASSIGN(
- PageResultInfo page_result_info,
- result_state_manager.GetNextPage(next_page_token, *result_retriever));
+ ICING_ASSERT_OK_AND_ASSIGN(PageResultInfo page_result_info,
+ result_state_manager.GetNextPage(
+ next_page_token, *result_retriever,
+ clock_->GetSystemTimeMilliseconds()));
EXPECT_THAT(page_result_info.second.results, SizeIs(kNumPerPage));
for (int j = 0; j < kNumPerPage; ++j) {
EXPECT_THAT(page_result_info.second.results[j].score(),
diff --git a/icing/result/result-state-v2.cc b/icing/result/result-state-v2.cc
index e42620e..3aa9359 100644
--- a/icing/result/result-state-v2.cc
+++ b/icing/result/result-state-v2.cc
@@ -15,54 +15,35 @@
#include "icing/result/result-state-v2.h"
#include <atomic>
+#include <cstdint>
#include <memory>
+#include <string>
+#include <vector>
-#include "icing/proto/scoring.pb.h"
#include "icing/proto/search.pb.h"
-#include "icing/proto/term.pb.h"
-#include "icing/result/projection-tree.h"
-#include "icing/result/snippet-context.h"
+#include "icing/result/result-adjustment-info.h"
#include "icing/scoring/scored-document-hits-ranker.h"
+#include "icing/store/document-store.h"
namespace icing {
namespace lib {
-namespace {
-SnippetContext CreateSnippetContext(SectionRestrictQueryTermsMap query_terms,
- const SearchSpecProto& search_spec,
- const ResultSpecProto& result_spec) {
- if (result_spec.snippet_spec().num_to_snippet() > 0 &&
- result_spec.snippet_spec().num_matches_per_property() > 0) {
- // Needs snippeting
- return SnippetContext(std::move(query_terms), result_spec.snippet_spec(),
- search_spec.term_match_type());
- }
- return SnippetContext(/*query_terms_in=*/{},
- ResultSpecProto::SnippetSpecProto::default_instance(),
- TermMatchType::UNKNOWN);
-}
-} // namespace
-
ResultStateV2::ResultStateV2(
std::unique_ptr<ScoredDocumentHitsRanker> scored_document_hits_ranker_in,
- SectionRestrictQueryTermsMap query_terms,
- const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
+ std::unique_ptr<ResultAdjustmentInfo> parent_adjustment_info,
+ std::unique_ptr<ResultAdjustmentInfo> child_adjustment_info,
const ResultSpecProto& result_spec, const DocumentStore& document_store)
: scored_document_hits_ranker(std::move(scored_document_hits_ranker_in)),
num_returned(0),
- snippet_context_(CreateSnippetContext(std::move(query_terms), search_spec,
- result_spec)),
+ parent_adjustment_info_(std::move(parent_adjustment_info)),
+ child_adjustment_info_(std::move(child_adjustment_info)),
num_per_page_(result_spec.num_per_page()),
num_total_bytes_per_page_threshold_(
result_spec.num_total_bytes_per_page_threshold()),
+ max_joined_children_per_parent_to_return_(
+ result_spec.max_joined_children_per_parent_to_return()),
num_total_hits_(nullptr),
result_group_type_(result_spec.result_group_type()) {
- for (const TypePropertyMask& type_field_mask :
- result_spec.type_property_masks()) {
- projection_tree_map_.insert(
- {type_field_mask.schema_type(), ProjectionTree(type_field_mask)});
- }
-
for (const ResultSpecProto::ResultGrouping& result_grouping :
result_spec.result_groupings()) {
int group_id = group_result_limits.size();
diff --git a/icing/result/result-state-v2.h b/icing/result/result-state-v2.h
index df2f070..919710e 100644
--- a/icing/result/result-state-v2.h
+++ b/icing/result/result-state-v2.h
@@ -22,13 +22,11 @@
#include <vector>
#include "icing/absl_ports/mutex.h"
-#include "icing/proto/scoring.pb.h"
+#include "icing/absl_ports/thread_annotations.h"
#include "icing/proto/search.pb.h"
-#include "icing/result/projection-tree.h"
-#include "icing/result/snippet-context.h"
+#include "icing/result/result-adjustment-info.h"
#include "icing/scoring/scored-document-hits-ranker.h"
#include "icing/store/document-store.h"
-#include "icing/store/namespace-id.h"
namespace icing {
namespace lib {
@@ -39,8 +37,8 @@ class ResultStateV2 {
public:
explicit ResultStateV2(
std::unique_ptr<ScoredDocumentHitsRanker> scored_document_hits_ranker_in,
- SectionRestrictQueryTermsMap query_terms,
- const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
+ std::unique_ptr<ResultAdjustmentInfo> parent_adjustment_info,
+ std::unique_ptr<ResultAdjustmentInfo> child_adjustment_info,
const ResultSpecProto& result_spec, const DocumentStore& document_store);
~ResultStateV2();
@@ -60,14 +58,28 @@ class ResultStateV2 {
void IncrementNumTotalHits(int increment_by)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex);
- const SnippetContext& snippet_context() const
+ // Returns a nullable pointer to parent adjustment info.
+ ResultAdjustmentInfo* parent_adjustment_info()
+ ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex) {
+ return parent_adjustment_info_.get();
+ }
+
+ // Returns a nullable pointer to parent adjustment info.
+ const ResultAdjustmentInfo* parent_adjustment_info() const
ICING_SHARED_LOCKS_REQUIRED(mutex) {
- return snippet_context_;
+ return parent_adjustment_info_.get();
}
- const std::unordered_map<std::string, ProjectionTree>& projection_tree_map()
- const ICING_SHARED_LOCKS_REQUIRED(mutex) {
- return projection_tree_map_;
+ // Returns a nullable pointer to child adjustment info.
+ ResultAdjustmentInfo* child_adjustment_info()
+ ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex) {
+ return child_adjustment_info_.get();
+ }
+
+ // Returns a nullable pointer to child adjustment info.
+ const ResultAdjustmentInfo* child_adjustment_info() const
+ ICING_SHARED_LOCKS_REQUIRED(mutex) {
+ return child_adjustment_info_.get();
}
const std::unordered_map<int32_t, int>& entry_id_group_id_map() const
@@ -75,7 +87,7 @@ class ResultStateV2 {
return entry_id_group_id_map_;
}
- int num_per_page() const ICING_SHARED_LOCKS_REQUIRED(mutex) {
+ int32_t num_per_page() const ICING_SHARED_LOCKS_REQUIRED(mutex) {
return num_per_page_;
}
@@ -84,6 +96,11 @@ class ResultStateV2 {
return num_total_bytes_per_page_threshold_;
}
+ int32_t max_joined_children_per_parent_to_return() const
+ ICING_SHARED_LOCKS_REQUIRED(mutex) {
+ return max_joined_children_per_parent_to_return_;
+ }
+
ResultSpecProto::ResultGroupingType result_group_type()
ICING_SHARED_LOCKS_REQUIRED(mutex) {
return result_group_type_;
@@ -110,11 +127,16 @@ class ResultStateV2 {
int num_returned ICING_GUARDED_BY(mutex);
private:
- // Information needed for snippeting.
- SnippetContext snippet_context_ ICING_GUARDED_BY(mutex);
+ // Adjustment information for parent documents, including snippet and
+ // projection. Can be nullptr if there is no adjustment info for parent
+ // documents.
+ std::unique_ptr<ResultAdjustmentInfo> parent_adjustment_info_
+ ICING_GUARDED_BY(mutex);
- // Information needed for projection.
- std::unordered_map<std::string, ProjectionTree> projection_tree_map_
+ // Adjustment information for child documents, including snippet and
+ // projection. This is only used for join query. Can be nullptr if there is no
+ // adjustment info for child documents.
+ std::unique_ptr<ResultAdjustmentInfo> child_adjustment_info_
ICING_GUARDED_BY(mutex);
// A map between result grouping entry id and the id of the group that it
@@ -123,7 +145,7 @@ class ResultStateV2 {
ICING_GUARDED_BY(mutex);
// Number of results to return in each page.
- int num_per_page_ ICING_GUARDED_BY(mutex);
+ int32_t num_per_page_ ICING_GUARDED_BY(mutex);
// The threshold of total bytes of all documents to cutoff, in order to limit
// # of bytes in a single page.
@@ -133,6 +155,10 @@ class ResultStateV2 {
// threshold too much.
int32_t num_total_bytes_per_page_threshold_ ICING_GUARDED_BY(mutex);
+ // Max # of joined child documents to be attached in the result for each
+ // parent document.
+ int32_t max_joined_children_per_parent_to_return_ ICING_GUARDED_BY(mutex);
+
// Pointer to a global counter to sum up the size of scored_document_hits in
// all ResultStates.
// Does not own.
diff --git a/icing/result/result-state-v2_test.cc b/icing/result/result-state-v2_test.cc
index 4f16e7f..0f88023 100644
--- a/icing/result/result-state-v2_test.cc
+++ b/icing/result/result-state-v2_test.cc
@@ -16,29 +16,27 @@
#include <atomic>
#include <cstdint>
+#include <limits>
#include <memory>
#include <string>
-#include <unordered_map>
-#include <unordered_set>
+#include <utility>
#include <vector>
+#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/absl_ports/mutex.h"
#include "icing/file/filesystem.h"
-#include "icing/portable/equals-proto.h"
+#include "icing/file/portable-file-backed-proto-log.h"
#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
#include "icing/proto/schema.pb.h"
-#include "icing/proto/scoring.pb.h"
#include "icing/proto/search.pb.h"
-#include "icing/proto/term.pb.h"
-#include "icing/result/projection-tree.h"
-#include "icing/result/snippet-context.h"
#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
#include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
#include "icing/scoring/scored-document-hit.h"
-#include "icing/scoring/scored-document-hits-ranker.h"
+#include "icing/store/document-id.h"
#include "icing/store/document-store.h"
-#include "icing/store/namespace-id.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/tmp-directory.h"
#include "icing/util/clock.h"
@@ -47,26 +45,11 @@ namespace icing {
namespace lib {
namespace {
-using ::icing::lib::portable_equals_proto::EqualsProto;
using ::testing::ElementsAre;
using ::testing::Eq;
-using ::testing::IsEmpty;
using ::testing::Pair;
using ::testing::UnorderedElementsAre;
-SearchSpecProto CreateSearchSpec(TermMatchType::Code match_type) {
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(match_type);
- return search_spec;
-}
-
-ScoringSpecProto CreateScoringSpec(bool is_descending_order) {
- ScoringSpecProto scoring_spec;
- scoring_spec.set_order_by(is_descending_order ? ScoringSpecProto::Order::DESC
- : ScoringSpecProto::Order::ASC);
- return scoring_spec;
-}
-
ResultSpecProto CreateResultSpec(
int num_per_page, ResultSpecProto::ResultGroupingType result_group_type) {
ResultSpecProto result_spec;
@@ -85,14 +68,22 @@ class ResultStateV2Test : public ::testing::Test {
SchemaStore::Create(&filesystem_, schema_store_base_dir_, &clock_));
SchemaProto schema;
schema.add_types()->set_schema_type("Document");
- ICING_ASSERT_OK(schema_store_->SetSchema(std::move(schema)));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ std::move(schema), /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
doc_store_base_dir_ = GetTestTempDir() + "/document_store";
filesystem_.CreateDirectoryRecursively(doc_store_base_dir_.c_str());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult result,
- DocumentStore::Create(&filesystem_, doc_store_base_dir_, &clock_,
- schema_store_.get()));
+ DocumentStore::Create(
+ &filesystem_, doc_store_base_dir_, &clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(result.document_store);
num_total_hits_ = 0;
@@ -132,15 +123,15 @@ TEST_F(ResultStateV2Test, ShouldInitializeValuesAccordingToSpecs) {
ResultSpecProto result_spec =
CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
result_spec.set_num_total_bytes_per_page_threshold(4096);
+ result_spec.set_max_joined_children_per_parent_to_return(2048);
+ // Adjustment info is not important in this test.
ResultStateV2 result_state(
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
- std::vector<ScoredDocumentHit>(),
- /*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true), result_spec,
- document_store());
+ std::vector<ScoredDocumentHit>(), /*is_descending=*/true),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+ result_spec, document_store());
absl_ports::shared_lock l(&result_state.mutex);
@@ -148,6 +139,8 @@ TEST_F(ResultStateV2Test, ShouldInitializeValuesAccordingToSpecs) {
EXPECT_THAT(result_state.num_per_page(), Eq(result_spec.num_per_page()));
EXPECT_THAT(result_state.num_total_bytes_per_page_threshold(),
Eq(result_spec.num_total_bytes_per_page_threshold()));
+ EXPECT_THAT(result_state.max_joined_children_per_parent_to_return(),
+ Eq(result_spec.max_joined_children_per_parent_to_return()));
}
TEST_F(ResultStateV2Test, ShouldInitializeValuesAccordingToDefaultSpecs) {
@@ -156,14 +149,14 @@ TEST_F(ResultStateV2Test, ShouldInitializeValuesAccordingToDefaultSpecs) {
ASSERT_THAT(default_result_spec.num_total_bytes_per_page_threshold(),
Eq(std::numeric_limits<int32_t>::max()));
+ // Adjustment info is not important in this test.
ResultStateV2 result_state(
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::vector<ScoredDocumentHit>(),
/*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true), default_result_spec,
- document_store());
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+ default_result_spec, document_store());
absl_ports::shared_lock l(&result_state.mutex);
@@ -172,116 +165,9 @@ TEST_F(ResultStateV2Test, ShouldInitializeValuesAccordingToDefaultSpecs) {
Eq(default_result_spec.num_per_page()));
EXPECT_THAT(result_state.num_total_bytes_per_page_threshold(),
Eq(default_result_spec.num_total_bytes_per_page_threshold()));
-}
-
-TEST_F(ResultStateV2Test, ShouldReturnSnippetContextAccordingToSpecs) {
- ResultSpecProto result_spec =
- CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
- result_spec.mutable_snippet_spec()->set_num_to_snippet(5);
- result_spec.mutable_snippet_spec()->set_num_matches_per_property(5);
- result_spec.mutable_snippet_spec()->set_max_window_utf32_length(5);
-
- SectionRestrictQueryTermsMap query_terms_map;
- query_terms_map.emplace("term1", std::unordered_set<std::string>());
-
- ResultStateV2 result_state(
- std::make_unique<
- PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
- std::vector<ScoredDocumentHit>(),
- /*is_descending=*/true),
- query_terms_map, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true), result_spec,
- document_store());
-
- absl_ports::shared_lock l(&result_state.mutex);
-
- const SnippetContext snippet_context = result_state.snippet_context();
-
- // Snippet context should be derived from the specs above.
- EXPECT_TRUE(snippet_context.query_terms.find("term1") !=
- snippet_context.query_terms.end());
- EXPECT_THAT(snippet_context.snippet_spec,
- EqualsProto(result_spec.snippet_spec()));
- EXPECT_THAT(snippet_context.match_type, Eq(TermMatchType::EXACT_ONLY));
-
- // The same copy can be fetched multiple times.
- const SnippetContext snippet_context2 = result_state.snippet_context();
- EXPECT_TRUE(snippet_context2.query_terms.find("term1") !=
- snippet_context2.query_terms.end());
- EXPECT_THAT(snippet_context2.snippet_spec,
- EqualsProto(result_spec.snippet_spec()));
- EXPECT_THAT(snippet_context2.match_type, Eq(TermMatchType::EXACT_ONLY));
-}
-
-TEST_F(ResultStateV2Test, NoSnippetingShouldReturnNull) {
- ResultSpecProto result_spec =
- CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
- // Setting num_to_snippet to 0 so that snippeting info won't be
- // stored.
- result_spec.mutable_snippet_spec()->set_num_to_snippet(0);
- result_spec.mutable_snippet_spec()->set_num_matches_per_property(5);
- result_spec.mutable_snippet_spec()->set_max_window_utf32_length(5);
-
- SectionRestrictQueryTermsMap query_terms_map;
- query_terms_map.emplace("term1", std::unordered_set<std::string>());
-
- ResultStateV2 result_state(
- std::make_unique<
- PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
- std::vector<ScoredDocumentHit>(),
- /*is_descending=*/true),
- query_terms_map, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true), result_spec,
- document_store());
-
- absl_ports::shared_lock l(&result_state.mutex);
-
- const SnippetContext snippet_context = result_state.snippet_context();
- EXPECT_THAT(snippet_context.query_terms, IsEmpty());
EXPECT_THAT(
- snippet_context.snippet_spec,
- EqualsProto(ResultSpecProto::SnippetSpecProto::default_instance()));
- EXPECT_THAT(snippet_context.match_type, TermMatchType::UNKNOWN);
-}
-
-TEST_F(ResultStateV2Test, ShouldConstructProjectionTreeMapAccordingToSpecs) {
- // Create a ResultSpec with type property mask.
- ResultSpecProto result_spec =
- CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
- TypePropertyMask* email_type_property_mask =
- result_spec.add_type_property_masks();
- email_type_property_mask->set_schema_type("Email");
- email_type_property_mask->add_paths("sender.name");
- email_type_property_mask->add_paths("sender.emailAddress");
- TypePropertyMask* phone_type_property_mask =
- result_spec.add_type_property_masks();
- phone_type_property_mask->set_schema_type("Phone");
- phone_type_property_mask->add_paths("caller");
- TypePropertyMask* wildcard_type_property_mask =
- result_spec.add_type_property_masks();
- wildcard_type_property_mask->set_schema_type(
- std::string(ProjectionTree::kSchemaTypeWildcard));
- wildcard_type_property_mask->add_paths("wild.card");
-
- ResultStateV2 result_state(
- std::make_unique<
- PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
- std::vector<ScoredDocumentHit>(),
- /*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true), result_spec,
- document_store());
-
- absl_ports::shared_lock l(&result_state.mutex);
-
- const std::unordered_map<std::string, ProjectionTree>& projection_tree_map =
- result_state.projection_tree_map();
- EXPECT_THAT(projection_tree_map,
- UnorderedElementsAre(
- Pair("Email", ProjectionTree(*email_type_property_mask)),
- Pair("Phone", ProjectionTree(*phone_type_property_mask)),
- Pair(std::string(ProjectionTree::kSchemaTypeWildcard),
- ProjectionTree(*wildcard_type_property_mask))));
+ result_state.max_joined_children_per_parent_to_return(),
+ Eq(default_result_spec.max_joined_children_per_parent_to_return()));
}
TEST_F(ResultStateV2Test,
@@ -342,14 +228,14 @@ TEST_F(ResultStateV2Test,
int32_t entry_id3, document_store().GetResultGroupingEntryId(
result_grouping_type, "namespace3", "Document"));
+ // Adjustment info is not important in this test.
ResultStateV2 result_state(
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::vector<ScoredDocumentHit>(),
/*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true), result_spec,
- document_store());
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+ result_spec, document_store());
absl_ports::shared_lock l(&result_state.mutex);
@@ -374,14 +260,14 @@ TEST_F(ResultStateV2Test, ShouldUpdateNumTotalHits) {
AddScoredDocument(/*document_id=*/4),
AddScoredDocument(/*document_id=*/3)};
+ // Adjustment info is not important in this test.
// Creates a ResultState with 5 ScoredDocumentHits.
ResultStateV2 result_state(
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits),
/*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/5, ResultSpecProto::NAMESPACE),
document_store());
@@ -408,14 +294,14 @@ TEST_F(ResultStateV2Test, ShouldUpdateNumTotalHitsWhenDestructed) {
num_total_hits() = 2;
{
+ // Adjustment info is not important in this test.
// Creates a ResultState with 5 ScoredDocumentHits.
ResultStateV2 result_state1(
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits1),
/*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/5, ResultSpecProto::NAMESPACE),
document_store());
@@ -425,14 +311,14 @@ TEST_F(ResultStateV2Test, ShouldUpdateNumTotalHitsWhenDestructed) {
ASSERT_THAT(num_total_hits(), Eq(7));
{
+ // Adjustment info is not important in this test.
// Creates another ResultState with 2 ScoredDocumentHits.
ResultStateV2 result_state2(
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits2),
/*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/5, ResultSpecProto::NAMESPACE),
document_store());
@@ -457,13 +343,13 @@ TEST_F(ResultStateV2Test, ShouldNotUpdateNumTotalHitsWhenNotRegistered) {
// Creates a ResultState with 5 ScoredDocumentHits.
{
+ // Adjustment info is not important in this test.
ResultStateV2 result_state(
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits),
/*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/5, ResultSpecProto::NAMESPACE),
document_store());
@@ -488,14 +374,14 @@ TEST_F(ResultStateV2Test, ShouldDecrementOriginalNumTotalHitsWhenReregister) {
AddScoredDocument(/*document_id=*/4),
AddScoredDocument(/*document_id=*/3)};
+ // Adjustment info is not important in this test.
// Creates a ResultState with 5 ScoredDocumentHits.
ResultStateV2 result_state(
std::make_unique<
PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
std::move(scored_document_hits),
/*is_descending=*/true),
- /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
CreateResultSpec(/*num_per_page=*/5, ResultSpecProto::NAMESPACE),
document_store());
@@ -520,4 +406,4 @@ TEST_F(ResultStateV2Test, ShouldDecrementOriginalNumTotalHitsWhenReregister) {
} // namespace
} // namespace lib
-} // namespace icing \ No newline at end of file
+} // namespace icing
diff --git a/icing/result/snippet-retriever.cc b/icing/result/snippet-retriever.cc
index 8044b8d..fcaba4c 100644
--- a/icing/result/snippet-retriever.cc
+++ b/icing/result/snippet-retriever.cc
@@ -27,13 +27,12 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
-#include "icing/absl_ports/str_join.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/search.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/query/query-terms.h"
+#include "icing/schema/property-util.h"
#include "icing/schema/schema-store.h"
-#include "icing/schema/section-manager.h"
#include "icing/schema/section.h"
#include "icing/store/document-filter-data.h"
#include "icing/tokenization/language-segmenter.h"
@@ -51,31 +50,13 @@ namespace lib {
namespace {
-const PropertyProto* GetProperty(const DocumentProto& document,
- std::string_view property_name) {
- for (const PropertyProto& property : document.properties()) {
- if (property.name() == property_name) {
- return &property;
- }
- }
- return nullptr;
-}
-
-inline std::string AddPropertyToPath(const std::string& current_path,
- std::string_view property) {
- if (current_path.empty()) {
- return std::string(property);
- }
- return absl_ports::StrCat(current_path, kPropertySeparator, property);
-}
-
inline std::string AddIndexToPath(int values_size, int index,
const std::string& property_path) {
if (values_size == 1) {
return property_path;
}
- return absl_ports::StrCat(property_path, kLBracket, std::to_string(index),
- kRBracket);
+ return absl_ports::StrCat(
+ property_path, property_util::ConvertToPropertyExprIndexStr(index));
}
// Returns a string of the normalized text of the input Token. Normalization
@@ -508,94 +489,96 @@ void GetEntriesFromProperty(const PropertyProto* current_property,
std::string_view value = current_property->string_values(i);
std::unique_ptr<Tokenizer::Iterator> iterator =
tokenizer->Tokenize(value).ValueOrDie();
+ // All iterators are moved through positions sequentially. Constructing them
+ // each time resets them to the beginning of the string. This means that,
+ // for t tokens and in a string of n chars, each MoveToUtf8 call from the
+ // beginning of the string is on average O(n/2), whereas calling MoveToUtf8
+ // from the token immediately prior to the desired one is O(n/t).
+ // Constructing each outside of the while-loop ensures that performance will
+ // be O(t * (n/t)) = O(n) rather than O(t * n / 2).
CharacterIterator start_itr(value);
CharacterIterator end_itr(value);
+ CharacterIterator reset_itr(value);
+ bool encountered_error = false;
while (iterator->Advance()) {
std::vector<Token> batch_tokens = iterator->GetTokens();
if (batch_tokens.empty()) {
continue;
}
- // As snippet matching may move iterator around, we save a reset iterator
- // so that we can reset to the initial iterator state, and continue
- // Advancing in order in the next round.
- CharacterIterator reset_itr(value);
+ bool needs_reset = false;
reset_itr.MoveToUtf8(batch_tokens.at(0).text.begin() - value.begin());
-
- for (const Token& token : batch_tokens) {
+ start_itr = reset_itr;
+ end_itr = start_itr;
+ for (int i = 0; i < batch_tokens.size(); ++i) {
+ const Token& token = batch_tokens.at(i);
CharacterIterator submatch_end = matcher->Matches(token);
// If the token matched a query term, then submatch_end will point to an
// actual position within token.text.
- if (submatch_end.utf8_index() != -1) {
- if (!start_itr.MoveToUtf8(token.text.begin() - value.begin())) {
- // We can't get the char_iterator to a valid position, so there's no
- // way for us to provide valid utf-16 indices. There's nothing more
- // we can do here, so just return whatever we've built up so far.
- if (!snippet_entry.snippet_matches().empty()) {
- *snippet_proto->add_entries() = std::move(snippet_entry);
- }
- return;
- }
- if (!end_itr.MoveToUtf8(token.text.end() - value.begin())) {
- // Same as above
- if (!snippet_entry.snippet_matches().empty()) {
- *snippet_proto->add_entries() = std::move(snippet_entry);
- }
- return;
- }
- SectionData data = {property_path, value};
- auto match_or = RetrieveMatch(match_options->snippet_spec, data,
- iterator.get(), start_itr, end_itr);
- if (!match_or.ok()) {
- if (absl_ports::IsAborted(match_or.status())) {
- // Only an aborted. We can't get this match, but we might be able
- // to retrieve others. Just continue.
- continue;
- } else {
- // Probably an internal error. The tokenizer iterator is probably
- // in an invalid state. There's nothing more we can do here, so
- // just return whatever we've built up so far.
- if (!snippet_entry.snippet_matches().empty()) {
- *snippet_proto->add_entries() = std::move(snippet_entry);
- }
- return;
- }
- }
- SnippetMatchProto match = std::move(match_or).ValueOrDie();
- // submatch_end refers to a position *within* token.text.
- // This, conveniently enough, means that index that submatch_end
- // points to is the length of the submatch (because the submatch
- // starts at 0 in token.text).
- match.set_submatch_byte_length(submatch_end.utf8_index());
- match.set_submatch_utf16_length(submatch_end.utf16_index());
- // Add the values for the submatch.
- snippet_entry.mutable_snippet_matches()->Add(std::move(match));
-
- if (--match_options->max_matches_remaining <= 0) {
- *snippet_proto->add_entries() = std::move(snippet_entry);
- return;
+ if (submatch_end.utf8_index() == -1) {
+ continue;
+ }
+ // As snippet matching may move iterator around, we save a reset
+ // iterator so that we can reset to the initial iterator state, and
+ // continue Advancing in order in the next round.
+ if (!start_itr.MoveToUtf8(token.text.begin() - value.begin())) {
+ encountered_error = true;
+ break;
+ }
+ if (!end_itr.MoveToUtf8(token.text.end() - value.begin())) {
+ encountered_error = true;
+ break;
+ }
+ SectionData data = {property_path, value};
+ auto match_or = RetrieveMatch(match_options->snippet_spec, data,
+ iterator.get(), start_itr, end_itr);
+ if (!match_or.ok()) {
+ if (absl_ports::IsAborted(match_or.status())) {
+ // Only an aborted. We can't get this match, but we might be able
+ // to retrieve others. Just continue.
+ continue;
+ } else {
+ encountered_error = true;
+ break;
}
}
+ SnippetMatchProto match = std::move(match_or).ValueOrDie();
+ if (match.window_byte_length() > 0) {
+ needs_reset = true;
+ }
+ // submatch_end refers to a position *within* token.text.
+ // This, conveniently enough, means that index that submatch_end
+ // points to is the length of the submatch (because the submatch
+ // starts at 0 in token.text).
+ match.set_submatch_byte_length(submatch_end.utf8_index());
+ match.set_submatch_utf16_length(submatch_end.utf16_index());
+ // Add the values for the submatch.
+ snippet_entry.mutable_snippet_matches()->Add(std::move(match));
+
+ if (--match_options->max_matches_remaining <= 0) {
+ *snippet_proto->add_entries() = std::move(snippet_entry);
+ return;
+ }
}
- // RetrieveMatch calls DetermineWindowStart/End, which may change the
- // position of the iterator. So, reset the iterator back to the original
- // position. The first token of the token batch will be the token to reset
- // to.
-
- bool success = false;
- if (reset_itr.utf8_index() > 0) {
- success =
- iterator->ResetToTokenStartingAfter(reset_itr.utf32_index() - 1);
- } else {
- success = iterator->ResetToStart();
+ if (encountered_error) {
+ break;
}
- if (!success) {
- if (!snippet_entry.snippet_matches().empty()) {
- *snippet_proto->add_entries() = std::move(snippet_entry);
+ // RetrieveMatch may call DetermineWindowStart/End if windowing is
+ // requested, which may change the position of the iterator. So, reset the
+ // iterator back to the original position. The first token of the token
+ // batch will be the token to reset to.
+ if (needs_reset) {
+ if (reset_itr.utf8_index() > 0) {
+ encountered_error =
+ !iterator->ResetToTokenStartingAfter(reset_itr.utf32_index() - 1);
+ } else {
+ encountered_error = !iterator->ResetToStart();
}
- return;
+ }
+ if (encountered_error) {
+ break;
}
}
if (!snippet_entry.snippet_matches().empty()) {
@@ -632,14 +615,14 @@ void RetrieveSnippetForSection(
SnippetProto* snippet_proto) {
std::string_view next_property_name = section_path.at(section_path_index);
const PropertyProto* current_property =
- GetProperty(document, next_property_name);
+ property_util::GetPropertyProto(document, next_property_name);
if (current_property == nullptr) {
ICING_VLOG(1) << "No property " << next_property_name << " found at path "
<< current_path;
return;
}
- std::string property_path =
- AddPropertyToPath(current_path, next_property_name);
+ std::string property_path = property_util::ConcatenatePropertyPathExpr(
+ current_path, next_property_name);
if (section_path_index == section_path.size() - 1) {
// We're at the end. Let's check our values.
GetEntriesFromProperty(current_property, property_path, matcher, tokenizer,
@@ -704,7 +687,7 @@ SnippetProto SnippetRetriever::RetrieveSnippet(
}
const SectionMetadata* metadata = section_metadata_or.ValueOrDie();
std::vector<std::string_view> section_path =
- absl_ports::StrSplit(metadata->path, kPropertySeparator);
+ property_util::SplitPropertyPathExpr(metadata->path);
// Match type must be as restrictive as possible. Prefix matches for a
// snippet should only be included if both the query is Prefix and the
diff --git a/icing/result/snippet-retriever_benchmark.cc b/icing/result/snippet-retriever_benchmark.cc
new file mode 100644
index 0000000..e574325
--- /dev/null
+++ b/icing/result/snippet-retriever_benchmark.cc
@@ -0,0 +1,333 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "third_party/absl/flags/flag.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/result/snippet-retriever.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/random-string.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/transform/normalizer-factory.h"
+#include "icing/util/clock.h"
+#include "icing/util/logging.h"
+#include "unicode/uloc.h"
+
+// Run on a Linux workstation:
+// $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
+// //icing/result:snippet-retriever_benchmark
+//
+// $ blaze-bin/icing/result/snippet-retriever_benchmark
+// --benchmark_filter=all
+//
+// Run on an Android device:
+// Make target //icing/tokenization:language-segmenter depend on
+// //third_party/icu
+//
+// Make target //icing/transform:normalizer depend on
+// //third_party/icu
+//
+// $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
+// --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
+// //icing/result:snippet-retriever_benchmark
+//
+// $ adb push blaze-bin/icing/result/snippet-retriever_benchmark
+// /data/local/tmp/
+//
+// $ adb shell /data/local/tmp/snippet-retriever_benchmark
+// --benchmark_filter=all --adb
+
+// Flag to tell the benchmark that it'll be run on an Android device via adb,
+// the benchmark will set up data files accordingly.
+ABSL_FLAG(bool, adb, false, "run benchmark via ADB on an Android device");
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::SizeIs;
+
+void BM_SnippetOneProperty(benchmark::State& state) {
+ bool run_via_adb = absl::GetFlag(FLAGS_adb);
+ if (!run_via_adb) {
+ ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
+
+ const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark";
+ const std::string schema_dir = base_dir + "/schema";
+ Filesystem filesystem;
+ filesystem.DeleteDirectoryRecursively(base_dir.c_str());
+ if (!filesystem.CreateDirectoryRecursively(schema_dir.c_str())) {
+ ICING_LOG(ERROR) << "Failed to create test directories";
+ }
+
+ language_segmenter_factory::SegmenterOptions options(ULOC_US);
+ std::unique_ptr<LanguageSegmenter> language_segmenter =
+ language_segmenter_factory::Create(std::move(options)).ValueOrDie();
+ std::unique_ptr<Normalizer> normalizer =
+ normalizer_factory::Create(
+ /*max_term_byte_size=*/std::numeric_limits<int>::max())
+ .ValueOrDie();
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("type1").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ Clock clock;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem, schema_dir, &clock));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ auto snippet_retriever =
+ SnippetRetriever::Create(schema_store.get(), language_segmenter.get(),
+ normalizer.get())
+ .ValueOrDie();
+
+ int num_matches = state.range(0);
+ int total_terms = state.range(1);
+
+ std::default_random_engine random;
+ std::vector<std::string> language =
+ CreateLanguages(/*language_size=*/1000, &random);
+ std::uniform_int_distribution<size_t> uniform(0u, language.size() - 1);
+ std::uniform_real_distribution<double> uniform_double(0.0, 1.0);
+
+ std::string text;
+ int num_actual_matches = 0;
+ double match_chance;
+ while (total_terms-- > 0) {
+ std::string term;
+ match_chance = static_cast<double>(num_matches) / total_terms;
+ if (uniform_double(random) <= match_chance) {
+ --num_matches;
+ ++num_actual_matches;
+ term = "foo";
+ } else {
+ term = language.at(uniform(random));
+ }
+ absl_ports::StrAppend(&text, " ", term);
+ }
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "uri1")
+ .SetSchema("type1")
+ .AddStringProperty("prop1", text)
+ .Build();
+ SectionRestrictQueryTermsMap query_terms = {{"", {"foo"}}};
+ ResultSpecProto::SnippetSpecProto snippet_spec;
+ snippet_spec.set_num_to_snippet(100000);
+ snippet_spec.set_num_matches_per_property(100000);
+ snippet_spec.set_max_window_utf32_length(64);
+
+ SectionIdMask section_id_mask = 0x01;
+ SnippetProto snippet_proto;
+ for (auto _ : state) {
+ snippet_proto = snippet_retriever->RetrieveSnippet(
+ query_terms, TERM_MATCH_PREFIX, snippet_spec, document,
+ section_id_mask);
+ ASSERT_THAT(snippet_proto.entries(), SizeIs(1));
+ ASSERT_THAT(snippet_proto.entries(0).snippet_matches(),
+ SizeIs(num_actual_matches));
+ }
+
+ // Destroy the schema store before the whole directory is removed because they
+ // persist data in destructor.
+ schema_store.reset();
+ filesystem.DeleteDirectoryRecursively(base_dir.c_str());
+}
+BENCHMARK(BM_SnippetOneProperty)
+ // Arguments: num_matches, total_terms
+ ->ArgPair(1, 1)
+ ->ArgPair(1, 16) // single match
+ ->ArgPair(2, 16) // ~10% matches
+ ->ArgPair(3, 16) // ~20% matches
+ ->ArgPair(8, 16) // 50% matches
+ ->ArgPair(16, 16) // 100% matches
+ ->ArgPair(1, 128) // single match
+ ->ArgPair(13, 128) // ~10% matches
+ ->ArgPair(26, 128) // ~20% matches
+ ->ArgPair(64, 128) // 50% matches
+ ->ArgPair(128, 128) // 100% matches
+ ->ArgPair(1, 512) // single match
+ ->ArgPair(51, 512) // ~10% matches
+ ->ArgPair(102, 512) // ~20% matches
+ ->ArgPair(256, 512) // 50% matches
+ ->ArgPair(512, 512) // 100% matches
+ ->ArgPair(1, 1024) // single match
+ ->ArgPair(102, 1024) // ~10% matches
+ ->ArgPair(205, 1024) // ~20% matches
+ ->ArgPair(512, 1024) // 50% matches
+ ->ArgPair(1024, 1024) // 100% matches
+ ->ArgPair(1, 4096) // single match
+ ->ArgPair(410, 4096) // ~10% matches
+ ->ArgPair(819, 4096) // ~20% matches
+ ->ArgPair(2048, 4096) // 50% matches
+ ->ArgPair(4096, 4096) // 100% matches
+ ->ArgPair(1, 16384) // single match
+ ->ArgPair(1638, 16384) // ~10% matches
+ ->ArgPair(3277, 16384) // ~20% matches
+ ->ArgPair(8192, 16384) // 50% matches
+ ->ArgPair(16384, 16384); // 100% matches
+
+void BM_SnippetRfcOneProperty(benchmark::State& state) {
+ bool run_via_adb = absl::GetFlag(FLAGS_adb);
+ if (!run_via_adb) {
+ ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
+
+ const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark";
+ const std::string schema_dir = base_dir + "/schema";
+ Filesystem filesystem;
+ filesystem.DeleteDirectoryRecursively(base_dir.c_str());
+ if (!filesystem.CreateDirectoryRecursively(schema_dir.c_str())) {
+ ICING_LOG(ERROR) << "Failed to create test directories";
+ }
+
+ language_segmenter_factory::SegmenterOptions options(ULOC_US);
+ std::unique_ptr<LanguageSegmenter> language_segmenter =
+ language_segmenter_factory::Create(std::move(options)).ValueOrDie();
+ std::unique_ptr<Normalizer> normalizer =
+ normalizer_factory::Create(
+ /*max_term_byte_size=*/std::numeric_limits<int>::max())
+ .ValueOrDie();
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("type1").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ Clock clock;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem, schema_dir, &clock));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ auto snippet_retriever =
+ SnippetRetriever::Create(schema_store.get(), language_segmenter.get(),
+ normalizer.get())
+ .ValueOrDie();
+
+ int num_matches = state.range(0);
+ int total_terms = state.range(1);
+
+ std::default_random_engine random;
+ std::vector<std::string> language =
+ CreateLanguages(/*language_size=*/1000, &random);
+ std::uniform_int_distribution<size_t> uniform(0u, language.size() - 1);
+ std::uniform_real_distribution<double> uniform_double(0.0, 1.0);
+
+ std::string text;
+ int num_actual_matches = 0;
+ double match_chance;
+ while (total_terms-- > 0) {
+ std::string term;
+ match_chance = static_cast<double>(num_matches) / total_terms;
+ if (uniform_double(random) <= match_chance) {
+ --num_matches;
+ ++num_actual_matches;
+ term = "foo@google.com";
+ } else {
+ term = absl_ports::StrCat(language.at(uniform(random)), "@google.com");
+ }
+ absl_ports::StrAppend(&text, ",", term);
+ }
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "uri1")
+ .SetSchema("type1")
+ .AddStringProperty("prop1", text)
+ .Build();
+ SectionRestrictQueryTermsMap query_terms = {{"", {"foo"}}};
+ ResultSpecProto::SnippetSpecProto snippet_spec;
+ snippet_spec.set_num_to_snippet(100000);
+ snippet_spec.set_num_matches_per_property(100000);
+ snippet_spec.set_max_window_utf32_length(64);
+
+ SectionIdMask section_id_mask = 0x01;
+ SnippetProto snippet_proto;
+ for (auto _ : state) {
+ snippet_proto = snippet_retriever->RetrieveSnippet(
+ query_terms, TERM_MATCH_PREFIX, snippet_spec, document,
+ section_id_mask);
+ ASSERT_THAT(snippet_proto.entries(), SizeIs(1));
+ ASSERT_THAT(snippet_proto.entries(0).snippet_matches(),
+ SizeIs(num_actual_matches));
+ }
+
+ // Destroy the schema store before the whole directory is removed because they
+ // persist data in destructor.
+ schema_store.reset();
+ filesystem.DeleteDirectoryRecursively(base_dir.c_str());
+}
+BENCHMARK(BM_SnippetRfcOneProperty)
+ // Arguments: num_matches, total_terms
+ ->ArgPair(1, 1)
+ ->ArgPair(1, 16) // single match
+ ->ArgPair(2, 16) // ~10% matches
+ ->ArgPair(3, 16) // ~20% matches
+ ->ArgPair(8, 16) // 50% matches
+ ->ArgPair(16, 16) // 100% matches
+ ->ArgPair(1, 128) // single match
+ ->ArgPair(13, 128) // ~10% matches
+ ->ArgPair(26, 128) // ~20% matches
+ ->ArgPair(64, 128) // 50% matches
+ ->ArgPair(128, 128) // 100% matches
+ ->ArgPair(1, 512) // single match
+ ->ArgPair(51, 512) // ~10% matches
+ ->ArgPair(102, 512) // ~20% matches
+ ->ArgPair(256, 512) // 50% matches
+ ->ArgPair(512, 512) // 100% matches
+ ->ArgPair(1, 1024) // single match
+ ->ArgPair(102, 1024) // ~10% matches
+ ->ArgPair(205, 1024) // ~20% matches
+ ->ArgPair(512, 1024) // 50% matches
+ ->ArgPair(1024, 1024) // 100% matches
+ ->ArgPair(1, 4096) // single match
+ ->ArgPair(410, 4096) // ~10% matches
+ ->ArgPair(819, 4096) // ~20% matches
+ ->ArgPair(2048, 4096) // 50% matches
+ ->ArgPair(4096, 4096) // 100% matches
+ ->ArgPair(1, 16384) // single match
+ ->ArgPair(1638, 16384) // ~10% matches
+ ->ArgPair(3277, 16384) // ~20% matches
+ ->ArgPair(8192, 16384) // 50% matches
+ ->ArgPair(16384, 16384); // 100% matches
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/result/snippet-retriever_test.cc b/icing/result/snippet-retriever_test.cc
index 80d00d5..8d81b43 100644
--- a/icing/result/snippet-retriever_test.cc
+++ b/icing/result/snippet-retriever_test.cc
@@ -113,7 +113,9 @@ class SnippetRetrieverTest : public testing::Test {
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
/*max_term_byte_size=*/10000));
@@ -1021,7 +1023,8 @@ TEST_F(SnippetRetrieverTest, SnippetingTestOneLevel) {
.SetCardinality(CARDINALITY_REPEATED)))
.Build();
ICING_ASSERT_OK(schema_store_->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true));
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
snippet_retriever_,
SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
@@ -1111,7 +1114,8 @@ TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevel) {
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
ICING_ASSERT_OK(schema_store_->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true));
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
snippet_retriever_,
SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
@@ -1217,7 +1221,8 @@ TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevelRepeated) {
.SetCardinality(CARDINALITY_REPEATED)))
.Build();
ICING_ASSERT_OK(schema_store_->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true));
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
snippet_retriever_,
SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
@@ -1331,7 +1336,8 @@ TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevelSingleValue) {
.SetCardinality(CARDINALITY_REPEATED)))
.Build();
ICING_ASSERT_OK(schema_store_->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true));
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
snippet_retriever_,
SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
@@ -1604,7 +1610,8 @@ TEST_F(SnippetRetrieverTest, SnippettingVerbatimAscii) {
.SetCardinality(CARDINALITY_REPEATED)))
.Build();
ICING_ASSERT_OK(schema_store_->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true));
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
snippet_retriever_,
SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
@@ -1657,7 +1664,8 @@ TEST_F(SnippetRetrieverTest, SnippettingVerbatimCJK) {
.SetCardinality(CARDINALITY_REPEATED)))
.Build();
ICING_ASSERT_OK(schema_store_->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true));
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
snippet_retriever_,
SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
@@ -1715,7 +1723,8 @@ TEST_F(SnippetRetrieverTest, SnippettingRfc822Ascii) {
.SetCardinality(CARDINALITY_REPEATED)))
.Build();
ICING_ASSERT_OK(schema_store_->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true));
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
snippet_retriever_,
@@ -1790,7 +1799,8 @@ TEST_F(SnippetRetrieverTest, SnippettingRfc822CJK) {
.SetCardinality(CARDINALITY_REPEATED)))
.Build();
ICING_ASSERT_OK(schema_store_->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true));
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
snippet_retriever_,
@@ -1835,13 +1845,11 @@ TEST_F(SnippetRetrieverTest, SnippettingRfc822CJK) {
TEST_F(SnippetRetrieverTest, SnippettingUrlAscii) {
SchemaProto schema =
SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- .SetType("urlType")
- .AddProperty(PropertyConfigBuilder()
- .SetName("url")
- .SetDataTypeString(MATCH_PREFIX,
- TOKENIZER_URL)
- .SetCardinality(CARDINALITY_REPEATED)))
+ .AddType(SchemaTypeConfigBuilder().SetType("urlType").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("url")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_URL)
+ .SetCardinality(CARDINALITY_REPEATED)))
.Build();
ICING_ASSERT_OK(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/true));
diff --git a/icing/schema-builder.h b/icing/schema-builder.h
index 8d3aecb..c74505e 100644
--- a/icing/schema-builder.h
+++ b/icing/schema-builder.h
@@ -44,6 +44,8 @@ constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_VERBATIM =
StringIndexingConfig::TokenizerType::VERBATIM;
constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_RFC822 =
StringIndexingConfig::TokenizerType::RFC822;
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_URL =
+ StringIndexingConfig::TokenizerType::URL;
constexpr TermMatchType::Code TERM_MATCH_UNKNOWN = TermMatchType::UNKNOWN;
constexpr TermMatchType::Code TERM_MATCH_EXACT = TermMatchType::EXACT_ONLY;
@@ -125,6 +127,29 @@ class PropertyConfigBuilder {
property_.set_schema_type(std::string(schema_type));
property_.mutable_document_indexing_config()->set_index_nested_properties(
index_nested_properties);
+ property_.mutable_document_indexing_config()
+ ->clear_indexable_nested_properties_list();
+ return *this;
+ }
+
+ PropertyConfigBuilder& SetDataTypeDocument(
+ std::string_view schema_type,
+ std::initializer_list<std::string> indexable_nested_properties_list) {
+ property_.set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ property_.set_schema_type(std::string(schema_type));
+ property_.mutable_document_indexing_config()->set_index_nested_properties(
+ false);
+ for (const std::string& property : indexable_nested_properties_list) {
+ property_.mutable_document_indexing_config()
+ ->add_indexable_nested_properties_list(property);
+ }
+ return *this;
+ }
+
+ PropertyConfigBuilder& SetJoinable(
+ JoinableConfig::ValueType::Code join_value_type, bool propagate_delete) {
+ property_.mutable_joinable_config()->set_value_type(join_value_type);
+ property_.mutable_joinable_config()->set_propagate_delete(propagate_delete);
return *this;
}
@@ -151,6 +176,11 @@ class SchemaTypeConfigBuilder {
return *this;
}
+ SchemaTypeConfigBuilder& AddParentType(std::string_view parent_type) {
+ type_config_.add_parent_types(std::string(parent_type));
+ return *this;
+ }
+
SchemaTypeConfigBuilder& SetVersion(int version) {
type_config_.set_version(version);
return *this;
diff --git a/icing/schema/backup-schema-producer.cc b/icing/schema/backup-schema-producer.cc
new file mode 100644
index 0000000..d0a0554
--- /dev/null
+++ b/icing/schema/backup-schema-producer.cc
@@ -0,0 +1,164 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/backup-schema-producer.h"
+
+#include <string_view>
+#include <unordered_map>
+#include <vector>
+
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema/property-util.h"
+#include "icing/schema/section.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Creates a map of property to indexed id count based on the list of indexed
+// properties provided by metadata_list.
+// For all non-document properties, the value will always be 1.
+// For document properties, the value will be the number of nested properties
+// that are indexed with that document type.
+std::unordered_map<std::string_view, int> CreateIndexedIdCountMap(
+ const std::vector<SectionMetadata>* metadata_list) {
+ std::unordered_map<std::string_view, int> property_indexed_id_count_map;
+ for (const SectionMetadata& metadata : *metadata_list) {
+ std::string_view top_level_property;
+ size_t separator_pos =
+ metadata.path.find(property_util::kPropertyPathSeparator);
+ if (separator_pos == std::string::npos) {
+ top_level_property = metadata.path;
+ } else {
+ top_level_property =
+ std::string_view(metadata.path.c_str(), separator_pos);
+ }
+ int& count = property_indexed_id_count_map[top_level_property];
+ ++count;
+ }
+ return property_indexed_id_count_map;
+}
+
+// Returns the indices (within schema.types()) of all types that are rollback
+// incompatible (old code cannot handle these types if they are unmodified).
+//
+// Currently, this means types that:
+// 1. Use RFC822 tokenization for any properties
+// 2. Use more than 16 indexed properties
+libtextclassifier3::StatusOr<std::vector<int>>
+GetRollbackIncompatibleTypeIndices(const SchemaProto& schema,
+ const SectionManager& type_manager) {
+ std::vector<int> invalid_type_indices;
+ for (int i = 0; i < schema.types_size(); ++i) {
+ const SchemaTypeConfigProto& type = schema.types(i);
+ bool rollback_incompatible = false;
+ for (const PropertyConfigProto& property : type.properties()) {
+ if (property.string_indexing_config().tokenizer_type() ==
+ StringIndexingConfig::TokenizerType::RFC822) {
+ rollback_incompatible = true;
+ break;
+ }
+ }
+ if (rollback_incompatible) {
+ invalid_type_indices.push_back(i);
+ continue;
+ }
+
+ ICING_ASSIGN_OR_RETURN(const std::vector<SectionMetadata>* metadata_list,
+ type_manager.GetMetadataList(type.schema_type()));
+ if (metadata_list->size() > kOldTotalNumSections) {
+ invalid_type_indices.push_back(i);
+ }
+ }
+ return invalid_type_indices;
+}
+
+} // namespace
+
+/* static */ libtextclassifier3::StatusOr<BackupSchemaProducer>
+BackupSchemaProducer::Create(const SchemaProto& schema,
+ const SectionManager& type_manager) {
+ ICING_ASSIGN_OR_RETURN(
+ std::vector<int> invalid_type_indices,
+ GetRollbackIncompatibleTypeIndices(schema, type_manager));
+ if (invalid_type_indices.empty()) {
+ return BackupSchemaProducer();
+ }
+
+ SchemaProto backup_schema(schema);
+ std::unordered_map<std::string_view, int> type_indexed_property_count;
+ for (int i : invalid_type_indices) {
+ SchemaTypeConfigProto* type = backup_schema.mutable_types(i);
+
+ // This should never cause an error - every type should have an entry in the
+ // type_manager.
+ ICING_ASSIGN_OR_RETURN(const std::vector<SectionMetadata>* metadata_list,
+ type_manager.GetMetadataList(type->schema_type()));
+ int num_indexed_sections = metadata_list->size();
+ std::unordered_map<std::string_view, int> property_indexed_id_count_map;
+ if (num_indexed_sections > kOldTotalNumSections) {
+ property_indexed_id_count_map = CreateIndexedIdCountMap(metadata_list);
+ }
+
+ // Step 1. Switch all properties with RFC tokenizer as unindexed.
+ for (PropertyConfigProto& property : *type->mutable_properties()) {
+ // If the property uses the RFC tokenizer, then we need to set it to NONE
+ // and set match type UNKNOWN.
+ if (property.string_indexing_config().tokenizer_type() ==
+ StringIndexingConfig::TokenizerType::RFC822) {
+ property.clear_string_indexing_config();
+ --num_indexed_sections;
+ property_indexed_id_count_map.erase(property.property_name());
+ }
+ }
+
+ // Step 2. If there are any types that exceed the old indexed property
+ // limit, then mark indexed properties as unindexed until we're back under
+ // the limit.
+ if (num_indexed_sections <= kOldTotalNumSections) {
+ continue;
+ }
+
+ // We expect that the last properties were the ones added most recently and
+ // are the least crucial, so we do removal in reverse order. This is a bit
+ // arbitrary, but we don't really have sufficient information to make this
+ // judgment anyways.
+ for (auto itr = type->mutable_properties()->rbegin();
+ itr != type->mutable_properties()->rend(); ++itr) {
+ auto indexed_count_itr =
+ property_indexed_id_count_map.find(itr->property_name());
+ if (indexed_count_itr == property_indexed_id_count_map.end()) {
+ continue;
+ }
+
+ // Mark this property as unindexed and subtract all indexed property ids
+ // consumed by this property.
+ PropertyConfigProto& property = *itr;
+ property.clear_document_indexing_config();
+ property.clear_string_indexing_config();
+ property.clear_integer_indexing_config();
+ num_indexed_sections -= indexed_count_itr->second;
+ if (num_indexed_sections <= kOldTotalNumSections) {
+ break;
+ }
+ }
+ }
+ return BackupSchemaProducer(std::move(backup_schema));
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/schema/backup-schema-producer.h b/icing/schema/backup-schema-producer.h
new file mode 100644
index 0000000..61dcde6
--- /dev/null
+++ b/icing/schema/backup-schema-producer.h
@@ -0,0 +1,55 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_BACKUP_SCHEMA_PRODUCER_H_
+#define ICING_SCHEMA_BACKUP_SCHEMA_PRODUCER_H_
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema/section-manager.h"
+#include "icing/schema/section.h"
+
+namespace icing {
+namespace lib {
+
+class BackupSchemaProducer {
+ public:
+ // Creates a BackupSchemaProducer based off of schema.
+ // If schema doesn't require a backup schema (because it is fully
+ // rollback-proof) then no copies will be made and `is_backup_necessary` will
+ // return false.
+ // If schema *does* require a backup schema, then `is_backup_necessary` will
+ // return true and the backup schema can be retrieved by calling `Produce`.
+ // Returns:
+ // - On success, a BackupSchemaProducer
+ // - INTERNAL_ERROR if the schema is inconsistent with the type_manager.
+ static libtextclassifier3::StatusOr<BackupSchemaProducer> Create(
+ const SchemaProto& schema, const SectionManager& type_manager);
+
+ SchemaProto Produce() && { return std::move(cached_schema_); }
+
+ bool is_backup_necessary() const { return !cached_schema_.types().empty(); }
+
+ private:
+ BackupSchemaProducer() = default;
+ explicit BackupSchemaProducer(SchemaProto&& schema)
+ : cached_schema_(std::move(schema)) {}
+
+ SchemaProto cached_schema_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_SCHEMA_BACKUP_SCHEMA_PRODUCER_H_
diff --git a/icing/schema/backup-schema-producer_test.cc b/icing/schema/backup-schema-producer_test.cc
new file mode 100644
index 0000000..dbd033f
--- /dev/null
+++ b/icing/schema/backup-schema-producer_test.cc
@@ -0,0 +1,737 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/backup-schema-producer.h"
+
+#include <string>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-type-manager.h"
+#include "icing/schema/schema-util.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
+#include "icing/store/key-mapper.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::Pointee;
+using ::testing::SizeIs;
+
+class BackupSchemaProducerTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ test_dir_ = GetTestTempDir() + "/icing";
+ schema_store_dir_ = test_dir_ + "/schema_store";
+ filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+ }
+
+ void TearDown() override {
+ ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
+ }
+
+ Filesystem filesystem_;
+ std::string test_dir_;
+ std::string schema_store_dir_;
+};
+
+TEST_F(BackupSchemaProducerTest, EmptySchema) {
+ SchemaProto empty;
+ SchemaUtil::TypeConfigMap type_config_map;
+ SchemaUtil::BuildTypeConfigMap(empty, &type_config_map);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+ /*maximum_size_bytes=*/10000));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ BackupSchemaProducer backup_producer,
+ BackupSchemaProducer::Create(empty,
+ schema_type_manager->section_manager()));
+ EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(false));
+}
+
+TEST_F(BackupSchemaProducerTest, NoIndexedPropertySchema) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_STRING))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop2")
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .SetDataType(TYPE_INT64)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("TypeB")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("prop3")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument(
+ "TypeA", /*index_nested_properties=*/false))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop4")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataType(TYPE_STRING)))
+ .Build();
+
+ SchemaUtil::TypeConfigMap type_config_map;
+ SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+ /*maximum_size_bytes=*/10000));
+ ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+ ASSERT_THAT(type_id_mapper->Put("TypeB", 1), IsOk());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ BackupSchemaProducer backup_producer,
+ BackupSchemaProducer::Create(schema,
+ schema_type_manager->section_manager()));
+ EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(false));
+}
+
+TEST_F(BackupSchemaProducerTest, RollbackCompatibleSchema) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop2")
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("TypeB")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("prop3")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument(
+ "TypeA", /*index_nested_properties=*/true))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop4")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_VERBATIM)))
+ .Build();
+
+ SchemaUtil::TypeConfigMap type_config_map;
+ SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+ /*maximum_size_bytes=*/10000));
+ ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+ ASSERT_THAT(type_id_mapper->Put("TypeB", 1), IsOk());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ BackupSchemaProducer backup_producer,
+ BackupSchemaProducer::Create(schema,
+ schema_type_manager->section_manager()));
+ EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(false));
+}
+
+TEST_F(BackupSchemaProducerTest, RemoveRfc822) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("TypeA").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822)))
+ .Build();
+
+ SchemaUtil::TypeConfigMap type_config_map;
+ SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+ /*maximum_size_bytes=*/10000));
+ ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ BackupSchemaProducer backup_producer,
+ BackupSchemaProducer::Create(schema,
+ schema_type_manager->section_manager()));
+ EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true));
+ SchemaProto backup = std::move(backup_producer).Produce();
+
+ SchemaProto expected_backup =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("TypeA").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_STRING)))
+ .Build();
+ EXPECT_THAT(backup, portable_equals_proto::EqualsProto(expected_backup));
+}
+
+TEST_F(BackupSchemaProducerTest, MakeExtraStringIndexedPropertiesUnindexed) {
+ PropertyConfigBuilder indexed_string_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+ SchemaTypeConfigProto type =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(indexed_string_property_builder.SetName("prop1"))
+ .AddProperty(indexed_string_property_builder.SetName("prop2"))
+ .AddProperty(indexed_string_property_builder.SetName("prop3"))
+ .AddProperty(indexed_string_property_builder.SetName("prop4"))
+ .AddProperty(indexed_string_property_builder.SetName("prop5"))
+ .AddProperty(indexed_string_property_builder.SetName("prop6"))
+ .AddProperty(indexed_string_property_builder.SetName("prop7"))
+ .AddProperty(indexed_string_property_builder.SetName("prop8"))
+ .AddProperty(indexed_string_property_builder.SetName("prop9"))
+ .AddProperty(indexed_string_property_builder.SetName("prop10"))
+ .AddProperty(indexed_string_property_builder.SetName("prop11"))
+ .AddProperty(indexed_string_property_builder.SetName("prop12"))
+ .AddProperty(indexed_string_property_builder.SetName("prop13"))
+ .AddProperty(indexed_string_property_builder.SetName("prop14"))
+ .AddProperty(indexed_string_property_builder.SetName("prop15"))
+ .AddProperty(indexed_string_property_builder.SetName("prop16"))
+ .AddProperty(indexed_string_property_builder.SetName("prop17"))
+ .AddProperty(indexed_string_property_builder.SetName("prop18"))
+ .AddProperty(indexed_string_property_builder.SetName("prop19"))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type).Build();
+
+ SchemaUtil::TypeConfigMap type_config_map;
+ SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+ /*maximum_size_bytes=*/10000));
+ ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ BackupSchemaProducer backup_producer,
+ BackupSchemaProducer::Create(schema,
+ schema_type_manager->section_manager()));
+ EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true));
+ SchemaProto backup = std::move(backup_producer).Produce();
+
+ PropertyConfigBuilder unindexed_string_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_STRING);
+ SchemaTypeConfigProto expected_type =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(indexed_string_property_builder.SetName("prop1"))
+ .AddProperty(indexed_string_property_builder.SetName("prop2"))
+ .AddProperty(indexed_string_property_builder.SetName("prop3"))
+ .AddProperty(indexed_string_property_builder.SetName("prop4"))
+ .AddProperty(indexed_string_property_builder.SetName("prop5"))
+ .AddProperty(indexed_string_property_builder.SetName("prop6"))
+ .AddProperty(indexed_string_property_builder.SetName("prop7"))
+ .AddProperty(indexed_string_property_builder.SetName("prop8"))
+ .AddProperty(indexed_string_property_builder.SetName("prop9"))
+ .AddProperty(indexed_string_property_builder.SetName("prop10"))
+ .AddProperty(indexed_string_property_builder.SetName("prop11"))
+ .AddProperty(indexed_string_property_builder.SetName("prop12"))
+ .AddProperty(indexed_string_property_builder.SetName("prop13"))
+ .AddProperty(indexed_string_property_builder.SetName("prop14"))
+ .AddProperty(indexed_string_property_builder.SetName("prop15"))
+ .AddProperty(unindexed_string_property_builder.SetName("prop16"))
+ .AddProperty(unindexed_string_property_builder.SetName("prop17"))
+ .AddProperty(unindexed_string_property_builder.SetName("prop18"))
+ .AddProperty(unindexed_string_property_builder.SetName("prop19"))
+ .Build();
+ SchemaProto expected_backup = SchemaBuilder().AddType(expected_type).Build();
+ EXPECT_THAT(backup, portable_equals_proto::EqualsProto(expected_backup));
+}
+
+TEST_F(BackupSchemaProducerTest, MakeExtraIntIndexedPropertiesUnindexed) {
+ PropertyConfigBuilder indexed_int_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE);
+ SchemaTypeConfigProto type =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(indexed_int_property_builder.SetName("prop0"))
+ .AddProperty(indexed_int_property_builder.SetName("prop1"))
+ .AddProperty(indexed_int_property_builder.SetName("prop2"))
+ .AddProperty(indexed_int_property_builder.SetName("prop3"))
+ .AddProperty(indexed_int_property_builder.SetName("prop4"))
+ .AddProperty(indexed_int_property_builder.SetName("prop5"))
+ .AddProperty(indexed_int_property_builder.SetName("prop6"))
+ .AddProperty(indexed_int_property_builder.SetName("prop7"))
+ .AddProperty(indexed_int_property_builder.SetName("prop8"))
+ .AddProperty(indexed_int_property_builder.SetName("prop9"))
+ .AddProperty(indexed_int_property_builder.SetName("prop10"))
+ .AddProperty(indexed_int_property_builder.SetName("prop11"))
+ .AddProperty(indexed_int_property_builder.SetName("prop12"))
+ .AddProperty(indexed_int_property_builder.SetName("prop13"))
+ .AddProperty(indexed_int_property_builder.SetName("prop14"))
+ .AddProperty(indexed_int_property_builder.SetName("prop15"))
+ .AddProperty(indexed_int_property_builder.SetName("prop16"))
+ .AddProperty(indexed_int_property_builder.SetName("prop17"))
+ .AddProperty(indexed_int_property_builder.SetName("prop18"))
+ .AddProperty(indexed_int_property_builder.SetName("prop19"))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type).Build();
+
+ SchemaUtil::TypeConfigMap type_config_map;
+ SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+ /*maximum_size_bytes=*/10000));
+ ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ BackupSchemaProducer backup_producer,
+ BackupSchemaProducer::Create(schema,
+ schema_type_manager->section_manager()));
+ EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true));
+ SchemaProto backup = std::move(backup_producer).Produce();
+
+ PropertyConfigBuilder unindexed_int_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_INT64);
+ SchemaTypeConfigProto expected_type =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(indexed_int_property_builder.SetName("prop0"))
+ .AddProperty(indexed_int_property_builder.SetName("prop1"))
+ .AddProperty(indexed_int_property_builder.SetName("prop2"))
+ .AddProperty(indexed_int_property_builder.SetName("prop3"))
+ .AddProperty(indexed_int_property_builder.SetName("prop4"))
+ .AddProperty(indexed_int_property_builder.SetName("prop5"))
+ .AddProperty(indexed_int_property_builder.SetName("prop6"))
+ .AddProperty(indexed_int_property_builder.SetName("prop7"))
+ .AddProperty(indexed_int_property_builder.SetName("prop8"))
+ .AddProperty(indexed_int_property_builder.SetName("prop9"))
+ .AddProperty(indexed_int_property_builder.SetName("prop10"))
+ .AddProperty(indexed_int_property_builder.SetName("prop11"))
+ .AddProperty(indexed_int_property_builder.SetName("prop12"))
+ .AddProperty(indexed_int_property_builder.SetName("prop13"))
+ .AddProperty(indexed_int_property_builder.SetName("prop14"))
+ .AddProperty(indexed_int_property_builder.SetName("prop15"))
+ .AddProperty(unindexed_int_property_builder.SetName("prop16"))
+ .AddProperty(unindexed_int_property_builder.SetName("prop17"))
+ .AddProperty(unindexed_int_property_builder.SetName("prop18"))
+ .AddProperty(unindexed_int_property_builder.SetName("prop19"))
+ .Build();
+ SchemaProto expected_backup = SchemaBuilder().AddType(expected_type).Build();
+ EXPECT_THAT(backup, portable_equals_proto::EqualsProto(expected_backup));
+}
+
+TEST_F(BackupSchemaProducerTest, MakeExtraDocumentIndexedPropertiesUnindexed) {
+ PropertyConfigBuilder indexed_string_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+ SchemaTypeConfigProto typeB =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeB")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(indexed_string_property_builder.SetName("prop1"))
+ .AddProperty(indexed_string_property_builder.SetName("prop2"))
+ .AddProperty(indexed_string_property_builder.SetName("prop3"))
+ .AddProperty(indexed_string_property_builder.SetName("prop4"))
+ .AddProperty(indexed_string_property_builder.SetName("prop5"))
+ .AddProperty(indexed_string_property_builder.SetName("prop6"))
+ .AddProperty(indexed_string_property_builder.SetName("prop7"))
+ .AddProperty(indexed_string_property_builder.SetName("prop8"))
+ .AddProperty(indexed_string_property_builder.SetName("prop9"))
+ .Build();
+
+ PropertyConfigBuilder indexed_document_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("TypeB", /*index_nested_properties=*/true);
+ SchemaTypeConfigProto typeA =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(indexed_document_property_builder.SetName("propA"))
+ .AddProperty(indexed_document_property_builder.SetName("propB"))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(typeA).AddType(typeB).Build();
+
+ SchemaUtil::TypeConfigMap type_config_map;
+ SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+ /*maximum_size_bytes=*/10000));
+ ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+ ASSERT_THAT(type_id_mapper->Put("TypeB", 1), IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ BackupSchemaProducer backup_producer,
+ BackupSchemaProducer::Create(schema,
+ schema_type_manager->section_manager()));
+ EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true));
+ SchemaProto backup = std::move(backup_producer).Produce();
+
+ PropertyConfigProto unindexed_document_property =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_DOCUMENT)
+ .Build();
+ unindexed_document_property.set_schema_type("TypeB");
+ PropertyConfigBuilder unindexed_document_property_builder(
+ unindexed_document_property);
+ SchemaTypeConfigProto expected_typeA =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(indexed_document_property_builder.SetName("propA"))
+ .AddProperty(unindexed_document_property_builder.SetName("propB"))
+ .Build();
+ SchemaProto expected_backup =
+ SchemaBuilder().AddType(expected_typeA).AddType(typeB).Build();
+ EXPECT_THAT(backup, portable_equals_proto::EqualsProto(expected_backup));
+}
+
+TEST_F(
+ BackupSchemaProducerTest,
+ MakeExtraDocumentIndexedPropertiesWithIndexableNestedPropertiesListUnindexed) {
+ PropertyConfigBuilder indexed_string_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+ PropertyConfigBuilder indexed_int_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE);
+ SchemaTypeConfigProto typeB =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeB")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(indexed_int_property_builder.SetName("prop1"))
+ .AddProperty(indexed_string_property_builder.SetName("prop2"))
+ .AddProperty(indexed_int_property_builder.SetName("prop3"))
+ .AddProperty(indexed_string_property_builder.SetName("prop4"))
+ .AddProperty(indexed_int_property_builder.SetName("prop5"))
+ .AddProperty(indexed_string_property_builder.SetName("prop6"))
+ .AddProperty(indexed_int_property_builder.SetName("prop7"))
+ .AddProperty(indexed_string_property_builder.SetName("prop8"))
+ .AddProperty(indexed_int_property_builder.SetName("prop9"))
+ .Build();
+
+ // Create indexed document property by using indexable nested properties list.
+ PropertyConfigBuilder indexed_document_property_with_list_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument(
+ "TypeB", /*indexable_nested_properties_list=*/{
+ "prop0", "prop1", "prop2", "prop3", "prop4", "prop5",
+ "unknown1", "unknown2", "unknown3"});
+ SchemaTypeConfigProto typeA =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(
+ indexed_document_property_with_list_builder.SetName("propA"))
+ .AddProperty(
+ indexed_document_property_with_list_builder.SetName("propB"))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(typeA).AddType(typeB).Build();
+
+ SchemaUtil::TypeConfigMap type_config_map;
+ SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+ /*maximum_size_bytes=*/10000));
+ ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+ ASSERT_THAT(type_id_mapper->Put("TypeB", 1), IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+ ASSERT_THAT(schema_type_manager->section_manager().GetMetadataList("TypeA"),
+ IsOkAndHolds(Pointee(SizeIs(18))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ BackupSchemaProducer backup_producer,
+ BackupSchemaProducer::Create(schema,
+ schema_type_manager->section_manager()));
+ EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true));
+ SchemaProto backup = std::move(backup_producer).Produce();
+
+ PropertyConfigProto unindexed_document_property =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_DOCUMENT)
+ .Build();
+ unindexed_document_property.set_schema_type("TypeB");
+ PropertyConfigBuilder unindexed_document_property_builder(
+ unindexed_document_property);
+
+ // "propA" and "propB" both have 9 sections respectively, so we have to drop
+ // "propB" indexing config to make total # of sections <= 16.
+ SchemaTypeConfigProto expected_typeA =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(
+ indexed_document_property_with_list_builder.SetName("propA"))
+ .AddProperty(unindexed_document_property_builder.SetName("propB"))
+ .Build();
+ SchemaProto expected_backup =
+ SchemaBuilder().AddType(expected_typeA).AddType(typeB).Build();
+ EXPECT_THAT(backup, portable_equals_proto::EqualsProto(expected_backup));
+}
+
+TEST_F(BackupSchemaProducerTest, MakeRfcPropertiesUnindexedFirst) {
+ PropertyConfigBuilder indexed_string_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+ SchemaTypeConfigProto typeA =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(indexed_string_property_builder.SetName("prop1"))
+ .AddProperty(indexed_string_property_builder.SetName("prop2"))
+ .AddProperty(indexed_string_property_builder.SetName("prop3"))
+ .AddProperty(indexed_string_property_builder.SetName("prop4"))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .AddProperty(indexed_string_property_builder.SetName("prop6"))
+ .AddProperty(indexed_string_property_builder.SetName("prop7"))
+ .AddProperty(indexed_string_property_builder.SetName("prop8"))
+ .AddProperty(indexed_string_property_builder.SetName("prop9"))
+ .AddProperty(indexed_string_property_builder.SetName("prop10"))
+ .AddProperty(indexed_string_property_builder.SetName("prop11"))
+ .AddProperty(indexed_string_property_builder.SetName("prop12"))
+ .AddProperty(indexed_string_property_builder.SetName("prop13"))
+ .AddProperty(indexed_string_property_builder.SetName("prop14"))
+ .AddProperty(indexed_string_property_builder.SetName("prop15"))
+ .AddProperty(indexed_string_property_builder.SetName("prop16"))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(typeA).Build();
+
+ SchemaUtil::TypeConfigMap type_config_map;
+ SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+ /*maximum_size_bytes=*/10000));
+ ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ BackupSchemaProducer backup_producer,
+ BackupSchemaProducer::Create(schema,
+ schema_type_manager->section_manager()));
+ EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true));
+ SchemaProto backup = std::move(backup_producer).Produce();
+
+ SchemaTypeConfigProto expected_typeA =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(indexed_string_property_builder.SetName("prop1"))
+ .AddProperty(indexed_string_property_builder.SetName("prop2"))
+ .AddProperty(indexed_string_property_builder.SetName("prop3"))
+ .AddProperty(indexed_string_property_builder.SetName("prop4"))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_STRING))
+ .AddProperty(indexed_string_property_builder.SetName("prop6"))
+ .AddProperty(indexed_string_property_builder.SetName("prop7"))
+ .AddProperty(indexed_string_property_builder.SetName("prop8"))
+ .AddProperty(indexed_string_property_builder.SetName("prop9"))
+ .AddProperty(indexed_string_property_builder.SetName("prop10"))
+ .AddProperty(indexed_string_property_builder.SetName("prop11"))
+ .AddProperty(indexed_string_property_builder.SetName("prop12"))
+ .AddProperty(indexed_string_property_builder.SetName("prop13"))
+ .AddProperty(indexed_string_property_builder.SetName("prop14"))
+ .AddProperty(indexed_string_property_builder.SetName("prop15"))
+ .AddProperty(indexed_string_property_builder.SetName("prop16"))
+ .Build();
+ SchemaProto expected_backup = SchemaBuilder().AddType(expected_typeA).Build();
+ EXPECT_THAT(backup, portable_equals_proto::EqualsProto(expected_backup));
+}
+
+TEST_F(BackupSchemaProducerTest, MakeExtraPropertiesUnindexedMultipleTypes) {
+ PropertyConfigBuilder indexed_string_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+ PropertyConfigBuilder indexed_int_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE);
+ SchemaTypeConfigProto typeB =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeB")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(indexed_int_property_builder.SetName("prop1"))
+ .AddProperty(indexed_string_property_builder.SetName("prop2"))
+ .AddProperty(indexed_int_property_builder.SetName("prop3"))
+ .AddProperty(indexed_string_property_builder.SetName("prop4"))
+ .Build();
+
+ PropertyConfigBuilder indexed_document_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("TypeB", /*index_nested_properties=*/true);
+ PropertyConfigBuilder indexed_document_property_with_list_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument(
+ "TypeB", /*indexable_nested_properties_list=*/{
+ "prop0", "prop4", "unknown1", "unknown2", "unknown3"});
+ SchemaTypeConfigProto typeA =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(indexed_string_property_builder.SetName("propA"))
+ .AddProperty(
+ indexed_document_property_with_list_builder.SetName("propB"))
+ .AddProperty(indexed_string_property_builder.SetName("propC"))
+ .AddProperty(indexed_document_property_builder.SetName("propD"))
+ .AddProperty(indexed_string_property_builder.SetName("propE"))
+ .AddProperty(indexed_int_property_builder.SetName("propF"))
+ .AddProperty(indexed_document_property_builder.SetName("propG"))
+ .AddProperty(indexed_string_property_builder.SetName("propH"))
+ .AddProperty(indexed_int_property_builder.SetName("propI"))
+ .AddProperty(
+ indexed_document_property_with_list_builder.SetName("propJ"))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(typeA).AddType(typeB).Build();
+
+ SchemaUtil::TypeConfigMap type_config_map;
+ SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+ /*maximum_size_bytes=*/10000));
+ ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+ ASSERT_THAT(type_id_mapper->Put("TypeB", 1), IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+ ASSERT_THAT(schema_type_manager->section_manager().GetMetadataList("TypeA"),
+ IsOkAndHolds(Pointee(SizeIs(26))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ BackupSchemaProducer backup_producer,
+ BackupSchemaProducer::Create(schema,
+ schema_type_manager->section_manager()));
+ EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true));
+ SchemaProto backup = std::move(backup_producer).Produce();
+
+ PropertyConfigBuilder unindexed_string_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_STRING);
+ PropertyConfigBuilder unindexed_int_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_INT64);
+ PropertyConfigProto unindexed_document_property =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_DOCUMENT)
+ .Build();
+ unindexed_document_property.set_schema_type("TypeB");
+ PropertyConfigBuilder unindexed_document_property_builder(
+ unindexed_document_property);
+
+ // On version 0 (Android T):
+ // - Only "propA", "propC", "propD.prop0", "propD.prop1", "propD.prop2",
+ // "propD.prop3", "propD.prop4", "propE", "propF" will be assigned sections.
+ // - Unlike version 2, "propB.prop0", "propB.prop4", "propB.unknown1",
+ // "propB.unknown2", "propB.unknown3" will be ignored because version 0
+ // doesn't recognize indexable nested properties list.
+ // - So there will be only 9 sections on version 0. We still have potential to
+ // avoid dropping "propG", "propH", "propI" indexing configs on version 0
+ // (in this case it will be 16 sections), but it is ok to make it simple as
+ // long as total # of sections <= 16.
+ SchemaTypeConfigProto expected_typeA =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(indexed_string_property_builder.SetName("propA"))
+ .AddProperty(
+ indexed_document_property_with_list_builder.SetName("propB"))
+ .AddProperty(indexed_string_property_builder.SetName("propC"))
+ .AddProperty(indexed_document_property_builder.SetName("propD"))
+ .AddProperty(indexed_string_property_builder.SetName("propE"))
+ .AddProperty(indexed_int_property_builder.SetName("propF"))
+ .AddProperty(unindexed_document_property_builder.SetName("propG"))
+ .AddProperty(unindexed_string_property_builder.SetName("propH"))
+ .AddProperty(unindexed_int_property_builder.SetName("propI"))
+ .AddProperty(unindexed_document_property_builder.SetName("propJ"))
+ .Build();
+ SchemaProto expected_backup =
+ SchemaBuilder().AddType(expected_typeA).AddType(typeB).Build();
+ EXPECT_THAT(backup, portable_equals_proto::EqualsProto(expected_backup));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/schema/joinable-property-manager-builder_test.cc b/icing/schema/joinable-property-manager-builder_test.cc
new file mode 100644
index 0000000..ac48faa
--- /dev/null
+++ b/icing/schema/joinable-property-manager-builder_test.cc
@@ -0,0 +1,446 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/joinable-property-manager.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
+#include "icing/store/key-mapper.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::Pointee;
+
+class JoinablePropertyManagerBuilderTest : public ::testing::Test {
+ protected:
+ void SetUp() override { test_dir_ = GetTestTempDir() + "/icing"; }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ }
+
+ Filesystem filesystem_;
+ std::string test_dir_;
+};
+
+TEST_F(JoinablePropertyManagerBuilderTest, Build) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put("SchemaTypeOne", 0));
+ ICING_ASSERT_OK(schema_type_mapper->Put("SchemaTypeTwo", 1));
+
+ PropertyConfigProto prop_foo =
+ PropertyConfigBuilder()
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+ PropertyConfigProto prop_bar =
+ PropertyConfigBuilder()
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+ PropertyConfigProto prop_baz =
+ PropertyConfigBuilder()
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+
+ JoinablePropertyManager::Builder builder(*schema_type_mapper);
+ // Add "foo" and "bar" to "SchemaTypeOne" (schema_type_id = 0).
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, prop_foo, /*property_path=*/"foo"));
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, prop_bar, /*property_path=*/"bar"));
+ // Add "baz" to "SchemaTypeTwo" (schema_type_id = 1).
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/1, prop_baz, /*property_path=*/"baz"));
+
+ std::unique_ptr<JoinablePropertyManager> joinable_property_manager =
+ std::move(builder).Build();
+ // Check "SchemaTypeOne"
+ EXPECT_THAT(
+ joinable_property_manager->GetMetadataList("SchemaTypeOne"),
+ IsOkAndHolds(Pointee(ElementsAre(
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/0, /*expected_property_path=*/"foo", prop_foo),
+ EqualsJoinablePropertyMetadata(/*expected_id=*/1,
+ /*expected_property_path=*/"bar",
+ prop_bar)))));
+ // Check "SchemaTypeTwo"
+ EXPECT_THAT(
+ joinable_property_manager->GetMetadataList("SchemaTypeTwo"),
+ IsOkAndHolds(Pointee(ElementsAre(EqualsJoinablePropertyMetadata(
+ /*expected_id=*/0, /*expected_property_path=*/"baz", prop_baz)))));
+}
+
+TEST_F(JoinablePropertyManagerBuilderTest, TooManyPropertiesShouldFail) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put("SchemaType", 0));
+
+ JoinablePropertyManager::Builder builder(*schema_type_mapper);
+ // Add kTotalNumJoinableProperties joinable properties
+ for (int i = 0; i < kTotalNumJoinableProperties; i++) {
+ PropertyConfigProto property_config =
+ PropertyConfigBuilder()
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, property_config,
+ /*property_path=*/"property" + std::to_string(i)));
+ }
+
+ // Add another joinable property. This should fail.
+ PropertyConfigProto property_config =
+ PropertyConfigBuilder()
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+ EXPECT_THAT(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, property_config,
+ /*property_path=*/"propertyExceed"),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE,
+ HasSubstr("Too many properties")));
+}
+
+TEST_F(JoinablePropertyManagerBuilderTest, InvalidSchemaTypeIdShouldFail) {
+ // Create a schema type mapper with invalid schema type id.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put("SchemaType", 0));
+
+ PropertyConfigProto property_config =
+ PropertyConfigBuilder()
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+
+ JoinablePropertyManager::Builder builder(*schema_type_mapper);
+ EXPECT_THAT(
+ builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/-1, property_config, /*property_path=*/"property"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(JoinablePropertyManagerBuilderTest,
+ SchemaTypeIdInconsistentWithSchemaTypeMapperSizeShouldFail) {
+ // Create a schema type mapper with schema type id = 2, but size of mapper is
+ // 2.
+ // Since JoinablePropertyManagerBuilder expects 2 schema type ids = [0, 1],
+ // building with schema type id = 2 should fail even though id = 2 is in
+ // schema type mapper.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put("SchemaTypeOne", 0));
+ ICING_ASSERT_OK(schema_type_mapper->Put("SchemaTypeTwo", 2));
+
+ PropertyConfigProto property_config =
+ PropertyConfigBuilder()
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+
+ JoinablePropertyManager::Builder builder(*schema_type_mapper);
+ EXPECT_THAT(
+ builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/2, property_config, /*property_path=*/"property"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(JoinablePropertyManagerBuilderTest,
+ NonStringPropertiesWithQualifiedIdJoinableConfigShouldNotProcess) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put("SchemaTypeOne", 0));
+ ICING_ASSERT_OK(schema_type_mapper->Put("SchemaTypeTwo", 1));
+
+ // Create non-string properties with QUALIFIED_ID joinable value type.
+ std::vector<PropertyConfigProto> properties = {
+ PropertyConfigBuilder()
+ .SetName("int1")
+ .SetDataType(TYPE_INT64)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("int2")
+ .SetDataType(TYPE_INT64)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("double1")
+ .SetDataType(TYPE_DOUBLE)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("double2")
+ .SetDataType(TYPE_DOUBLE)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("boolean1")
+ .SetDataType(TYPE_BOOLEAN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("boolean2")
+ .SetDataType(TYPE_BOOLEAN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("bytes1")
+ .SetDataType(TYPE_BYTES)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("bytes2")
+ .SetDataType(TYPE_BYTES)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("document1")
+ .SetDataTypeDocument(/*schema_type=*/"SchemaTypeTwo",
+ /*index_nested_properties=*/true)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("document2")
+ .SetDataTypeDocument(/*schema_type=*/"SchemaTypeTwo",
+ /*index_nested_properties=*/true)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build()};
+
+ JoinablePropertyManager::Builder builder(*schema_type_mapper);
+ for (const PropertyConfigProto& property_config : properties) {
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, property_config,
+ std::string(property_config.property_name())));
+ }
+
+ std::unique_ptr<JoinablePropertyManager> joinable_property_manager =
+ std::move(builder).Build();
+ EXPECT_THAT(joinable_property_manager->GetMetadataList("SchemaTypeOne"),
+ IsOkAndHolds(Pointee(IsEmpty())));
+}
+
+class JoinablePropertyManagerBuilderWithJoinablePropertyTest
+ : public JoinablePropertyManagerBuilderTest,
+ public ::testing::WithParamInterface<PropertyConfigProto> {};
+
+TEST_P(JoinablePropertyManagerBuilderWithJoinablePropertyTest, Build) {
+ static constexpr std::string_view kSchemaType = "type";
+ static constexpr std::string_view kPropertyPath = "foo.bar";
+ const PropertyConfigProto& property_config = GetParam();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put(kSchemaType, 0));
+
+ JoinablePropertyManager::Builder builder(*schema_type_mapper);
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, property_config, std::string(kPropertyPath)));
+
+ std::unique_ptr<JoinablePropertyManager> joinable_property_manager =
+ std::move(builder).Build();
+ EXPECT_THAT(
+ joinable_property_manager->GetMetadataList(std::string(kSchemaType)),
+ IsOkAndHolds(Pointee(ElementsAre(EqualsJoinablePropertyMetadata(
+ /*expected_id=*/0, kPropertyPath, property_config)))));
+}
+
+// The following type is considered joinable:
+// - String with QUALIFIED_ID joinable value type
+INSTANTIATE_TEST_SUITE_P(
+ JoinablePropertyManagerBuilderWithJoinablePropertyTest,
+ JoinablePropertyManagerBuilderWithJoinablePropertyTest,
+ testing::Values(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ // Indexable string can be configured joinable as well. For
+ // convenience, just test one indexable string config.
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build()));
+
+class JoinablePropertyManagerBuilderWithNonJoinablePropertyTest
+ : public JoinablePropertyManagerBuilderTest,
+ public ::testing::WithParamInterface<PropertyConfigProto> {};
+
+TEST_P(JoinablePropertyManagerBuilderWithNonJoinablePropertyTest, Build) {
+ static constexpr std::string_view kSchemaType = "type";
+ static constexpr std::string_view kPropertyPath = "foo.bar";
+ const PropertyConfigProto& property_config = GetParam();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put(kSchemaType, 0));
+
+ JoinablePropertyManager::Builder builder(*schema_type_mapper);
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, property_config, std::string(kPropertyPath)));
+
+ std::unique_ptr<JoinablePropertyManager> joinable_property_manager =
+ std::move(builder).Build();
+ EXPECT_THAT(
+ joinable_property_manager->GetMetadataList(std::string(kSchemaType)),
+ IsOkAndHolds(Pointee(IsEmpty())));
+}
+
+// All types without JoinableConfig (i.e. joinable value type = NONE by default)
+// are considered non-joinable. Other mismatching types (e.g. non-string
+// properties with QUALIFIED_ID joinable value type) were tested individually
+// above.
+INSTANTIATE_TEST_SUITE_P(
+ JoinablePropertyManagerBuilderWithNonJoinablePropertyTest,
+ JoinablePropertyManagerBuilderWithNonJoinablePropertyTest,
+ testing::Values(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ // Indexable but non-joinable string
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataType(TYPE_INT64)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataType(TYPE_DOUBLE)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataType(TYPE_BOOLEAN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataType(TYPE_BYTES)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeDocument("anotherSchema",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeDocument("anotherSchema",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build()));
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/schema/joinable-property-manager.cc b/icing/schema/joinable-property-manager.cc
new file mode 100644
index 0000000..1606abb
--- /dev/null
+++ b/icing/schema/joinable-property-manager.cc
@@ -0,0 +1,203 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/joinable-property-manager.h"
+
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/schema/property-util.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Helper function to append a new joinable property metadata
+libtextclassifier3::Status AppendNewJoinablePropertyMetadata(
+ JoinablePropertyManager::JoinablePropertyMetadataListWrapper*
+ metadata_list_wrapper,
+ std::string&& concatenated_path,
+ PropertyConfigProto::DataType::Code data_type,
+ JoinableConfig::ValueType::Code value_type) {
+ // Validates next joinable property id, makes sure that joinable property id
+ // is the same as the list index so that we could find any joinable property
+ // metadata by id in O(1) later.
+ JoinablePropertyId new_id = static_cast<JoinablePropertyId>(
+ metadata_list_wrapper->metadata_list.size());
+ if (!IsJoinablePropertyIdValid(new_id)) {
+ // Max number of joinable properties reached
+ return absl_ports::OutOfRangeError(
+ IcingStringUtil::StringPrintf("Too many properties to be joinable, max "
+ "number of properties allowed: %d",
+ kTotalNumJoinableProperties));
+ }
+
+ // Creates joinable property metadata
+ metadata_list_wrapper->metadata_list.push_back(JoinablePropertyMetadata(
+ new_id, data_type, value_type, std::move(concatenated_path)));
+ metadata_list_wrapper->property_path_to_id_map.insert(
+ {metadata_list_wrapper->metadata_list.back().path, new_id});
+ return libtextclassifier3::Status::OK;
+}
+
+template <typename T>
+void AppendJoinablePropertyContent(
+ JoinablePropertyMetadata joinable_property_metadata,
+ libtextclassifier3::StatusOr<std::vector<T>>&& joinable_property_content_or,
+ std::vector<JoinableProperty<T>>& joinable_property_out) {
+ if (!joinable_property_content_or.ok()) {
+ return;
+ }
+
+ std::vector<T> joinable_property_content =
+ std::move(joinable_property_content_or).ValueOrDie();
+ if (!joinable_property_content.empty()) {
+ // Adds to result vector if joinable property is found in document
+ joinable_property_out.emplace_back(std::move(joinable_property_metadata),
+ std::move(joinable_property_content));
+ }
+}
+
+} // namespace
+
+libtextclassifier3::Status
+JoinablePropertyManager::Builder::ProcessSchemaTypePropertyConfig(
+ SchemaTypeId schema_type_id, const PropertyConfigProto& property_config,
+ std::string&& property_path) {
+ if (schema_type_id < 0 ||
+ schema_type_id >=
+ static_cast<int64_t>(joinable_property_metadata_cache_.size())) {
+ return absl_ports::InvalidArgumentError("Invalid schema type id");
+ }
+
+ switch (property_config.data_type()) {
+ case PropertyConfigProto::DataType::STRING: {
+ if (property_config.joinable_config().value_type() ==
+ JoinableConfig::ValueType::QUALIFIED_ID) {
+ ICING_RETURN_IF_ERROR(AppendNewJoinablePropertyMetadata(
+ &joinable_property_metadata_cache_[schema_type_id],
+ std::move(property_path), PropertyConfigProto::DataType::STRING,
+ JoinableConfig::ValueType::QUALIFIED_ID));
+ }
+ break;
+ }
+ default: {
+ // Skip other data types.
+ break;
+ }
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<JoinablePropertyGroup>
+JoinablePropertyManager::ExtractJoinableProperties(
+ const DocumentProto& document) const {
+ ICING_ASSIGN_OR_RETURN(
+ const std::vector<JoinablePropertyMetadata>* metadata_list,
+ GetMetadataList(document.schema()));
+ JoinablePropertyGroup joinable_property_group;
+ for (const JoinablePropertyMetadata& joinable_property_metadata :
+ *metadata_list) {
+ switch (joinable_property_metadata.data_type) {
+ case PropertyConfigProto::DataType::STRING: {
+ if (joinable_property_metadata.value_type ==
+ JoinableConfig::ValueType::QUALIFIED_ID) {
+ AppendJoinablePropertyContent(
+ joinable_property_metadata,
+ property_util::ExtractPropertyValuesFromDocument<
+ std::string_view>(document, joinable_property_metadata.path),
+ joinable_property_group.qualified_id_properties);
+ }
+ break;
+ }
+ default: {
+ // Skip other data types.
+ break;
+ }
+ }
+ }
+ return joinable_property_group;
+}
+
+libtextclassifier3::StatusOr<const JoinablePropertyMetadata*>
+JoinablePropertyManager::GetJoinablePropertyMetadata(
+ SchemaTypeId schema_type_id, const std::string& property_path) const {
+ if (schema_type_id < 0 ||
+ schema_type_id >=
+ static_cast<int64_t>(joinable_property_metadata_cache_.size())) {
+ return absl_ports::InvalidArgumentError("Invalid schema type id");
+ }
+
+ const auto iter = joinable_property_metadata_cache_[schema_type_id]
+ .property_path_to_id_map.find(property_path);
+ if (iter == joinable_property_metadata_cache_[schema_type_id]
+ .property_path_to_id_map.end()) {
+ return nullptr;
+ }
+
+ JoinablePropertyId joinable_property_id = iter->second;
+ return &joinable_property_metadata_cache_[schema_type_id]
+ .metadata_list[joinable_property_id];
+}
+
+libtextclassifier3::StatusOr<const JoinablePropertyMetadata*>
+JoinablePropertyManager::GetJoinablePropertyMetadata(
+ SchemaTypeId schema_type_id,
+ JoinablePropertyId joinable_property_id) const {
+ if (schema_type_id < 0 ||
+ schema_type_id >=
+ static_cast<int64_t>(joinable_property_metadata_cache_.size())) {
+ return absl_ports::InvalidArgumentError("Invalid schema type id");
+ }
+ if (!IsJoinablePropertyIdValid(joinable_property_id)) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Invalid joinable property id %d", joinable_property_id));
+ }
+
+ const std::vector<JoinablePropertyMetadata>& metadata_list =
+ joinable_property_metadata_cache_[schema_type_id].metadata_list;
+ if (joinable_property_id >= metadata_list.size()) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Joinable property with id %d doesn't exist in type config id %d",
+ joinable_property_id, schema_type_id));
+ }
+
+ // The index of metadata list is the same as the joinable property id, so we
+ // can use joinable property id as the index.
+ return &metadata_list[joinable_property_id];
+}
+
+libtextclassifier3::StatusOr<const std::vector<JoinablePropertyMetadata>*>
+JoinablePropertyManager::GetMetadataList(
+ const std::string& type_config_name) const {
+ ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
+ schema_type_mapper_.Get(type_config_name));
+ return &joinable_property_metadata_cache_.at(schema_type_id).metadata_list;
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/schema/joinable-property-manager.h b/icing/schema/joinable-property-manager.h
new file mode 100644
index 0000000..3ee5963
--- /dev/null
+++ b/icing/schema/joinable-property-manager.h
@@ -0,0 +1,160 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_JOINABLE_PROPERTY_MANAGER_H_
+#define ICING_SCHEMA_JOINABLE_PROPERTY_MANAGER_H_
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/proto/document.pb.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/key-mapper.h"
+
+namespace icing {
+namespace lib {
+
+// This class provides joinable-property-related operations. It assigns joinable
+// properties according to JoinableConfig and extracts joinable property values
+// from documents.
+class JoinablePropertyManager {
+ public:
+ // A wrapper class that contains a vector of metadatas and property path to
+ // JoinablePropertyId reverse lookup map.
+ struct JoinablePropertyMetadataListWrapper {
+ std::vector<JoinablePropertyMetadata> metadata_list;
+ std::unordered_map<std::string, JoinablePropertyId> property_path_to_id_map;
+ };
+
+ // Builder class to create a JoinablePropertyManager which does not take
+ // ownership of any input components, and all pointers must refer to valid
+ // objects that outlive the created JoinablePropertyManager instance.
+ class Builder {
+ public:
+ explicit Builder(const KeyMapper<SchemaTypeId>& schema_type_mapper)
+ : schema_type_mapper_(schema_type_mapper),
+ joinable_property_metadata_cache_(schema_type_mapper.num_keys()) {}
+
+ // Checks and appends a new JoinablePropertyMetadata for the schema type id
+ // if the given property config is joinable.
+ //
+ // Returns:
+ // - OK on success
+ // - INVALID_ARGUMENT_ERROR if schema type id is invalid (not in range [0,
+ // schema_type_mapper_.num_keys() - 1])
+ // - OUT_OF_RANGE_ERROR if # of joinable properties in a single Schema
+ // exceeds the threshold (kTotalNumJoinableProperties)
+ libtextclassifier3::Status ProcessSchemaTypePropertyConfig(
+ SchemaTypeId schema_type_id, const PropertyConfigProto& property_config,
+ std::string&& property_path);
+
+ // Builds and returns a JoinablePropertyManager instance.
+ std::unique_ptr<JoinablePropertyManager> Build() && {
+ return std::unique_ptr<JoinablePropertyManager>(
+ new JoinablePropertyManager(
+ schema_type_mapper_,
+ std::move(joinable_property_metadata_cache_)));
+ }
+
+ private:
+ const KeyMapper<SchemaTypeId>& schema_type_mapper_; // Does not own.
+ std::vector<JoinablePropertyMetadataListWrapper>
+ joinable_property_metadata_cache_;
+ };
+
+ JoinablePropertyManager(const JoinablePropertyManager&) = delete;
+ JoinablePropertyManager& operator=(const JoinablePropertyManager&) = delete;
+
+ // Extracts all joinable property contents of different types from the given
+ // document and group them by joinable value type.
+ // - Joinable properties are sorted by joinable property id in ascending
+ // order.
+ // - Joinable property ids start from 0.
+ // - Joinable properties with empty content won't be returned.
+ //
+ // Returns:
+ // - A JoinablePropertyGroup instance on success
+ // - NOT_FOUND_ERROR if the type config name of document is not present in
+ // schema_type_mapper_
+ libtextclassifier3::StatusOr<JoinablePropertyGroup> ExtractJoinableProperties(
+ const DocumentProto& document) const;
+
+ // Returns the JoinablePropertyMetadata associated with property_path that's
+ // in the SchemaTypeId.
+ //
+ // Returns:
+ // - Valid pointer to JoinablePropertyMetadata on success
+ // - nullptr if property_path doesn't exist (or is not joinable) in the
+ // joinable metadata list of the schema
+ // - INVALID_ARGUMENT_ERROR if schema type id is invalid
+ libtextclassifier3::StatusOr<const JoinablePropertyMetadata*>
+ GetJoinablePropertyMetadata(SchemaTypeId schema_type_id,
+ const std::string& property_path) const;
+
+ // Returns the JoinablePropertyMetadata associated with the JoinablePropertyId
+ // that's in the SchemaTypeId.
+ //
+ // Returns:
+ // - Valid pointer to JoinablePropertyMetadata on success
+ // - INVALID_ARGUMENT_ERROR if schema type id or JoinablePropertyId is
+ // invalid
+ libtextclassifier3::StatusOr<const JoinablePropertyMetadata*>
+ GetJoinablePropertyMetadata(SchemaTypeId schema_type_id,
+ JoinablePropertyId joinable_property_id) const;
+
+ // Returns:
+ // - On success, the joinable property metadatas for the specified type
+ // - NOT_FOUND_ERROR if the type config name is not present in
+ // schema_type_mapper_
+ libtextclassifier3::StatusOr<const std::vector<JoinablePropertyMetadata>*>
+ GetMetadataList(const std::string& type_config_name) const;
+
+ private:
+ explicit JoinablePropertyManager(
+ const KeyMapper<SchemaTypeId>& schema_type_mapper,
+ std::vector<JoinablePropertyMetadataListWrapper>&&
+ joinable_property_metadata_cache)
+ : schema_type_mapper_(schema_type_mapper),
+ joinable_property_metadata_cache_(joinable_property_metadata_cache) {}
+
+ // Maps schema types to a densely-assigned unique id.
+ const KeyMapper<SchemaTypeId>& schema_type_mapper_; // Does not own
+
+ // The index of joinable_property_metadata_cache_ corresponds to a schema
+ // type's SchemaTypeId. At that SchemaTypeId index, we store a
+ // JoinablePropertyMetadataListWrapper instance. The metadata list's index
+ // corresponds to a joinable property's JoinablePropertyId. At the
+ // JoinablePropertyId index, we store the JoinablePropertyMetadata of that
+ // joinable property.
+ //
+ // For example, suppose "email" has a SchemaTypeId of 0 and it has a joinable
+ // property called "senderQualifiedId" with a JoinablePropertyId of 1. Then
+ // the "senderQualifiedId" property's JoinablePropertyMetadata will be at
+ // joinable_property_metadata_cache_[0].metadata_list[1], and
+ // joinable_property_metadata_cache_[0]
+ // .property_path_to_id_map["senderQualifiedId"]
+ // will be 1.
+ const std::vector<JoinablePropertyMetadataListWrapper>
+ joinable_property_metadata_cache_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_SCHEMA_JOINABLE_PROPERTY_MANAGER_H_
diff --git a/icing/schema/joinable-property-manager_test.cc b/icing/schema/joinable-property-manager_test.cc
new file mode 100644
index 0000000..ceaaa18
--- /dev/null
+++ b/icing/schema/joinable-property-manager_test.cc
@@ -0,0 +1,519 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/joinable-property-manager.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/schema/schema-type-manager.h"
+#include "icing/schema/schema-util.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
+#include "icing/store/key-mapper.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::IsNull;
+using ::testing::Pointee;
+using ::testing::SizeIs;
+
+// type and property names of Email
+static constexpr char kTypeEmail[] = "Email";
+// joinable
+static constexpr char kPropertyReceiverQualifiedId[] = "receiverQualifiedId";
+static constexpr char kPropertySenderQualifiedId[] = "senderQualifiedId";
+// non-joinable
+static constexpr char kPropertyAttachment[] = "attachment";
+static constexpr char kPropertySubject[] = "subject";
+static constexpr char kPropertyText[] = "text";
+static constexpr char kPropertyTimestamp[] = "timestamp";
+
+// type and property names of Conversation
+static constexpr char kTypeConversation[] = "Conversation";
+// joinable
+static constexpr char kPropertyEmails[] = "emails";
+static constexpr char kPropertyGroupQualifiedId[] = "groupQualifiedId";
+// non-joinable
+static constexpr char kPropertyName[] = "name";
+static constexpr char kPropertyNumber[] = "number";
+
+constexpr int64_t kDefaultTimestamp = 1663274901;
+
+PropertyConfigProto CreateSenderQualifiedIdPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertySenderQualifiedId)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+}
+
+PropertyConfigProto CreateReceiverQualifiedIdPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyReceiverQualifiedId)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+}
+
+PropertyConfigProto CreateGroupQualifiedIdPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyGroupQualifiedId)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+}
+
+SchemaTypeConfigProto CreateEmailTypeConfig() {
+ return SchemaTypeConfigBuilder()
+ .SetType(kTypeEmail)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertySubject)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyText)
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyAttachment)
+ .SetDataType(TYPE_BYTES)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyTimestamp)
+ .SetDataType(TYPE_INT64)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(CreateSenderQualifiedIdPropertyConfig())
+ .AddProperty(CreateReceiverQualifiedIdPropertyConfig())
+ .Build();
+}
+
+SchemaTypeConfigProto CreateConversationTypeConfig() {
+ return SchemaTypeConfigBuilder()
+ .SetType(kTypeConversation)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyName)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyNumber)
+ .SetDataType(TYPE_INT64)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(CreateGroupQualifiedIdPropertyConfig())
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName(kPropertyEmails)
+ .SetDataTypeDocument(kTypeEmail, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+}
+
+class JoinablePropertyManagerTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ test_dir_ = GetTestTempDir() + "/icing";
+
+ type_config_map_.emplace(kTypeEmail, CreateEmailTypeConfig());
+ type_config_map_.emplace(kTypeConversation, CreateConversationTypeConfig());
+
+ email_document_ =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema(kTypeEmail)
+ .AddStringProperty(kPropertySubject, "the subject")
+ .AddStringProperty(kPropertyText, "the text")
+ .AddStringProperty(kPropertySenderQualifiedId, "pkg$db/ns#Person1")
+ .AddStringProperty(kPropertyReceiverQualifiedId,
+ "pkg$db/ns#Person2")
+ .AddBytesProperty(kPropertyAttachment, "attachment")
+ .AddInt64Property(kPropertyTimestamp, kDefaultTimestamp)
+ .Build();
+
+ conversation_document_ =
+ DocumentBuilder()
+ .SetKey("icing", "conversation/1")
+ .SetSchema(kTypeConversation)
+ .AddStringProperty(kPropertyName, "the conversation")
+ .AddInt64Property(kPropertyNumber, 2)
+ .AddDocumentProperty(kPropertyEmails,
+ DocumentProto(email_document_))
+ .AddStringProperty(kPropertyGroupQualifiedId,
+ "pkg$db/ns#GroupQualifiedId1")
+ .Build();
+
+ // DynamicTrieKeyMapper uses 3 internal arrays for bookkeeping. Give each
+ // one 128KiB so the total DynamicTrieKeyMapper should get 384KiB
+ int key_mapper_size = 3 * 128 * 1024;
+ ICING_ASSERT_OK_AND_ASSIGN(schema_type_mapper_,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_, key_mapper_size));
+ ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeEmail, 0));
+ ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeConversation, 1));
+ }
+
+ void TearDown() override {
+ schema_type_mapper_.reset();
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ }
+
+ Filesystem filesystem_;
+ std::string test_dir_;
+ SchemaUtil::TypeConfigMap type_config_map_;
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper_;
+
+ DocumentProto email_document_;
+ DocumentProto conversation_document_;
+};
+
+TEST_F(JoinablePropertyManagerTest, ExtractJoinableProperties) {
+ // Use SchemaTypeManager factory method to instantiate
+ // JoinablePropertyManager.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+ // Extracts all joinable properties from 'Email' document
+ ICING_ASSERT_OK_AND_ASSIGN(JoinablePropertyGroup joinable_property_group,
+ schema_type_manager->joinable_property_manager()
+ .ExtractJoinableProperties(email_document_));
+
+ // Qualified Id joinable properties
+ EXPECT_THAT(joinable_property_group.qualified_id_properties, SizeIs(2));
+
+ EXPECT_THAT(
+ joinable_property_group.qualified_id_properties[0].metadata,
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/0, /*expected_property_path=*/"receiverQualifiedId",
+ CreateReceiverQualifiedIdPropertyConfig()));
+ EXPECT_THAT(joinable_property_group.qualified_id_properties[0].values,
+ ElementsAre("pkg$db/ns#Person2"));
+
+ EXPECT_THAT(
+ joinable_property_group.qualified_id_properties[1].metadata,
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/1, /*expected_property_path=*/"senderQualifiedId",
+ CreateSenderQualifiedIdPropertyConfig()));
+ EXPECT_THAT(joinable_property_group.qualified_id_properties[1].values,
+ ElementsAre("pkg$db/ns#Person1"));
+}
+
+TEST_F(JoinablePropertyManagerTest, ExtractJoinablePropertiesNested) {
+ // Use SchemaTypeManager factory method to instantiate
+ // JoinablePropertyManager.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+ // Extracts all joinable properties from 'Conversation' document
+ ICING_ASSERT_OK_AND_ASSIGN(
+ JoinablePropertyGroup joinable_property_group,
+ schema_type_manager->joinable_property_manager()
+ .ExtractJoinableProperties(conversation_document_));
+
+ // Qualified Id joinable properties
+ EXPECT_THAT(joinable_property_group.qualified_id_properties, SizeIs(3));
+
+ EXPECT_THAT(joinable_property_group.qualified_id_properties[0].metadata,
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/0,
+ /*expected_property_path=*/"emails.receiverQualifiedId",
+ CreateReceiverQualifiedIdPropertyConfig()));
+ EXPECT_THAT(joinable_property_group.qualified_id_properties[0].values,
+ ElementsAre("pkg$db/ns#Person2"));
+
+ EXPECT_THAT(joinable_property_group.qualified_id_properties[1].metadata,
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/1,
+ /*expected_property_path=*/"emails.senderQualifiedId",
+ CreateSenderQualifiedIdPropertyConfig()));
+ EXPECT_THAT(joinable_property_group.qualified_id_properties[1].values,
+ ElementsAre("pkg$db/ns#Person1"));
+
+ EXPECT_THAT(
+ joinable_property_group.qualified_id_properties[2].metadata,
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/2, /*expected_property_path=*/"groupQualifiedId",
+ CreateGroupQualifiedIdPropertyConfig()));
+ EXPECT_THAT(joinable_property_group.qualified_id_properties[2].values,
+ ElementsAre("pkg$db/ns#GroupQualifiedId1"));
+}
+
+TEST_F(JoinablePropertyManagerTest,
+ ExtractJoinablePropertiesShouldIgnoreEmptyContents) {
+ // Use SchemaTypeManager factory method to instantiate
+ // JoinablePropertyManager.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+ // Create an email document without receiverQualifiedId.
+ DocumentProto another_email_document =
+ DocumentBuilder()
+ .SetKey("icing", "email/2")
+ .SetSchema(kTypeEmail)
+ .AddStringProperty(kPropertySubject, "the subject")
+ .AddStringProperty(kPropertyText, "the text")
+ .AddBytesProperty(kPropertyAttachment, "attachment")
+ .AddStringProperty(kPropertySenderQualifiedId, "pkg$db/ns#Person1")
+ .AddInt64Property(kPropertyTimestamp, kDefaultTimestamp)
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ JoinablePropertyGroup joinable_property_group,
+ schema_type_manager->joinable_property_manager()
+ .ExtractJoinableProperties(another_email_document));
+
+ // ExtractJoinableProperties should ignore receiverQualifiedId and not append
+ // a JoinableProperty instance of it into the vector.
+ EXPECT_THAT(joinable_property_group.qualified_id_properties, SizeIs(1));
+ EXPECT_THAT(
+ joinable_property_group.qualified_id_properties[0].metadata,
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/1, /*expected_property_path=*/"senderQualifiedId",
+ CreateSenderQualifiedIdPropertyConfig()));
+ EXPECT_THAT(joinable_property_group.qualified_id_properties[0].values,
+ ElementsAre("pkg$db/ns#Person1"));
+}
+
+TEST_F(JoinablePropertyManagerTest, GetJoinablePropertyMetadata) {
+ // Use SchemaTypeManager factory method to instantiate
+ // JoinablePropertyManager.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+ // Email (joinable property id -> joinable property path):
+ // 0 -> receiverQualifiedId
+ // 1 -> senderQualifiedId
+ EXPECT_THAT(
+ schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/0,
+ /*joinable_property_id=*/0),
+ IsOkAndHolds(Pointee(EqualsJoinablePropertyMetadata(
+ /*expected_id=*/0, /*expected_property_path=*/"receiverQualifiedId",
+ CreateReceiverQualifiedIdPropertyConfig()))));
+ EXPECT_THAT(
+ schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/0,
+ /*joinable_property_id=*/1),
+ IsOkAndHolds(Pointee(EqualsJoinablePropertyMetadata(
+ /*expected_id=*/1, /*expected_property_path=*/"senderQualifiedId",
+ CreateSenderQualifiedIdPropertyConfig()))));
+
+ // Conversation (joinable property id -> joinable property path):
+ // 0 -> emails.receiverQualifiedId
+ // 1 -> emails.senderQualifiedId
+ // 2 -> groupQualifiedId
+ EXPECT_THAT(schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/1,
+ /*joinable_property_id=*/0),
+ IsOkAndHolds(Pointee(EqualsJoinablePropertyMetadata(
+ /*expected_id=*/0,
+ /*expected_property_path=*/"emails.receiverQualifiedId",
+ CreateReceiverQualifiedIdPropertyConfig()))));
+ EXPECT_THAT(schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/1,
+ /*joinable_property_id=*/1),
+ IsOkAndHolds(Pointee(EqualsJoinablePropertyMetadata(
+ /*expected_id=*/1,
+ /*expected_property_path=*/"emails.senderQualifiedId",
+ CreateSenderQualifiedIdPropertyConfig()))));
+ EXPECT_THAT(
+ schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/1,
+ /*joinable_property_id=*/2),
+ IsOkAndHolds(Pointee(EqualsJoinablePropertyMetadata(
+ /*expected_id=*/2, /*expected_property_path=*/"groupQualifiedId",
+ CreateGroupQualifiedIdPropertyConfig()))));
+}
+
+TEST_F(JoinablePropertyManagerTest,
+ GetJoinablePropertyMetadataInvalidSchemaTypeId) {
+ // Use SchemaTypeManager factory method to instantiate
+ // JoinablePropertyManager.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+ ASSERT_THAT(type_config_map_, SizeIs(2));
+
+ EXPECT_THAT(schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/-1,
+ /*joinable_property_id=*/0),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/2,
+ /*joinable_property_id=*/0),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(JoinablePropertyManagerTest,
+ GetJoinablePropertyMetadataInvalidJoinablePropertyId) {
+ // Use SchemaTypeManager factory method to instantiate
+ // JoinablePropertyManager.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+ // Email (joinable property id -> joinable property path):
+ // 0 -> receiverQualifiedId
+ // 1 -> senderQualifiedId
+ EXPECT_THAT(schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/0,
+ /*joinable_property_id=*/-1),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/0,
+ /*joinable_property_id=*/2),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Conversation (joinable property id -> joinable property path):
+ // 0 -> emails.receiverQualifiedId
+ // 1 -> emails.senderQualifiedId
+ // 2 -> groupQualifiedId
+ EXPECT_THAT(schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/1,
+ /*joinable_property_id=*/-1),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/1,
+ /*joinable_property_id=*/3),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(JoinablePropertyManagerTest, GetJoinablePropertyMetadataByPath) {
+ // Use SchemaTypeManager factory method to instantiate
+ // JoinablePropertyManager.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+ // Email (joinable property id -> joinable property path):
+ // 0 -> receiverQualifiedId
+ // 1 -> senderQualifiedId
+ EXPECT_THAT(
+ schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/0,
+ "receiverQualifiedId"),
+ IsOkAndHolds(Pointee(EqualsJoinablePropertyMetadata(
+ /*expected_id=*/0, /*expected_property_path=*/"receiverQualifiedId",
+ CreateReceiverQualifiedIdPropertyConfig()))));
+ EXPECT_THAT(
+ schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/0,
+ "senderQualifiedId"),
+ IsOkAndHolds(Pointee(EqualsJoinablePropertyMetadata(
+ /*expected_id=*/1, /*expected_property_path=*/"senderQualifiedId",
+ CreateSenderQualifiedIdPropertyConfig()))));
+
+ // Conversation (joinable property id -> joinable property path):
+ // 0 -> emails.receiverQualifiedId
+ // 1 -> emails.senderQualifiedId
+ // 2 -> groupQualifiedId
+ EXPECT_THAT(schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/1,
+ "emails.receiverQualifiedId"),
+ IsOkAndHolds(Pointee(EqualsJoinablePropertyMetadata(
+ /*expected_id=*/0,
+ /*expected_property_path=*/"emails.receiverQualifiedId",
+ CreateReceiverQualifiedIdPropertyConfig()))));
+ EXPECT_THAT(schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/1,
+ "emails.senderQualifiedId"),
+ IsOkAndHolds(Pointee(EqualsJoinablePropertyMetadata(
+ /*expected_id=*/1,
+ /*expected_property_path=*/"emails.senderQualifiedId",
+ CreateSenderQualifiedIdPropertyConfig()))));
+ EXPECT_THAT(
+ schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/1,
+ "groupQualifiedId"),
+ IsOkAndHolds(Pointee(EqualsJoinablePropertyMetadata(
+ /*expected_id=*/2, /*expected_property_path=*/"groupQualifiedId",
+ CreateGroupQualifiedIdPropertyConfig()))));
+}
+
+TEST_F(JoinablePropertyManagerTest,
+ GetJoinablePropertyMetadataByPathInvalidSchemaTypeId) {
+ // Use SchemaTypeManager factory method to instantiate
+ // JoinablePropertyManager.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+ ASSERT_THAT(type_config_map_, SizeIs(2));
+
+ EXPECT_THAT(schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/-1,
+ "receiverQualifiedId"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/2,
+ "receiverQualifiedId"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(JoinablePropertyManagerTest, GetJoinablePropertyMetadataByPathNotExist) {
+ // Use SchemaTypeManager factory method to instantiate
+ // JoinablePropertyManager.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+ EXPECT_THAT(
+ schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/0, "nonExistingPath"),
+ IsOkAndHolds(IsNull()));
+ EXPECT_THAT(schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/1,
+ "emails.nonExistingPath"),
+ IsOkAndHolds(IsNull()));
+}
+
+// Note: valid GetMetadataList has been tested in
+// JoinablePropertyManagerBuildTest.
+TEST_F(JoinablePropertyManagerTest, GetMetadataListInvalidSchemaTypeName) {
+ // Use SchemaTypeManager factory method to instantiate
+ // JoinablePropertyManager.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+ EXPECT_THAT(schema_type_manager->joinable_property_manager().GetMetadataList(
+ "NonExistingSchemaTypeName"),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/schema/joinable-property.h b/icing/schema/joinable-property.h
new file mode 100644
index 0000000..057bb74
--- /dev/null
+++ b/icing/schema/joinable-property.h
@@ -0,0 +1,132 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_JOINABLE_PROPERTY_H_
+#define ICING_SCHEMA_JOINABLE_PROPERTY_H_
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/proto/schema.pb.h"
+
+namespace icing {
+namespace lib {
+
+using JoinablePropertyId = int8_t;
+
+// 6 bits for 64 values.
+inline constexpr int kJoinablePropertyIdBits = 6;
+inline constexpr JoinablePropertyId kTotalNumJoinableProperties =
+ (INT8_C(1) << kJoinablePropertyIdBits);
+inline constexpr JoinablePropertyId kInvalidJoinablePropertyId =
+ kTotalNumJoinableProperties;
+inline constexpr JoinablePropertyId kMaxJoinablePropertyId =
+ kTotalNumJoinableProperties - 1;
+inline constexpr JoinablePropertyId kMinJoinablePropertyId = 0;
+
+constexpr bool IsJoinablePropertyIdValid(
+ JoinablePropertyId joinable_property_id) {
+ return joinable_property_id >= kMinJoinablePropertyId &&
+ joinable_property_id <= kMaxJoinablePropertyId;
+}
+
+static_assert(
+ kJoinablePropertyIdBits < 8 * sizeof(JoinablePropertyId),
+ "Cannot exhaust all bits of JoinablePropertyId since it is a signed "
+ "integer and the most significant bit should be preserved.");
+
+struct JoinablePropertyMetadata {
+ // Dot-joined property names, representing the location of joinable property
+ // inside an document. E.g. "property1.property2".
+ std::string path;
+
+ // A unique id of joinable property.
+ JoinablePropertyId id;
+
+ // Data type of this joinable property values. Currently we only support
+ // STRING.
+ PropertyConfigProto::DataType::Code data_type;
+
+ // How values will be used as a joining matcher.
+ //
+ // JoinableConfig::ValueType::QUALIFIED_ID:
+ // Value in this property is a joinable (string) qualified id. Qualified id
+ // is composed of namespace and uri, and it will be used as the identifier
+ // of the parent document. Note: it is invalid to use this value type with
+ // non-string DataType.
+ JoinableConfig::ValueType::Code value_type;
+
+ explicit JoinablePropertyMetadata(
+ JoinablePropertyId id_in,
+ PropertyConfigProto::DataType::Code data_type_in,
+ JoinableConfig::ValueType::Code value_type_in, std::string&& path_in)
+ : path(std::move(path_in)),
+ id(id_in),
+ data_type(data_type_in),
+ value_type(value_type_in) {}
+
+ JoinablePropertyMetadata(const JoinablePropertyMetadata& other) = default;
+ JoinablePropertyMetadata& operator=(const JoinablePropertyMetadata& other) =
+ default;
+
+ JoinablePropertyMetadata(JoinablePropertyMetadata&& other) = default;
+ JoinablePropertyMetadata& operator=(JoinablePropertyMetadata&& other) =
+ default;
+
+ bool operator==(const JoinablePropertyMetadata& rhs) const {
+ return path == rhs.path && id == rhs.id && data_type == rhs.data_type &&
+ value_type == rhs.value_type;
+ }
+};
+
+// JoinableProperty is an icing internal concept similar to document property
+// values (contents), but with extra metadata. the data type of value is
+// specified by template.
+//
+// Current supported data types:
+// - std::string_view (PropertyConfigProto::DataType::STRING)
+template <typename T>
+struct JoinableProperty {
+ JoinablePropertyMetadata metadata;
+ std::vector<T> values;
+
+ explicit JoinableProperty(JoinablePropertyMetadata&& metadata_in,
+ std::vector<T>&& values_in)
+ : metadata(std::move(metadata_in)), values(std::move(values_in)) {}
+
+ PropertyConfigProto::DataType::Code data_type() const {
+ return metadata.data_type;
+ }
+
+ JoinableConfig::ValueType::Code value_type() const {
+ return metadata.value_type;
+ }
+};
+
+// Groups of different type joinable properties. Callers can access joinable
+// properties with types they want and avoid going through non-desired ones.
+//
+// REQUIRES: lifecycle of the property must be longer than this object, since we
+// use std::string_view for extracting its string_values.
+struct JoinablePropertyGroup {
+ std::vector<JoinableProperty<std::string_view>> qualified_id_properties;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_SCHEMA_JOINABLE_PROPERTY_H_
diff --git a/icing/schema/property-util.cc b/icing/schema/property-util.cc
new file mode 100644
index 0000000..67ff748
--- /dev/null
+++ b/icing/schema/property-util.cc
@@ -0,0 +1,137 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/property-util.h"
+
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/absl_ports/str_join.h"
+#include "icing/proto/document.pb.h"
+
+namespace icing {
+namespace lib {
+
+namespace property_util {
+
+std::string ConvertToPropertyExprIndexStr(int index) {
+ if (index == kWildcardPropertyIndex) {
+ return "";
+ }
+ return absl_ports::StrCat(kLBracket, std::to_string(index), kRBracket);
+}
+
+std::string ConcatenatePropertyPathExpr(std::string_view property_path_expr1,
+ std::string_view property_path_expr2) {
+ if (property_path_expr1.empty()) {
+ return std::string(property_path_expr2);
+ }
+ if (property_path_expr2.empty()) {
+ return std::string(property_path_expr1);
+ }
+ return absl_ports::StrCat(property_path_expr1, kPropertyPathSeparator,
+ property_path_expr2);
+}
+
+std::vector<std::string_view> SplitPropertyPathExpr(
+ std::string_view property_path_expr) {
+ return absl_ports::StrSplit(property_path_expr, kPropertyPathSeparator);
+}
+
+PropertyInfo ParsePropertyNameExpr(std::string_view property_name_expr) {
+ size_t l_bracket = property_name_expr.find(kLBracket);
+ if (l_bracket == std::string_view::npos ||
+ l_bracket >= property_name_expr.length()) {
+ return PropertyInfo(std::string(property_name_expr),
+ kWildcardPropertyIndex);
+ }
+ size_t r_bracket = property_name_expr.find(kRBracket, l_bracket);
+ if (r_bracket == std::string_view::npos || r_bracket - l_bracket < 2) {
+ return PropertyInfo(std::string(property_name_expr),
+ kWildcardPropertyIndex);
+ }
+ std::string index_string = std::string(
+ property_name_expr.substr(l_bracket + 1, r_bracket - l_bracket - 1));
+ return PropertyInfo(std::string(property_name_expr.substr(0, l_bracket)),
+ std::stoi(index_string));
+}
+
+std::vector<PropertyInfo> ParsePropertyPathExpr(
+ std::string_view property_path_expr) {
+ std::vector<std::string_view> property_name_exprs =
+ SplitPropertyPathExpr(property_path_expr);
+
+ std::vector<PropertyInfo> property_infos;
+ property_infos.reserve(property_name_exprs.size());
+ for (std::string_view property_name_expr : property_name_exprs) {
+ property_infos.push_back(ParsePropertyNameExpr(property_name_expr));
+ }
+ return property_infos;
+}
+
+bool IsParentPropertyPath(std::string_view property_path_expr1,
+ std::string_view property_path_expr2) {
+ if (property_path_expr2.length() < property_path_expr1.length()) {
+ return false;
+ }
+ if (property_path_expr1 !=
+ property_path_expr2.substr(0, property_path_expr1.length())) {
+ return false;
+ }
+ if (property_path_expr2.length() > property_path_expr1.length() &&
+ property_path_expr2[property_path_expr1.length()] !=
+ kPropertyPathSeparator[0]) {
+ return false;
+ }
+ return true;
+}
+
+const PropertyProto* GetPropertyProto(const DocumentProto& document,
+ std::string_view property_name) {
+ for (const PropertyProto& property : document.properties()) {
+ if (property.name() == property_name) {
+ return &property;
+ }
+ }
+ return nullptr;
+}
+
+template <>
+libtextclassifier3::StatusOr<std::vector<std::string>>
+ExtractPropertyValues<std::string>(const PropertyProto& property) {
+ return std::vector<std::string>(property.string_values().begin(),
+ property.string_values().end());
+}
+
+template <>
+libtextclassifier3::StatusOr<std::vector<std::string_view>>
+ExtractPropertyValues<std::string_view>(const PropertyProto& property) {
+ return std::vector<std::string_view>(property.string_values().begin(),
+ property.string_values().end());
+}
+
+template <>
+libtextclassifier3::StatusOr<std::vector<int64_t>>
+ExtractPropertyValues<int64_t>(const PropertyProto& property) {
+ return std::vector<int64_t>(property.int64_values().begin(),
+ property.int64_values().end());
+}
+
+} // namespace property_util
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/schema/property-util.h b/icing/schema/property-util.h
new file mode 100644
index 0000000..7557879
--- /dev/null
+++ b/icing/schema/property-util.h
@@ -0,0 +1,212 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_PROPERTY_UTIL_H_
+#define ICING_SCHEMA_PROPERTY_UTIL_H_
+
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/proto/document.pb.h"
+
+namespace icing {
+namespace lib {
+
+namespace property_util {
+
+// Definition:
+// - Expr (short for expression): with or without index.
+// - property_name: one level of property name without index. E.g. "abc", "def".
+// - property_name_expr: one level of property name with or without index. E.g.
+// "abc", "abc[0]", "def[1]".
+// - property_path: multiple levels (including one) of property names without
+// indices. E.g. "abc", "abc.def".
+// - property_path_expr: multiple levels (including one) of property name
+// expressions. E.g. "abc", "abc[0]", "abc.def",
+// "abc[0].def", "abc[0].def[1]".
+//
+// Set relationship graph (A -> B: A is a subset of B):
+//
+// property_path -> property_path_expr
+// ^ ^
+// | |
+// property_name -> property_name_expr
+inline constexpr std::string_view kPropertyPathSeparator = ".";
+inline constexpr std::string_view kLBracket = "[";
+inline constexpr std::string_view kRBracket = "]";
+
+inline constexpr int kWildcardPropertyIndex = -1;
+
+struct PropertyInfo {
+ std::string name;
+ int index;
+
+ explicit PropertyInfo(std::string name_in, int index_in)
+ : name(std::move(name_in)), index(index_in) {}
+};
+
+// Converts a property (value) index to string, wrapped by kLBracket and
+// kRBracket.
+//
+// REQUIRES: index should be valid or kWildcardPropertyIndex.
+//
+// Returns:
+// - "" if index is kWildcardPropertyIndex.
+// - kLBracket + std::to_string(index) + kRBracket for all non
+// kWildcardPropertyIndex indices.
+std::string ConvertToPropertyExprIndexStr(int index);
+
+// Concatenates 2 property path expressions.
+//
+// Returns:
+// - property_path_expr1 + "." + property_path_expr2 if both are not empty.
+// - property_path_expr1 if property_path_expr2 is empty.
+// - property_path_expr2 if property_path_expr1 is empty.
+// - "" if both are empty.
+std::string ConcatenatePropertyPathExpr(std::string_view property_path_expr1,
+ std::string_view property_path_expr2);
+
+// Splits a property path expression into multiple property name expressions.
+//
+// Returns: a vector of property name expressions.
+std::vector<std::string_view> SplitPropertyPathExpr(
+ std::string_view property_path_expr);
+
+// Parses a property name expression into (property name, property index). If
+// the index expression is missing, then the returned property index will be
+// kWildcardPropertyIndex.
+//
+// Examples:
+// - ParsePropertyNameExpr("foo") will return ("foo",
+// kWildcardPropertyIndex).
+// - ParsePropertyNameExpr("foo[5]") will return ("foo", 5).
+//
+// Returns: a PropertyInfo instance.
+PropertyInfo ParsePropertyNameExpr(std::string_view property_name_expr);
+
+// Parses a property path expression into multiple (property name, property
+// index). It is similar to ParsePropertyPathExpr, except property path
+// expression can contain multiple name expressions.
+//
+// Examples:
+// - ParsePropertyPathExpr("foo") will return [("foo",
+// kWildcardPropertyIndex)].
+// - ParsePropertyPathExpr("foo[5]") will return [("foo", 5)].
+// - ParsePropertyPathExpr("foo.bar[2]") will return [("foo",
+// kWildcardPropertyIndex), ("bar", 2)]
+//
+// Returns: a vector of PropertyInfo instances.
+std::vector<PropertyInfo> ParsePropertyPathExpr(
+ std::string_view property_path_expr);
+
+// A property path property_path_expr1 is considered a parent of another
+// property path property_path_expr2 if:
+// 1. property_path_expr2 == property_path_expr1, OR
+// 2. property_path_expr2 consists of the entire path of property_path_expr1
+// + "." + [some other property path].
+//
+// Note that this can only be used for property name strings that do not
+// contain the property index.
+//
+// Examples:
+// - IsParentPropertyPath("foo", "foo") will return true.
+// - IsParentPropertyPath("foo", "foo.bar") will return true.
+// - IsParentPropertyPath("foo", "bar.foo") will return false.
+// - IsParentPropertyPath("foo.bar", "foo.foo.bar") will return false.
+//
+// Returns: true if property_path_expr1 is a parent property path of
+// property_path_expr2.
+bool IsParentPropertyPath(std::string_view property_path_expr1,
+ std::string_view property_path_expr2);
+
+// Gets the desired PropertyProto from the document by given property name.
+// Since the input parameter is property name, this function only deals with
+// the first level of properties in the document and cannot deal with nested
+// documents.
+//
+// Returns:
+// - const PropertyInfo* if property name exists in the document.
+// - nullptr if property name not found.
+const PropertyProto* GetPropertyProto(const DocumentProto& document,
+ std::string_view property_name);
+
+template <typename T>
+libtextclassifier3::StatusOr<std::vector<T>> ExtractPropertyValues(
+ const PropertyProto& property) {
+ return absl_ports::UnimplementedError(
+ "Unimplemented template type for ExtractPropertyValues");
+}
+
+template <>
+libtextclassifier3::StatusOr<std::vector<std::string>>
+ExtractPropertyValues<std::string>(const PropertyProto& property);
+
+template <>
+libtextclassifier3::StatusOr<std::vector<std::string_view>>
+ExtractPropertyValues<std::string_view>(const PropertyProto& property);
+
+template <>
+libtextclassifier3::StatusOr<std::vector<int64_t>>
+ExtractPropertyValues<int64_t>(const PropertyProto& property);
+
+template <typename T>
+libtextclassifier3::StatusOr<std::vector<T>> ExtractPropertyValuesFromDocument(
+ const DocumentProto& document, std::string_view property_path) {
+ // Finds the first property name in property_path
+ size_t separator_position = property_path.find(kPropertyPathSeparator);
+ std::string_view current_property_name =
+ (separator_position == std::string::npos)
+ ? property_path
+ : property_path.substr(0, separator_position);
+
+ const PropertyProto* property_proto =
+ GetPropertyProto(document, current_property_name);
+ if (property_proto == nullptr) {
+ // Property name not found, it could be one of the following 2 cases:
+ // 1. The property is optional and it's not in the document
+ // 2. The property name is invalid
+ return std::vector<T>();
+ }
+
+ if (separator_position == std::string::npos) {
+ // Current property name is the last one in property path.
+ return ExtractPropertyValues<T>(*property_proto);
+ }
+
+ // Extracts property values recursively
+ std::string_view sub_property_path =
+ property_path.substr(separator_position + 1);
+ std::vector<T> nested_document_content;
+ for (const DocumentProto& nested_document :
+ property_proto->document_values()) {
+ auto content_or = ExtractPropertyValuesFromDocument<T>(nested_document,
+ sub_property_path);
+ if (content_or.ok()) {
+ std::vector<T> content = std::move(content_or).ValueOrDie();
+ std::move(content.begin(), content.end(),
+ std::back_inserter(nested_document_content));
+ }
+ }
+ return nested_document_content;
+}
+
+} // namespace property_util
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_SCHEMA_PROPERTY_UTIL_H_
diff --git a/icing/schema/property-util_test.cc b/icing/schema/property-util_test.cc
new file mode 100644
index 0000000..eddcc84
--- /dev/null
+++ b/icing/schema/property-util_test.cc
@@ -0,0 +1,253 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/property-util.h"
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/proto/document.pb.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::IsEmpty;
+
+static constexpr std::string_view kTypeTest = "Test";
+static constexpr std::string_view kPropertySingleString = "singleString";
+static constexpr std::string_view kPropertyRepeatedString = "repeatedString";
+static constexpr std::string_view kPropertySingleInteger = "singleInteger";
+static constexpr std::string_view kPropertyRepeatedInteger = "repeatedInteger";
+
+static constexpr std::string_view kTypeNestedTest = "NestedTest";
+static constexpr std::string_view kPropertyStr = "str";
+static constexpr std::string_view kPropertyNestedDocument = "nestedDocument";
+
+TEST(PropertyUtilTest, IsParentPropertyPath) {
+ EXPECT_TRUE(property_util::IsParentPropertyPath("foo", "foo"));
+ EXPECT_TRUE(property_util::IsParentPropertyPath("foo", "foo.bar"));
+ EXPECT_TRUE(property_util::IsParentPropertyPath("foo", "foo.bar.foo"));
+ EXPECT_TRUE(property_util::IsParentPropertyPath("foo", "foo.foo.bar"));
+ EXPECT_TRUE(property_util::IsParentPropertyPath("foo.bar", "foo.bar.foo"));
+
+ EXPECT_FALSE(property_util::IsParentPropertyPath("foo", "foofoo.bar"));
+ EXPECT_FALSE(property_util::IsParentPropertyPath("foo.bar", "foo.foo.bar"));
+ EXPECT_FALSE(property_util::IsParentPropertyPath("foo.bar", "foofoo.bar"));
+ EXPECT_FALSE(property_util::IsParentPropertyPath("foo.bar.foo", "foo"));
+ EXPECT_FALSE(property_util::IsParentPropertyPath("foo.bar.foo", "foo.bar"));
+ EXPECT_FALSE(
+ property_util::IsParentPropertyPath("foo.foo.bar", "foo.bar.foo"));
+ EXPECT_FALSE(property_util::IsParentPropertyPath("foo", "foo#bar.foo"));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesTypeString) {
+ PropertyProto property;
+ property.mutable_string_values()->Add("Hello, world");
+ property.mutable_string_values()->Add("Foo");
+ property.mutable_string_values()->Add("Bar");
+
+ EXPECT_THAT(property_util::ExtractPropertyValues<std::string>(property),
+ IsOkAndHolds(ElementsAre("Hello, world", "Foo", "Bar")));
+
+ EXPECT_THAT(property_util::ExtractPropertyValues<std::string_view>(property),
+ IsOkAndHolds(ElementsAre("Hello, world", "Foo", "Bar")));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesTypeInteger) {
+ PropertyProto property;
+ property.mutable_int64_values()->Add(123);
+ property.mutable_int64_values()->Add(-456);
+ property.mutable_int64_values()->Add(0);
+
+ EXPECT_THAT(property_util::ExtractPropertyValues<int64_t>(property),
+ IsOkAndHolds(ElementsAre(123, -456, 0)));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesMismatchedType) {
+ PropertyProto property;
+ property.mutable_int64_values()->Add(123);
+ property.mutable_int64_values()->Add(-456);
+ property.mutable_int64_values()->Add(0);
+
+ EXPECT_THAT(property_util::ExtractPropertyValues<std::string_view>(property),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesEmpty) {
+ PropertyProto property;
+ EXPECT_THAT(property_util::ExtractPropertyValues<std::string>(property),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(property_util::ExtractPropertyValues<std::string_view>(property),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(property_util::ExtractPropertyValues<int64_t>(property),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesTypeUnimplemented) {
+ PropertyProto property;
+ EXPECT_THAT(property_util::ExtractPropertyValues<int32_t>(property),
+ StatusIs(libtextclassifier3::StatusCode::UNIMPLEMENTED));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesFromDocument) {
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "test/1")
+ .SetSchema(std::string(kTypeTest))
+ .AddStringProperty(std::string(kPropertySingleString), "single")
+ .AddStringProperty(std::string(kPropertyRepeatedString), "repeated1",
+ "repeated2", "repeated3")
+ .AddInt64Property(std::string(kPropertySingleInteger), 123)
+ .AddInt64Property(std::string(kPropertyRepeatedInteger), 1, 2, 3)
+ .Build();
+
+ // Single string
+ EXPECT_THAT(
+ property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+ document, /*property_path=*/kPropertySingleString),
+ IsOkAndHolds(ElementsAre("single")));
+ // Repeated string
+ EXPECT_THAT(
+ property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+ document, /*property_path=*/kPropertyRepeatedString),
+ IsOkAndHolds(ElementsAre("repeated1", "repeated2", "repeated3")));
+ // Single integer
+ EXPECT_THAT(property_util::ExtractPropertyValuesFromDocument<int64_t>(
+ document, /*property_path=*/kPropertySingleInteger),
+ IsOkAndHolds(ElementsAre(123)));
+ // Repeated integer
+ EXPECT_THAT(property_util::ExtractPropertyValuesFromDocument<int64_t>(
+ document, /*property_path=*/kPropertyRepeatedInteger),
+ IsOkAndHolds(ElementsAre(1, 2, 3)));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesFromDocumentNested) {
+ DocumentProto nested_document =
+ DocumentBuilder()
+ .SetKey("icing", "nested/1")
+ .SetSchema(std::string(kTypeNestedTest))
+ .AddStringProperty(std::string(kPropertyStr), "a", "b", "c")
+ .AddDocumentProperty(
+ std::string(kPropertyNestedDocument),
+ DocumentBuilder()
+ .SetSchema(std::string(kTypeTest))
+ .AddStringProperty(std::string(kPropertySingleString),
+ "single1")
+ .AddStringProperty(std::string(kPropertyRepeatedString),
+ "repeated1", "repeated2", "repeated3")
+ .AddInt64Property(std::string(kPropertySingleInteger), 123)
+ .AddInt64Property(std::string(kPropertyRepeatedInteger), 1, 2,
+ 3)
+ .Build(),
+ DocumentBuilder()
+ .SetSchema(std::string(kTypeTest))
+ .AddStringProperty(std::string(kPropertySingleString),
+ "single2")
+ .AddStringProperty(std::string(kPropertyRepeatedString),
+ "repeated4", "repeated5", "repeated6")
+ .AddInt64Property(std::string(kPropertySingleInteger), 456)
+ .AddInt64Property(std::string(kPropertyRepeatedInteger), 4, 5,
+ 6)
+ .Build())
+ .Build();
+
+ // Since there are 2 nested documents, all of values at leaf will be returned.
+ EXPECT_THAT(
+ property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+ nested_document, /*property_path=*/"nestedDocument.singleString"),
+ IsOkAndHolds(ElementsAre("single1", "single2")));
+ EXPECT_THAT(
+ property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+ nested_document, /*property_path=*/"nestedDocument.repeatedString"),
+ IsOkAndHolds(ElementsAre("repeated1", "repeated2", "repeated3",
+ "repeated4", "repeated5", "repeated6")));
+ EXPECT_THAT(
+ property_util::ExtractPropertyValuesFromDocument<int64_t>(
+ nested_document, /*property_path=*/"nestedDocument.singleInteger"),
+ IsOkAndHolds(ElementsAre(123, 456)));
+ EXPECT_THAT(
+ property_util::ExtractPropertyValuesFromDocument<int64_t>(
+ nested_document, /*property_path=*/"nestedDocument.repeatedInteger"),
+ IsOkAndHolds(ElementsAre(1, 2, 3, 4, 5, 6)));
+
+ // Test the property at first level
+ EXPECT_THAT(
+ property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+ nested_document, kPropertyStr),
+ IsOkAndHolds(ElementsAre("a", "b", "c")));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesFromDocumentNonExistingPaths) {
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "test/1")
+ .SetSchema(std::string(kTypeTest))
+ .AddStringProperty(std::string(kPropertySingleString), "single")
+ .AddStringProperty(std::string(kPropertyRepeatedString), "repeated1",
+ "repeated2", "repeated3")
+ .AddInt64Property(std::string(kPropertySingleInteger), 123)
+ .AddInt64Property(std::string(kPropertyRepeatedInteger), 1, 2, 3)
+ .Build();
+ EXPECT_THAT(
+ property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+ document, /*property_path=*/"invalid"),
+ IsOkAndHolds(IsEmpty()));
+
+ DocumentProto nested_document =
+ DocumentBuilder()
+ .SetKey("icing", "nested/1")
+ .SetSchema(std::string(kTypeNestedTest))
+ .AddStringProperty(std::string(kPropertyStr), "a", "b", "c")
+ .AddDocumentProperty(std::string(kPropertyNestedDocument),
+ DocumentProto(document), DocumentProto(document))
+ .Build();
+ EXPECT_THAT(
+ property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+ nested_document, /*property_path=*/kPropertySingleString),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(
+ property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+ nested_document, /*property_path=*/"nestedDocument.invalid"),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesFromDocumentTypeUnimplemented) {
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "test/1")
+ .SetSchema(std::string(kTypeTest))
+ .AddStringProperty(std::string(kPropertySingleString), "single")
+ .AddStringProperty(std::string(kPropertyRepeatedString), "repeated1",
+ "repeated2", "repeated3")
+ .AddInt64Property(std::string(kPropertySingleInteger), 123)
+ .AddInt64Property(std::string(kPropertyRepeatedInteger), 1, 2, 3)
+ .Build();
+ EXPECT_THAT(property_util::ExtractPropertyValuesFromDocument<int32_t>(
+ document, /*property_path=*/kPropertySingleString),
+ StatusIs(libtextclassifier3::StatusCode::UNIMPLEMENTED));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/schema/schema-property-iterator.cc b/icing/schema/schema-property-iterator.cc
new file mode 100644
index 0000000..8fc245c
--- /dev/null
+++ b/icing/schema/schema-property-iterator.cc
@@ -0,0 +1,198 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/schema-property-iterator.h"
+
+#include <algorithm>
+#include <string>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema/property-util.h"
+
+namespace icing {
+namespace lib {
+
+libtextclassifier3::Status SchemaPropertyIterator::Advance() {
+ while (!levels_.empty()) {
+ if (!levels_.back().Advance()) {
+ // When finishing iterating all properties of the current level, pop it
+ // from the stack (levels_), return to the previous level and resume the
+ // iteration.
+ parent_type_config_names_.erase(
+ parent_type_config_names_.find(levels_.back().GetSchemaTypeName()));
+ levels_.pop_back();
+ continue;
+ }
+
+ const PropertyConfigProto& curr_property_config =
+ levels_.back().GetCurrentPropertyConfig();
+ std::string curr_property_path = levels_.back().GetCurrentPropertyPath();
+
+ // Iterate through the sorted_top_level_indexable_nested_properties_ in
+ // order until we find the first element that is >= curr_property_path.
+ while (current_top_level_indexable_nested_properties_idx_ <
+ sorted_top_level_indexable_nested_properties_.size() &&
+ sorted_top_level_indexable_nested_properties_.at(
+ current_top_level_indexable_nested_properties_idx_) <
+ curr_property_path) {
+ // If an element in sorted_top_level_indexable_nested_properties_ < the
+ // current property path, it means that we've already iterated past the
+ // possible position for it without seeing it.
+ // It's not a valid property path in our schema definition. Add it to
+ // unknown_indexable_nested_properties_ and advance
+ // current_top_level_indexable_nested_properties_idx_.
+ unknown_indexable_nested_property_paths_.push_back(
+ sorted_top_level_indexable_nested_properties_.at(
+ current_top_level_indexable_nested_properties_idx_));
+ ++current_top_level_indexable_nested_properties_idx_;
+ }
+
+ if (curr_property_config.data_type() !=
+ PropertyConfigProto::DataType::DOCUMENT) {
+ // We've advanced to a leaf property.
+ // Set whether this property is indexable according to its level's
+ // indexable config. If this property is declared in
+ // indexable_nested_properties_list of the top-level schema, it is also
+ // nested indexable.
+ std::string* current_indexable_nested_prop =
+ current_top_level_indexable_nested_properties_idx_ <
+ sorted_top_level_indexable_nested_properties_.size()
+ ? &sorted_top_level_indexable_nested_properties_.at(
+ current_top_level_indexable_nested_properties_idx_)
+ : nullptr;
+ if (current_indexable_nested_prop == nullptr ||
+ *current_indexable_nested_prop > curr_property_path) {
+ // Current property is not in the indexable list. Set it as indexable if
+ // its schema level is indexable AND it is an indexable property.
+ bool is_property_indexable =
+ levels_.back().GetLevelNestedIndexable() &&
+ SchemaUtil::IsIndexedProperty(curr_property_config);
+ levels_.back().SetCurrentPropertyIndexable(is_property_indexable);
+ } else if (*current_indexable_nested_prop == curr_property_path) {
+ // Current property is in the indexable list. Set its indexable config
+ // to true. This property will consume a sectionId regardless of whether
+ // or not it is actually indexable.
+ levels_.back().SetCurrentPropertyIndexable(true);
+ ++current_top_level_indexable_nested_properties_idx_;
+ }
+ return libtextclassifier3::Status::OK;
+ }
+
+ // - When advancing to a TYPE_DOCUMENT property, it means it is a nested
+ // schema and we need to traverse the next level. Look up SchemaTypeConfig
+ // (by the schema name) by type_config_map_, and push a new level into
+ // levels_.
+ // - Each level has to record the index of property it is currently at, so
+ // we can resume the iteration when returning back to it. Also other
+ // essential info will be maintained in LevelInfo as well.
+ auto nested_type_config_iter =
+ type_config_map_.find(curr_property_config.schema_type());
+ if (nested_type_config_iter == type_config_map_.end()) {
+ // This should never happen because our schema should already be
+ // validated by this point.
+ return absl_ports::NotFoundError(absl_ports::StrCat(
+ "Type config not found: ", curr_property_config.schema_type()));
+ }
+ const SchemaTypeConfigProto& nested_type_config =
+ nested_type_config_iter->second;
+
+ if (levels_.back().GetLevelNestedIndexable()) {
+ // We should set sorted_top_level_indexable_nested_properties_ to the list
+ // defined by the current level.
+ // GetLevelNestedIndexable() is true either because:
+ // 1. We're looking at a document property of the top-level schema --
+ // The first LevelInfo for the iterator is initialized with
+ // all_nested_properties_indexable_ = true.
+ // 2. All previous levels set index_nested_properties = true:
+ // This indicates that upper-level schema types want to follow nested
+ // properties definition of its document subtypes. If this is the first
+ // subtype level that defines a list, we should set it as
+ // top_level_indexable_nested_properties_ for the current top-level
+ // schema.
+ sorted_top_level_indexable_nested_properties_.clear();
+ sorted_top_level_indexable_nested_properties_.reserve(
+ curr_property_config.document_indexing_config()
+ .indexable_nested_properties_list()
+ .size());
+ for (const std::string& property :
+ curr_property_config.document_indexing_config()
+ .indexable_nested_properties_list()) {
+ // Concat the current property name to each property to get the full
+ // property path expression for each indexable nested property.
+ sorted_top_level_indexable_nested_properties_.push_back(
+ property_util::ConcatenatePropertyPathExpr(curr_property_path,
+ property));
+ }
+ current_top_level_indexable_nested_properties_idx_ = 0;
+ // Sort elements and dedupe
+ std::sort(sorted_top_level_indexable_nested_properties_.begin(),
+ sorted_top_level_indexable_nested_properties_.end());
+ auto last =
+ std::unique(sorted_top_level_indexable_nested_properties_.begin(),
+ sorted_top_level_indexable_nested_properties_.end());
+ sorted_top_level_indexable_nested_properties_.erase(
+ last, sorted_top_level_indexable_nested_properties_.end());
+ }
+
+ bool is_cycle =
+ parent_type_config_names_.find(nested_type_config.schema_type()) !=
+ parent_type_config_names_.end();
+ bool is_parent_property_path =
+ current_top_level_indexable_nested_properties_idx_ <
+ sorted_top_level_indexable_nested_properties_.size() &&
+ property_util::IsParentPropertyPath(
+ curr_property_path,
+ sorted_top_level_indexable_nested_properties_.at(
+ current_top_level_indexable_nested_properties_idx_));
+ if (is_cycle && !is_parent_property_path) {
+ // Cycle detected. The schema definition is guaranteed to be valid here
+ // since it must have already been validated during SchemaUtil::Validate,
+ // which would have rejected any schema with bad cycles.
+ //
+ // There are no properties in the indexable_nested_properties_list that
+ // are a part of this circular reference.
+ // We do not need to iterate this type further so we simply move on to
+ // other properties in the parent type.
+ continue;
+ }
+
+ bool all_nested_properties_indexable =
+ levels_.back().GetLevelNestedIndexable() &&
+ curr_property_config.document_indexing_config()
+ .index_nested_properties();
+ levels_.push_back(LevelInfo(nested_type_config,
+ std::move(curr_property_path),
+ all_nested_properties_indexable));
+ parent_type_config_names_.insert(nested_type_config.schema_type());
+ }
+
+ // Before returning, move all remaining uniterated properties from
+ // sorted_top_level_indexable_nested_properties_ into
+ // unknown_indexable_nested_properties_.
+ std::move(sorted_top_level_indexable_nested_properties_.begin() +
+ current_top_level_indexable_nested_properties_idx_,
+ sorted_top_level_indexable_nested_properties_.end(),
+ std::back_inserter(unknown_indexable_nested_property_paths_));
+
+ return absl_ports::OutOfRangeError("End of iterator");
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/schema/schema-property-iterator.h b/icing/schema/schema-property-iterator.h
new file mode 100644
index 0000000..66b8f32
--- /dev/null
+++ b/icing/schema/schema-property-iterator.h
@@ -0,0 +1,222 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_SCHEMA_PROPERTY_ITERATOR_H_
+#define ICING_SCHEMA_SCHEMA_PROPERTY_ITERATOR_H_
+
+#include <algorithm>
+#include <numeric>
+#include <string>
+#include <string_view>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema/property-util.h"
+#include "icing/schema/schema-util.h"
+
+namespace icing {
+namespace lib {
+
+// SchemaPropertyIterator: a class for iterating through all properties of a
+// given SchemaTypeConfigProto in lexicographical order. Only leaf
+// (non-document-type) properties will be returned, and for document type
+// properties, the iterator will traverse down to the next nested level of
+// schema.
+//
+// REQUIRED: The schema in which this SchemaTypeConfigProto is defined must have
+// already passed the validation step during SetSchema.
+class SchemaPropertyIterator {
+ public:
+ explicit SchemaPropertyIterator(
+ const SchemaTypeConfigProto& base_schema_type_config,
+ const SchemaUtil::TypeConfigMap& type_config_map)
+ : type_config_map_(type_config_map) {
+ levels_.push_back(LevelInfo(base_schema_type_config,
+ /*base_property_path=*/"",
+ /*all_nested_properties_indexable=*/true));
+ parent_type_config_names_.insert(base_schema_type_config.schema_type());
+ }
+
+ // Gets the current property config.
+ //
+ // REQUIRES: The preceding call for Advance() is OK.
+ const PropertyConfigProto& GetCurrentPropertyConfig() const {
+ return levels_.back().GetCurrentPropertyConfig();
+ }
+
+ // Gets the current property path.
+ //
+ // REQUIRES: The preceding call for Advance() is OK.
+ std::string GetCurrentPropertyPath() const {
+ return levels_.back().GetCurrentPropertyPath();
+ }
+
+ // Returns whether the current property is indexable. This would be true if
+ // either the current level is nested indexable, or if the current property is
+ // declared indexable in the indexable_nested_properties_list of the top-level
+ // schema type.
+ //
+ // REQUIRES: The preceding call for Advance() is OK.
+ bool GetCurrentPropertyIndexable() const {
+ return levels_.back().GetCurrentPropertyIndexable();
+ }
+
+ // Returns whether the current schema level is nested indexable. If this is
+ // true, all properties in the level are indexed.
+ //
+ // REQUIRES: The preceding call for Advance() is OK.
+ bool GetLevelNestedIndexable() const {
+ return levels_.back().GetLevelNestedIndexable();
+ }
+
+ // The set of indexable nested properties that are defined in the
+ // indexable_nested_properties_list but are not found in the schema
+ // definition. These properties still consume sectionIds, but will not be
+ // indexed.
+ const std::vector<std::string>& unknown_indexable_nested_property_paths()
+ const {
+ return unknown_indexable_nested_property_paths_;
+ }
+
+ // Advances to the next leaf property.
+ //
+ // Returns:
+ // - OK on success
+ // - OUT_OF_RANGE_ERROR if there is no more leaf property
+ // - INVALID_ARGUMENT_ERROR if cycle dependency is detected in the nested
+ // schema
+ // - NOT_FOUND_ERROR if any nested schema name is not found in
+ // type_config_map
+ libtextclassifier3::Status Advance();
+
+ private:
+ // An inner class for maintaining the iterating state of a (nested) level.
+ // Nested SchemaTypeConfig is a tree structure, so we have to traverse it
+ // recursively to all leaf properties.
+ class LevelInfo {
+ public:
+ explicit LevelInfo(const SchemaTypeConfigProto& schema_type_config,
+ std::string base_property_path,
+ bool all_nested_properties_indexable)
+ : schema_type_config_(schema_type_config),
+ base_property_path_(std::move(base_property_path)),
+ sorted_property_indices_(schema_type_config.properties_size()),
+ current_vec_idx_(-1),
+ sorted_property_indexable_(schema_type_config.properties_size()),
+ all_nested_properties_indexable_(all_nested_properties_indexable) {
+ // Index sort property by lexicographical order.
+ std::iota(sorted_property_indices_.begin(),
+ sorted_property_indices_.end(),
+ /*value=*/0);
+ std::sort(
+ sorted_property_indices_.begin(), sorted_property_indices_.end(),
+ [&schema_type_config](int lhs_idx, int rhs_idx) -> bool {
+ return schema_type_config.properties(lhs_idx).property_name() <
+ schema_type_config.properties(rhs_idx).property_name();
+ });
+ }
+
+ bool Advance() {
+ return ++current_vec_idx_ < sorted_property_indices_.size();
+ }
+
+ const PropertyConfigProto& GetCurrentPropertyConfig() const {
+ return schema_type_config_.properties(
+ sorted_property_indices_[current_vec_idx_]);
+ }
+
+ std::string GetCurrentPropertyPath() const {
+ return property_util::ConcatenatePropertyPathExpr(
+ base_property_path_, GetCurrentPropertyConfig().property_name());
+ }
+
+ bool GetLevelNestedIndexable() const {
+ return all_nested_properties_indexable_;
+ }
+
+ bool GetCurrentPropertyIndexable() const {
+ return sorted_property_indexable_[current_vec_idx_];
+ }
+
+ void SetCurrentPropertyIndexable(bool indexable) {
+ sorted_property_indexable_[current_vec_idx_] = indexable;
+ }
+
+ std::string_view GetSchemaTypeName() const {
+ return schema_type_config_.schema_type();
+ }
+
+ private:
+ const SchemaTypeConfigProto& schema_type_config_; // Does not own
+
+ // Concatenated property path of all parent levels.
+ std::string base_property_path_;
+
+ // We perform index sort (comparing property name) in order to iterate all
+ // leaf properties in lexicographical order. This vector is for storing
+ // these sorted indices.
+ std::vector<int> sorted_property_indices_;
+ int current_vec_idx_;
+
+ // Vector indicating whether each property in the current level is
+ // indexable. We can declare different indexable settings for properties in
+ // the same level using indexable_nested_properties_list.
+ //
+ // Element indices in this vector correspond to property indices in the
+ // sorted order.
+ std::vector<bool> sorted_property_indexable_;
+
+ // Indicates if all properties in the current level is nested indexable.
+ // This would be true for a level if the document declares
+ // index_nested_properties=true. If any of parent document type
+ // property sets its flag false, then this would be false for all its child
+ // properties.
+ bool all_nested_properties_indexable_;
+ };
+
+ const SchemaUtil::TypeConfigMap& type_config_map_; // Does not own
+
+ // For maintaining the stack of recursive nested schema type traversal. We use
+ // std::vector instead of std::stack to avoid memory allocate and free too
+ // frequently.
+ std::vector<LevelInfo> levels_;
+
+ // Maintaining all traversed parent schema type config names of the current
+ // stack (levels_). It is used to detect nested schema cycle dependency.
+ std::unordered_multiset<std::string_view> parent_type_config_names_;
+
+ // Sorted list of indexable nested properties for the top-level schema.
+ std::vector<std::string> sorted_top_level_indexable_nested_properties_;
+
+ // Current iteration index in the sorted_top_level_indexable_nested_properties
+ // list.
+ int current_top_level_indexable_nested_properties_idx_ = 0;
+
+ // Vector of indexable nested properties defined in the
+ // indexable_nested_properties_list, but not found in the schema definition.
+ // These properties still consume sectionIds, but will not be indexed.
+ // Properties are inserted into this vector in sorted order.
+ //
+ // TODO(b/289152024): Implement support for indexing these properties if they
+ // are in the child types of polymorphic nested properties.
+ std::vector<std::string> unknown_indexable_nested_property_paths_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_SCHEMA_SCHEMA_PROPERTY_ITERATOR_H_
diff --git a/icing/schema/schema-property-iterator_test.cc b/icing/schema/schema-property-iterator_test.cc
new file mode 100644
index 0000000..2b0226d
--- /dev/null
+++ b/icing/schema/schema-property-iterator_test.cc
@@ -0,0 +1,3905 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/schema-property-iterator.h"
+
+#include <initializer_list>
+#include <string>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-util.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using portable_equals_proto::EqualsProto;
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+
+TEST(SchemaPropertyIteratorTest,
+ SingleLevelSchemaTypeConfigShouldIterateInCorrectOrder) {
+ std::string schema_type_name = "Schema";
+
+ SchemaTypeConfigProto schema_type_config =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name)
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Google").SetDataTypeString(
+ TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(PropertyConfigBuilder().SetName("Youtube").SetDataType(
+ TYPE_BYTES))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Alphabet")
+ .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN))
+ .Build();
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_type_name, schema_type_config}};
+
+ SchemaPropertyIterator iterator(schema_type_config, type_config_map);
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Alphabet"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config.properties(2)));
+ EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Google"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config.properties(0)));
+ EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Youtube"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config.properties(1)));
+ EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(iterator.unknown_indexable_nested_property_paths(), IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest,
+ NestedSchemaTypeConfigShouldIterateInCorrectOrder) {
+ std::string schema_type_name1 = "SchemaOne";
+ std::string schema_type_name2 = "SchemaTwo";
+ std::string schema_type_name3 = "SchemaThree";
+
+ SchemaTypeConfigProto schema_type_config1 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name1)
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Google").SetDataTypeString(
+ TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(PropertyConfigBuilder().SetName("Youtube").SetDataType(
+ TYPE_BYTES))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Alphabet")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE))
+ .Build();
+ SchemaTypeConfigProto schema_type_config2 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name2)
+ .AddProperty(PropertyConfigBuilder().SetName("Foo").SetDataTypeString(
+ TERM_MATCH_UNKNOWN, TOKENIZER_NONE))
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Bar").SetDataTypeDocument(
+ schema_type_name1, /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto schema_type_config3 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name3)
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Hello").SetDataTypeString(
+ TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder().SetName("World").SetDataTypeDocument(
+ schema_type_name1, /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Icing").SetDataTypeDocument(
+ schema_type_name2, /*index_nested_properties=*/true))
+ .Build();
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_type_name1, schema_type_config1},
+ {schema_type_name2, schema_type_config2},
+ {schema_type_name3, schema_type_config3}};
+
+ // SchemaThree: {
+ // "Hello": TYPE_STRING,
+ // "World": TYPE_DOCUMENT SchemaOne {
+ // "Google": TYPE_STRING,
+ // "Youtube": TYPE_BYTES,
+ // "Alphabet": TYPE_INT64,
+ // },
+ // "Icing": TYPE_DOCUMENT SchemaTwo {
+ // "Foo": TYPE_STRING,
+ // "Bar": TYPE_DOCUMENT SchemaOne {
+ // "Google": TYPE_STRING,
+ // "Youtube": TYPE_BYTES,
+ // "Alphabet": TYPE_INT64,
+ // },
+ // },
+ // }
+ SchemaPropertyIterator iterator(schema_type_config3, type_config_map);
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Hello"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config3.properties(0)));
+ EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Icing.Bar.Alphabet"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(2)));
+ EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Icing.Bar.Google"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Icing.Bar.Youtube"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Icing.Foo"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config2.properties(0)));
+ EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("World.Alphabet"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(2)));
+ EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("World.Google"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("World.Youtube"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(iterator.unknown_indexable_nested_property_paths(), IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest,
+ NonExistingNestedSchemaTypeConfigShouldGetNotFoundError) {
+ std::string schema_type_name1 = "SchemaOne";
+ std::string schema_type_name2 = "SchemaTwo";
+
+ SchemaTypeConfigProto schema_type_config1 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name1)
+ .AddProperty(PropertyConfigBuilder().SetName("Google").SetDataType(
+ TYPE_STRING))
+ .AddProperty(PropertyConfigBuilder().SetName("Youtube").SetDataType(
+ TYPE_BYTES))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Alphabet")
+ .SetDataType(TYPE_INT64))
+ .Build();
+ SchemaTypeConfigProto schema_type_config2 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name2)
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Foo").SetDataTypeDocument(
+ schema_type_name1, /*index_nested_properties=*/true))
+ .Build();
+ // Remove the second level (schema_type_config1) from type_config_map.
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_type_name2, schema_type_config2}};
+
+ SchemaPropertyIterator iterator(schema_type_config2, type_config_map);
+ // Since Foo is a document type property with schema type = "SchemaOne" and
+ // "SchemaOne" is not in type_config_map, Advance() should return NOT_FOUND
+ // error.
+ EXPECT_THAT(iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST(SchemaPropertyIteratorTest,
+ SchemaTypeConfigWithEmptyPropertyShouldGetOutOfRangeErrorAtFirstAdvance) {
+ std::string schema_type_name = "Schema";
+
+ SchemaTypeConfigProto schema_type_config =
+ SchemaTypeConfigBuilder().SetType(schema_type_name).Build();
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_type_name, schema_type_config}};
+
+ SchemaPropertyIterator iterator(schema_type_config, type_config_map);
+ EXPECT_THAT(iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+ EXPECT_THAT(iterator.unknown_indexable_nested_property_paths(), IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest, NestedIndexable) {
+ std::string schema_type_name1 = "SchemaOne";
+ std::string schema_type_name2 = "SchemaTwo";
+ std::string schema_type_name3 = "SchemaThree";
+ std::string schema_type_name4 = "SchemaFour";
+
+ SchemaTypeConfigProto schema_type_config1 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name1)
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Google").SetDataTypeString(
+ TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config2 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name2)
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Bar").SetDataTypeDocument(
+ schema_type_name1, /*index_nested_properties=*/true))
+ .AddProperty(PropertyConfigBuilder().SetName("Foo").SetDataTypeString(
+ TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config3 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name3)
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Bar").SetDataTypeDocument(
+ schema_type_name1,
+ /*index_nested_properties=*/false))
+ .AddProperty(PropertyConfigBuilder().SetName("Foo").SetDataTypeString(
+ TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config4 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name4)
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Baz1").SetDataTypeDocument(
+ schema_type_name2, /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Baz2").SetDataTypeDocument(
+ schema_type_name2, /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Baz3").SetDataTypeDocument(
+ schema_type_name3, /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Baz4").SetDataTypeDocument(
+ schema_type_name3, /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Hello1").SetDataTypeDocument(
+ schema_type_name1, /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Hello2").SetDataTypeDocument(
+ schema_type_name1, /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder().SetName("World").SetDataTypeString(
+ TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_type_name1, schema_type_config1},
+ {schema_type_name2, schema_type_config2},
+ {schema_type_name3, schema_type_config3},
+ {schema_type_name4, schema_type_config4}};
+
+ // SchemaFour: {
+ // "Baz1": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=true SchemaTwo {
+ // "Bar": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=true SchemaOne {
+ // "Google": TYPE_STRING INDEXABLE,
+ // },
+ // "Foo": TYPE_STRING INDEXABLE,
+ // },
+ // "Baz2": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=false SchemaTwo {
+ // "Bar": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=true SchemaOne {
+ // "Google": TYPE_STRING INDEXABLE,
+ // },
+ // "Foo": TYPE_STRING INDEXABLE,
+ // },
+ // "Baz3": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=true SchemaThree {
+ // "Bar": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=false SchemaOne {
+ // "Google": TYPE_STRING INDEXABLE,
+ // },
+ // "Foo": TYPE_STRING INDEXABLE,
+ // },
+ // "Baz4": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=false SchemaThree {
+ // "Bar": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=false SchemaOne {
+ // "Google": TYPE_STRING INDEXABLE,
+ // },
+ // "Foo": TYPE_STRING INDEXABLE,
+ // },
+ // "Hello": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=false SchemaOne {
+ // "Google": TYPE_STRING INDEXABLE,
+ // },
+ // "World": TYPE_STRING INDEXABLE,
+ // }
+ SchemaPropertyIterator iterator(schema_type_config4, type_config_map);
+
+ // Baz1 to Baz4: 2 levels of nested document type property.
+ // For Baz1, all levels set index_nested_properties = true, so all leaf
+ // properties should be nested indexable.
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Baz1.Bar.Google"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Baz1.Foo"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config2.properties(1)));
+ EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ // For Baz2, the parent level sets index_nested_properties = false, so all
+ // leaf properties in child levels should be nested unindexable even if
+ // they've set their index_nested_properties = true.
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Baz2.Bar.Google"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Baz2.Foo"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config2.properties(1)));
+ EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ // For Baz3, the parent level sets index_nested_properties = true, but the
+ // child level sets index_nested_properties = false.
+ // - Leaf properties in the parent level should be nested indexable.
+ // - Leaf properties in the child level should be nested unindexable.
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Baz3.Bar.Google"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Baz3.Foo"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config2.properties(1)));
+ EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ // For Baz4, all levels set index_nested_properties = false, so all leaf
+ // properties should be nested unindexable.
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Baz4.Bar.Google"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Baz4.Foo"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config2.properties(1)));
+ EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ // Verify 1 and 0 level of nested document type properties.
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Hello1.Google"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Hello2.Google"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("World"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config4.properties(6)));
+ EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(iterator.unknown_indexable_nested_property_paths(), IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest,
+ IndexableNestedPropertiesList_singleNestedLevel) {
+ std::string schema_type_name1 = "SchemaOne";
+ std::string schema_type_name2 = "SchemaTwo";
+
+ SchemaTypeConfigProto schema_type_config1 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name1)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema1prop1")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema1prop2")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema1prop3")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schema1prop4")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schema1prop5")
+ .SetDataType(TYPE_BOOLEAN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config2 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name2)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema2prop1")
+ .SetDataTypeDocument(
+ schema_type_name1,
+ /*indexable_nested_properties_list=*/{"schema1prop2",
+ "schema1prop3",
+ "schema1prop5"}))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema2prop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schema2prop3")
+ .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN))
+ .Build();
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_type_name1, schema_type_config1},
+ {schema_type_name2, schema_type_config2}};
+
+ // Order of iteration for Schema2:
+ // {"schema2prop1.schema1prop1", "schema2prop1.schema1prop2",
+ // "schema2prop1.schema1prop3", "schema2prop1.schema1prop4",
+ // "schema2prop1.schema1prop5", "schema2prop2", "schema2prop3"}
+ //
+ // Indexable properties:
+ // {"schema2prop1.schema1prop2", "schema2prop1.schema1prop3",
+ // "schema2prop1.schema1prop5", "schema2prop2"}.
+ //
+ // "schema2prop1.schema1prop4" is indexable by its indexing-config, but is not
+ // considered indexable for Schema2 because Schema2 sets its
+ // index_nested_properties config to false, and "schema1prop4" is not
+ // in the indexable_nested_properties_list for schema2prop1.
+ //
+ // "schema2prop1.schema1prop1", "schema2prop1.schema1prop3" and
+ // "schema2prop1.schema1prop5" are non-indexable by its indexing-config.
+ // However "schema2prop1.schema1prop3" and "schema2prop1.schema1prop5" are
+ // indexed as it appears in the indexable_list.
+ SchemaPropertyIterator schema2_iterator(schema_type_config2, type_config_map);
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+ Eq("schema2prop1.schema1prop1"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+ Eq("schema2prop1.schema1prop2"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+ Eq("schema2prop1.schema1prop3"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(2)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+ Eq("schema2prop1.schema1prop4"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(3)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+ Eq("schema2prop1.schema1prop5"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(4)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(), Eq("schema2prop2"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config2.properties(1)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(), Eq("schema2prop3"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config2.properties(2)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema2_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema2_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+
+ // Iterate through schema1 properties. Schema1 only has non-document type leaf
+ // properties, so its properties will be assigned indexable or not according
+ // to their indexing configs.
+ SchemaPropertyIterator schema1_iterator(schema_type_config1, type_config_map);
+
+ EXPECT_THAT(schema1_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema1_iterator.GetCurrentPropertyPath(), Eq("schema1prop1"));
+ EXPECT_THAT(schema1_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema1_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema1_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema1_iterator.GetCurrentPropertyPath(), Eq("schema1prop2"));
+ EXPECT_THAT(schema1_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema1_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema1_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema1_iterator.GetCurrentPropertyPath(), Eq("schema1prop3"));
+ EXPECT_THAT(schema1_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(2)));
+ EXPECT_THAT(schema1_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema1_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema1_iterator.GetCurrentPropertyPath(), Eq("schema1prop4"));
+ EXPECT_THAT(schema1_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(3)));
+ EXPECT_THAT(schema1_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema1_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema1_iterator.GetCurrentPropertyPath(), Eq("schema1prop5"));
+ EXPECT_THAT(schema1_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(4)));
+ EXPECT_THAT(schema1_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema1_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema1_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest,
+ IndexableNestedPropertiesList_indexBooleanTrueDoesNotAffectOtherLevels) {
+ std::string schema_type_name1 = "SchemaOne";
+ std::string schema_type_name2 = "SchemaTwo";
+ std::string schema_type_name3 = "SchemaThree";
+
+ SchemaTypeConfigProto schema_type_config1 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name1)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema1prop1")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema1prop2")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema1prop3")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config2 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name2)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema2prop1")
+ .SetDataTypeDocument(schema_type_name1,
+ /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema2prop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema2prop3")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config3 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name3)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema3prop3")
+ .SetDataTypeDocument(
+ schema_type_name1,
+ /*indexable_nested_properties_list=*/{"schema1prop1",
+ "schema1prop3"}))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schema3prop1")
+ .SetDataTypeDocument(
+ schema_type_name2,
+ /*indexable_nested_properties_list=*/
+ {"schema2prop2", "schema2prop1.schema1prop1",
+ "schema2prop1.schema1prop3"}))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema3prop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_type_name1, schema_type_config1},
+ {schema_type_name2, schema_type_config2},
+ {schema_type_name3, schema_type_config3}};
+
+ // Order of iteration for Schema3:
+ // {"schema3prop1.schema2prop1.schema1prop1",
+ // "schema3prop1.schema2prop1.schema1prop2",
+ // "schema3prop1.schema2prop1.schema1prop3",
+ // "schema3prop1.schema2prop2", "schema3prop1.schema2prop3", "schema3prop2",
+ // "schema3prop3.schema1prop1", "schema3prop3.schema1prop2",
+ // "schema3prop3.schema1prop3"}.
+ //
+ // Indexable properties:
+ // {"schema3prop1.schema2prop1.schema1prop1",
+ // "schema3prop1.schema2prop1.schema1prop3",
+ // "schema3prop1.schema2prop2", "schema3prop2", "schema3prop3.schema1prop1",
+ // "schema3prop3.schema1prop3"}
+ //
+ // Schema2 setting index_nested_properties=true does not affect nested
+ // properties indexing for Schema3.
+ SchemaPropertyIterator schema3_iterator(schema_type_config3, type_config_map);
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop1.schema2prop1.schema1prop1"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop1.schema2prop1.schema1prop2"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop1.schema2prop1.schema1prop3"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(2)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop1.schema2prop2"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config2.properties(1)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop1.schema2prop3"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config2.properties(2)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(), Eq("schema3prop2"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config3.properties(2)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop3.schema1prop1"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop3.schema1prop2"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop3.schema1prop3"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(2)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema3_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema3_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+
+ // Order of iteration for Schema2:
+ // {"schema2prop1.schema1prop1", "schema2prop1.schema1prop2",
+ // "schema2prop1.schema1prop3", "schema2prop2", "schema2prop3"}
+ //
+ // Indexable properties:
+ // {"schema2prop1.schema1prop1", "schema2prop1.schema1prop2",
+ // "schema2prop1.schema1prop3", "schema2prop2", "schema2prop3"}
+ //
+ // All properties are indexed because index_nested_properties=true for
+ // Schema2.schema2prop1. Schema3's indexable_nested_properties setting does
+ // not affect this.
+ SchemaPropertyIterator schema2_iterator(schema_type_config2, type_config_map);
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+ Eq("schema2prop1.schema1prop1"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+ Eq("schema2prop1.schema1prop2"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+ Eq("schema2prop1.schema1prop3"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(2)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(), Eq("schema2prop2"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config2.properties(1)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(), Eq("schema2prop3"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config2.properties(2)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema2_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema2_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest,
+ IndexableNestedPropertiesList_indexBooleanFalseDoesNotAffectOtherLevels) {
+ std::string schema_type_name1 = "SchemaOne";
+ std::string schema_type_name2 = "SchemaTwo";
+ std::string schema_type_name3 = "SchemaThree";
+
+ SchemaTypeConfigProto schema_type_config1 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name1)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema1prop1")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema1prop2")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config2 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name2)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema2prop1")
+ .SetDataTypeDocument(schema_type_name1,
+ /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto schema_type_config3 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name3)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema3prop1")
+ .SetDataTypeDocument(schema_type_name2,
+ /*indexable_nested_properties_list=*/
+ std::initializer_list<std::string>{
+ "schema2prop1.schema1prop2"}))
+ .Build();
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_type_name1, schema_type_config1},
+ {schema_type_name2, schema_type_config2},
+ {schema_type_name3, schema_type_config3}};
+
+ // Order of iteration for Schema3:
+ // {"schema3prop1.schema2prop1.schema1prop1",
+ // "schema3prop1.schema2prop1.schema1prop2"}.
+ //
+ // Indexable properties: {"schema3prop1.schema2prop1.schema1prop2"}
+ //
+ // Schema2 setting index_nested_properties=false, does not affect Schema3's
+ // indexable list.
+ SchemaPropertyIterator schema3_iterator(schema_type_config3, type_config_map);
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop1.schema2prop1.schema1prop1"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop1.schema2prop1.schema1prop2"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema3_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema3_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+
+ // Order of iteration for Schema2:
+ // {"schema2prop1.schema1prop1", "schema2prop1.schema1prop2"}
+ //
+ // Indexable properties: None
+ //
+ // The indexable list for Schema3 does not propagate to Schema2.
+ SchemaPropertyIterator schema2_iterator(schema_type_config2, type_config_map);
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+ Eq("schema2prop1.schema1prop1"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+ Eq("schema2prop1.schema1prop2"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema2_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema2_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest,
+ IndexableNestedPropertiesList_indexableSetDoesNotAffectOtherLevels) {
+ std::string schema_type_name1 = "SchemaOne";
+ std::string schema_type_name2 = "SchemaTwo";
+ std::string schema_type_name3 = "SchemaThree";
+
+ SchemaTypeConfigProto schema_type_config1 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name1)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema1prop1")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema1prop2")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema1prop3")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config2 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name2)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema2prop1")
+ .SetDataTypeDocument(
+ schema_type_name1,
+ /*indexable_nested_properties_list=*/
+ std::initializer_list<std::string>{"schema1prop2"}))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema2prop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema2prop3")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config3 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name3)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema3prop3")
+ .SetDataTypeDocument(
+ schema_type_name1,
+ /*indexable_nested_properties_list=*/{"schema1prop1",
+ "schema1prop3"}))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schema3prop1")
+ .SetDataTypeDocument(
+ schema_type_name2,
+ /*indexable_nested_properties_list=*/
+ {"schema2prop2", "schema2prop1.schema1prop1",
+ "schema2prop1.schema1prop3"}))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema3prop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_type_name1, schema_type_config1},
+ {schema_type_name2, schema_type_config2},
+ {schema_type_name3, schema_type_config3}};
+
+ // Order of iteration for Schema3:
+ // {"schema3prop1.schema2prop1.schema1prop1",
+ // "schema3prop1.schema2prop1.schema1prop2",
+ // "schema3prop1.schema2prop1.schema1prop3",
+ // "schema3prop1.schema2prop2", "schema3prop1.schema2prop3", "schema3prop2",
+ // "schema3prop3.schema1prop1", "schema3prop3.schema1prop2",
+ // "schema3prop3.schema1prop3"}.
+ //
+ // Indexable properties:
+ // {"schema3prop1.schema2prop1.schema1prop1",
+ // "schema3prop1.schema2prop1.schema1prop3",
+ // "schema3prop1.schema2prop2", "schema3prop2", "schema3prop3.schema1prop1",
+ // "schema3prop3.schema1prop3"}
+ //
+ // Schema2 setting indexable_nested_properties_list={schema1prop2} does not
+ // affect nested properties indexing for Schema3.
+ SchemaPropertyIterator schema3_iterator(schema_type_config3, type_config_map);
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop1.schema2prop1.schema1prop1"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop1.schema2prop1.schema1prop2"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop1.schema2prop1.schema1prop3"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(2)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop1.schema2prop2"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config2.properties(1)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop1.schema2prop3"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config2.properties(2)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(), Eq("schema3prop2"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config3.properties(2)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop3.schema1prop1"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop3.schema1prop2"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop3.schema1prop3"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(2)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema3_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema3_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+
+ // Order of iteration for Schema2:
+ // {"schema2prop1.schema1prop1", "schema2prop1.schema1prop2",
+ // "schema2prop1.schema1prop3", "schema2prop2", "schema2prop3"}
+ //
+ // Indexable properties:
+ // {"schema2prop1.schema1prop2", "schema2prop2", "schema2prop3"}
+ //
+ // Indexable_nested_properties set for Schema3.schema3prop1 does not propagate
+ // to Schema2.
+ SchemaPropertyIterator schema2_iterator(schema_type_config2, type_config_map);
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+ Eq("schema2prop1.schema1prop1"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+ Eq("schema2prop1.schema1prop2"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+ Eq("schema2prop1.schema1prop3"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(2)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(), Eq("schema2prop2"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config2.properties(1)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(), Eq("schema2prop3"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config2.properties(2)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema2_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema2_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+}
+
+TEST(
+ SchemaPropertyIteratorTest,
+ IndexableNestedPropertiesList_upperLevelIndexTrueIndexesListOfNestedLevel) {
+ std::string schema_type_name1 = "SchemaOne";
+ std::string schema_type_name2 = "SchemaTwo";
+ std::string schema_type_name3 = "SchemaThree";
+ std::string schema_type_name4 = "SchemaFour";
+
+ SchemaTypeConfigProto schema_type_config1 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name1)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema1prop1")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema1prop2")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config2 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name2)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema2prop1")
+ .SetDataTypeDocument(
+ schema_type_name1,
+ /*indexable_nested_properties_list=*/
+ std::initializer_list<std::string>{"schema1prop2"}))
+ .Build();
+ SchemaTypeConfigProto schema_type_config3 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name3)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema3prop1")
+ .SetDataTypeDocument(schema_type_name2,
+ /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto schema_type_config4 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name4)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema4prop1")
+ .SetDataTypeDocument(schema_type_name3,
+ /*index_nested_properties=*/true))
+ .Build();
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_type_name1, schema_type_config1},
+ {schema_type_name2, schema_type_config2},
+ {schema_type_name3, schema_type_config3},
+ {schema_type_name4, schema_type_config4}};
+
+ // Order of iteration for Schema4:
+ // {"schema4prop1.schema3prop1.schema2prop1.schema1prop1",
+ // "schema4prop1.schema3prop1.schema2prop1.schema1prop2"}.
+ //
+ // Indexable properties: {schema4prop1.schema3prop1.schema2prop1.schema1prop2}
+ //
+ // Both Schema4 and Schema3 sets index_nested_properties=true, so they both
+ // want to follow the indexing behavior of its subtype.
+ // Schema2 is the first subtype to define an indexing config, so we index its
+ // list for both Schema3 and Schema4 even though it sets
+ // index_nested_properties=false.
+ SchemaPropertyIterator schema4_iterator(schema_type_config4, type_config_map);
+
+ EXPECT_THAT(schema4_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyPath(),
+ Eq("schema4prop1.schema3prop1.schema2prop1.schema1prop1"));
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema4_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyPath(),
+ Eq("schema4prop1.schema3prop1.schema2prop1.schema1prop2"));
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema4_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema4_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+
+ // Order of iteration for Schema3:
+ // {"schema3prop1.schema2prop1.schema1prop1",
+ // "schema3prop1.schema2prop1.schema1prop2"}.
+ //
+ // Indexable properties: {schema3prop1.schema2prop1.schema1prop2}
+ SchemaPropertyIterator schema3_iterator(schema_type_config3, type_config_map);
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop1.schema2prop1.schema1prop1"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop1.schema2prop1.schema1prop2"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema3_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema3_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+
+ // Order of iteration for Schema2:
+ // {"schema2prop1.schema1prop1", "schema2prop1.schema1prop2"}
+ //
+ // Indexable properties:
+ // {"schema2prop1.schema1prop2"}
+ //
+ // Schema3 setting index_nested_properties=true does not propagate to Schema2.
+ SchemaPropertyIterator schema2_iterator(schema_type_config2, type_config_map);
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+ Eq("schema2prop1.schema1prop1"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+ Eq("schema2prop1.schema1prop2"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema2_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema2_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest,
+ IndexableNestedPropertiesList_unknownPropPaths) {
+ std::string schema_type_name1 = "SchemaOne";
+ std::string schema_type_name2 = "SchemaTwo";
+ std::string schema_type_name3 = "SchemaThree";
+ std::string schema_type_name4 = "SchemaFour";
+
+ SchemaTypeConfigProto schema_type_config1 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name1)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema1prop1")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema1prop2")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config2 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name2)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema2prop1")
+ .SetDataTypeDocument(schema_type_name1,
+ /*indexable_nested_properties_list=*/
+ {"schema1prop2", "schema1prop2.foo",
+ "foo.bar", "zzz", "aaa.zzz"}))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema2prop2")
+ .SetDataTypeDocument(
+ schema_type_name1,
+ /*indexable_nested_properties_list=*/
+ {"schema1prop1", "schema1prop2", "unknown.path"}))
+ .Build();
+ SchemaTypeConfigProto schema_type_config3 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name3)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema3prop1")
+ .SetDataTypeDocument(
+ schema_type_name2,
+ /*indexable_nested_properties_list=*/
+ {"schema3prop1", "schema2prop1", "schema1prop2",
+ "schema2prop1.schema1prop2", "schema2prop1.zzz", "zzz"}))
+ .Build();
+ SchemaTypeConfigProto schema_type_config4 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name4)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema4prop1")
+ .SetDataTypeDocument(schema_type_name3,
+ /*index_nested_properties=*/true))
+ .Build();
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_type_name1, schema_type_config1},
+ {schema_type_name2, schema_type_config2},
+ {schema_type_name3, schema_type_config3},
+ {schema_type_name4, schema_type_config4}};
+
+ // Order of iteration for Schema4:
+ // "schema4prop1.schema3prop1.schema2prop1.schema1prop1",
+ // "schema4prop1.schema3prop1.schema2prop1.schema1prop2" (indexable),
+ // "schema4prop1.schema3prop1.schema2prop2.schema1prop1",
+ // "schema4prop1.schema3prop1.schema2prop2.schema1prop2"
+ //
+ // Unknown property paths from schema3 will also be included for schema4,
+ // since schema4 sets index_nested_properties=true.
+ // This includes everything in schema3prop1's list except
+ // "schema2prop1.schema1prop2".
+ SchemaPropertyIterator schema4_iterator(schema_type_config4, type_config_map);
+
+ EXPECT_THAT(schema4_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyPath(),
+ Eq("schema4prop1.schema3prop1.schema2prop1.schema1prop1"));
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema4_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyPath(),
+ Eq("schema4prop1.schema3prop1.schema2prop1.schema1prop2"));
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema4_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyPath(),
+ Eq("schema4prop1.schema3prop1.schema2prop2.schema1prop1"));
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema4_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyPath(),
+ Eq("schema4prop1.schema3prop1.schema2prop2.schema1prop2"));
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema4_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema4_iterator.unknown_indexable_nested_property_paths(),
+ testing::ElementsAre("schema4prop1.schema3prop1.schema1prop2",
+ "schema4prop1.schema3prop1.schema2prop1",
+ "schema4prop1.schema3prop1.schema2prop1.zzz",
+ "schema4prop1.schema3prop1.schema3prop1",
+ "schema4prop1.schema3prop1.zzz"));
+
+ // Order of iteration for Schema3:
+ // "schema3prop1.schema2prop1.schema1prop1",
+ // "schema3prop1.schema2prop1.schema1prop2" (indexable),
+ // "schema3prop1.schema2prop2.schema1prop1",
+ // "schema3prop1.schema2prop2.schema1prop2"
+ //
+ // Unknown properties (in order):
+ // "schema3prop1.schema1prop2", "schema3prop1.schema2prop1" (not a leaf prop),
+ // "schema3prop1.schema2prop1.zzz", "schema3prop1.schema3prop1",
+ // "schema3prop1.zzz"
+ SchemaPropertyIterator schema3_iterator(schema_type_config3, type_config_map);
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop1.schema2prop1.schema1prop1"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop1.schema2prop1.schema1prop2"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop1.schema2prop2.schema1prop1"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop1.schema2prop2.schema1prop2"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema3_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema3_iterator.unknown_indexable_nested_property_paths(),
+ testing::ElementsAre(
+ "schema3prop1.schema1prop2", "schema3prop1.schema2prop1",
+ "schema3prop1.schema2prop1.zzz", "schema3prop1.schema3prop1",
+ "schema3prop1.zzz"));
+
+ // Order of iteration for Schema2:
+ // "schema2prop1.schema1prop1",
+ // "schema2prop1.schema1prop2" (indexable),
+ // "schema2prop2.schema1prop1" (indexable),
+ // "schema2prop2.schema1prop2" (indexable)
+ //
+ // Unknown properties (in order):
+ // "schema2prop1.aaa.zzz", "schema2prop1.foo.bar",
+ // "schema2prop1.schema1prop2.foo", "schema2prop1.zzz",
+ // "schema2prop2.unknown.path"
+ SchemaPropertyIterator schema2_iterator(schema_type_config2, type_config_map);
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+ Eq("schema2prop1.schema1prop1"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+ Eq("schema2prop1.schema1prop2"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+ Eq("schema2prop2.schema1prop1"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+ Eq("schema2prop2.schema1prop2"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema2_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(
+ schema2_iterator.unknown_indexable_nested_property_paths(),
+ testing::ElementsAre("schema2prop1.aaa.zzz", "schema2prop1.foo.bar",
+ "schema2prop1.schema1prop2.foo", "schema2prop1.zzz",
+ "schema2prop2.unknown.path"));
+}
+
+TEST(SchemaPropertyIteratorTest,
+ IndexableNestedPropertiesListDuplicateElements) {
+ std::string schema_type_name1 = "SchemaOne";
+ std::string schema_type_name2 = "SchemaTwo";
+ std::string schema_type_name3 = "SchemaThree";
+ std::string schema_type_name4 = "SchemaFour";
+
+ SchemaTypeConfigProto schema_type_config1 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name1)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema1prop1")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema1prop2")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config2 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name2)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema2prop1")
+ .SetDataTypeDocument(
+ schema_type_name1,
+ /*indexable_nested_properties_list=*/
+ {"schema1prop2", "schema1prop2", "schema1prop2.foo",
+ "schema1prop2.foo", "foo.bar", "foo.bar", "foo.bar",
+ "zzz", "zzz", "aaa.zzz", "schema1prop2"}))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schema2prop2")
+ .SetDataTypeDocument(
+ schema_type_name1,
+ /*indexable_nested_properties_list=*/
+ {"schema1prop1", "schema1prop2", "unknown.path",
+ "unknown.path", "unknown.path", "unknown.path",
+ "schema1prop1"}))
+ .Build();
+ SchemaTypeConfigProto schema_type_config3 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name3)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema3prop1")
+ .SetDataTypeDocument(
+ schema_type_name2,
+ /*indexable_nested_properties_list=*/
+ {"schema3prop1", "schema3prop1", "schema2prop1",
+ "schema2prop1", "schema1prop2", "schema1prop2",
+ "schema2prop1.schema1prop2", "schema2prop1.schema1prop2",
+ "schema2prop1.zzz", "zzz", "zzz"}))
+ .Build();
+ SchemaTypeConfigProto schema_type_config4 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name4)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schema4prop1")
+ .SetDataTypeDocument(schema_type_name3,
+ /*index_nested_properties=*/true))
+ .Build();
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_type_name1, schema_type_config1},
+ {schema_type_name2, schema_type_config2},
+ {schema_type_name3, schema_type_config3},
+ {schema_type_name4, schema_type_config4}};
+
+ // The results of this test case is the same as the previous test case. This
+ // is to test that the indexable-list is deduped correctly.
+
+ // Order of iteration for Schema4:
+ // "schema4prop1.schema3prop1.schema2prop1.schema1prop1",
+ // "schema4prop1.schema3prop1.schema2prop1.schema1prop2" (indexable),
+ // "schema4prop1.schema3prop1.schema2prop2.schema1prop1",
+ // "schema4prop1.schema3prop1.schema2prop2.schema1prop2"
+ //
+ // Unknown property paths from schema3 will also be included for schema4,
+ // since schema4 sets index_nested_properties=true.
+ // This includes everything in schema3prop1's list except
+ // "schema2prop1.schema1prop2".
+ SchemaPropertyIterator schema4_iterator(schema_type_config4, type_config_map);
+
+ EXPECT_THAT(schema4_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyPath(),
+ Eq("schema4prop1.schema3prop1.schema2prop1.schema1prop1"));
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema4_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyPath(),
+ Eq("schema4prop1.schema3prop1.schema2prop1.schema1prop2"));
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema4_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyPath(),
+ Eq("schema4prop1.schema3prop1.schema2prop2.schema1prop1"));
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema4_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyPath(),
+ Eq("schema4prop1.schema3prop1.schema2prop2.schema1prop2"));
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema4_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema4_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema4_iterator.unknown_indexable_nested_property_paths(),
+ testing::ElementsAre("schema4prop1.schema3prop1.schema1prop2",
+ "schema4prop1.schema3prop1.schema2prop1",
+ "schema4prop1.schema3prop1.schema2prop1.zzz",
+ "schema4prop1.schema3prop1.schema3prop1",
+ "schema4prop1.schema3prop1.zzz"));
+
+ // Order of iteration for Schema3:
+ // "schema3prop1.schema2prop1.schema1prop1",
+ // "schema3prop1.schema2prop1.schema1prop2" (indexable),
+ // "schema3prop1.schema2prop2.schema1prop1",
+ // "schema3prop1.schema2prop2.schema1prop2"
+ //
+ // Unknown properties (in order):
+ // "schema2prop1.aaa.zzz", "schema2prop1.foo.bar",
+ // "schema2prop1.schema1prop2.foo", "schema2prop1.zzz",
+ // "schema2prop2.unknown.path"
+ SchemaPropertyIterator schema3_iterator(schema_type_config3, type_config_map);
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop1.schema2prop1.schema1prop1"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop1.schema2prop1.schema1prop2"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop1.schema2prop2.schema1prop1"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("schema3prop1.schema2prop2.schema1prop2"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema3_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema3_iterator.unknown_indexable_nested_property_paths(),
+ testing::ElementsAre(
+ "schema3prop1.schema1prop2", "schema3prop1.schema2prop1",
+ "schema3prop1.schema2prop1.zzz", "schema3prop1.schema3prop1",
+ "schema3prop1.zzz"));
+
+ // Order of iteration for Schema2:
+ // "schema2prop1.schema1prop1",
+ // "schema2prop1.schema1prop2" (indexable),
+ // "schema2prop2.schema1prop1" (indexable),
+ // "schema2prop2.schema1prop2" (indexable)
+ //
+ // Unknown properties (in order):
+ // "schema2prop1.aaa.zzz", "schema2prop1.foo.bar",
+ // "schema2prop1.schema1prop2.foo", "schema2prop1.zzz",
+ // "schema2prop2.unknown.path"
+ SchemaPropertyIterator schema2_iterator(schema_type_config2, type_config_map);
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+ Eq("schema2prop1.schema1prop1"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+ Eq("schema2prop1.schema1prop2"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+ Eq("schema2prop2.schema1prop1"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+ Eq("schema2prop2.schema1prop2"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema2_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(
+ schema2_iterator.unknown_indexable_nested_property_paths(),
+ testing::ElementsAre("schema2prop1.aaa.zzz", "schema2prop1.foo.bar",
+ "schema2prop1.schema1prop2.foo", "schema2prop1.zzz",
+ "schema2prop2.unknown.path"));
+}
+
+TEST(SchemaPropertyIteratorTest,
+ IndexableNestedProperties_duplicatePropertyNamesInDifferentProperties) {
+ std::string schema_type_name1 = "SchemaOne";
+ std::string schema_type_name2 = "SchemaTwo";
+ std::string schema_type_name3 = "SchemaThree";
+
+ SchemaTypeConfigProto schema_type_config1 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name1)
+ .AddProperty(
+ PropertyConfigBuilder().SetName("prop1").SetDataTypeString(
+ TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder().SetName("prop2").SetDataTypeString(
+ TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder().SetName("prop3").SetDataTypeString(
+ TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config2 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name2)
+ .AddProperty(
+ PropertyConfigBuilder().SetName("prop1").SetDataTypeDocument(
+ schema_type_name1,
+ /*indexable_nested_properties_list=*/
+ std::initializer_list<std::string>{"prop2"}))
+ .AddProperty(
+ PropertyConfigBuilder().SetName("prop2").SetDataTypeString(
+ TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder().SetName("prop3").SetDataTypeString(
+ TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config3 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name3)
+ .AddProperty(
+ PropertyConfigBuilder().SetName("prop3").SetDataTypeDocument(
+ schema_type_name1,
+ /*indexable_nested_properties_list=*/
+ {"prop1", "prop3"}))
+ .AddProperty(
+ PropertyConfigBuilder().SetName("prop1").SetDataTypeDocument(
+ schema_type_name2,
+ /*indexable_nested_properties_list=*/
+ {"prop2", "prop1.prop1", "prop1.prop3"}))
+ .AddProperty(
+ PropertyConfigBuilder().SetName("prop2").SetDataTypeString(
+ TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder().SetName("prop4").SetDataTypeDocument(
+ schema_type_name1,
+ /*indexable_nested_properties_list=*/
+ {"prop2", "prop3"}))
+ .Build();
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_type_name1, schema_type_config1},
+ {schema_type_name2, schema_type_config2},
+ {schema_type_name3, schema_type_config3}};
+
+ // Order of iteration for Schema3:
+ // {"prop1.prop1.prop1", "prop1.prop1.prop2", "prop1.prop1.prop3",
+ // "prop1.prop2", "prop1.prop3", "prop2",
+ // "prop3.prop1", "prop3.prop2", "prop3.prop3",
+ // "prop4.prop1", "prop4.prop2", "prop4.prop3"}.
+ //
+ // Indexable properties:
+ // {"prop1.prop1.prop1", "prop1.prop1.prop3", "prop1.prop2", "prop2",
+ // "prop3.prop1", "prop3.prop3", "prop4.prop2", "prop4.prop3"}
+ //
+ // Properties do not affect other properties with the same name from different
+ // properties.
+ SchemaPropertyIterator schema3_iterator(schema_type_config3, type_config_map);
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("prop1.prop1.prop1"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("prop1.prop1.prop2"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+ Eq("prop1.prop1.prop3"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(2)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(), Eq("prop1.prop2"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config2.properties(1)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(), Eq("prop1.prop3"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config2.properties(2)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(), Eq("prop2"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config3.properties(2)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(), Eq("prop3.prop1"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(), Eq("prop3.prop2"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(), Eq("prop3.prop3"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(2)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(), Eq("prop4.prop1"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(), Eq("prop4.prop2"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(), Eq("prop4.prop3"));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(2)));
+ EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema3_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema3_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+
+ // Order of iteration for Schema2:
+ // {"prop1.prop1", "prop1.prop2",
+ // "prop1.prop3", "prop2", "prop3"}
+ //
+ // Indexable properties:
+ // {"prop1.prop2", "prop1.prop3", "prop2", "prop3"}
+ //
+ // Indexable_nested_properties set for Schema3.prop1 does not propagate
+ // to Schema2.
+ SchemaPropertyIterator schema2_iterator(schema_type_config2, type_config_map);
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(), Eq("prop1.prop1"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(), Eq("prop1.prop2"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(), Eq("prop1.prop3"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(2)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(), Eq("prop2"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config2.properties(1)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(), Eq("prop3"));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config2.properties(2)));
+ EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema2_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema2_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+}
+TEST(SchemaPropertyIteratorTest, SingleLevelCycle) {
+ std::string schema_a = "A";
+ std::string schema_b = "B";
+
+ // Create schema with A -> B -> B -> B...
+ SchemaTypeConfigProto schema_type_config_a =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_a)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaAprop1")
+ .SetDataTypeDocument(
+ schema_b, /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaAprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config_b =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_b)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaBprop1")
+ .SetDataTypeDocument(
+ schema_b, /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaBprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_a, schema_type_config_a}, {schema_b, schema_type_config_b}};
+
+ // Order of iteration for schema A:
+ // {"schemaAprop1.schemaBprop2", "schemaAprop2"}, both indexable
+ SchemaPropertyIterator schema_a_iterator(schema_type_config_a,
+ type_config_map);
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), Eq("schemaAprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema_a_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+
+ // Order of iteration for schema B:
+ // {"schemaBprop2"}, indexable.
+ SchemaPropertyIterator schema_b_iterator(schema_type_config_b,
+ type_config_map);
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), Eq("schemaBprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema_b_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest, MultipleLevelCycle) {
+ std::string schema_a = "A";
+ std::string schema_b = "B";
+ std::string schema_c = "C";
+
+ // Create schema with A -> B -> C -> A -> B -> C...
+ SchemaTypeConfigProto schema_type_config_a =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_a)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaAprop1")
+ .SetDataTypeDocument(
+ schema_b, /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaAprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config_b =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_b)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaBprop1")
+ .SetDataTypeDocument(
+ schema_c, /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaBprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config_c =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_c)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaCprop1")
+ .SetDataTypeDocument(
+ schema_a, /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaCprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_a, schema_type_config_a},
+ {schema_b, schema_type_config_b},
+ {schema_c, schema_type_config_c}};
+
+ // Order of iteration for schema A:
+ // {"schemaAprop1.schemaBprop1.schemaCprop2", "schemaAprop1.schemaBprop2",
+ // "schemaAprop2"}, all indexable
+ SchemaPropertyIterator schema_a_iterator(schema_type_config_a,
+ type_config_map);
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), Eq("schemaAprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema_a_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+
+ // Order of iteration for schema B:
+ // {"schemaBprop1.schemaCprop1.schemaAprop2", "schemaBprop1.schemaCprop2",
+ // "schemaBprop2"}
+ //
+ // Indexable properties: {"schemaBprop1.schemaCprop2", "schemaBprop2"}
+ SchemaPropertyIterator schema_b_iterator(schema_type_config_b,
+ type_config_map);
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop1.schemaAprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), Eq("schemaBprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema_b_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+
+ // Order of iteration for schema C:
+ // {"schemaCprop1.schemaAprop1.schemaBprop2", "schemaCprop1.schemaAprop2",
+ // "schemaCprop2"}
+ //
+ // Indexable properties: {"schemaCprop2"}
+ SchemaPropertyIterator schema_c_iterator(schema_type_config_c,
+ type_config_map);
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(), Eq("schemaCprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_c_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema_c_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest, SingleLevelCycleWithIndexableList) {
+ std::string schema_a = "A";
+ std::string schema_b = "B";
+
+ // Create schema with A -> B -> B -> B...
+ SchemaTypeConfigProto schema_type_config_a =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_a)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaAprop1")
+ .SetDataTypeDocument(
+ schema_b, /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaAprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config_b =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_b)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaBprop1")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaBprop2")
+ .SetDataTypeDocument(
+ schema_b, /*indexable_nested_properties_list=*/
+ {"schemaBprop1", "schemaBprop2.schemaBprop1",
+ "schemaBprop2.schemaBprop3",
+ "schemaBprop2.schemaBprop2.schemaBprop3"}))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaBprop3")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_a, schema_type_config_a}, {schema_b, schema_type_config_b}};
+
+ // Order of iteration and whether each property is indexable for schema A:
+ // {"schemaAprop1.schemaBprop1" (true),
+ // "schemaAprop1.schemaBprop2.schemaBprop1" (true),
+ // "schemaAprop1.schemaBprop2.schemaBprop2.schemaBprop1" (true),
+ // "schemaAprop1.schemaBprop2.schemaBprop2.schemaBprop2.schemaBprop1" (false),
+ // "schemaAprop1.schemaBprop2.schemaBprop2.schemaBprop2.schemaBprop3" (true),
+ // "schemaAprop1.schemaBprop2.schemaBprop2.schemaBprop3" (true),
+ // "schemaAprop1.schemaBprop2.schemaBprop3" (false),
+ // "schemaAprop1.schemaBprop3" (true),
+ // "schemaAprop2" (true)}
+ SchemaPropertyIterator schema_a_iterator(schema_type_config_a,
+ type_config_map);
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop1"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(0)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop2.schemaBprop1"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(0)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop2.schemaBprop2.schemaBprop1"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(0)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(
+ schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop2.schemaBprop2.schemaBprop2.schemaBprop1"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(0)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(
+ schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop2.schemaBprop2.schemaBprop2.schemaBprop3"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(2)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop2.schemaBprop2.schemaBprop3"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(2)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop2.schemaBprop3"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(2)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop3"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(2)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), Eq("schemaAprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema_a_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+
+ // Order of iteration for schema B:
+ // {"schemaBprop1" (true),
+ // "schemaBprop2.schemaBprop1" (true),
+ // "schemaBprop2.schemaBprop2.schemaBprop1" (true),
+ // "schemaBprop2.schemaBprop2.schemaBprop2.schemaBprop1" (false),
+ // "schemaBprop2.schemaBprop2.schemaBprop2.schemaBprop3" (true),
+ // "schemaBprop2.schemaBprop2.schemaBprop3" (true),
+ // "schemaBprop2.schemaBprop3" (false),
+ // "schemaBprop3" (true)}
+ SchemaPropertyIterator schema_b_iterator(schema_type_config_b,
+ type_config_map);
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), Eq("schemaBprop1"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(0)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop2.schemaBprop1"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(0)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop2.schemaBprop2.schemaBprop1"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(0)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop2.schemaBprop2.schemaBprop2.schemaBprop1"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(0)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop2.schemaBprop2.schemaBprop2.schemaBprop3"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(2)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop2.schemaBprop2.schemaBprop3"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(2)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop2.schemaBprop3"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(2)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), Eq("schemaBprop3"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(2)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema_b_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest, MultipleCycles) {
+ std::string schema_a = "A";
+ std::string schema_b = "B";
+ std::string schema_c = "C";
+ std::string schema_d = "D";
+
+ // Create the following schema:
+ // D <--> A <--- C
+ // \ ^
+ // v /
+ // B
+ // Schema type A has two cycles: A-B-C-A and A-D-A
+ SchemaTypeConfigProto schema_type_config_a =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_a)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaAprop1")
+ .SetDataTypeDocument(
+ schema_b, /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaAprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaAprop3")
+ .SetDataTypeDocument(
+ schema_d, /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto schema_type_config_b =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_b)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaBprop1")
+ .SetDataTypeDocument(
+ schema_c, /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaBprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config_c =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_c)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaCprop1")
+ .SetDataTypeDocument(
+ schema_a, /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaCprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config_d =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_d)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaDprop1")
+ .SetDataTypeDocument(
+ schema_a, /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaDprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_a, schema_type_config_a},
+ {schema_b, schema_type_config_b},
+ {schema_c, schema_type_config_c},
+ {schema_d, schema_type_config_d}};
+
+ // Order of iteration for schema A:
+ // {"schemaAprop1.schemaBprop1.schemaCprop2", "schemaAprop1.schemaBprop2",
+ // "schemaAprop2", "schemaAprop3.schemaDprop2"}, all indexable
+ SchemaPropertyIterator schema_a_iterator(schema_type_config_a,
+ type_config_map);
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), Eq("schemaAprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema_a_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+
+ // Order of iteration for schema B:
+ // {"schemaBprop1.schemaCprop1.schemaAprop2",
+ // "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2",
+ // "schemaBprop1.schemaCprop2", "schemaBprop2"}
+ //
+ // Indexable properties: {"schemaBprop1.schemaCprop2", "schemaBprop2"}
+ SchemaPropertyIterator schema_b_iterator(schema_type_config_b,
+ type_config_map);
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop1.schemaAprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), Eq("schemaBprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema_b_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+
+ // Order of iteration for schema C:
+ // {"schemaCprop1.schemaAprop1.schemaBprop2", "schemaCprop1.schemaAprop2",
+ // "schemaCprop1.schemaAprop3.schemaDprop2", "schemaCprop2"}
+ //
+ // Indexable properties: {"schemaCprop2"}
+ SchemaPropertyIterator schema_c_iterator(schema_type_config_c,
+ type_config_map);
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(), Eq("schemaCprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_c_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema_c_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+
+ // Order of iteration for schema D:
+ // {"schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2",
+ // "schemaDprop1.schemaAprop1.schemaBprop2", "schemaDprop1.schemaAprop2",
+ // "schemaDprop2"}
+ //
+ // Indexable properties: {"schemaDprop2"}
+ SchemaPropertyIterator schema_d_iterator(schema_type_config_d,
+ type_config_map);
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+ Eq("schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+ Eq("schemaDprop1.schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+ Eq("schemaDprop1.schemaAprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(), Eq("schemaDprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_d_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema_d_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest, MultipleCyclesWithIndexableList) {
+ std::string schema_a = "A";
+ std::string schema_b = "B";
+ std::string schema_c = "C";
+ std::string schema_d = "D";
+
+ // Create the following schema:
+ // D <--> A <--- C
+ // \ ^
+ // v /
+ // B
+ // Schema type A has two cycles: A-B-C-A and A-D-A
+ SchemaTypeConfigProto schema_type_config_a =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_a)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaAprop1")
+ .SetDataTypeDocument(
+ schema_b, /*indexable_nested_properties_list=*/
+ {"schemaBprop2", "schemaBprop1.schemaCprop1.schemaAprop2",
+ "schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2",
+ "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2",
+ "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1."
+ "schemaAprop2"}))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaAprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaAprop3")
+ .SetDataTypeDocument(
+ schema_d, /*indexable_nested_properties_list=*/
+ {"schemaDprop2", "schemaDprop1.schemaAprop2",
+ "schemaDprop1.schemaAprop1.schemaBprop2",
+ "schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2",
+ "schemaDprop1.schemaAprop3.schemaDprop2"}))
+ .Build();
+ SchemaTypeConfigProto schema_type_config_b =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_b)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaBprop1")
+ .SetDataTypeDocument(
+ schema_c, /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaBprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config_c =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_c)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaCprop1")
+ .SetDataTypeDocument(
+ schema_a, /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaCprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config_d =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_d)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaDprop1")
+ .SetDataTypeDocument(
+ schema_a, /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaDprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_a, schema_type_config_a},
+ {schema_b, schema_type_config_b},
+ {schema_c, schema_type_config_c},
+ {schema_d, schema_type_config_d}};
+
+ // Order of iteration and whether each property is indexable for schema A:
+ // "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2" (true),
+ // "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop2" (true),
+ // "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop2"
+ // (true), "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"
+ // (true), "schemaAprop1.schemaBprop1.schemaCprop2" (false),
+ // "schemaAprop1.schemaBprop2" (true),
+ // "schemaAprop2" (true),
+ // "schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2" (true),
+ // "schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop2" (true),
+ // "schemaAprop3.schemaDprop1.schemaAprop2" (true),
+ // "schemaAprop3.schemaDprop1.schemaAprop3.schemaDprop2" (true),
+ // "schemaAprop3.schemaDprop2" (true)
+ SchemaPropertyIterator schema_a_iterator(schema_type_config_a,
+ type_config_map);
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(
+ schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3."
+ "schemaDprop1.schemaAprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(
+ schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), Eq("schemaAprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(
+ schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop3.schemaDprop1.schemaAprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop3.schemaDprop1.schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema_a_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+
+ // Order of iteration and whether each property is indexable for schema B:
+ // "schemaBprop1.schemaCprop1.schemaAprop2" (false),
+ // "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2" (false),
+ // "schemaBprop1.schemaCprop2" (true),
+ // "schemaBprop2" (true)
+ SchemaPropertyIterator schema_b_iterator(schema_type_config_b,
+ type_config_map);
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop1.schemaAprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), Eq("schemaBprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema_b_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+
+ // Order of iteration for schema C:
+ // "schemaCprop1.schemaAprop1.schemaBprop2" (false),
+ // "schemaCprop1.schemaAprop2" (false),
+ // "schemaCprop1.schemaAprop3.schemaDprop2" (false),
+ // "schemaCprop2" (true)
+ SchemaPropertyIterator schema_c_iterator(schema_type_config_c,
+ type_config_map);
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(), Eq("schemaCprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_c_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema_c_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+
+ // Order of iteration for schema D:
+ // "schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2" (false),
+ // "schemaDprop1.schemaAprop1.schemaBprop2" (false),
+ // "schemaDprop1.schemaAprop2" (false),
+ // "schemaDprop2" (true)
+ SchemaPropertyIterator schema_d_iterator(schema_type_config_d,
+ type_config_map);
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+ Eq("schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+ Eq("schemaDprop1.schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+ Eq("schemaDprop1.schemaAprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(), Eq("schemaDprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_d_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema_d_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest, MultipleCyclesWithIndexableList_allIndexTrue) {
+ std::string schema_a = "A";
+ std::string schema_b = "B";
+ std::string schema_c = "C";
+ std::string schema_d = "D";
+
+ // Create the following schema:
+ // D <--> A <--- C
+ // \ ^
+ // v /
+ // B
+ // Schema type A has two cycles: A-B-C-A and A-D-A
+ SchemaTypeConfigProto schema_type_config_a =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_a)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaAprop1")
+ .SetDataTypeDocument(
+ schema_b, /*indexable_nested_properties_list=*/
+ {"schemaBprop2", "schemaBprop1.schemaCprop1.schemaAprop2",
+ "schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2",
+ "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2",
+ "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1."
+ "schemaAprop2"}))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaAprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaAprop3")
+ .SetDataTypeDocument(
+ schema_d, /*indexable_nested_properties_list=*/
+ {"schemaDprop2", "schemaDprop1.schemaAprop2",
+ "schemaDprop1.schemaAprop1.schemaBprop2",
+ "schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2",
+ "schemaDprop1.schemaAprop3.schemaDprop2"}))
+ .Build();
+ SchemaTypeConfigProto schema_type_config_b =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_b)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaBprop1")
+ .SetDataTypeDocument(
+ schema_c, /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaBprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config_c =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_c)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaCprop1")
+ .SetDataTypeDocument(
+ schema_a, /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaCprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config_d =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_d)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaDprop1")
+ .SetDataTypeDocument(
+ schema_a, /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaDprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_a, schema_type_config_a},
+ {schema_b, schema_type_config_b},
+ {schema_c, schema_type_config_c},
+ {schema_d, schema_type_config_d}};
+
+ // Order of iteration and whether each property is indexable for schema A:
+ // "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2" (true),
+ // "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop2" (true),
+ // "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop2"
+ // (true), "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"
+ // (true), "schemaAprop1.schemaBprop1.schemaCprop2" (false),
+ // "schemaAprop1.schemaBprop2" (true),
+ // "schemaAprop2" (true),
+ // "schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2" (true),
+ // "schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop2" (true),
+ // "schemaAprop3.schemaDprop1.schemaAprop2" (true),
+ // "schemaAprop3.schemaDprop1.schemaAprop3.schemaDprop2" (true),
+ // "schemaAprop3.schemaDprop2" (true)
+ SchemaPropertyIterator schema_a_iterator(schema_type_config_a,
+ type_config_map);
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(
+ schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3."
+ "schemaDprop1.schemaAprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(
+ schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), Eq("schemaAprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(
+ schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop3.schemaDprop1.schemaAprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop3.schemaDprop1.schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema_a_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+
+ // Order of iteration and whether each property is indexable for schema B:
+ // "schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2"
+ // (true),
+ // "schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop2"
+ // (true),
+ // "schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop2"
+ // (true),
+ // "schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"
+ // (true), "schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop2"
+ // (false), "schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2" (true),
+ // "schemaBprop1.schemaCprop1.schemaAprop2" (true),
+ // "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2"
+ // (true),
+ // "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop2"
+ // (true), "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop2"
+ // (true),
+ // "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop3.schemaDprop2"
+ // (true), "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2" (true)
+ // "schemaBprop1.schemaCprop2" (true)
+ // "schemaBprop2" (true)
+
+ SchemaPropertyIterator schema_b_iterator(schema_type_config_b,
+ type_config_map);
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop1."
+ "schemaCprop1.schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop1."
+ "schemaCprop1.schemaAprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop1."
+ "schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop1."
+ "schemaCprop1.schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(
+ schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop1.schemaAprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1."
+ "schemaAprop1.schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1."
+ "schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(
+ schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1."
+ "schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), Eq("schemaBprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema_b_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+
+ // Order of iteration and whether each property is indexable for schema C:
+ // "schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2"
+ // (true), "schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop2"
+ // (true),
+ // "schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop2"
+ // (true),
+ // "schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"
+ // (true),
+ // "schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop2" (false),
+ // "schemaCprop1.schemaAprop1.schemaBprop2" (true),
+ // "schemaCprop1.schemaAprop2" (true),
+ // "schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2"
+ // (true),
+ // "schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop2" (true),
+ // "schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop2" (true),
+ // "schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop3.schemaDprop2" (true),
+ // "schemaCprop1.schemaAprop3.schemaDprop2" (true)
+ // "schemaCprop2" (true)
+ SchemaPropertyIterator schema_c_iterator(schema_type_config_c,
+ type_config_map);
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop1."
+ "schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(
+ schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop1."
+ "schemaAprop3.schemaDprop1.schemaAprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop1."
+ "schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop1."
+ "schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(
+ schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(
+ schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(), Eq("schemaCprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_c_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema_c_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+
+ // Order of iteration and whether each property is indexable for schema D:
+ // "schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2"
+ // (true), "schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop2"
+ // (true),
+ // "schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop2"
+ // (true),
+ // "schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"
+ // (true), "schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2" (false),
+ // "schemaDprop1.schemaAprop1.schemaBprop2" (true),
+ // "schemaDprop1.schemaAprop2" (true),
+ // "schemaDprop1.schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2"
+ // (true), "schemaDprop1.schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop2"
+ // (true), "schemaDprop1.schemaAprop3.schemaDprop1.schemaAprop2" (true),
+ // "schemaDprop1.schemaAprop3.schemaDprop1.schemaAprop3.schemaDprop2" (true),
+ // "schemaDprop1.schemaAprop3.schemaDprop2" (true),
+ // "schemaDprop2" (true)
+ SchemaPropertyIterator schema_d_iterator(schema_type_config_d,
+ type_config_map);
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+ Eq("schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop1."
+ "schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(
+ schema_d_iterator.GetCurrentPropertyPath(),
+ Eq("schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+ Eq("schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop1."
+ "schemaAprop3.schemaDprop1.schemaAprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+ Eq("schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop1."
+ "schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+ Eq("schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+ Eq("schemaDprop1.schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+ Eq("schemaDprop1.schemaAprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+ Eq("schemaDprop1.schemaAprop3.schemaDprop1.schemaAprop1."
+ "schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(
+ schema_d_iterator.GetCurrentPropertyPath(),
+ Eq("schemaDprop1.schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+ Eq("schemaDprop1.schemaAprop3.schemaDprop1.schemaAprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(
+ schema_d_iterator.GetCurrentPropertyPath(),
+ Eq("schemaDprop1.schemaAprop3.schemaDprop1.schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+ Eq("schemaDprop1.schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(), Eq("schemaDprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_d_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema_d_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest,
+ MultipleCyclesWithIndexableList_unknownPropPaths) {
+ std::string schema_a = "A";
+ std::string schema_b = "B";
+ std::string schema_c = "C";
+ std::string schema_d = "D";
+
+ // Create the following schema:
+ // D <--> A <--- C
+ // \ ^
+ // v /
+ // B
+ // Schema type A has two cycles: A-B-C-A and A-D-A
+ SchemaTypeConfigProto schema_type_config_a =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_a)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaAprop1")
+ .SetDataTypeDocument(
+ schema_b, /*indexable_nested_properties_list=*/
+ {"schemaBprop2", "schemaBprop1.schemaCprop1.schemaAprop2",
+ "schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2",
+ "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2",
+ "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1."
+ "schemaAprop2",
+ "schemaBprop1.schemaCprop1",
+ "schemaBprop1.schemaCprop1.schemaAprop3", "schemaAprop2",
+ "schemaBprop2.schemaCprop2", "schemaBprop1.foo.bar",
+ "foo", "foo", "bar"}))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaAprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaAprop3")
+ .SetDataTypeDocument(
+ schema_d, /*indexable_nested_properties_list=*/
+ {"schemaDprop2", "schemaDprop1.schemaAprop2",
+ "schemaDprop1.schemaAprop1.schemaBprop2",
+ "schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2",
+ "schemaDprop1.schemaAprop3.schemaDprop2", "schemaBprop2",
+ "bar", "schemaDprop2.foo", "schemaDprop1",
+ "schemaAprop3.schemaDprop2"}))
+ .Build();
+ SchemaTypeConfigProto schema_type_config_b =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_b)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaBprop1")
+ .SetDataTypeDocument(
+ schema_c, /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaBprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config_c =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_c)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaCprop1")
+ .SetDataTypeDocument(
+ schema_a, /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaCprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config_d =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_d)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaDprop1")
+ .SetDataTypeDocument(
+ schema_a, /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaDprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_a, schema_type_config_a},
+ {schema_b, schema_type_config_b},
+ {schema_c, schema_type_config_c},
+ {schema_d, schema_type_config_d}};
+
+ // Order of iteration and whether each property is indexable for schema A:
+ // "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2" (true),
+ // "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop2" (true),
+ // "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop2"
+ // (true), "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"
+ // (true), "schemaAprop1.schemaBprop1.schemaCprop2" (false),
+ // "schemaAprop1.schemaBprop2" (true),
+ // "schemaAprop2" (true),
+ // "schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2" (true),
+ // "schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop2" (true),
+ // "schemaAprop3.schemaDprop1.schemaAprop2" (true),
+ // "schemaAprop3.schemaDprop1.schemaAprop3.schemaDprop2" (true),
+ // "schemaAprop3.schemaDprop2" (true)
+ //
+ // The following properties listed in the indexable_list are not defined
+ // in the schema and should not be seen during iteration. These should appear
+ // in the unknown_indexable_nested_properties_ set.
+ // "schemaAprop1.bar",
+ // "schemaAprop1.foo",
+ // "schemaAprop1.schemaAprop2",
+ // "schemaAprop1.schemaBprop1.foo.bar",
+ // "schemaAprop1.schemaBprop1.schemaCprop1",
+ // "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3",
+ // "schemaAprop1.schemaBprop2.schemaCprop2",
+ // "schemaAprop3.bar",
+ // "schemaAprop3.schemaAprop3.schemaDprop2",
+ // "schemaAprop3.schemaBprop2",
+ // "schemaAprop3.schemaDprop1",
+ // "schemaAprop3.schemaDprop2.foo"
+ SchemaPropertyIterator schema_a_iterator(schema_type_config_a,
+ type_config_map);
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(
+ schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3."
+ "schemaDprop1.schemaAprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(
+ schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), Eq("schemaAprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(
+ schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop3.schemaDprop1.schemaAprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop3.schemaDprop1.schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(
+ schema_a_iterator.unknown_indexable_nested_property_paths(),
+ ElementsAre(
+ "schemaAprop1.bar", "schemaAprop1.foo", "schemaAprop1.schemaAprop2",
+ "schemaAprop1.schemaBprop1.foo.bar",
+ "schemaAprop1.schemaBprop1.schemaCprop1",
+ "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3",
+ "schemaAprop1.schemaBprop2.schemaCprop2", "schemaAprop3.bar",
+ "schemaAprop3.schemaAprop3.schemaDprop2", "schemaAprop3.schemaBprop2",
+ "schemaAprop3.schemaDprop1", "schemaAprop3.schemaDprop2.foo"));
+
+ // Order of iteration and whether each property is indexable for schema B:
+ // "schemaBprop1.schemaCprop1.schemaAprop2" (false),
+ // "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2" (false),
+ // "schemaBprop1.schemaCprop2" (true),
+ // "schemaBprop2" (true)
+ SchemaPropertyIterator schema_b_iterator(schema_type_config_b,
+ type_config_map);
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop1.schemaAprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), Eq("schemaBprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema_b_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+
+ // Order of iteration for schema C:
+ // "schemaCprop1.schemaAprop1.schemaBprop2" (false),
+ // "schemaCprop1.schemaAprop2" (false),
+ // "schemaCprop1.schemaAprop3.schemaDprop2" (false),
+ // "schemaCprop2" (true)
+ SchemaPropertyIterator schema_c_iterator(schema_type_config_c,
+ type_config_map);
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(), Eq("schemaCprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_c_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema_c_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+
+ // Order of iteration for schema D:
+ // "schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2" (false),
+ // "schemaDprop1.schemaAprop1.schemaBprop2" (false),
+ // "schemaDprop1.schemaAprop2" (false),
+ // "schemaDprop2" (true)
+ SchemaPropertyIterator schema_d_iterator(schema_type_config_d,
+ type_config_map);
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+ Eq("schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+ Eq("schemaDprop1.schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+ Eq("schemaDprop1.schemaAprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(), Eq("schemaDprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_d_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema_d_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest, TopLevelCycleWithMultipleIndexableLists) {
+ std::string schema_a = "A";
+ std::string schema_b = "B";
+ std::string schema_c = "C";
+ std::string schema_d = "D";
+
+ // Create the following schema:
+ // A <-> A -> B
+ // A has a top-level property that is a self-reference.
+ SchemaTypeConfigProto schema_type_config_a =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_a)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaAprop1")
+ .SetDataTypeDocument(
+ schema_b, /*indexable_nested_properties_list=*/
+ {"schemaBprop1", "schemaBprop2"}))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaAprop2")
+ .SetDataTypeDocument(
+ schema_a, /*indexable_nested_properties_list=*/
+ {"schemaAprop1.schemaBprop2",
+ "schemaAprop1.schemaBprop3"}))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaAprop3")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config_b =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_b)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaBprop1")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaBprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaBprop3")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_a, schema_type_config_a}, {schema_b, schema_type_config_b}};
+
+ // Order of iteration for Schema A:
+ // "schemaAprop1.schemaBprop1" (true)
+ // "schemaAprop1.schemaBprop2" (true)
+ // "schemaAprop1.schemaBprop3" (false)
+ // "schemaAprop2.schemaAprop1.schemaBprop1" (false)
+ // "schemaAprop2.schemaAprop1.schemaBprop2" (true)
+ // "schemaAprop2.schemaAprop1.schemaBprop3" (true)
+ // "schemaAprop2.schemaAprop3" (false)
+ // "schemaAprop3" (true)
+ SchemaPropertyIterator schema_a_iterator(schema_type_config_a,
+ type_config_map);
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop1"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(0)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop3"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(2)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop2.schemaAprop1.schemaBprop1"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(0)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop2.schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop2.schemaAprop1.schemaBprop3"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(2)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop2.schemaAprop3"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(2)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), Eq("schemaAprop3"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(2)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(schema_a_iterator.unknown_indexable_nested_property_paths(),
+ IsEmpty());
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc
index ddd9e3b..a389d13 100644
--- a/icing/schema/schema-store.cc
+++ b/icing/schema/schema-store.cc
@@ -15,11 +15,14 @@
#include "icing/schema/schema-store.h"
#include <algorithm>
+#include <cinttypes>
#include <cstdint>
+#include <limits>
#include <memory>
#include <string>
#include <string_view>
#include <unordered_map>
+#include <unordered_set>
#include <utility>
#include <vector>
@@ -30,13 +33,18 @@
#include "icing/file/destructible-directory.h"
#include "icing/file/file-backed-proto.h"
#include "icing/file/filesystem.h"
+#include "icing/file/version-util.h"
#include "icing/proto/debug.pb.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/logging.pb.h"
#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
#include "icing/proto/storage.pb.h"
+#include "icing/schema/backup-schema-producer.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/schema/property-util.h"
+#include "icing/schema/schema-type-manager.h"
#include "icing/schema/schema-util.h"
-#include "icing/schema/section-manager.h"
#include "icing/schema/section.h"
#include "icing/store/document-filter-data.h"
#include "icing/store/dynamic-trie-key-mapper.h"
@@ -51,6 +59,7 @@ namespace {
constexpr char kSchemaStoreHeaderFilename[] = "schema_store_header";
constexpr char kSchemaFilename[] = "schema.pb";
+constexpr char kOverlaySchemaFilename[] = "overlay_schema.pb";
constexpr char kSchemaTypeMapperFilename[] = "schema_type_mapper";
// A DynamicTrieKeyMapper stores its data across 3 arrays internally. Giving
@@ -58,15 +67,19 @@ constexpr char kSchemaTypeMapperFilename[] = "schema_type_mapper";
// 384KiB.
constexpr int32_t kSchemaTypeMapperMaxSize = 3 * 128 * 1024; // 384 KiB
-const std::string MakeHeaderFilename(const std::string& base_dir) {
+std::string MakeHeaderFilename(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kSchemaStoreHeaderFilename);
}
-const std::string MakeSchemaFilename(const std::string& base_dir) {
+std::string MakeSchemaFilename(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kSchemaFilename);
}
-const std::string MakeSchemaTypeMapperFilename(const std::string& base_dir) {
+std::string MakeOverlaySchemaFilename(const std::string& base_dir) {
+ return absl_ports::StrCat(base_dir, "/", kOverlaySchemaFilename);
+}
+
+std::string MakeSchemaTypeMapperFilename(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kSchemaTypeMapperFilename);
}
@@ -107,6 +120,61 @@ std::unordered_set<SchemaTypeId> SchemaTypeIdsChanged(
} // namespace
+/* static */ libtextclassifier3::StatusOr<SchemaStore::Header>
+SchemaStore::Header::Read(const Filesystem* filesystem,
+ const std::string& path) {
+ Header header;
+ ScopedFd sfd(filesystem->OpenForRead(path.c_str()));
+ if (!sfd.is_valid()) {
+ return absl_ports::NotFoundError("SchemaStore header doesn't exist");
+ }
+
+ // If file is sizeof(LegacyHeader), then it must be LegacyHeader.
+ int64_t file_size = filesystem->GetFileSize(sfd.get());
+ if (file_size == sizeof(LegacyHeader)) {
+ LegacyHeader legacy_header;
+ if (!filesystem->Read(path.c_str(), &legacy_header,
+ sizeof(legacy_header))) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Couldn't read: ", path));
+ }
+ if (legacy_header.magic != Header::kMagic) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Invalid header kMagic for file: ", path));
+ }
+ header.set_checksum(legacy_header.checksum);
+ } else if (file_size == sizeof(Header)) {
+ if (!filesystem->Read(path.c_str(), &header, sizeof(header))) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Couldn't read: ", path));
+ }
+ if (header.magic() != Header::kMagic) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Invalid header kMagic for file: ", path));
+ }
+ } else {
+ int legacy_header_size = sizeof(LegacyHeader);
+ int header_size = sizeof(Header);
+ return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+ "Unexpected header size %" PRId64 ". Expected %d or %d", file_size,
+ legacy_header_size, header_size));
+ }
+ return header;
+}
+
+libtextclassifier3::Status SchemaStore::Header::Write(
+ const Filesystem* filesystem, const std::string& path) {
+ ScopedFd scoped_fd(filesystem->OpenForWrite(path.c_str()));
+ // This should overwrite the header.
+ if (!scoped_fd.is_valid() ||
+ !filesystem->Write(scoped_fd.get(), this, sizeof(*this)) ||
+ !filesystem->DataSync(scoped_fd.get())) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Failed to write SchemaStore header: ", path));
+ }
+ return libtextclassifier3::Status::OK;
+}
+
libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> SchemaStore::Create(
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, InitializeStatsProto* initialize_stats) {
@@ -139,6 +207,106 @@ libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> SchemaStore::Create(
return schema_store;
}
+/* static */ libtextclassifier3::Status SchemaStore::DiscardOverlaySchema(
+ const Filesystem* filesystem, const std::string& base_dir, Header& header) {
+ std::string header_filename = MakeHeaderFilename(base_dir);
+ if (header.overlay_created()) {
+ header.SetOverlayInfo(
+ /*overlay_created=*/false,
+ /*min_overlay_version_compatibility=*/ std::numeric_limits<
+ int32_t>::max());
+ ICING_RETURN_IF_ERROR(header.Write(filesystem, header_filename));
+ }
+ std::string schema_overlay_filename = MakeOverlaySchemaFilename(base_dir);
+ if (!filesystem->DeleteFile(schema_overlay_filename.c_str())) {
+ return absl_ports::InternalError(
+ "Unable to delete stale schema overlay file.");
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+/* static */ libtextclassifier3::Status SchemaStore::MigrateSchema(
+ const Filesystem* filesystem, const std::string& base_dir,
+ version_util::StateChange version_state_change, int32_t new_version) {
+ if (!filesystem->DirectoryExists(base_dir.c_str())) {
+ // Situations when schema store directory doesn't exist:
+ // - Initializing new Icing instance: don't have to do anything now. The
+ // directory will be created later.
+ // - Lose schema store: there is nothing we can do now. The logic will be
+ // handled later by initializing.
+ //
+ // Therefore, just simply return OK here.
+ return libtextclassifier3::Status::OK;
+ }
+
+ std::string overlay_schema_filename = MakeOverlaySchemaFilename(base_dir);
+ if (!filesystem->FileExists(overlay_schema_filename.c_str())) {
+ // The overlay doesn't exist. So there should be nothing particularly
+ // interesting to worry about.
+ return libtextclassifier3::Status::OK;
+ }
+
+ std::string header_filename = MakeHeaderFilename(base_dir);
+ libtextclassifier3::StatusOr<Header> header_or;
+ switch (version_state_change) {
+ // No necessary actions for normal upgrades or no version change. The data
+ // that was produced by the previous version is fully compatible with this
+ // version and there's no stale data for us to clean up.
+ // The same is true for a normal rollforward. A normal rollforward implies
+ // that the previous version was one that understood the concept of the
+ // overlay schema and would have already discarded it if it was unusable.
+ case version_util::StateChange::kVersionZeroUpgrade:
+ // fallthrough
+ case version_util::StateChange::kUpgrade:
+ // fallthrough
+ case version_util::StateChange::kRollForward:
+ // fallthrough
+ case version_util::StateChange::kCompatible:
+ return libtextclassifier3::Status::OK;
+ case version_util::StateChange::kVersionZeroRollForward:
+ // We've rolled forward. The schema overlay file, if it exists, is
+ // possibly stale. We must throw it out.
+ header_or = Header::Read(filesystem, header_filename);
+ if (!header_or.ok()) {
+ return header_or.status();
+ }
+ return SchemaStore::DiscardOverlaySchema(filesystem, base_dir,
+ header_or.ValueOrDie());
+ case version_util::StateChange::kRollBack:
+ header_or = Header::Read(filesystem, header_filename);
+ if (!header_or.ok()) {
+ return header_or.status();
+ }
+ if (header_or.ValueOrDie().min_overlay_version_compatibility() <=
+ new_version) {
+ // We've been rolled back, but the overlay schema claims that it
+ // supports this version. So we can safely return.
+ return libtextclassifier3::Status::OK;
+ }
+ // We've been rolled back to a version that the overlay schema doesn't
+ // support. We must throw it out.
+ return SchemaStore::DiscardOverlaySchema(filesystem, base_dir,
+ header_or.ValueOrDie());
+ case version_util::StateChange::kUndetermined:
+ // It's not clear what version we're on, but the base schema should always
+ // be safe to use. Throw out the overlay.
+ header_or = Header::Read(filesystem, header_filename);
+ if (!header_or.ok()) {
+ return header_or.status();
+ }
+ return SchemaStore::DiscardOverlaySchema(filesystem, base_dir,
+ header_or.ValueOrDie());
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+/* static */ libtextclassifier3::Status SchemaStore::DiscardDerivedFiles(
+ const Filesystem* filesystem, const std::string& base_dir) {
+ // Schema type mapper
+ return DynamicTrieKeyMapper<SchemaTypeId>::Delete(
+ *filesystem, MakeSchemaTypeMapperFilename(base_dir));
+}
+
SchemaStore::SchemaStore(const Filesystem* filesystem, std::string base_dir,
const Clock* clock)
: filesystem_(filesystem),
@@ -149,7 +317,7 @@ SchemaStore::SchemaStore(const Filesystem* filesystem, std::string base_dir,
SchemaStore::~SchemaStore() {
if (has_schema_successfully_set_ && schema_file_ != nullptr &&
- schema_type_mapper_ != nullptr && section_manager_ != nullptr) {
+ schema_type_mapper_ != nullptr && schema_type_manager_ != nullptr) {
if (!PersistToDisk().ok()) {
ICING_LOG(ERROR) << "Error persisting to disk in SchemaStore destructor";
}
@@ -157,6 +325,7 @@ SchemaStore::~SchemaStore() {
}
libtextclassifier3::Status SchemaStore::Initialize(SchemaProto new_schema) {
+ ICING_RETURN_IF_ERROR(LoadSchema());
if (!absl_ports::IsNotFound(GetSchema().status())) {
return absl_ports::FailedPreconditionError(
"Incorrectly tried to initialize schema store with a new schema, when "
@@ -164,11 +333,13 @@ libtextclassifier3::Status SchemaStore::Initialize(SchemaProto new_schema) {
}
ICING_RETURN_IF_ERROR(schema_file_->Write(
std::make_unique<SchemaProto>(std::move(new_schema))));
- return InitializeInternal(/*initialize_stats=*/nullptr);
+ return InitializeInternal(/*create_overlay_if_necessary=*/true,
+ /*initialize_stats=*/nullptr);
}
libtextclassifier3::Status SchemaStore::Initialize(
InitializeStatsProto* initialize_stats) {
+ ICING_RETURN_IF_ERROR(LoadSchema());
auto schema_proto_or = GetSchema();
if (absl_ports::IsNotFound(schema_proto_or.status())) {
// Don't have an existing schema proto, that's fine
@@ -177,11 +348,69 @@ libtextclassifier3::Status SchemaStore::Initialize(
// Real error when trying to read the existing schema
return schema_proto_or.status();
}
- return InitializeInternal(initialize_stats);
+ return InitializeInternal(/*create_overlay_if_necessary=*/false,
+ initialize_stats);
+}
+
+libtextclassifier3::Status SchemaStore::LoadSchema() {
+ libtextclassifier3::StatusOr<Header> header_or =
+ Header::Read(filesystem_, MakeHeaderFilename(base_dir_));
+ bool header_exists = false;
+ if (!header_or.ok() && !absl_ports::IsNotFound(header_or.status())) {
+ return header_or.status();
+ } else if (!header_or.ok()) {
+ header_ = std::make_unique<Header>();
+ } else {
+ header_exists = true;
+ header_ = std::make_unique<Header>(std::move(header_or).ValueOrDie());
+ }
+
+ std::string overlay_schema_filename = MakeOverlaySchemaFilename(base_dir_);
+ bool overlay_schema_file_exists =
+ filesystem_->FileExists(overlay_schema_filename.c_str());
+
+ libtextclassifier3::Status base_schema_state = schema_file_->Read().status();
+ if (!base_schema_state.ok() && !absl_ports::IsNotFound(base_schema_state)) {
+ return base_schema_state;
+ }
+
+ // There are three valid cases:
+ // 1. Everything is missing. This is an empty schema store.
+ if (!base_schema_state.ok() && !overlay_schema_file_exists &&
+ !header_exists) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ // 2. There never was a overlay schema. The header exists, the base schema
+ // exists and the header says the overlay schema shouldn't exist
+ if (base_schema_state.ok() && !overlay_schema_file_exists && header_exists &&
+ !header_->overlay_created()) {
+ // Nothing else to do. Just return safely.
+ return libtextclassifier3::Status::OK;
+ }
+
+ // 3. There is an overlay schema and a base schema and a header. The header
+ // says that the overlay schema should exist.
+ if (base_schema_state.ok() && overlay_schema_file_exists && header_exists &&
+ header_->overlay_created()) {
+ overlay_schema_file_ = std::make_unique<FileBackedProto<SchemaProto>>(
+ *filesystem_, MakeOverlaySchemaFilename(base_dir_));
+ return libtextclassifier3::Status::OK;
+ }
+
+ // Something has gone wrong. We've lost part of the schema ground truth.
+ // Return an error.
+ bool overlay_created = header_->overlay_created();
+ bool base_schema_exists = base_schema_state.ok();
+ return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+ "Unable to properly load schema. Header {exists:%d, overlay_created:%d}, "
+ "base schema exists: %d, overlay_schema_exists: %d",
+ header_exists, overlay_created, base_schema_exists,
+ overlay_schema_file_exists));
}
libtextclassifier3::Status SchemaStore::InitializeInternal(
- InitializeStatsProto* initialize_stats) {
+ bool create_overlay_if_necessary, InitializeStatsProto* initialize_stats) {
if (!InitializeDerivedFiles().ok()) {
ICING_VLOG(3)
<< "Couldn't find derived files or failed to initialize them, "
@@ -191,7 +420,7 @@ libtextclassifier3::Status SchemaStore::InitializeInternal(
initialize_stats->set_schema_store_recovery_cause(
InitializeStatsProto::IO_ERROR);
}
- ICING_RETURN_IF_ERROR(RegenerateDerivedFiles());
+ ICING_RETURN_IF_ERROR(RegenerateDerivedFiles(create_overlay_if_necessary));
if (initialize_stats != nullptr) {
initialize_stats->set_schema_store_recovery_latency_ms(
regenerate_timer->GetElapsedMilliseconds());
@@ -207,24 +436,6 @@ libtextclassifier3::Status SchemaStore::InitializeInternal(
}
libtextclassifier3::Status SchemaStore::InitializeDerivedFiles() {
- if (!HeaderExists()) {
- // Without a header, we don't know if things are consistent between each
- // other so the caller should just regenerate everything from ground truth.
- return absl_ports::InternalError("SchemaStore header doesn't exist");
- }
-
- SchemaStore::Header header;
- if (!filesystem_->Read(MakeHeaderFilename(base_dir_).c_str(), &header,
- sizeof(header))) {
- return absl_ports::InternalError(
- absl_ports::StrCat("Couldn't read: ", MakeHeaderFilename(base_dir_)));
- }
-
- if (header.magic != SchemaStore::Header::kMagic) {
- return absl_ports::InternalError(absl_ports::StrCat(
- "Invalid header kMagic for file: ", MakeHeaderFilename(base_dir_)));
- }
-
ICING_ASSIGN_OR_RETURN(
schema_type_mapper_,
DynamicTrieKeyMapper<SchemaTypeId>::Create(
@@ -232,78 +443,109 @@ libtextclassifier3::Status SchemaStore::InitializeDerivedFiles() {
kSchemaTypeMapperMaxSize));
ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
- if (checksum.Get() != header.checksum) {
+ if (checksum.Get() != header_->checksum()) {
return absl_ports::InternalError(
"Combined checksum of SchemaStore was inconsistent");
}
- // Update our in-memory data structures
- type_config_map_.clear();
- ICING_ASSIGN_OR_RETURN(const SchemaProto* schema_proto, GetSchema());
- for (const SchemaTypeConfigProto& type_config : schema_proto->types()) {
- // Update our type_config_map_
- type_config_map_.emplace(type_config.schema_type(), type_config);
- }
- ICING_ASSIGN_OR_RETURN(
- section_manager_,
- SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
-
+ BuildInMemoryCache();
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::Status SchemaStore::RegenerateDerivedFiles() {
+libtextclassifier3::Status SchemaStore::RegenerateDerivedFiles(
+ bool create_overlay_if_necessary) {
ICING_ASSIGN_OR_RETURN(const SchemaProto* schema_proto, GetSchema());
ICING_RETURN_IF_ERROR(ResetSchemaTypeMapper());
- type_config_map_.clear();
for (const SchemaTypeConfigProto& type_config : schema_proto->types()) {
- // Update our type_config_map_
- type_config_map_.emplace(type_config.schema_type(), type_config);
-
// Assign a SchemaTypeId to the type
ICING_RETURN_IF_ERROR(schema_type_mapper_->Put(
type_config.schema_type(), schema_type_mapper_->num_keys()));
}
-
- ICING_ASSIGN_OR_RETURN(
- section_manager_,
- SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
+ BuildInMemoryCache();
+
+ if (create_overlay_if_necessary) {
+ ICING_ASSIGN_OR_RETURN(
+ BackupSchemaProducer producer,
+ BackupSchemaProducer::Create(*schema_proto,
+ schema_type_manager_->section_manager()));
+
+ if (producer.is_backup_necessary()) {
+ SchemaProto base_schema = std::move(producer).Produce();
+
+ // The overlay schema should be written to the overlay file location.
+ overlay_schema_file_ = std::make_unique<FileBackedProto<SchemaProto>>(
+ *filesystem_, MakeOverlaySchemaFilename(base_dir_));
+ auto schema_ptr = std::make_unique<SchemaProto>(std::move(*schema_proto));
+ ICING_RETURN_IF_ERROR(overlay_schema_file_->Write(std::move(schema_ptr)));
+
+ // The base schema should be written to the original file
+ auto base_schema_ptr =
+ std::make_unique<SchemaProto>(std::move(base_schema));
+ ICING_RETURN_IF_ERROR(schema_file_->Write(std::move(base_schema_ptr)));
+
+ // LINT.IfChange(min_overlay_version_compatibility)
+ // Although the current version is 2, the schema is compatible with
+ // version 1, so min_overlay_version_compatibility should be 1.
+ int32_t min_overlay_version_compatibility = version_util::kVersionOne;
+ // LINT.ThenChange(//depot/google3/icing/file/version-util.h:kVersion)
+ header_->SetOverlayInfo(
+ /*overlay_created=*/true, min_overlay_version_compatibility);
+ // Rebuild in memory data - references to the old schema will be invalid
+ // now.
+ BuildInMemoryCache();
+ }
+ }
// Write the header
ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
- ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
-
- return libtextclassifier3::Status::OK;
+ header_->set_checksum(checksum.Get());
+ return header_->Write(filesystem_, MakeHeaderFilename(base_dir_));
}
-bool SchemaStore::HeaderExists() {
- if (!filesystem_->FileExists(MakeHeaderFilename(base_dir_).c_str())) {
- return false;
- }
-
- int64_t file_size =
- filesystem_->GetFileSize(MakeHeaderFilename(base_dir_).c_str());
-
- // If it's been truncated to size 0 before, we consider it to be a new file
- return file_size != 0 && file_size != Filesystem::kBadFileSize;
-}
-
-libtextclassifier3::Status SchemaStore::UpdateHeader(const Crc32& checksum) {
- // Write the header
- SchemaStore::Header header;
- header.magic = SchemaStore::Header::kMagic;
- header.checksum = checksum.Get();
+libtextclassifier3::Status SchemaStore::BuildInMemoryCache() {
+ ICING_ASSIGN_OR_RETURN(const SchemaProto* schema_proto, GetSchema());
+ ICING_ASSIGN_OR_RETURN(
+ SchemaUtil::InheritanceMap inheritance_map,
+ SchemaUtil::BuildTransitiveInheritanceGraph(*schema_proto));
- ScopedFd scoped_fd(
- filesystem_->OpenForWrite(MakeHeaderFilename(base_dir_).c_str()));
- // This should overwrite the header.
- if (!scoped_fd.is_valid() ||
- !filesystem_->Write(scoped_fd.get(), &header, sizeof(header)) ||
- !filesystem_->DataSync(scoped_fd.get())) {
- return absl_ports::InternalError(absl_ports::StrCat(
- "Failed to write SchemaStore header: ", MakeHeaderFilename(base_dir_)));
+ reverse_schema_type_mapper_.clear();
+ type_config_map_.clear();
+ schema_subtype_id_map_.clear();
+ for (const SchemaTypeConfigProto& type_config : schema_proto->types()) {
+ std::string_view type_name = type_config.schema_type();
+ ICING_ASSIGN_OR_RETURN(SchemaTypeId type_id,
+ schema_type_mapper_->Get(type_name));
+
+ // Build reverse_schema_type_mapper_
+ reverse_schema_type_mapper_.insert({type_id, std::string(type_name)});
+
+ // Build type_config_map_
+ type_config_map_.insert({std::string(type_name), type_config});
+
+ // Build schema_subtype_id_map_
+ std::unordered_set<SchemaTypeId>& subtype_id_set =
+ schema_subtype_id_map_[type_id];
+ // Find all child types
+ auto child_types_names = inheritance_map.find(type_name);
+ if (child_types_names != inheritance_map.end()) {
+ subtype_id_set.reserve(child_types_names->second.size() + 1);
+ for (const auto& [child_type_name, is_direct_child] :
+ child_types_names->second) {
+ ICING_ASSIGN_OR_RETURN(SchemaTypeId child_type_id,
+ schema_type_mapper_->Get(child_type_name));
+ subtype_id_set.insert(child_type_id);
+ }
+ }
+ // Every type is a subtype of itself.
+ subtype_id_set.insert(type_id);
}
+
+ // Build schema_type_manager_
+ ICING_ASSIGN_OR_RETURN(
+ schema_type_manager_,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
return libtextclassifier3::Status::OK;
}
@@ -330,7 +572,8 @@ libtextclassifier3::Status SchemaStore::ResetSchemaTypeMapper() {
}
libtextclassifier3::StatusOr<Crc32> SchemaStore::ComputeChecksum() const {
- auto schema_proto_or = GetSchema();
+ // Base schema checksum
+ auto schema_proto_or = schema_file_->Read();
if (absl_ports::IsNotFound(schema_proto_or.status())) {
return Crc32();
}
@@ -338,11 +581,23 @@ libtextclassifier3::StatusOr<Crc32> SchemaStore::ComputeChecksum() const {
Crc32 schema_checksum;
schema_checksum.Append(schema_proto->SerializeAsString());
+ Crc32 overlay_schema_checksum;
+ if (overlay_schema_file_ != nullptr) {
+ auto schema_proto_or = schema_file_->Read();
+ if (schema_proto_or.ok()) {
+ ICING_ASSIGN_OR_RETURN(schema_proto, schema_proto_or);
+ overlay_schema_checksum.Append(schema_proto->SerializeAsString());
+ }
+ }
+
ICING_ASSIGN_OR_RETURN(Crc32 schema_type_mapper_checksum,
schema_type_mapper_->ComputeChecksum());
Crc32 total_checksum;
total_checksum.Append(std::to_string(schema_checksum.Get()));
+ if (overlay_schema_file_ != nullptr) {
+ total_checksum.Append(std::to_string(overlay_schema_checksum.Get()));
+ }
total_checksum.Append(std::to_string(schema_type_mapper_checksum.Get()));
return total_checksum;
@@ -350,6 +605,9 @@ libtextclassifier3::StatusOr<Crc32> SchemaStore::ComputeChecksum() const {
libtextclassifier3::StatusOr<const SchemaProto*> SchemaStore::GetSchema()
const {
+ if (overlay_schema_file_ != nullptr) {
+ return overlay_schema_file_->Read();
+ }
return schema_file_->Read();
}
@@ -359,20 +617,19 @@ libtextclassifier3::StatusOr<const SchemaProto*> SchemaStore::GetSchema()
// SetSchema(SchemaProto&& new_schema)
libtextclassifier3::StatusOr<const SchemaStore::SetSchemaResult>
SchemaStore::SetSchema(const SchemaProto& new_schema,
- bool ignore_errors_and_delete_documents) {
- return SetSchema(SchemaProto(new_schema), ignore_errors_and_delete_documents);
+ bool ignore_errors_and_delete_documents,
+ bool allow_circular_schema_definitions) {
+ return SetSchema(SchemaProto(new_schema), ignore_errors_and_delete_documents,
+ allow_circular_schema_definitions);
}
libtextclassifier3::StatusOr<const SchemaStore::SetSchemaResult>
SchemaStore::SetSchema(SchemaProto&& new_schema,
- bool ignore_errors_and_delete_documents) {
- ICING_ASSIGN_OR_RETURN(SchemaUtil::DependencyMap new_dependency_map,
- SchemaUtil::Validate(new_schema));
- // TODO(b/256022027): validate and extract joinable properties.
- // - Joinable config in non-string properties should be ignored, since
- // currently we only support string joining.
- // - If set joinable, the property itself and all of its parent (nested doc)
- // properties should not have REPEATED cardinality.
+ bool ignore_errors_and_delete_documents,
+ bool allow_circular_schema_definitions) {
+ ICING_ASSIGN_OR_RETURN(
+ SchemaUtil::DependentMap new_dependent_map,
+ SchemaUtil::Validate(new_schema, allow_circular_schema_definitions));
SetSchemaResult result;
@@ -401,13 +658,15 @@ SchemaStore::SetSchema(SchemaProto&& new_schema,
// Different schema, track the differences and see if we can still write it
SchemaUtil::SchemaDelta schema_delta =
SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
- new_dependency_map);
+ new_dependent_map);
result.schema_types_new_by_name = std::move(schema_delta.schema_types_new);
result.schema_types_changed_fully_compatible_by_name =
std::move(schema_delta.schema_types_changed_fully_compatible);
result.schema_types_index_incompatible_by_name =
std::move(schema_delta.schema_types_index_incompatible);
+ result.schema_types_join_incompatible_by_name =
+ std::move(schema_delta.schema_types_join_incompatible);
for (const auto& schema_type : schema_delta.schema_types_deleted) {
// We currently don't support deletions, so mark this as not possible.
@@ -495,6 +754,10 @@ libtextclassifier3::Status SchemaStore::ApplySchemaChange(
// Manually set them to the correct paths.
base_dir_ = std::move(old_base_dir);
schema_file_->SetSwappedFilepath(MakeSchemaFilename(base_dir_));
+ if (overlay_schema_file_ != nullptr) {
+ overlay_schema_file_->SetSwappedFilepath(
+ MakeOverlaySchemaFilename(base_dir_));
+ }
return libtextclassifier3::Status::OK;
}
@@ -517,33 +780,57 @@ libtextclassifier3::StatusOr<SchemaTypeId> SchemaStore::GetSchemaTypeId(
return schema_type_mapper_->Get(schema_type);
}
-libtextclassifier3::StatusOr<std::vector<std::string_view>>
-SchemaStore::GetStringSectionContent(const DocumentProto& document,
- std::string_view section_path) const {
+libtextclassifier3::StatusOr<const std::string*> SchemaStore::GetSchemaType(
+ SchemaTypeId schema_type_id) const {
ICING_RETURN_IF_ERROR(CheckSchemaSet());
- return section_manager_->GetSectionContent<std::string_view>(document,
- section_path);
+ if (const auto it = reverse_schema_type_mapper_.find(schema_type_id);
+ it == reverse_schema_type_mapper_.end()) {
+ return absl_ports::InvalidArgumentError("Invalid schema type id");
+ } else {
+ return &it->second;
+ }
}
-libtextclassifier3::StatusOr<std::vector<std::string_view>>
-SchemaStore::GetStringSectionContent(const DocumentProto& document,
- SectionId section_id) const {
- ICING_RETURN_IF_ERROR(CheckSchemaSet());
- return section_manager_->GetSectionContent<std::string_view>(document,
- section_id);
+libtextclassifier3::StatusOr<const std::unordered_set<SchemaTypeId>*>
+SchemaStore::GetSchemaTypeIdsWithChildren(std::string_view schema_type) const {
+ ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
+ GetSchemaTypeId(schema_type));
+ auto iter = schema_subtype_id_map_.find(schema_type_id);
+ if (iter == schema_subtype_id_map_.end()) {
+ // This should never happen, unless there is an inconsistency or IO error.
+ return absl_ports::InternalError(absl_ports::StrCat(
+ "Schema type '", schema_type, "' is not found in the subtype map."));
+ }
+ return &iter->second;
}
libtextclassifier3::StatusOr<const SectionMetadata*>
SchemaStore::GetSectionMetadata(SchemaTypeId schema_type_id,
SectionId section_id) const {
ICING_RETURN_IF_ERROR(CheckSchemaSet());
- return section_manager_->GetSectionMetadata(schema_type_id, section_id);
+ return schema_type_manager_->section_manager().GetSectionMetadata(
+ schema_type_id, section_id);
}
libtextclassifier3::StatusOr<SectionGroup> SchemaStore::ExtractSections(
const DocumentProto& document) const {
ICING_RETURN_IF_ERROR(CheckSchemaSet());
- return section_manager_->ExtractSections(document);
+ return schema_type_manager_->section_manager().ExtractSections(document);
+}
+
+libtextclassifier3::StatusOr<const JoinablePropertyMetadata*>
+SchemaStore::GetJoinablePropertyMetadata(
+ SchemaTypeId schema_type_id, const std::string& property_path) const {
+ ICING_RETURN_IF_ERROR(CheckSchemaSet());
+ return schema_type_manager_->joinable_property_manager()
+ .GetJoinablePropertyMetadata(schema_type_id, property_path);
+}
+
+libtextclassifier3::StatusOr<JoinablePropertyGroup>
+SchemaStore::ExtractJoinableProperties(const DocumentProto& document) const {
+ ICING_RETURN_IF_ERROR(CheckSchemaSet());
+ return schema_type_manager_->joinable_property_manager()
+ .ExtractJoinableProperties(document);
}
libtextclassifier3::Status SchemaStore::PersistToDisk() {
@@ -553,9 +840,8 @@ libtextclassifier3::Status SchemaStore::PersistToDisk() {
ICING_RETURN_IF_ERROR(schema_type_mapper_->PersistToDisk());
// Write the header
ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
- ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
-
- return libtextclassifier3::Status::OK;
+ header_->set_checksum(checksum.Get());
+ return header_->Write(filesystem_, MakeHeaderFilename(base_dir_));
}
SchemaStoreStorageInfoProto SchemaStore::GetStorageInfo() const {
@@ -569,7 +855,8 @@ SchemaStoreStorageInfoProto SchemaStore::GetStorageInfo() const {
int num_types_sections_exhausted = 0;
for (const SchemaTypeConfigProto& type : schema->types()) {
auto sections_list_or =
- section_manager_->GetMetadataList(type.schema_type());
+ schema_type_manager_->section_manager().GetMetadataList(
+ type.schema_type());
if (!sections_list_or.ok()) {
continue;
}
@@ -587,7 +874,51 @@ SchemaStoreStorageInfoProto SchemaStore::GetStorageInfo() const {
libtextclassifier3::StatusOr<const std::vector<SectionMetadata>*>
SchemaStore::GetSectionMetadata(const std::string& schema_type) const {
- return section_manager_->GetMetadataList(schema_type);
+ return schema_type_manager_->section_manager().GetMetadataList(schema_type);
+}
+
+bool SchemaStore::IsPropertyDefinedInSchema(
+ SchemaTypeId schema_type_id, const std::string& property_path) const {
+ auto schema_name_itr = reverse_schema_type_mapper_.find(schema_type_id);
+ if (schema_name_itr == reverse_schema_type_mapper_.end()) {
+ return false;
+ }
+ const std::string* current_type_name = &schema_name_itr->second;
+
+ std::vector<std::string_view> property_path_parts =
+ property_util::SplitPropertyPathExpr(property_path);
+ for (int i = 0; i < property_path_parts.size(); ++i) {
+ auto type_config_itr = type_config_map_.find(*current_type_name);
+ if (type_config_itr == type_config_map_.end()) {
+ return false;
+ }
+ std::string_view property_name = property_path_parts.at(i);
+ const PropertyConfigProto* selected_property = nullptr;
+ for (const PropertyConfigProto& property :
+ type_config_itr->second.properties()) {
+ if (property.property_name() == property_name) {
+ selected_property = &property;
+ break;
+ }
+ }
+ if (selected_property == nullptr) {
+ return false;
+ }
+ if (i == property_path_parts.size() - 1) {
+ // We've found a property at the final part of the path.
+ return true;
+ }
+ if (selected_property->data_type() !=
+ PropertyConfigProto::DataType::DOCUMENT) {
+ // If this isn't final part of the path, but this property isn't a
+ // document, so we know that this path doesn't exist.
+ return false;
+ }
+ current_type_name = &selected_property->schema_type();
+ }
+
+ // We should never reach this point.
+ return false;
}
libtextclassifier3::StatusOr<SchemaDebugInfoProto> SchemaStore::GetDebugInfo()
@@ -602,5 +933,55 @@ libtextclassifier3::StatusOr<SchemaDebugInfoProto> SchemaStore::GetDebugInfo()
return debug_info;
}
+std::vector<SchemaStore::ExpandedTypePropertyMask>
+SchemaStore::ExpandTypePropertyMasks(
+ const google::protobuf::RepeatedPtrField<TypePropertyMask>& type_property_masks)
+ const {
+ std::unordered_map<SchemaTypeId, ExpandedTypePropertyMask> result_map;
+ for (const TypePropertyMask& type_field_mask : type_property_masks) {
+ if (type_field_mask.schema_type() == kSchemaTypeWildcard) {
+ ExpandedTypePropertyMask entry{type_field_mask.schema_type(),
+ /*paths=*/{}};
+ entry.paths.insert(type_field_mask.paths().begin(),
+ type_field_mask.paths().end());
+ result_map.insert({kInvalidSchemaTypeId, std::move(entry)});
+ } else {
+ auto schema_type_ids_or =
+ GetSchemaTypeIdsWithChildren(type_field_mask.schema_type());
+ // If we can't find the SchemaTypeIds, just throw it away
+ if (!schema_type_ids_or.ok()) {
+ continue;
+ }
+ const std::unordered_set<SchemaTypeId>* schema_type_ids =
+ schema_type_ids_or.ValueOrDie();
+ for (SchemaTypeId schema_type_id : *schema_type_ids) {
+ auto schema_type_name_iter =
+ reverse_schema_type_mapper_.find(schema_type_id);
+ if (schema_type_name_iter == reverse_schema_type_mapper_.end()) {
+ // This should never happen, unless there is an inconsistency or IO
+ // error.
+ ICING_LOG(ERROR) << "Got unknown schema type id: " << schema_type_id;
+ continue;
+ }
+
+ auto iter = result_map.find(schema_type_id);
+ if (iter == result_map.end()) {
+ ExpandedTypePropertyMask entry{schema_type_name_iter->second,
+ /*paths=*/{}};
+ iter = result_map.insert({schema_type_id, std::move(entry)}).first;
+ }
+ iter->second.paths.insert(type_field_mask.paths().begin(),
+ type_field_mask.paths().end());
+ }
+ }
+ }
+ std::vector<ExpandedTypePropertyMask> result;
+ result.reserve(result_map.size());
+ for (auto& entry : result_map) {
+ result.push_back(std::move(entry.second));
+ }
+ return result;
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/schema/schema-store.h b/icing/schema/schema-store.h
index d5a7c6f..88968b1 100644
--- a/icing/schema/schema-store.h
+++ b/icing/schema/schema-store.h
@@ -16,23 +16,30 @@
#define ICING_SCHEMA_SCHEMA_STORE_H_
#include <cstdint>
+#include <cstring>
+#include <limits>
#include <memory>
#include <string>
#include <string_view>
+#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
#include "icing/file/file-backed-proto.h"
#include "icing/file/filesystem.h"
+#include "icing/file/version-util.h"
#include "icing/proto/debug.pb.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/logging.pb.h"
#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
#include "icing/proto/storage.pb.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/schema/schema-type-manager.h"
#include "icing/schema/schema-util.h"
-#include "icing/schema/section-manager.h"
#include "icing/schema/section.h"
#include "icing/store/document-filter-data.h"
#include "icing/store/key-mapper.h"
@@ -49,9 +56,7 @@ namespace lib {
// should always call Get* from the SchemaStore.
class SchemaStore {
public:
- struct Header {
- static constexpr int32_t kMagic = 0x72650d0a;
-
+ struct LegacyHeader {
// Holds the magic as a quick sanity check against file corruption.
int32_t magic;
@@ -59,6 +64,69 @@ class SchemaStore {
uint32_t checksum;
};
+ class Header {
+ public:
+ static constexpr int32_t kMagic = 0x72650d0a;
+
+ explicit Header()
+ : magic_(kMagic),
+ checksum_(0),
+ overlay_created_(false),
+ min_overlay_version_compatibility_(
+ std::numeric_limits<int32_t>::max()) {
+ memset(overlay_created_padding_, 0, kOverlayCreatedPaddingSize);
+ memset(padding_, 0, kPaddingSize);
+ }
+
+ // RETURNS:
+ // - On success, a valid Header instance
+ // - NOT_FOUND if header file doesn't exist
+ // - INTERNAL if unable to read header
+ static libtextclassifier3::StatusOr<Header> Read(
+ const Filesystem* filesystem, const std::string& path);
+
+ libtextclassifier3::Status Write(const Filesystem* filesystem,
+ const std::string& path);
+
+ int32_t magic() const { return magic_; }
+
+ uint32_t checksum() const { return checksum_; }
+ void set_checksum(uint32_t checksum) { checksum_ = checksum; }
+
+ bool overlay_created() const { return overlay_created_; }
+
+ int32_t min_overlay_version_compatibility() const {
+ return min_overlay_version_compatibility_;
+ }
+
+ void SetOverlayInfo(bool overlay_created,
+ int32_t min_overlay_version_compatibility) {
+ overlay_created_ = overlay_created;
+ min_overlay_version_compatibility_ = min_overlay_version_compatibility;
+ }
+
+ private:
+ // Holds the magic as a quick sanity check against file corruption.
+ int32_t magic_;
+
+ // Checksum of the SchemaStore's sub-component's checksums.
+ uint32_t checksum_;
+
+ bool overlay_created_;
+ // Three bytes of padding due to the fact that
+ // min_overlay_version_compatibility_ has an alignof() == 4 and the offset
+ // of overlay_created_padding_ == 9.
+ static constexpr int kOverlayCreatedPaddingSize = 3;
+ uint8_t overlay_created_padding_[kOverlayCreatedPaddingSize];
+
+ int32_t min_overlay_version_compatibility_;
+
+ static constexpr int kPaddingSize = 1008;
+ // Padding exists just to reserve space for additional values.
+ uint8_t padding_[kPaddingSize];
+ };
+ static_assert(sizeof(Header) == 1024);
+
// Holds information on what may have been affected by the new schema. This is
// generally data that other classes may depend on from the SchemaStore,
// so that we can know if we should go update those classes as well.
@@ -113,8 +181,20 @@ class SchemaStore {
// but invalidated the index. Represented by the `schema_type` field in the
// SchemaTypeConfigProto.
std::unordered_set<std::string> schema_types_index_incompatible_by_name;
+
+ // Schema types that were changed in a way that was backwards compatible,
+ // but invalidated the joinable cache. Represented by the `schema_type`
+ // field in the SchemaTypeConfigProto.
+ std::unordered_set<std::string> schema_types_join_incompatible_by_name;
+ };
+
+ struct ExpandedTypePropertyMask {
+ std::string schema_type;
+ std::unordered_set<std::string> paths;
};
+ static constexpr std::string_view kSchemaTypeWildcard = "*";
+
// Factory function to create a SchemaStore which does not take ownership
// of any input components, and all pointers must refer to valid objects that
// outlive the created SchemaStore instance. The base_dir must already exist.
@@ -131,6 +211,23 @@ class SchemaStore {
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, InitializeStatsProto* initialize_stats = nullptr);
+ // Migrates schema files (backup v.s. new schema) according to version state
+ // change.
+ //
+ // Returns:
+ // OK on success or nothing to migrate
+ static libtextclassifier3::Status MigrateSchema(
+ const Filesystem* filesystem, const std::string& base_dir,
+ version_util::StateChange version_state_change, int32_t new_version);
+
+ // Discards all derived data in the schema store.
+ //
+ // Returns:
+ // OK on success or nothing to discard
+ // INTERNAL_ERROR on any I/O errors
+ static libtextclassifier3::Status DiscardDerivedFiles(
+ const Filesystem* filesystem, const std::string& base_dir);
+
SchemaStore(SchemaStore&&) = default;
SchemaStore& operator=(SchemaStore&&) = default;
@@ -162,10 +259,12 @@ class SchemaStore {
// INTERNAL_ERROR on any IO errors
libtextclassifier3::StatusOr<const SetSchemaResult> SetSchema(
const SchemaProto& new_schema,
- bool ignore_errors_and_delete_documents = false);
+ bool ignore_errors_and_delete_documents,
+ bool allow_circular_schema_definitions);
libtextclassifier3::StatusOr<const SetSchemaResult> SetSchema(
SchemaProto&& new_schema,
- bool ignore_errors_and_delete_documents = false);
+ bool ignore_errors_and_delete_documents,
+ bool allow_circular_schema_definitions);
// Get the SchemaTypeConfigProto of schema_type name.
//
@@ -177,6 +276,15 @@ class SchemaStore {
libtextclassifier3::StatusOr<const SchemaTypeConfigProto*>
GetSchemaTypeConfig(std::string_view schema_type) const;
+ // Returns the schema type of the passed in SchemaTypeId
+ //
+ // Returns:
+ // schema type on success
+ // FAILED_PRECONDITION if schema hasn't been set yet
+ // INVALID_ARGUMENT if schema type id is invalid
+ libtextclassifier3::StatusOr<const std::string*> GetSchemaType(
+ SchemaTypeId schema_type_id) const;
+
// Returns the SchemaTypeId of the passed in schema type
//
// Returns:
@@ -187,44 +295,36 @@ class SchemaStore {
libtextclassifier3::StatusOr<SchemaTypeId> GetSchemaTypeId(
std::string_view schema_type) const;
- // Finds content of a section by section path (e.g. property1.property2)
+ // Similar to GetSchemaTypeId but will return a set of SchemaTypeId to also
+ // include child types.
//
// Returns:
- // A string of content on success
+ // A set of SchemaTypeId on success
// FAILED_PRECONDITION if schema hasn't been set yet
- // NOT_FOUND if:
- // 1. Property is optional and not found in the document
- // 2. section_path is invalid
- // 3. Content is empty
- libtextclassifier3::StatusOr<std::vector<std::string_view>>
- GetStringSectionContent(const DocumentProto& document,
- std::string_view section_path) const;
-
- // Finds content of a section by id
- //
- // Returns:
- // A string of content on success
- // FAILED_PRECONDITION if schema hasn't been set yet
- // INVALID_ARGUMENT if section id is invalid
- // NOT_FOUND if type config name of document not found
- libtextclassifier3::StatusOr<std::vector<std::string_view>>
- GetStringSectionContent(const DocumentProto& document,
- SectionId section_id) const;
+ // NOT_FOUND_ERROR if we don't know about the schema type
+ // INTERNAL_ERROR on IO error
+ libtextclassifier3::StatusOr<const std::unordered_set<SchemaTypeId>*>
+ GetSchemaTypeIdsWithChildren(std::string_view schema_type) const;
// Returns the SectionMetadata associated with the SectionId that's in the
// SchemaTypeId.
//
// Returns:
- // pointer to SectionMetadata on success
+ // Valid pointer to SectionMetadata on success
// FAILED_PRECONDITION if schema hasn't been set yet
- // INVALID_ARGUMENT if schema type id or section is invalid
+ // INVALID_ARGUMENT if schema type id or section id is invalid
libtextclassifier3::StatusOr<const SectionMetadata*> GetSectionMetadata(
SchemaTypeId schema_type_id, SectionId section_id) const;
+ // Returns true if a property is defined in the said schema, regardless of
+ // whether it is indexed or not.
+ bool IsPropertyDefinedInSchema(SchemaTypeId schema_type_id,
+ const std::string& property) const;
+
// Extracts all sections of different types from the given document and group
// them by type.
// - Each Section vector is sorted by section Id in ascending order. The
- // sorted section Ids may not be continuous, since not all section Ids are
+ // sorted section ids may not be continuous, since not all sections are
// present in the document.
// - Sections with empty content won't be returned.
// - For example, we may extract:
@@ -238,6 +338,34 @@ class SchemaStore {
libtextclassifier3::StatusOr<SectionGroup> ExtractSections(
const DocumentProto& document) const;
+ // Returns the JoinablePropertyMetadata associated with property_path that's
+ // in the SchemaTypeId.
+ //
+ // Returns:
+ // Valid pointer to JoinablePropertyMetadata on success
+ // nullptr if property_path doesn't exist (or is not joinable) in the
+ // joinable metadata list of the schema
+ // FAILED_PRECONDITION if schema hasn't been set yet
+ // INVALID_ARGUMENT if schema type id is invalid
+ libtextclassifier3::StatusOr<const JoinablePropertyMetadata*>
+ GetJoinablePropertyMetadata(SchemaTypeId schema_type_id,
+ const std::string& property_path) const;
+
+ // Extracts all joinable property contents of different types from the given
+ // document and group them by joinable value type.
+ // - Joinable properties are sorted by joinable property id in ascending
+ // order. The sorted joinable property ids may not be continuous, since not
+ // all joinable properties are present in the document.
+ // - Joinable property ids start from 0.
+ // - Joinable properties with empty content won't be returned.
+ //
+ // Returns:
+ // A JoinablePropertyGroup instance on success
+ // FAILED_PRECONDITION if schema hasn't been set yet
+ // NOT_FOUND if the type config name of document not found
+ libtextclassifier3::StatusOr<JoinablePropertyGroup> ExtractJoinableProperties(
+ const DocumentProto& document) const;
+
// Syncs all the data changes to disk.
//
// Returns:
@@ -272,6 +400,23 @@ class SchemaStore {
// INTERNAL_ERROR on IO errors, crc compute error
libtextclassifier3::StatusOr<SchemaDebugInfoProto> GetDebugInfo() const;
+ // Expands the provided type_property_masks into a vector of
+ // ExpandedTypePropertyMasks to account for polymorphism. If both a parent
+ // type and one of its child type appears in the masks, the parent type's
+ // paths will be merged into the child's.
+ //
+ // For example, assume that we have two schema types A and B, and we have
+ // - A is the parent type of B
+ // - Paths of A: {P1, P2}
+ // - Paths of B: {P3}
+ //
+ // Then, we will have the following in the result.
+ // - Expanded paths of A: {P1, P2}
+ // - Expanded paths of B: {P1, P2, P3}
+ std::vector<ExpandedTypePropertyMask> ExpandTypePropertyMasks(
+ const google::protobuf::RepeatedPtrField<TypePropertyMask>& type_property_masks)
+ const;
+
private:
// Factory function to create a SchemaStore and set its schema. The created
// instance does not take ownership of any input components and all pointers
@@ -292,6 +437,15 @@ class SchemaStore {
explicit SchemaStore(const Filesystem* filesystem, std::string base_dir,
const Clock* clock);
+ // Deletes the overlay schema and ensures that the Header is correctly set.
+ //
+ // RETURNS:
+ // OK on success
+ // INTERNAL_ERROR on any IO errors
+ static libtextclassifier3::Status DiscardOverlaySchema(
+ const Filesystem* filesystem, const std::string& base_dir,
+ Header& header);
+
// Verifies that there is no error retrieving a previously set schema. Then
// initializes like normal.
//
@@ -315,7 +469,7 @@ class SchemaStore {
// OK on success
// INTERNAL_ERROR on IO error
libtextclassifier3::Status InitializeInternal(
- InitializeStatsProto* initialize_stats);
+ bool create_overlay_if_necessary, InitializeStatsProto* initialize_stats);
// Creates sub-components and verifies the integrity of each sub-component.
//
@@ -330,11 +484,16 @@ class SchemaStore {
// OK on success
// NOT_FOUND_ERROR if a schema proto has not been set
// INTERNAL_ERROR on any IO errors
- libtextclassifier3::Status RegenerateDerivedFiles();
+ libtextclassifier3::Status RegenerateDerivedFiles(
+ bool create_overlay_if_necessary);
- // Checks if the header exists already. This does not create the header file
- // if it doesn't exist.
- bool HeaderExists();
+ // Build type_config_map_, schema_subtype_id_map_, and schema_type_manager_.
+ //
+ // Returns:
+ // OK on success
+ // NOT_FOUND_ERROR if a schema proto has not been set
+ // INTERNAL_ERROR on any IO errors
+ libtextclassifier3::Status BuildInMemoryCache();
// Update and replace the header file. Creates the header file if it doesn't
// exist.
@@ -367,6 +526,15 @@ class SchemaStore {
: absl_ports::FailedPreconditionError("Schema not set yet.");
}
+ // Correctly loads the Header, schema_file_ and (if present) the
+ // overlay_schema_file_.
+ // RETURNS:
+ // - OK on success
+ // - INTERNAL if an IO error is encountered when reading the Header or
+ // schemas.
+ // Or an invalid schema configuration is present.
+ libtextclassifier3::Status LoadSchema();
+
const Filesystem* filesystem_;
std::string base_dir_;
const Clock* clock_;
@@ -379,16 +547,37 @@ class SchemaStore {
// Cached schema
std::unique_ptr<FileBackedProto<SchemaProto>> schema_file_;
+ // This schema holds the definition of any schema types that are not
+ // compatible with older versions of Icing code.
+ std::unique_ptr<FileBackedProto<SchemaProto>> overlay_schema_file_;
+
+ // Maps schema types to a densely-assigned unique id.
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper_;
+
+ // Maps schema type ids to the corresponding schema type. This is an inverse
+ // map of schema_type_mapper_.
+ std::unordered_map<SchemaTypeId, std::string> reverse_schema_type_mapper_;
+
// A hash map of (type config name -> type config), allows faster lookup of
// type config in schema. The O(1) type config access makes schema-related and
// section-related operations faster.
SchemaUtil::TypeConfigMap type_config_map_;
- // Maps schema types to a densely-assigned unique id.
- std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper_;
-
- // Manager of indexed section related metadata.
- std::unique_ptr<const SectionManager> section_manager_;
+ // Maps from each type id to all of its subtype ids.
+ // T2 is a subtype of T1, if and only if one of the following conditions is
+ // met:
+ // - T2 is T1
+ // - T2 extends T1
+ // - There exists a type U, such that T2 is a subtype of U, and U is a subtype
+ // of T1
+ std::unordered_map<SchemaTypeId, std::unordered_set<SchemaTypeId>>
+ schema_subtype_id_map_;
+
+ // Manager of section (indexable property) and joinable property related
+ // metadata for all Schemas.
+ std::unique_ptr<const SchemaTypeManager> schema_type_manager_;
+
+ std::unique_ptr<Header> header_;
};
} // namespace lib
diff --git a/icing/schema/schema-store_test.cc b/icing/schema/schema-store_test.cc
index da04931..8cc7008 100644
--- a/icing/schema/schema-store_test.cc
+++ b/icing/schema/schema-store_test.cc
@@ -25,9 +25,11 @@
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
#include "icing/file/mock-filesystem.h"
+#include "icing/file/version-util.h"
#include "icing/portable/equals-proto.h"
#include "icing/proto/debug.pb.h"
#include "icing/proto/document.pb.h"
+#include "icing/proto/logging.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/storage.pb.h"
#include "icing/proto/term.pb.h"
@@ -56,6 +58,7 @@ using ::testing::Not;
using ::testing::Pointee;
using ::testing::Return;
using ::testing::SizeIs;
+using ::testing::UnorderedElementsAre;
constexpr int64_t kDefaultTimestamp = 12345678;
@@ -114,7 +117,7 @@ TEST_F(SchemaStoreTest, SchemaStoreMoveConstructible) {
// Create an instance of SchemaStore.
SchemaProto schema =
SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("TypeA").AddProperty(
+ .AddType(SchemaTypeConfigBuilder().SetType("type_a").AddProperty(
PropertyConfigBuilder()
.SetName("prop1")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
@@ -125,7 +128,9 @@ TEST_F(SchemaStoreTest, SchemaStoreMoveConstructible) {
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
- ICING_ASSERT_OK(schema_store->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(Crc32 expected_checksum,
schema_store->ComputeChecksum());
@@ -138,7 +143,7 @@ TEST_F(SchemaStoreTest, SchemaStoreMoveConstructible) {
SectionMetadata expected_metadata(/*id_in=*/0, TYPE_STRING, TOKENIZER_PLAIN,
TERM_MATCH_EXACT, NUMERIC_MATCH_UNKNOWN,
"prop1");
- EXPECT_THAT(move_constructed_schema_store.GetSectionMetadata("TypeA"),
+ EXPECT_THAT(move_constructed_schema_store.GetSectionMetadata("type_a"),
IsOkAndHolds(Pointee(ElementsAre(expected_metadata))));
}
@@ -146,7 +151,7 @@ TEST_F(SchemaStoreTest, SchemaStoreMoveAssignment) {
// Create an instance of SchemaStore.
SchemaProto schema1 =
SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("TypeA").AddProperty(
+ .AddType(SchemaTypeConfigBuilder().SetType("type_a").AddProperty(
PropertyConfigBuilder()
.SetName("prop1")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
@@ -157,14 +162,16 @@ TEST_F(SchemaStoreTest, SchemaStoreMoveAssignment) {
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
- ICING_ASSERT_OK(schema_store->SetSchema(schema1));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema1, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(Crc32 expected_checksum,
schema_store->ComputeChecksum());
// Construct another instance of SchemaStore
SchemaProto schema2 =
SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("TypeB").AddProperty(
+ .AddType(SchemaTypeConfigBuilder().SetType("type_b").AddProperty(
PropertyConfigBuilder()
.SetName("prop2")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
@@ -174,7 +181,9 @@ TEST_F(SchemaStoreTest, SchemaStoreMoveAssignment) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> move_assigned_schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
- ICING_ASSERT_OK(schema_store->SetSchema(schema2));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema2, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// Move assign the first instance into the second one.
*move_assigned_schema_store = std::move(*schema_store);
@@ -185,7 +194,7 @@ TEST_F(SchemaStoreTest, SchemaStoreMoveAssignment) {
SectionMetadata expected_metadata(/*id_in=*/0, TYPE_STRING, TOKENIZER_PLAIN,
TERM_MATCH_EXACT, NUMERIC_MATCH_UNKNOWN,
"prop1");
- EXPECT_THAT(move_assigned_schema_store->GetSectionMetadata("TypeA"),
+ EXPECT_THAT(move_assigned_schema_store->GetSectionMetadata("type_a"),
IsOkAndHolds(Pointee(ElementsAre(expected_metadata))));
}
@@ -199,7 +208,9 @@ TEST_F(SchemaStoreTest, CorruptSchemaError) {
SchemaStore::SetSchemaResult result;
result.success = true;
result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
- EXPECT_THAT(schema_store->SetSchema(schema_),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -237,7 +248,9 @@ TEST_F(SchemaStoreTest, RecoverCorruptDerivedFileOk) {
SchemaStore::SetSchemaResult result;
result.success = true;
result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
- EXPECT_THAT(schema_store->SetSchema(schema_),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -254,9 +267,56 @@ TEST_F(SchemaStoreTest, RecoverCorruptDerivedFileOk) {
absl_ports::StrCat(schema_store_dir_, "/schema_type_mapper");
filesystem_.DeleteDirectoryRecursively(schema_type_mapper_dir.c_str());
+ InitializeStatsProto initialize_stats;
+ fake_clock_.SetTimerElapsedMilliseconds(123);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_,
+ &initialize_stats));
+ EXPECT_THAT(initialize_stats.schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(initialize_stats.schema_store_recovery_latency_ms(), Eq(123));
+
+ // Everything looks fine, ground truth and derived data
+ ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+ schema_store->GetSchema());
+ EXPECT_THAT(*actual_schema, EqualsProto(schema_));
+ EXPECT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
+}
+
+TEST_F(SchemaStoreTest, RecoverDiscardDerivedFilesOk) {
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ // Set it for the first time
+ SchemaStore::SetSchemaResult result;
+ result.success = true;
+ result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
+ EXPECT_THAT(schema_store->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOkAndHolds(EqualsSetSchemaResult(result)));
+ ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+ schema_store->GetSchema());
+ EXPECT_THAT(*actual_schema, EqualsProto(schema_));
+
+ EXPECT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
+ }
+
+ ICING_ASSERT_OK(
+ SchemaStore::DiscardDerivedFiles(&filesystem_, schema_store_dir_));
+
+ InitializeStatsProto initialize_stats;
+ fake_clock_.SetTimerElapsedMilliseconds(123);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_,
+ &initialize_stats));
+ EXPECT_THAT(initialize_stats.schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(initialize_stats.schema_store_recovery_latency_ms(), Eq(123));
// Everything looks fine, ground truth and derived data
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -275,7 +335,9 @@ TEST_F(SchemaStoreTest, RecoverBadChecksumOk) {
SchemaStore::SetSchemaResult result;
result.success = true;
result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
- EXPECT_THAT(schema_store->SetSchema(schema_),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -289,7 +351,7 @@ TEST_F(SchemaStoreTest, RecoverBadChecksumOk) {
// of derived files from ground truth.
const std::string header_file =
absl_ports::StrCat(schema_store_dir_, "/schema_store_header");
- SchemaStore::Header header;
+ SchemaStore::LegacyHeader header;
header.magic = SchemaStore::Header::kMagic;
header.checksum = 10; // Arbitrary garbage checksum
filesystem_.DeleteFile(header_file.c_str());
@@ -320,6 +382,9 @@ TEST_F(SchemaStoreTest, CreateNoPreviousSchemaOk) {
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(store->GetSectionMetadata(/*schema_type_id=*/0, /*section_id=*/0),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(store->GetJoinablePropertyMetadata(/*schema_type_id=*/0,
+ /*property_path=*/"A"),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
// The apis to extract content from a document should fail gracefully.
DocumentProto doc;
@@ -327,12 +392,10 @@ TEST_F(SchemaStoreTest, CreateNoPreviousSchemaOk) {
prop->set_name("name");
prop->add_string_values("foo bar baz");
- EXPECT_THAT(store->GetStringSectionContent(doc, /*section_id=*/0),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
- EXPECT_THAT(store->GetStringSectionContent(doc, "name"),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(store->ExtractSections(doc),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(store->ExtractJoinableProperties(doc),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
// The apis to persist and checksum data should succeed.
EXPECT_THAT(store->ComputeChecksum(), IsOkAndHolds(Crc32()));
@@ -347,7 +410,9 @@ TEST_F(SchemaStoreTest, CreateWithPreviousSchemaOk) {
SchemaStore::SetSchemaResult result;
result.success = true;
result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
- EXPECT_THAT(schema_store->SetSchema(schema_),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
schema_store.reset();
@@ -373,7 +438,9 @@ TEST_F(SchemaStoreTest, MultipleCreateOk) {
SchemaStore::SetSchemaResult result;
result.success = true;
result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
- EXPECT_THAT(schema_store->SetSchema(schema_),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
// Verify that our in-memory structures are ok
@@ -418,7 +485,9 @@ TEST_F(SchemaStoreTest, SetNewSchemaOk) {
SchemaStore::SetSchemaResult result;
result.success = true;
result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
- EXPECT_THAT(schema_store->SetSchema(schema_),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -434,7 +503,9 @@ TEST_F(SchemaStoreTest, SetSameSchemaOk) {
SchemaStore::SetSchemaResult result;
result.success = true;
result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
- EXPECT_THAT(schema_store->SetSchema(schema_),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -443,7 +514,9 @@ TEST_F(SchemaStoreTest, SetSameSchemaOk) {
// And one more for fun
result = SchemaStore::SetSchemaResult();
result.success = true;
- EXPECT_THAT(schema_store->SetSchema(schema_),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
EXPECT_THAT(*actual_schema, EqualsProto(schema_));
@@ -458,7 +531,9 @@ TEST_F(SchemaStoreTest, SetIncompatibleSchemaOk) {
SchemaStore::SetSchemaResult result;
result.success = true;
result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
- EXPECT_THAT(schema_store->SetSchema(schema_),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -472,7 +547,9 @@ TEST_F(SchemaStoreTest, SetIncompatibleSchemaOk) {
result.success = false;
result.schema_types_deleted_by_name.emplace("email");
result.schema_types_deleted_by_id.emplace(0);
- EXPECT_THAT(schema_store->SetSchema(schema_),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
}
@@ -489,7 +566,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithAddedTypeOk) {
SchemaStore::SetSchemaResult result;
result.success = true;
result.schema_types_new_by_name.insert("email");
- EXPECT_THAT(schema_store->SetSchema(schema),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -504,7 +583,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithAddedTypeOk) {
result = SchemaStore::SetSchemaResult();
result.success = true;
result.schema_types_new_by_name.insert("new_type");
- EXPECT_THAT(schema_store->SetSchema(schema),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
EXPECT_THAT(*actual_schema, EqualsProto(schema));
@@ -526,7 +607,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithDeletedTypeOk) {
result.success = true;
result.schema_types_new_by_name.insert("email");
result.schema_types_new_by_name.insert("message");
- EXPECT_THAT(schema_store->SetSchema(schema),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -551,7 +634,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithDeletedTypeOk) {
old_email_schema_type_id);
// Can't set the incompatible schema
- EXPECT_THAT(schema_store->SetSchema(schema),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(incompatible_result)));
SchemaStore::SetSchemaResult force_result;
@@ -562,7 +647,8 @@ TEST_F(SchemaStoreTest, SetSchemaWithDeletedTypeOk) {
// Force set the incompatible schema
EXPECT_THAT(schema_store->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true),
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(force_result)));
ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
EXPECT_THAT(*actual_schema, EqualsProto(schema));
@@ -584,7 +670,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithReorderedTypesOk) {
result.success = true;
result.schema_types_new_by_name.insert("email");
result.schema_types_new_by_name.insert("message");
- EXPECT_THAT(schema_store->SetSchema(schema),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -605,7 +693,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithReorderedTypesOk) {
1); // Old SchemaTypeId of "message"
// Set the compatible schema
- EXPECT_THAT(schema_store->SetSchema(schema),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
EXPECT_THAT(*actual_schema, EqualsProto(schema));
@@ -630,7 +720,9 @@ TEST_F(SchemaStoreTest, IndexedPropertyChangeRequiresReindexingOk) {
SchemaStore::SetSchemaResult result;
result.success = true;
result.schema_types_new_by_name.insert("email");
- EXPECT_THAT(schema_store->SetSchema(schema),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -649,7 +741,9 @@ TEST_F(SchemaStoreTest, IndexedPropertyChangeRequiresReindexingOk) {
result = SchemaStore::SetSchemaResult();
result.success = true;
result.schema_types_index_incompatible_by_name.insert("email");
- EXPECT_THAT(schema_store->SetSchema(schema),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
EXPECT_THAT(*actual_schema, EqualsProto(schema));
@@ -697,8 +791,11 @@ TEST_F(SchemaStoreTest, IndexNestedDocumentsChangeRequiresReindexingOk) {
result.success = true;
result.schema_types_new_by_name.insert("email");
result.schema_types_new_by_name.insert("person");
- EXPECT_THAT(schema_store->SetSchema(no_nested_index_schema),
- IsOkAndHolds(EqualsSetSchemaResult(result)));
+ EXPECT_THAT(
+ schema_store->SetSchema(no_nested_index_schema,
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
EXPECT_THAT(*actual_schema, EqualsProto(no_nested_index_schema));
@@ -708,8 +805,11 @@ TEST_F(SchemaStoreTest, IndexNestedDocumentsChangeRequiresReindexingOk) {
result = SchemaStore::SetSchemaResult();
result.success = true;
result.schema_types_index_incompatible_by_name.insert("person");
- EXPECT_THAT(schema_store->SetSchema(nested_index_schema),
- IsOkAndHolds(EqualsSetSchemaResult(result)));
+ EXPECT_THAT(
+ schema_store->SetSchema(nested_index_schema,
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
EXPECT_THAT(*actual_schema, EqualsProto(nested_index_schema));
@@ -718,8 +818,11 @@ TEST_F(SchemaStoreTest, IndexNestedDocumentsChangeRequiresReindexingOk) {
result = SchemaStore::SetSchemaResult();
result.success = true;
result.schema_types_index_incompatible_by_name.insert("person");
- EXPECT_THAT(schema_store->SetSchema(no_nested_index_schema),
- IsOkAndHolds(EqualsSetSchemaResult(result)));
+ EXPECT_THAT(
+ schema_store->SetSchema(no_nested_index_schema,
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
EXPECT_THAT(*actual_schema, EqualsProto(no_nested_index_schema));
}
@@ -743,7 +846,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) {
SchemaStore::SetSchemaResult result;
result.success = true;
result.schema_types_new_by_name.insert("email");
- EXPECT_THAT(schema_store->SetSchema(schema),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -769,7 +874,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) {
old_email_schema_type_id);
// Can't set the incompatible schema
- EXPECT_THAT(schema_store->SetSchema(schema),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(incompatible_result)));
SchemaStore::SetSchemaResult force_result;
@@ -780,7 +887,8 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) {
// Force set the incompatible schema
EXPECT_THAT(schema_store->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true),
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(force_result)));
ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
EXPECT_THAT(*actual_schema, EqualsProto(schema));
@@ -802,7 +910,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleNestedTypesOk) {
.SetCardinality(CARDINALITY_REPEATED));
SchemaProto old_schema =
SchemaBuilder().AddType(contact_point_repeated_label).Build();
- ICING_EXPECT_OK(schema_store->SetSchema(old_schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ old_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId old_contact_point_type_id,
schema_store->GetSchemaTypeId("ContactPoint"));
@@ -838,7 +948,8 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleNestedTypesOk) {
expected_result.schema_types_new_by_name.insert("Person");
EXPECT_THAT(
schema_store->SetSchema(new_schema,
- /*ignore_errors_and_delete_documents=*/false),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(expected_result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -849,7 +960,8 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleNestedTypesOk) {
expected_result.success = true;
EXPECT_THAT(
schema_store->SetSchema(new_schema,
- /*ignore_errors_and_delete_documents=*/true),
+ /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(expected_result)));
ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
EXPECT_THAT(*actual_schema, EqualsProto(new_schema));
@@ -872,7 +984,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithIndexIncompatibleNestedTypesOk) {
.SetCardinality(CARDINALITY_REPEATED));
SchemaProto old_schema =
SchemaBuilder().AddType(contact_point_prefix_label).Build();
- ICING_EXPECT_OK(schema_store->SetSchema(old_schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ old_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// 2. Create a type that references the ContactPoint type and make a index
// backwards incompatible change to ContactPoint
@@ -904,7 +1018,8 @@ TEST_F(SchemaStoreTest, SetSchemaWithIndexIncompatibleNestedTypesOk) {
expected_result.schema_types_new_by_name.insert("Person");
EXPECT_THAT(
schema_store->SetSchema(new_schema,
- /*ignore_errors_and_delete_documents=*/false),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(expected_result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -927,7 +1042,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithCompatibleNestedTypesOk) {
.SetCardinality(CARDINALITY_OPTIONAL));
SchemaProto old_schema =
SchemaBuilder().AddType(contact_point_optional_label).Build();
- ICING_EXPECT_OK(schema_store->SetSchema(old_schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ old_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// 2. Create a type that references the ContactPoint type and make a backwards
// compatible change to ContactPoint
@@ -959,7 +1076,139 @@ TEST_F(SchemaStoreTest, SetSchemaWithCompatibleNestedTypesOk) {
"ContactPoint");
expected_result.schema_types_new_by_name.insert("Person");
EXPECT_THAT(schema_store->SetSchema(
- new_schema, /*ignore_errors_and_delete_documents=*/false),
+ new_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOkAndHolds(EqualsSetSchemaResult(expected_result)));
+ ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+ schema_store->GetSchema());
+ EXPECT_THAT(*actual_schema, EqualsProto(new_schema));
+}
+
+TEST_F(SchemaStoreTest, SetSchemaWithAddedIndexableNestedTypeOk) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ // 1. Create a ContactPoint type with a optional property, and a type that
+ // references the ContactPoint type.
+ SchemaTypeConfigBuilder contact_point =
+ SchemaTypeConfigBuilder()
+ .SetType("ContactPoint")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("label")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED));
+ SchemaTypeConfigBuilder person =
+ SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("contactPoints")
+ .SetDataTypeDocument("ContactPoint",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED));
+ SchemaProto old_schema =
+ SchemaBuilder().AddType(contact_point).AddType(person).Build();
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ old_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // 2. Add another nested document property to "Person" that has type
+ // "ContactPoint"
+ SchemaTypeConfigBuilder new_person =
+ SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("contactPoints")
+ .SetDataTypeDocument("ContactPoint",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("anotherContactPoint")
+ .SetDataTypeDocument("ContactPoint",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED));
+ SchemaProto new_schema =
+ SchemaBuilder().AddType(contact_point).AddType(new_person).Build();
+
+ // 3. Set to new schema. "Person" should be index-incompatible since we need
+ // to index an additional property: 'anotherContactPoint.label'.
+ // - "Person" is also considered join-incompatible since the added nested
+ // document property could also contain a joinable property.
+ SchemaStore::SetSchemaResult expected_result;
+ expected_result.success = true;
+ expected_result.schema_types_index_incompatible_by_name.insert("Person");
+ expected_result.schema_types_join_incompatible_by_name.insert("Person");
+
+ EXPECT_THAT(schema_store->SetSchema(
+ new_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOkAndHolds(EqualsSetSchemaResult(expected_result)));
+ ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+ schema_store->GetSchema());
+ EXPECT_THAT(*actual_schema, EqualsProto(new_schema));
+}
+
+TEST_F(SchemaStoreTest, SetSchemaWithAddedJoinableNestedTypeOk) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ // 1. Create a ContactPoint type with a optional property, and a type that
+ // references the ContactPoint type.
+ SchemaTypeConfigBuilder contact_point =
+ SchemaTypeConfigBuilder()
+ .SetType("ContactPoint")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("label")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_REQUIRED));
+ SchemaTypeConfigBuilder person =
+ SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("contactPoints")
+ .SetDataTypeDocument("ContactPoint",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL));
+ SchemaProto old_schema =
+ SchemaBuilder().AddType(contact_point).AddType(person).Build();
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ old_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // 2. Add another nested document property to "Person" that has type
+ // "ContactPoint", but make it non-indexable
+ SchemaTypeConfigBuilder new_person =
+ SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("contactPoints")
+ .SetDataTypeDocument("ContactPoint",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("anotherContactPoint")
+ .SetDataTypeDocument("ContactPoint",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL));
+ SchemaProto new_schema =
+ SchemaBuilder().AddType(contact_point).AddType(new_person).Build();
+
+ // 3. Set to new schema. "Person" should be join-incompatible but
+ // index-compatible.
+ SchemaStore::SetSchemaResult expected_result;
+ expected_result.success = true;
+ expected_result.schema_types_join_incompatible_by_name.insert("Person");
+
+ EXPECT_THAT(schema_store->SetSchema(
+ new_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(expected_result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -987,7 +1236,9 @@ TEST_F(SchemaStoreTest, GetSchemaTypeId) {
result.success = true;
result.schema_types_new_by_name.insert(first_type);
result.schema_types_new_by_name.insert(second_type);
- EXPECT_THAT(schema_store->SetSchema(schema_),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
EXPECT_THAT(schema_store->GetSchemaTypeId(first_type), IsOkAndHolds(0));
@@ -1011,7 +1262,9 @@ TEST_F(SchemaStoreTest, ComputeChecksumSameBetweenCalls) {
SchemaProto foo_schema =
SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build();
- ICING_EXPECT_OK(schema_store->SetSchema(foo_schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ foo_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, schema_store->ComputeChecksum());
@@ -1027,7 +1280,9 @@ TEST_F(SchemaStoreTest, ComputeChecksumSameAcrossInstances) {
SchemaProto foo_schema =
SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build();
- ICING_EXPECT_OK(schema_store->SetSchema(foo_schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ foo_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, schema_store->ComputeChecksum());
@@ -1048,7 +1303,9 @@ TEST_F(SchemaStoreTest, ComputeChecksumChangesOnModification) {
SchemaProto foo_schema =
SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build();
- ICING_EXPECT_OK(schema_store->SetSchema(foo_schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ foo_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, schema_store->ComputeChecksum());
@@ -1059,7 +1316,9 @@ TEST_F(SchemaStoreTest, ComputeChecksumChangesOnModification) {
.AddType(SchemaTypeConfigBuilder().SetType("bar"))
.Build();
- ICING_EXPECT_OK(schema_store->SetSchema(foo_bar_schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ foo_bar_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
EXPECT_THAT(schema_store->ComputeChecksum(), IsOkAndHolds(Not(Eq(checksum))));
}
@@ -1081,7 +1340,9 @@ TEST_F(SchemaStoreTest, PersistToDiskPreservesAcrossInstances) {
SchemaProto schema =
SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build();
- ICING_EXPECT_OK(schema_store->SetSchema(schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// Persisting shouldn't change anything
ICING_EXPECT_OK(schema_store->PersistToDisk());
@@ -1094,7 +1355,9 @@ TEST_F(SchemaStoreTest, PersistToDiskPreservesAcrossInstances) {
schema = SchemaBuilder(schema)
.AddType(SchemaTypeConfigBuilder().SetType("bar"))
.Build();
- ICING_EXPECT_OK(schema_store->SetSchema(schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// Should also persist on destruction
schema_store.reset();
@@ -1137,7 +1400,9 @@ TEST_F(SchemaStoreTest, SchemaStoreStorageInfoProto) {
result.success = true;
result.schema_types_new_by_name.insert("email");
result.schema_types_new_by_name.insert("fullSectionsType");
- EXPECT_THAT(schema_store->SetSchema(schema),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
SchemaStoreStorageInfoProto storage_info = schema_store->GetStorageInfo();
@@ -1154,7 +1419,9 @@ TEST_F(SchemaStoreTest, GetDebugInfo) {
// Set schema
ASSERT_THAT(
- schema_store->SetSchema(schema_),
+ schema_store->SetSchema(schema_,
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(SchemaStore::SetSchemaResult{
.success = true,
.schema_types_new_by_name = {schema_.types(0).schema_type()}})));
@@ -1190,7 +1457,9 @@ TEST_F(SchemaStoreTest, InitializeRegenerateDerivedFilesFailure) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("Type"))
.Build();
- ICING_ASSERT_OK(schema_store->SetSchema(std::move(schema)));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ std::move(schema), /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
}
auto mock_filesystem = std::make_unique<MockFilesystem>();
@@ -1225,7 +1494,9 @@ TEST_F(SchemaStoreTest, SetSchemaRegenerateDerivedFilesFailure) {
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
SchemaProto schema = SchemaBuilder().AddType(type).Build();
- ICING_ASSERT_OK(schema_store->SetSchema(std::move(schema)));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ std::move(schema), /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
}
{
@@ -1243,8 +1514,11 @@ TEST_F(SchemaStoreTest, SetSchemaRegenerateDerivedFilesFailure) {
.AddType(type)
.AddType(SchemaTypeConfigBuilder().SetType("Type2"))
.Build();
- EXPECT_THAT(schema_store->SetSchema(std::move(schema)),
- StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ EXPECT_THAT(
+ schema_store->SetSchema(std::move(schema),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
DocumentProto document =
DocumentBuilder()
.SetSchema("Type")
@@ -1272,6 +1546,1649 @@ TEST_F(SchemaStoreTest, SetSchemaRegenerateDerivedFilesFailure) {
}
}
+TEST_F(SchemaStoreTest, CanCheckForPropertiesDefinedInSchema) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ // Set it for the first time
+ SchemaStore::SetSchemaResult result;
+ result.success = true;
+ result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
+
+ // Don't use schema_ defined in the test suite, as we want to make sure that
+ // the test is written correctly without referring to what the suite has
+ // defined.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(
+ // Add an indexed property so we generate
+ // section metadata on it
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ EXPECT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOkAndHolds(EqualsSetSchemaResult(result)));
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId schema_id,
+ schema_store->GetSchemaTypeId("email"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(schema_id, "subject"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(schema_id, "timestamp"));
+ EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(schema_id, "foobar"));
+}
+
+TEST_F(SchemaStoreTest, GetSchemaTypeIdsWithChildren) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ // Create a schema with the following inheritance relation:
+ // A
+ // / \
+ // B E
+ // / \
+ // C D
+ // |
+ // F
+ SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder().SetType("C").AddParentType("B").Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder().SetType("D").AddParentType("B").Build();
+ SchemaTypeConfigProto type_e =
+ SchemaTypeConfigBuilder().SetType("E").AddParentType("A").Build();
+ SchemaTypeConfigProto type_f =
+ SchemaTypeConfigBuilder().SetType("F").AddParentType("D").Build();
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_a)
+ .AddType(type_b)
+ .AddType(type_c)
+ .AddType(type_d)
+ .AddType(type_e)
+ .AddType(type_f)
+ .Build();
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // Get schema type id for each type.
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_a_id,
+ schema_store->GetSchemaTypeId("A"));
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_b_id,
+ schema_store->GetSchemaTypeId("B"));
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_c_id,
+ schema_store->GetSchemaTypeId("C"));
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_d_id,
+ schema_store->GetSchemaTypeId("D"));
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_e_id,
+ schema_store->GetSchemaTypeId("E"));
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_f_id,
+ schema_store->GetSchemaTypeId("F"));
+
+ // Check the results from GetSchemaTypeIdsWithChildren
+ EXPECT_THAT(
+ schema_store->GetSchemaTypeIdsWithChildren("A"),
+ IsOkAndHolds(Pointee(UnorderedElementsAre(
+ type_a_id, type_b_id, type_c_id, type_d_id, type_e_id, type_f_id))));
+ EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("B"),
+ IsOkAndHolds(Pointee(UnorderedElementsAre(
+ type_b_id, type_c_id, type_d_id, type_f_id))));
+ EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("C"),
+ IsOkAndHolds(Pointee(UnorderedElementsAre(type_c_id))));
+ EXPECT_THAT(
+ schema_store->GetSchemaTypeIdsWithChildren("D"),
+ IsOkAndHolds(Pointee(UnorderedElementsAre(type_d_id, type_f_id))));
+ EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("E"),
+ IsOkAndHolds(Pointee(UnorderedElementsAre(type_e_id))));
+ EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("F"),
+ IsOkAndHolds(Pointee(UnorderedElementsAre(type_f_id))));
+}
+
+TEST_F(SchemaStoreTest, DiamondGetSchemaTypeIdsWithChildren) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ // Create a schema with the following inheritance relation:
+ // A
+ // / \
+ // B E
+ // / \ /
+ // C D
+ // \ /
+ // F
+ SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder().SetType("C").AddParentType("B").Build();
+ SchemaTypeConfigProto type_d = SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddParentType("B")
+ .AddParentType("E")
+ .Build();
+ SchemaTypeConfigProto type_e =
+ SchemaTypeConfigBuilder().SetType("E").AddParentType("A").Build();
+ SchemaTypeConfigProto type_f = SchemaTypeConfigBuilder()
+ .SetType("F")
+ .AddParentType("C")
+ .AddParentType("D")
+ .Build();
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_a)
+ .AddType(type_b)
+ .AddType(type_c)
+ .AddType(type_d)
+ .AddType(type_e)
+ .AddType(type_f)
+ .Build();
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // Get schema type id for each type.
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_a_id,
+ schema_store->GetSchemaTypeId("A"));
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_b_id,
+ schema_store->GetSchemaTypeId("B"));
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_c_id,
+ schema_store->GetSchemaTypeId("C"));
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_d_id,
+ schema_store->GetSchemaTypeId("D"));
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_e_id,
+ schema_store->GetSchemaTypeId("E"));
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_f_id,
+ schema_store->GetSchemaTypeId("F"));
+
+ // Check the results from GetSchemaTypeIdsWithChildren
+ EXPECT_THAT(
+ schema_store->GetSchemaTypeIdsWithChildren("A"),
+ IsOkAndHolds(Pointee(UnorderedElementsAre(
+ type_a_id, type_b_id, type_c_id, type_d_id, type_e_id, type_f_id))));
+ EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("B"),
+ IsOkAndHolds(Pointee(UnorderedElementsAre(
+ type_b_id, type_c_id, type_d_id, type_f_id))));
+ EXPECT_THAT(
+ schema_store->GetSchemaTypeIdsWithChildren("C"),
+ IsOkAndHolds(Pointee(UnorderedElementsAre(type_c_id, type_f_id))));
+ EXPECT_THAT(
+ schema_store->GetSchemaTypeIdsWithChildren("D"),
+ IsOkAndHolds(Pointee(UnorderedElementsAre(type_d_id, type_f_id))));
+ EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("E"),
+ IsOkAndHolds(Pointee(
+ UnorderedElementsAre(type_e_id, type_d_id, type_f_id))));
+ EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("F"),
+ IsOkAndHolds(Pointee(UnorderedElementsAre(type_f_id))));
+}
+
+TEST_F(SchemaStoreTest, IndexableFieldsAreDefined) {
+ SchemaTypeConfigProto email_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("recipients")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("recipientIds")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(email_type).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/true));
+ constexpr SchemaTypeId kTypeEmailSchemaId = 0;
+
+ // Indexables.
+ EXPECT_TRUE(
+ schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, "subject"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId,
+ "senderQualifiedId"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId,
+ "recipients"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId,
+ "recipientIds"));
+ EXPECT_TRUE(
+ schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, "timestamp"));
+}
+
+TEST_F(SchemaStoreTest, JoinableFieldsAreDefined) {
+ SchemaTypeConfigProto email_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("tagQualifiedId")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(email_type).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/true));
+ constexpr SchemaTypeId kTypeEmailSchemaId = 0;
+
+ // Joinables.
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId,
+ "tagQualifiedId"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId,
+ "senderQualifiedId"));
+}
+
+TEST_F(SchemaStoreTest, NonIndexableFieldsAreDefined) {
+ SchemaTypeConfigProto email_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("attachment")
+ .SetDataType(TYPE_BYTES)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("nonindexableInteger")
+ .SetDataType(TYPE_INT64)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(email_type).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/true));
+ constexpr SchemaTypeId kTypeEmailSchemaId = 0;
+
+ // Non-indexables.
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId,
+ "attachment"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId,
+ "nonindexableInteger"));
+ EXPECT_TRUE(
+ schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, "text"));
+}
+
+TEST_F(SchemaStoreTest, NonExistentFieldsAreUndefined) {
+ SchemaTypeConfigProto email_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("nonindexableInteger")
+ .SetDataType(TYPE_INT64)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(email_type).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/true));
+ constexpr SchemaTypeId kTypeEmailSchemaId = 0;
+
+ // Non-existents.
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, "foobar"));
+ EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId,
+ "timestamp.foo"));
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, "time"));
+}
+
+TEST_F(SchemaStoreTest, NestedIndexableFieldsAreDefined) {
+ SchemaTypeConfigProto email_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("tagQualifiedId")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .Build();
+
+ SchemaTypeConfigProto conversation_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Conversation")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emails")
+ .SetDataTypeDocument(
+ "Email", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("nestedNonIndexable")
+ .SetDataTypeDocument("Email",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema =
+ SchemaBuilder().AddType(email_type).AddType(conversation_type).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/true));
+ constexpr SchemaTypeId kTypeConversationSchemaId = 1;
+
+ // Indexables.
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeConversationSchemaId,
+ "emails.subject"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeConversationSchemaId,
+ "emails.timestamp"));
+}
+
+TEST_F(SchemaStoreTest, NestedJoinableFieldsAreDefined) {
+ SchemaTypeConfigProto email_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("tagQualifiedId")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .Build();
+
+ SchemaTypeConfigProto conversation_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Conversation")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emails")
+ .SetDataTypeDocument(
+ "Email", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("nestedNonIndexable")
+ .SetDataTypeDocument("Email",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema =
+ SchemaBuilder().AddType(email_type).AddType(conversation_type).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/true));
+ constexpr SchemaTypeId kTypeConversationSchemaId = 1;
+
+ // Joinables.
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeConversationSchemaId,
+ "emails.tagQualifiedId"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(
+ kTypeConversationSchemaId, "nestedNonIndexable.tagQualifiedId"));
+}
+
+TEST_F(SchemaStoreTest, NestedNonIndexableFieldsAreDefined) {
+ SchemaTypeConfigProto email_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("tagQualifiedId")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .Build();
+
+ SchemaTypeConfigProto conversation_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Conversation")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emails")
+ .SetDataTypeDocument(
+ "Email", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("nestedNonIndexable")
+ .SetDataTypeDocument("Email",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema =
+ SchemaBuilder().AddType(email_type).AddType(conversation_type).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/true));
+ constexpr SchemaTypeId kTypeConversationSchemaId = 1;
+
+ // Non-indexables.
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeConversationSchemaId,
+ "emails.text"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(
+ kTypeConversationSchemaId, "nestedNonIndexable.subject"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(
+ kTypeConversationSchemaId, "nestedNonIndexable.text"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(
+ kTypeConversationSchemaId, "nestedNonIndexable.timestamp"));
+}
+
+TEST_F(SchemaStoreTest, NestedNonExistentFieldsAreUndefined) {
+ SchemaTypeConfigProto email_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("tagQualifiedId")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .Build();
+
+ SchemaTypeConfigProto conversation_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Conversation")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emails")
+ .SetDataTypeDocument(
+ "Email", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("nestedNonIndexable")
+ .SetDataTypeDocument("Email",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema =
+ SchemaBuilder().AddType(email_type).AddType(conversation_type).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/true));
+ constexpr SchemaTypeId kTypeConversationSchemaId = 1;
+
+ // Non-existents.
+ EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(
+ kTypeConversationSchemaId, "emails.foobar"));
+ EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(
+ kTypeConversationSchemaId, "nestedNonIndexable.foobar"));
+ EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(
+ kTypeConversationSchemaId, "emails.timestamp.foo"));
+ EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(
+ kTypeConversationSchemaId, "emails.time"));
+}
+
+TEST_F(SchemaStoreTest, IntermediateDocumentPropertiesAreDefined) {
+ SchemaTypeConfigProto email_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("tagQualifiedId")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .Build();
+
+ SchemaTypeConfigProto conversation_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Conversation")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emails")
+ .SetDataTypeDocument(
+ "Email", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("nestedNonIndexable")
+ .SetDataTypeDocument("Email",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema =
+ SchemaBuilder().AddType(email_type).AddType(conversation_type).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/true));
+ constexpr SchemaTypeId kTypeConversationSchemaId = 1;
+
+ // Intermediate documents props.
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeConversationSchemaId,
+ "emails"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeConversationSchemaId,
+ "nestedNonIndexable"));
+}
+
+TEST_F(SchemaStoreTest, CyclePathsAreDefined) {
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A", /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/true));
+ constexpr SchemaTypeId kTypeASchemaId = 0;
+ constexpr SchemaTypeId kTypeBSchemaId = 1;
+
+ // A's top-level properties
+ EXPECT_TRUE(
+ schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "subject"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b"));
+
+ // A's nested properties in B
+ EXPECT_TRUE(
+ schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.body"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.a"));
+
+ // A's nested properties in B's nested property in A
+ EXPECT_TRUE(
+ schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.a.subject"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.a.b"));
+
+ // B's top-level properties
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "body"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a"));
+
+ // B's nested properties in A
+ EXPECT_TRUE(
+ schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.subject"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.b"));
+
+ // B's nested properties in A's nested property in B
+ EXPECT_TRUE(
+ schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.b.body"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.b.a"));
+}
+
+TEST_F(SchemaStoreTest, WrongTypeCyclePathsAreUndefined) {
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A", /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/true));
+ constexpr SchemaTypeId kTypeASchemaId = 0;
+ constexpr SchemaTypeId kTypeBSchemaId = 1;
+
+ // The same paths as above, but we check the wrong types instead.
+ // A's top-level properties
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "subject"));
+ EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "b"));
+
+ // A's nested properties in B
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "b.body"));
+ EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "b.a"));
+
+ // A's nested properties in B's nested property in A
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "b.a.subject"));
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "b.a.b"));
+
+ // B's top-level properties
+ EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "body"));
+ EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "a"));
+
+ // B's nested properties in A
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "a.subject"));
+ EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "a.b"));
+
+ // B's nested properties in A's nested property in B
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "a.b.body"));
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "a.b.a"));
+}
+
+TEST_F(SchemaStoreTest, CyclePathsNonexistentPropertiesAreUndefined) {
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A", /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/true));
+ constexpr SchemaTypeId kTypeASchemaId = 0;
+ constexpr SchemaTypeId kTypeBSchemaId = 1;
+
+ // Undefined paths in A
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.subject"));
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.a.body"));
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.a.a"));
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.a.subject.b"));
+
+ // Undefined paths in B
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.body"));
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.b.subject"));
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.b.b"));
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.b.body.a"));
+}
+
+TEST_F(SchemaStoreTest, LoadsOverlaySchemaOnInit) {
+ // Create a schema that is rollback incompatible and will trigger us to create
+ // an overlay schema.
+ PropertyConfigBuilder indexed_string_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("type_b")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ {
+ // Create a new of the schema store and check that the same schema is
+ // present.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+
+ // The overlay should exist
+ std::string overlay_schema_path = schema_store_dir_ + "/overlay_schema.pb";
+ ASSERT_TRUE(filesystem_.FileExists(overlay_schema_path.c_str()));
+
+ // The base schema should hold a compatible schema
+ SchemaTypeConfigProto modified_type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_STRING))
+ .Build();
+ SchemaProto expected_base_schema =
+ SchemaBuilder().AddType(modified_type_a).AddType(type_b).Build();
+ std::string base_schema_path = schema_store_dir_ + "/schema.pb";
+ auto base_schema_file_ = std::make_unique<FileBackedProto<SchemaProto>>(
+ filesystem_, base_schema_path);
+ ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* base_schema,
+ base_schema_file_->Read());
+ EXPECT_THAT(*base_schema, EqualsProto(expected_base_schema));
+ }
+}
+
+TEST_F(SchemaStoreTest, LoadsBaseSchemaWithNoOverlayOnInit) {
+ // Create a normal schema that won't require an overlay.
+ PropertyConfigBuilder indexed_string_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("type_b")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ {
+ // Create a new instance of the schema store and check that the same schema
+ // is present.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+
+ // Additionally, the overlay should not exist
+ std::string overlay_schema_path = schema_store_dir_ + "/overlay_schema.pb";
+ ASSERT_FALSE(filesystem_.FileExists(overlay_schema_path.c_str()));
+ }
+}
+
+TEST_F(SchemaStoreTest, LoadSchemaBackupSchemaMissing) {
+ // Create a schema that is rollback incompatible and will trigger us to create
+ // a backup schema.
+ PropertyConfigBuilder indexed_string_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("type_b")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ // Delete the backup schema.
+ std::string backup_schema_path = schema_store_dir_ + "/schema.pb";
+ ASSERT_TRUE(filesystem_.DeleteFile(backup_schema_path.c_str()));
+
+ {
+ // Create a new instance of the schema store and check that it fails because
+ // the backup schema is not available.
+ EXPECT_THAT(
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ }
+}
+
+TEST_F(SchemaStoreTest, LoadSchemaOverlaySchemaMissing) {
+ // Create a schema that is rollback incompatible and will trigger us to create
+ // a backup schema.
+ PropertyConfigBuilder indexed_string_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("type_b")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ // Delete the overlay schema.
+ std::string overlay_schema_path = schema_store_dir_ + "/overlay_schema.pb";
+ ASSERT_TRUE(filesystem_.DeleteFile(overlay_schema_path.c_str()));
+
+ {
+ // Create a new instance of the schema store and check that it fails because
+ // the overlay schema is not available when we expected it to be.
+ EXPECT_THAT(
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ }
+}
+
+TEST_F(SchemaStoreTest, LoadSchemaHeaderMissing) {
+ // Create a schema that is rollback incompatible and will trigger us to create
+ // a backup schema.
+ PropertyConfigBuilder indexed_string_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("type_b")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ // Delete the overlay schema.
+ std::string schema_header_path = schema_store_dir_ + "/schema_store_header";
+ ASSERT_TRUE(filesystem_.DeleteFile(schema_header_path.c_str()));
+
+ {
+ // Create a new of the schema store and check that the same schema is
+ // present.
+ EXPECT_THAT(
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ }
+}
+
+TEST_F(SchemaStoreTest, LoadSchemaNoOverlayHeaderMissing) {
+ // Create a normal schema that won't require a backup.
+ PropertyConfigBuilder indexed_string_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("type_b")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ // Delete the schema header.
+ std::string schema_header_path = schema_store_dir_ + "/schema_store_header";
+ ASSERT_TRUE(filesystem_.DeleteFile(schema_header_path.c_str()));
+
+ {
+ // Create a new instance of the schema store and check that it fails because
+ // the schema header (which is now a part of the ground truth) is not
+ // available.
+ EXPECT_THAT(
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ }
+}
+
+TEST_F(SchemaStoreTest, MigrateSchemaCompatibleNoChange) {
+ // Create a schema that is rollback incompatible and will trigger us to create
+ // a backup schema.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+ &filesystem_, schema_store_dir_, version_util::StateChange::kCompatible,
+ version_util::kVersion));
+
+ {
+ // Create a new of the schema store and check that the same schema is
+ // present.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+}
+
+TEST_F(SchemaStoreTest, MigrateSchemaUpgradeNoChange) {
+ // Create a schema that is rollback incompatible and will trigger us to create
+ // a backup schema.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+ &filesystem_, schema_store_dir_, version_util::StateChange::kUpgrade,
+ version_util::kVersion + 1));
+
+ {
+ // Create a new of the schema store and check that the same schema is
+ // present.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+}
+
+TEST_F(SchemaStoreTest, MigrateSchemaVersionZeroUpgradeNoChange) {
+ // Because we are upgrading from version zero, the schema must be compatible
+ // with version zero.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ ICING_EXPECT_OK(
+ SchemaStore::MigrateSchema(&filesystem_, schema_store_dir_,
+ version_util::StateChange::kVersionZeroUpgrade,
+ version_util::kVersion + 1));
+
+ {
+ // Create a new of the schema store and check that the same schema is
+ // present.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+}
+
+TEST_F(SchemaStoreTest,
+ MigrateSchemaRollbackDiscardsIncompatibleOverlaySchema) {
+ // Because we are upgrading from version zero, the schema must be compatible
+ // with version zero.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ // Rollback to a version before kVersionOne. The schema header will declare
+ // that the overlay is compatible with any version starting with kVersionOne.
+ // So kVersionOne - 1 is incompatible and will throw out the schema.
+ ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+ &filesystem_, schema_store_dir_, version_util::StateChange::kRollBack,
+ version_util::kVersionOne - 1));
+
+ {
+ // Create a new of the schema store and check that we fell back to the
+ // base schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ SchemaTypeConfigProto other_type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_STRING))
+ .Build();
+ SchemaProto base_schema = SchemaBuilder().AddType(other_type_a).Build();
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(base_schema))));
+ }
+}
+
+TEST_F(SchemaStoreTest, MigrateSchemaRollbackKeepsCompatibleOverlaySchema) {
+ // Because we are upgrading from version zero, the schema must be compatible
+ // with version zero.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ // Rollback to kVersion. The schema header will declare that the overlay is
+ // compatible with any version starting with kVersion. So we will be
+ // compatible and retain the overlay schema.
+ ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+ &filesystem_, schema_store_dir_, version_util::StateChange::kRollBack,
+ version_util::kVersion));
+
+ {
+ // Create a new of the schema store and check that the same schema is
+ // present.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+}
+
+TEST_F(SchemaStoreTest, MigrateSchemaRollforwardRetainsBaseSchema) {
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ // Rollback to a version before kVersionOne. The schema header will declare
+ // that the overlay is compatible with any version starting with kVersionOne.
+ // So kVersionOne - 1 is incompatible and will throw out the schema.
+ ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+ &filesystem_, schema_store_dir_, version_util::StateChange::kRollBack,
+ version_util::kVersionOne - 1));
+
+ SchemaTypeConfigProto other_type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_STRING))
+ .Build();
+ SchemaProto base_schema = SchemaBuilder().AddType(other_type_a).Build();
+
+ {
+ // Create a new of the schema store and check that we fell back to the
+ // base schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(base_schema))));
+ }
+
+ // Now rollforward to a new version. This should accept whatever schema is
+ // present (currently base schema)
+ ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+ &filesystem_, schema_store_dir_, version_util::StateChange::kRollForward,
+ version_util::kVersion));
+ {
+ // Create a new of the schema store and check that we fell back to the
+ // base schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(base_schema))));
+ }
+}
+
+TEST_F(SchemaStoreTest, MigrateSchemaRollforwardRetainsOverlaySchema) {
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ // Rollback to kVersion. The schema header will declare that the overlay is
+ // compatible with any version starting with kVersion. So we will be
+ // compatible and retain the overlay schema.
+ ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+ &filesystem_, schema_store_dir_, version_util::StateChange::kRollBack,
+ version_util::kVersion));
+
+ {
+ // Create a new of the schema store and check that the same schema is
+ // present.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ // Now rollforward to a new version. This should accept whatever schema is
+ // present (currently overlay schema)
+ ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+ &filesystem_, schema_store_dir_, version_util::StateChange::kRollForward,
+ version_util::kVersion));
+ {
+ // Create a new of the schema store and check that the same schema is
+ // present.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+}
+
+TEST_F(SchemaStoreTest,
+ MigrateSchemaVersionZeroRollforwardDiscardsOverlaySchema) {
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ // A VersionZeroRollforward will always discard the overlay schema because it
+ // could be stale.
+ ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+ &filesystem_, schema_store_dir_,
+ version_util::StateChange::kVersionZeroRollForward,
+ version_util::kVersion));
+
+ SchemaTypeConfigProto other_type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_STRING))
+ .Build();
+ SchemaProto base_schema = SchemaBuilder().AddType(other_type_a).Build();
+
+ {
+ // Create a new of the schema store and check that we fell back to the
+ // base schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(base_schema))));
+ }
+}
+
+TEST_F(SchemaStoreTest, MigrateSchemaVersionUndeterminedDiscardsOverlaySchema) {
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ // An Undetermined will always discard the overlay schema because it doesn't
+ // know which state we're in and so it fallback to the base schema because
+ // it should always be valid.
+ ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+ &filesystem_, schema_store_dir_, version_util::StateChange::kUndetermined,
+ version_util::kVersion));
+
+ SchemaTypeConfigProto other_type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_STRING))
+ .Build();
+ SchemaProto base_schema = SchemaBuilder().AddType(other_type_a).Build();
+
+ {
+ // Create a new of the schema store and check that we fell back to the
+ // base schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(base_schema))));
+ }
+}
+
} // namespace
} // namespace lib
diff --git a/icing/schema/schema-type-manager.cc b/icing/schema/schema-type-manager.cc
new file mode 100644
index 0000000..4a6b7f2
--- /dev/null
+++ b/icing/schema/schema-type-manager.cc
@@ -0,0 +1,108 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/schema-type-manager.h"
+
+#include <memory>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/schema/joinable-property-manager.h"
+#include "icing/schema/property-util.h"
+#include "icing/schema/schema-property-iterator.h"
+#include "icing/schema/schema-util.h"
+#include "icing/schema/section-manager.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/key-mapper.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<SchemaTypeManager>>
+SchemaTypeManager::Create(const SchemaUtil::TypeConfigMap& type_config_map,
+ const KeyMapper<SchemaTypeId>* schema_type_mapper) {
+ ICING_RETURN_ERROR_IF_NULL(schema_type_mapper);
+
+ SectionManager::Builder section_manager_builder(*schema_type_mapper);
+ JoinablePropertyManager::Builder joinable_property_manager_builder(
+ *schema_type_mapper);
+
+ for (const auto& [type_config_name, type_config] : type_config_map) {
+ ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
+ schema_type_mapper->Get(type_config_name));
+
+ // Use iterator to traverse all leaf properties of the schema.
+ SchemaPropertyIterator iterator(type_config, type_config_map);
+ while (true) {
+ libtextclassifier3::Status status = iterator.Advance();
+ if (!status.ok()) {
+ if (absl_ports::IsOutOfRange(status)) {
+ break;
+ }
+ return status;
+ }
+
+ // Process section (indexable property)
+ if (iterator.GetCurrentPropertyIndexable()) {
+ ICING_RETURN_IF_ERROR(
+ section_manager_builder.ProcessSchemaTypePropertyConfig(
+ schema_type_id, iterator.GetCurrentPropertyConfig(),
+ iterator.GetCurrentPropertyPath()));
+ }
+
+ // Process joinable property
+ ICING_RETURN_IF_ERROR(
+ joinable_property_manager_builder.ProcessSchemaTypePropertyConfig(
+ schema_type_id, iterator.GetCurrentPropertyConfig(),
+ iterator.GetCurrentPropertyPath()));
+ }
+
+ // Process unknown property paths in the indexable_nested_properties_list.
+ // These property paths should consume sectionIds but are currently
+ // not indexed.
+ //
+ // SectionId assignment order:
+ // - We assign section ids to known (existing) properties first in alphabet
+ // order.
+ // - After handling all known properties, we assign section ids to all
+ // unknown (non-existent) properties that are specified in the
+ // indexable_nested_properties_list.
+ // - As a result, assignment of the entire section set is not done
+ // alphabetically, but assignment is still deterministic and alphabetical
+ // order is preserved inside the known properties and unknown properties
+ // sets individually.
+ for (const auto& property_path :
+ iterator.unknown_indexable_nested_property_paths()) {
+ PropertyConfigProto unknown_property_config;
+ unknown_property_config.set_property_name(std::string(
+ property_util::SplitPropertyPathExpr(property_path).back()));
+ unknown_property_config.set_data_type(
+ PropertyConfigProto::DataType::UNKNOWN);
+
+ ICING_RETURN_IF_ERROR(
+ section_manager_builder.ProcessSchemaTypePropertyConfig(
+ schema_type_id, unknown_property_config,
+ std::string(property_path)));
+ }
+ }
+
+ return std::unique_ptr<SchemaTypeManager>(new SchemaTypeManager(
+ std::move(section_manager_builder).Build(),
+ std::move(joinable_property_manager_builder).Build()));
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/schema/schema-type-manager.h b/icing/schema/schema-type-manager.h
new file mode 100644
index 0000000..f2adbd9
--- /dev/null
+++ b/icing/schema/schema-type-manager.h
@@ -0,0 +1,79 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_SCHEMA_TYPE_MANAGER_H_
+#define ICING_SCHEMA_SCHEMA_TYPE_MANAGER_H_
+
+#include <memory>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/schema/joinable-property-manager.h"
+#include "icing/schema/schema-util.h"
+#include "icing/schema/section-manager.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/key-mapper.h"
+
+namespace icing {
+namespace lib {
+
+// This class is a wrapper of SectionManager and JoinablePropertyManager.
+class SchemaTypeManager {
+ public:
+ // Schema type ids are continuous, and so we use a vector instead of an
+ // unordered map for the mappings.
+ using SchemaTypeIdToPropertiesVector =
+ std::vector<std::unordered_set<std::string>>;
+ // Factory function to create a SchemaTypeManager which does not take
+ // ownership of any input components, and all pointers must refer to valid
+ // objects that outlive the created SchemaTypeManager instance.
+ //
+ // Returns:
+ // - A SchemaTypeManager on success
+ // - FAILED_PRECONDITION_ERROR on any null pointer input
+ // - OUT_OF_RANGE_ERROR if # of indexable properties in a single Schema
+ // exceeds the threshold (kTotalNumSections, kTotalNumJoinableProperties)
+ // - INVALID_ARGUMENT_ERROR if type_config_map contains incorrect
+ // information that causes errors (e.g. invalid schema type id, cycle
+ // dependency in nested schema)
+ // - NOT_FOUND_ERROR if any nested schema name is not found in
+ // type_config_map
+ static libtextclassifier3::StatusOr<std::unique_ptr<SchemaTypeManager>>
+ Create(const SchemaUtil::TypeConfigMap& type_config_map,
+ const KeyMapper<SchemaTypeId>* schema_type_mapper);
+
+ const SectionManager& section_manager() const { return *section_manager_; }
+
+ const JoinablePropertyManager& joinable_property_manager() const {
+ return *joinable_property_manager_;
+ }
+
+ private:
+ explicit SchemaTypeManager(
+ std::unique_ptr<SectionManager> section_manager,
+ std::unique_ptr<JoinablePropertyManager> joinable_property_manager)
+ : section_manager_(std::move(section_manager)),
+ joinable_property_manager_(std::move(joinable_property_manager)) {}
+
+ std::unique_ptr<SectionManager> section_manager_;
+
+ std::unique_ptr<JoinablePropertyManager> joinable_property_manager_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_SCHEMA_SCHEMA_TYPE_MANAGER_H_
diff --git a/icing/schema/schema-type-manager_test.cc b/icing/schema/schema-type-manager_test.cc
new file mode 100644
index 0000000..eafc612
--- /dev/null
+++ b/icing/schema/schema-type-manager_test.cc
@@ -0,0 +1,356 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/schema-type-manager.h"
+
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-util.h"
+#include "icing/schema/section.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
+#include "icing/store/key-mapper.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Pointee;
+
+// type and property names of EmailMessage
+static constexpr char kTypeEmail[] = "EmailMessage";
+static constexpr SchemaTypeId kTypeEmailSchemaId = 0;
+// indexable (in lexicographical order)
+static constexpr char kPropertyRecipientIds[] = "recipientIds";
+static constexpr char kPropertyRecipients[] = "recipients";
+static constexpr char kPropertySenderQualifiedId[] =
+ "senderQualifiedId"; // QUALIFIED_ID joinable
+static constexpr char kPropertySubject[] = "subject";
+static constexpr char kPropertyTimestamp[] = "timestamp";
+// non-indexable
+static constexpr char kPropertyAttachment[] = "attachment";
+static constexpr char kPropertyNonIndexableInteger[] = "nonIndexableInteger";
+static constexpr char kPropertyTagQualifiedId[] =
+ "tagQualifiedId"; // QUALIFIED_ID joinable
+static constexpr char kPropertyText[] = "text";
+
+// type and property names of Conversation
+static constexpr char kTypeConversation[] = "Conversation";
+static constexpr SchemaTypeId kTypeConversationSchemaId = 1;
+// indexable (in lexicographical order)
+static constexpr char kPropertyEmails[] = "emails";
+static constexpr char kPropertyGroupQualifiedId[] =
+ "groupQualifiedId"; // QUALIFIED_ID joinable
+static constexpr char kPropertyName[] = "name";
+// non-indexable
+static constexpr char kPropertyNestedNonIndexable[] = "nestedNonIndexable";
+static constexpr char kPropertySuperTagQualifiedId[] =
+ "superTagQualifiedId"; // QUALIFIED_ID joinable
+
+PropertyConfigProto CreateReceipientIdsPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyRecipientIds)
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REPEATED)
+ .Build();
+}
+
+PropertyConfigProto CreateRecipientsPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyRecipients)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED)
+ .Build();
+}
+
+PropertyConfigProto CreateSenderQualifiedIdPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertySenderQualifiedId)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+}
+
+PropertyConfigProto CreateSubjectPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertySubject)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+}
+
+PropertyConfigProto CreateTimestampPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyTimestamp)
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+}
+
+PropertyConfigProto CreateTagQualifiedIdPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyTagQualifiedId)
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+}
+
+PropertyConfigProto CreateGroupQualifiedIdPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyGroupQualifiedId)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+}
+
+PropertyConfigProto CreateSuperTagQualifiedIdPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertySuperTagQualifiedId)
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+}
+
+PropertyConfigProto CreateNamePropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyName)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+}
+
+SchemaTypeConfigProto CreateEmailTypeConfig() {
+ return SchemaTypeConfigBuilder()
+ .SetType(kTypeEmail)
+ .AddProperty(CreateTagQualifiedIdPropertyConfig())
+ .AddProperty(CreateSubjectPropertyConfig())
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyText)
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyAttachment)
+ .SetDataType(TYPE_BYTES)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(CreateSenderQualifiedIdPropertyConfig())
+ .AddProperty(CreateRecipientsPropertyConfig())
+ .AddProperty(CreateReceipientIdsPropertyConfig())
+ .AddProperty(CreateTimestampPropertyConfig())
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyNonIndexableInteger)
+ .SetDataType(TYPE_INT64)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .Build();
+}
+
+SchemaTypeConfigProto CreateConversationTypeConfig() {
+ return SchemaTypeConfigBuilder()
+ .SetType(kTypeConversation)
+ .AddProperty(CreateSuperTagQualifiedIdPropertyConfig())
+ .AddProperty(CreateNamePropertyConfig())
+ .AddProperty(CreateGroupQualifiedIdPropertyConfig())
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName(kPropertyEmails)
+ .SetDataTypeDocument(kTypeEmail, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyNestedNonIndexable)
+ .SetDataTypeDocument(kTypeEmail,
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .Build();
+}
+
+class SchemaTypeManagerTest : public ::testing::Test {
+ protected:
+ void SetUp() override { test_dir_ = GetTestTempDir() + "/icing"; }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ }
+
+ Filesystem filesystem_;
+ std::string test_dir_;
+};
+
+TEST_F(SchemaTypeManagerTest, Create) {
+ SchemaUtil::TypeConfigMap type_config_map;
+ type_config_map.emplace(kTypeEmail, CreateEmailTypeConfig());
+ type_config_map.emplace(kTypeConversation, CreateConversationTypeConfig());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put(kTypeEmail, kTypeEmailSchemaId));
+ ICING_ASSERT_OK(
+ schema_type_mapper->Put(kTypeConversation, kTypeConversationSchemaId));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, schema_type_mapper.get()));
+
+ // Check SectionManager
+ // In the Email type, "recipientIds", "recipients", "senderQualifiedId",
+ // "subject" and "timestamp" are indexable properties. "attachment",
+ // "nonIndexableInteger", "tagQualifiedId" and "text" are non-indexable.
+ EXPECT_THAT(
+ schema_type_manager->section_manager().GetMetadataList(kTypeEmail),
+ IsOkAndHolds(Pointee(ElementsAre(
+ EqualsSectionMetadata(/*expected_id=*/0,
+ /*expected_property_path=*/"recipientIds",
+ CreateReceipientIdsPropertyConfig()),
+ EqualsSectionMetadata(/*expected_id=*/1,
+ /*expected_property_path=*/"recipients",
+ CreateRecipientsPropertyConfig()),
+ EqualsSectionMetadata(/*expected_id=*/2,
+ /*expected_property_path=*/"senderQualifiedId",
+ CreateSenderQualifiedIdPropertyConfig()),
+ EqualsSectionMetadata(/*expected_id=*/3,
+ /*expected_property_path=*/"subject",
+ CreateSubjectPropertyConfig()),
+ EqualsSectionMetadata(/*expected_id=*/4,
+ /*expected_property_path=*/"timestamp",
+ CreateTimestampPropertyConfig())))));
+
+ // In the Conversation type, "groupQualifiedId" and "name" are indexable
+ // properties as are the indexable properties of the email in the "emails"
+ // property. All properties of the email in the "nestedNonIndexable" property
+ // are not indexable.
+ EXPECT_THAT(
+ schema_type_manager->section_manager().GetMetadataList(kTypeConversation),
+ IsOkAndHolds(Pointee(ElementsAre(
+ EqualsSectionMetadata(
+ /*expected_id=*/0,
+ /*expected_property_path=*/"emails.recipientIds",
+ CreateReceipientIdsPropertyConfig()),
+ EqualsSectionMetadata(/*expected_id=*/1,
+ /*expected_property_path=*/"emails.recipients",
+ CreateRecipientsPropertyConfig()),
+ EqualsSectionMetadata(
+ /*expected_id=*/2,
+ /*expected_property_path=*/"emails.senderQualifiedId",
+ CreateSenderQualifiedIdPropertyConfig()),
+ EqualsSectionMetadata(/*expected_id=*/3,
+ /*expected_property_path=*/"emails.subject",
+ CreateSubjectPropertyConfig()),
+ EqualsSectionMetadata(/*expected_id=*/4,
+ /*expected_property_path=*/"emails.timestamp",
+ CreateTimestampPropertyConfig()),
+ EqualsSectionMetadata(/*expected_id=*/5,
+ /*expected_property_path=*/"groupQualifiedId",
+ CreateGroupQualifiedIdPropertyConfig()),
+ EqualsSectionMetadata(/*expected_id=*/6,
+ /*expected_property_path=*/"name",
+ CreateNamePropertyConfig())))));
+
+ // Check JoinablePropertyManager
+ // In the Email type, "senderQualifiedId" and "tagQualifiedId" are joinable
+ // properties.
+ EXPECT_THAT(
+ schema_type_manager->joinable_property_manager().GetMetadataList(
+ kTypeEmail),
+ IsOkAndHolds(Pointee(ElementsAre(
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/0, /*expected_property_path=*/"senderQualifiedId",
+ CreateSenderQualifiedIdPropertyConfig()),
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/1, /*expected_property_path=*/"tagQualifiedId",
+ CreateTagQualifiedIdPropertyConfig())))));
+ // In the Conversation type, "groupQualifiedId" and "superTagQualifiedId" are
+ // joinable properties as are the joinable properties of the email in the
+ // "emails" and "nestedNonIndexable" property.
+ EXPECT_THAT(
+ schema_type_manager->joinable_property_manager().GetMetadataList(
+ kTypeConversation),
+ IsOkAndHolds(Pointee(ElementsAre(
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/0,
+ /*expected_property_path=*/"emails.senderQualifiedId",
+ CreateSenderQualifiedIdPropertyConfig()),
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/1,
+ /*expected_property_path=*/"emails.tagQualifiedId",
+ CreateTagQualifiedIdPropertyConfig()),
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/2, /*expected_property_path=*/"groupQualifiedId",
+ CreateGroupQualifiedIdPropertyConfig()),
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/3,
+ /*expected_property_path=*/"nestedNonIndexable.senderQualifiedId",
+ CreateSenderQualifiedIdPropertyConfig()),
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/4,
+ /*expected_property_path=*/"nestedNonIndexable.tagQualifiedId",
+ CreateTagQualifiedIdPropertyConfig()),
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/5,
+ /*expected_property_path=*/"superTagQualifiedId",
+ CreateSuperTagQualifiedIdPropertyConfig())))));
+}
+
+TEST_F(SchemaTypeManagerTest, CreateWithNullPointerShouldFail) {
+ SchemaUtil::TypeConfigMap type_config_map;
+ EXPECT_THAT(SchemaTypeManager::Create(type_config_map,
+ /*schema_type_mapper=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_F(SchemaTypeManagerTest, CreateWithSchemaNotInSchemaTypeMapperShouldFail) {
+ SchemaTypeConfigProto type_config;
+ type_config.set_schema_type("type");
+
+ auto property = type_config.add_properties();
+ property->set_property_name("property");
+ property->set_data_type(TYPE_STRING);
+ property->set_cardinality(CARDINALITY_REQUIRED);
+ property->mutable_string_indexing_config()->set_term_match_type(
+ TERM_MATCH_EXACT);
+
+ SchemaUtil::TypeConfigMap type_config_map;
+ type_config_map.emplace("type", type_config);
+
+ // Create an empty schema type mapper
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+
+ EXPECT_THAT(
+ SchemaTypeManager::Create(type_config_map, schema_type_mapper.get()),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/schema/schema-util.cc b/icing/schema/schema-util.cc
index 88b6946..af6feda 100644
--- a/icing/schema/schema-util.cc
+++ b/icing/schema/schema-util.cc
@@ -14,19 +14,21 @@
#include "icing/schema/schema-util.h"
+#include <algorithm>
#include <cstdint>
+#include <queue>
#include <string>
#include <string_view>
#include <unordered_map>
#include <unordered_set>
#include <utility>
+#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/absl_ports/annotate.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/absl_ports/str_join.h"
-#include "icing/legacy/core/icing-string-util.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/util/logging.h"
@@ -107,148 +109,364 @@ bool IsTermMatchTypeCompatible(const StringIndexingConfig& old_indexed,
old_indexed.tokenizer_type() == new_indexed.tokenizer_type();
}
+bool IsIntegerNumericMatchTypeCompatible(
+ const IntegerIndexingConfig& old_indexed,
+ const IntegerIndexingConfig& new_indexed) {
+ return old_indexed.numeric_match_type() == new_indexed.numeric_match_type();
+}
+
+bool IsDocumentIndexingCompatible(const DocumentIndexingConfig& old_indexed,
+ const DocumentIndexingConfig& new_indexed) {
+ // TODO(b/265304217): This could mark the new schema as incompatible and
+ // generate some unnecessary index rebuilds if the two schemas have an
+ // equivalent set of indexed properties, but changed the way that it is
+ // declared.
+ if (old_indexed.index_nested_properties() !=
+ new_indexed.index_nested_properties()) {
+ return false;
+ }
+
+ if (old_indexed.indexable_nested_properties_list().size() !=
+ new_indexed.indexable_nested_properties_list().size()) {
+ return false;
+ }
+
+ std::unordered_set<std::string_view> old_indexable_nested_properies_set(
+ old_indexed.indexable_nested_properties_list().begin(),
+ old_indexed.indexable_nested_properties_list().end());
+ for (const auto& property : new_indexed.indexable_nested_properties_list()) {
+ if (old_indexable_nested_properies_set.find(property) ==
+ old_indexable_nested_properies_set.end()) {
+ return false;
+ }
+ }
+ return true;
+}
+
void AddIncompatibleChangeToDelta(
std::unordered_set<std::string>& incompatible_delta,
const SchemaTypeConfigProto& old_type_config,
- const SchemaUtil::DependencyMap& new_schema_dependency_map,
+ const SchemaUtil::DependentMap& new_schema_dependent_map,
const SchemaUtil::TypeConfigMap& old_type_config_map,
const SchemaUtil::TypeConfigMap& new_type_config_map) {
// If this type is incompatible, then every type that depends on it might
- // also be incompatible. Use the dependency map to mark those ones as
+ // also be incompatible. Use the dependent map to mark those ones as
// incompatible too.
incompatible_delta.insert(old_type_config.schema_type());
- auto parent_types_itr =
- new_schema_dependency_map.find(old_type_config.schema_type());
- if (parent_types_itr != new_schema_dependency_map.end()) {
- for (std::string_view parent_type : parent_types_itr->second) {
+ auto dependent_types_itr =
+ new_schema_dependent_map.find(old_type_config.schema_type());
+ if (dependent_types_itr != new_schema_dependent_map.end()) {
+ for (const auto& [dependent_type, _] : dependent_types_itr->second) {
// The types from new_schema that depend on the current
// old_type_config may not present in old_schema.
// Those types will be listed at schema_delta.schema_types_new
// instead.
- std::string parent_type_str(parent_type);
- if (old_type_config_map.find(parent_type_str) !=
+ std::string dependent_type_str(dependent_type);
+ if (old_type_config_map.find(dependent_type_str) !=
old_type_config_map.end()) {
- incompatible_delta.insert(std::move(parent_type_str));
+ incompatible_delta.insert(std::move(dependent_type_str));
}
}
}
}
+// Returns if C1 <= C2 based on the following rule, where C1 and C2 are
+// cardinalities that can be one of REPEATED, OPTIONAL, or REQUIRED.
+//
+// Rule: REQUIRED < OPTIONAL < REPEATED
+bool CardinalityLessThanEq(PropertyConfigProto::Cardinality::Code C1,
+ PropertyConfigProto::Cardinality::Code C2) {
+ if (C1 == C2) {
+ return true;
+ }
+ if (C1 == PropertyConfigProto::Cardinality::REQUIRED) {
+ return C2 == PropertyConfigProto::Cardinality::OPTIONAL ||
+ C2 == PropertyConfigProto::Cardinality::REPEATED;
+ }
+ if (C1 == PropertyConfigProto::Cardinality::OPTIONAL) {
+ return C2 == PropertyConfigProto::Cardinality::REPEATED;
+ }
+ return false;
+}
+
} // namespace
-libtextclassifier3::Status ExpandTranstiveDependencies(
- const SchemaUtil::DependencyMap& child_to_direct_parent_map,
+libtextclassifier3::Status CalculateTransitiveNestedTypeRelations(
+ const SchemaUtil::DependentMap& direct_nested_types_map,
+ const std::unordered_set<std::string_view>& joinable_types,
+ std::string_view type, bool path_contains_joinable_property,
+ SchemaUtil::DependentMap* expanded_nested_types_map,
+ std::unordered_map<std::string_view, bool>&&
+ pending_expansion_paths_indexable,
+ std::unordered_set<std::string_view>* sink_types) {
+ // TODO(b/280698121): Implement optimizations to this code to avoid reentering
+ // a node after it's already been expanded.
+
+ auto itr = direct_nested_types_map.find(type);
+ if (itr == direct_nested_types_map.end()) {
+ // It's a sink node. Just return.
+ sink_types->insert(type);
+ return libtextclassifier3::Status::OK;
+ }
+ std::unordered_map<std::string_view, std::vector<const PropertyConfigProto*>>
+ expanded_relations;
+
+ // Add all of the adjacent outgoing relations.
+ expanded_relations.reserve(itr->second.size());
+ expanded_relations.insert(itr->second.begin(), itr->second.end());
+
+ // Iterate through each adjacent outgoing relation and add their indirect
+ // outgoing relations.
+ for (const auto& [adjacent_type, adjacent_property_protos] : itr->second) {
+ // Make a copy of pending_expansion_paths_indexable for every iteration.
+ std::unordered_map<std::string_view, bool> pending_expansion_paths_copy(
+ pending_expansion_paths_indexable);
+
+ // 1. Check the nested indexable config of the edge (type -> adjacent_type),
+ // and the joinable config of the current path up to adjacent_type.
+ //
+ // The nested indexable config is true if any of the PropertyConfigProtos
+ // representing the connecting edge has index_nested_properties=true.
+ bool is_edge_nested_indexable = std::any_of(
+ adjacent_property_protos.begin(), adjacent_property_protos.end(),
+ [](const PropertyConfigProto* property_config) {
+ return property_config->document_indexing_config()
+ .index_nested_properties();
+ });
+ // TODO(b/265304217): change this once we add joinable_properties_list.
+ // Check if addition of the new edge (type->adjacent_type) makes the path
+ // joinable.
+ bool new_path_contains_joinable_property =
+ joinable_types.count(type) > 0 || path_contains_joinable_property;
+ // Set is_nested_indexable field for the current edge
+ pending_expansion_paths_copy[type] = is_edge_nested_indexable;
+
+ // If is_edge_nested_indexable=false, then all paths to adjacent_type
+ // currently in the pending_expansions map are also not nested indexable.
+ if (!is_edge_nested_indexable) {
+ for (auto& pending_expansion : pending_expansion_paths_copy) {
+ pending_expansion.second = false;
+ }
+ }
+
+ // 2. Check if we're in the middle of expanding this type - IOW
+ // there's a cycle!
+ //
+ // This cycle is not allowed if either:
+ // 1. The cycle starting at adjacent_type is nested indexable, OR
+ // 2. The current path contains a joinable property.
+ auto adjacent_itr = pending_expansion_paths_copy.find(adjacent_type);
+ if (adjacent_itr != pending_expansion_paths_copy.end()) {
+ if (adjacent_itr->second || new_path_contains_joinable_property) {
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "Invalid cycle detected in type configs. '", type,
+ "' references itself and is nested-indexable or nested-joinable."));
+ }
+ // The cycle is allowed and there's no need to keep iterating the loop.
+ // Move on to the next adjacent value.
+ continue;
+ }
+
+ // 3. Expand this type as needed.
+ ICING_RETURN_IF_ERROR(CalculateTransitiveNestedTypeRelations(
+ direct_nested_types_map, joinable_types, adjacent_type,
+ new_path_contains_joinable_property, expanded_nested_types_map,
+ std::move(pending_expansion_paths_copy), sink_types));
+ if (sink_types->count(adjacent_type) > 0) {
+ // "adjacent" is a sink node. Just skip to the next.
+ continue;
+ }
+
+ // 4. "adjacent" has been fully expanded. Add all of its transitive
+ // outgoing relations to this type's transitive outgoing relations.
+ auto adjacent_expanded_itr = expanded_nested_types_map->find(adjacent_type);
+ for (const auto& [transitive_reachable, _] :
+ adjacent_expanded_itr->second) {
+ // Insert a transitive reachable node `transitive_reachable` for `type` if
+ // it wasn't previously reachable.
+ // Since there is no direct edge between `type` and `transitive_reachable`
+ // we insert an empty vector into the dependent map.
+ expanded_relations.insert({transitive_reachable, {}});
+ }
+ }
+ for (const auto& kvp : expanded_relations) {
+ expanded_nested_types_map->operator[](type).insert(kvp);
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+template <typename T>
+libtextclassifier3::Status CalculateAcyclicTransitiveRelations(
+ const SchemaUtil::TypeRelationMap<T>& direct_relation_map,
std::string_view type,
- SchemaUtil::DependencyMap* expanded_child_to_parent_map,
+ SchemaUtil::TypeRelationMap<T>* expanded_relation_map,
std::unordered_set<std::string_view>* pending_expansions,
- std::unordered_set<std::string_view>* orphaned_types) {
- auto expanded_itr = expanded_child_to_parent_map->find(type);
- if (expanded_itr != expanded_child_to_parent_map->end()) {
+ std::unordered_set<std::string_view>* sink_types) {
+ auto expanded_itr = expanded_relation_map->find(type);
+ if (expanded_itr != expanded_relation_map->end()) {
// We've already expanded this type. Just return.
return libtextclassifier3::Status::OK;
}
- auto itr = child_to_direct_parent_map.find(type);
- if (itr == child_to_direct_parent_map.end()) {
- // It's an orphan. Just return.
- orphaned_types->insert(type);
+ auto itr = direct_relation_map.find(type);
+ if (itr == direct_relation_map.end()) {
+ // It's a sink node. Just return.
+ sink_types->insert(type);
return libtextclassifier3::Status::OK;
}
pending_expansions->insert(type);
- std::unordered_set<std::string_view> expanded_dependencies;
+ std::unordered_map<std::string_view, T> expanded_relations;
- // Add all of the direct parent dependencies.
- expanded_dependencies.reserve(itr->second.size());
- expanded_dependencies.insert(itr->second.begin(), itr->second.end());
+ // Add all of the adjacent outgoing relations.
+ expanded_relations.reserve(itr->second.size());
+ expanded_relations.insert(itr->second.begin(), itr->second.end());
- // Iterate through each direct parent and add their indirect parents.
- for (std::string_view dep : itr->second) {
+ // Iterate through each adjacent outgoing relation and add their indirect
+ // outgoing relations.
+ for (const auto& [adjacent, _] : itr->second) {
// 1. Check if we're in the middle of expanding this type - IOW there's a
// cycle!
- if (pending_expansions->count(dep) > 0) {
+ if (pending_expansions->count(adjacent) > 0) {
return absl_ports::InvalidArgumentError(
- absl_ports::StrCat("Infinite loop detected in type configs. '", type,
- "' references itself."));
+ absl_ports::StrCat("Invalid cycle detected in type configs. '", type,
+ "' references or inherits from itself."));
}
// 2. Expand this type as needed.
- ICING_RETURN_IF_ERROR(ExpandTranstiveDependencies(
- child_to_direct_parent_map, dep, expanded_child_to_parent_map,
- pending_expansions, orphaned_types));
- if (orphaned_types->count(dep) > 0) {
- // Dep is an orphan. Just skip to the next dep.
+ ICING_RETURN_IF_ERROR(CalculateAcyclicTransitiveRelations(
+ direct_relation_map, adjacent, expanded_relation_map,
+ pending_expansions, sink_types));
+ if (sink_types->count(adjacent) > 0) {
+ // "adjacent" is a sink node. Just skip to the next.
continue;
}
- // 3. Dep has been fully expanded. Add all of its dependencies to this
- // type's dependencies.
- auto dep_expanded_itr = expanded_child_to_parent_map->find(dep);
- expanded_dependencies.reserve(expanded_dependencies.size() +
- dep_expanded_itr->second.size());
- expanded_dependencies.insert(dep_expanded_itr->second.begin(),
- dep_expanded_itr->second.end());
+ // 3. "adjacent" has been fully expanded. Add all of its transitive outgoing
+ // relations to this type's transitive outgoing relations.
+ auto adjacent_expanded_itr = expanded_relation_map->find(adjacent);
+ for (const auto& [transitive_reachable, _] :
+ adjacent_expanded_itr->second) {
+ // Insert a transitive reachable node `transitive_reachable` for `type`.
+ // Also since there is no direct edge between `type` and
+ // `transitive_reachable`, the direct edge is initialized by default.
+ expanded_relations.insert({transitive_reachable, T()});
+ }
}
- expanded_child_to_parent_map->insert(
- {type, std::move(expanded_dependencies)});
+ expanded_relation_map->insert({type, std::move(expanded_relations)});
pending_expansions->erase(type);
return libtextclassifier3::Status::OK;
}
-// Expands the dependencies represented by the child_to_direct_parent_map to
-// also include indirect parents.
+// Calculate and return the expanded nested-type map from
+// direct_nested_type_map. This expands the direct_nested_type_map to also
+// include indirect nested-type relations.
//
-// Ex. Suppose we have a schema with four types A, B, C, D. A has a property of
-// type B and B has a property of type C. C and D only have non-document
-// properties.
+// Ex. Suppose we have the following relations in direct_nested_type_map.
//
-// The child to direct parent dependency map for this schema would be:
-// C -> B
-// B -> A
+// C -> B (Schema type B has a document property of type C)
+// B -> A (Schema type A has a document property of type B)
//
-// This function would expand it so that A is also present as an indirect parent
-// of C.
-libtextclassifier3::StatusOr<SchemaUtil::DependencyMap>
-ExpandTranstiveDependencies(
- const SchemaUtil::DependencyMap& child_to_direct_parent_map) {
- SchemaUtil::DependencyMap expanded_child_to_parent_map;
+// Then, this function would expand the map by adding C -> A to the map.
+libtextclassifier3::StatusOr<SchemaUtil::DependentMap>
+CalculateTransitiveNestedTypeRelations(
+ const SchemaUtil::DependentMap& direct_nested_type_map,
+ const std::unordered_set<std::string_view>& joinable_types,
+ bool allow_circular_schema_definitions) {
+ SchemaUtil::DependentMap expanded_nested_type_map;
+ // Types that have no outgoing relations.
+ std::unordered_set<std::string_view> sink_types;
+
+ if (allow_circular_schema_definitions) {
+ // Map of nodes that are pending expansion -> whether the path from each key
+ // node to the 'current' node is nested_indexable.
+ // A copy of this map is made for each new node that we expand.
+ std::unordered_map<std::string_view, bool>
+ pending_expansion_paths_indexable;
+ for (const auto& kvp : direct_nested_type_map) {
+ ICING_RETURN_IF_ERROR(CalculateTransitiveNestedTypeRelations(
+ direct_nested_type_map, joinable_types, kvp.first,
+ /*path_contains_joinable_property=*/false, &expanded_nested_type_map,
+ std::unordered_map<std::string_view, bool>(
+ pending_expansion_paths_indexable),
+ &sink_types));
+ }
+ } else {
+ // If allow_circular_schema_definitions is false, then fallback to the old
+ // way of detecting cycles.
+ // Types that we are expanding.
+ std::unordered_set<std::string_view> pending_expansions;
+ for (const auto& kvp : direct_nested_type_map) {
+ ICING_RETURN_IF_ERROR(CalculateAcyclicTransitiveRelations(
+ direct_nested_type_map, kvp.first, &expanded_nested_type_map,
+ &pending_expansions, &sink_types));
+ }
+ }
+ return expanded_nested_type_map;
+}
+
+// Calculate and return the expanded inheritance map from
+// direct_nested_type_map. This expands the direct_inheritance_map to also
+// include indirect inheritance relations.
+//
+// Ex. Suppose we have the following relations in direct_inheritance_map.
+//
+// C -> B (Schema type C is B's parent_type )
+// B -> A (Schema type B is A's parent_type)
+//
+// Then, this function would expand the map by adding C -> A to the map.
+libtextclassifier3::StatusOr<SchemaUtil::InheritanceMap>
+CalculateTransitiveInheritanceRelations(
+ const SchemaUtil::InheritanceMap& direct_inheritance_map) {
+ SchemaUtil::InheritanceMap expanded_inheritance_map;
// Types that we are expanding.
std::unordered_set<std::string_view> pending_expansions;
- // Types that have no parents that depend on them.
- std::unordered_set<std::string_view> orphaned_types;
- for (const auto& kvp : child_to_direct_parent_map) {
- ICING_RETURN_IF_ERROR(ExpandTranstiveDependencies(
- child_to_direct_parent_map, kvp.first, &expanded_child_to_parent_map,
- &pending_expansions, &orphaned_types));
+ // Types that have no outgoing relation.
+ std::unordered_set<std::string_view> sink_types;
+ for (const auto& kvp : direct_inheritance_map) {
+ ICING_RETURN_IF_ERROR(CalculateAcyclicTransitiveRelations(
+ direct_inheritance_map, kvp.first, &expanded_inheritance_map,
+ &pending_expansions, &sink_types));
}
- return expanded_child_to_parent_map;
+ return expanded_inheritance_map;
}
-// Builds a transitive child-parent dependency map. 'Orphaned' types (types with
-// no parents) will not be present in the map.
+// Builds a transitive dependent map. Types with no dependents will not be
+// present in the map as keys.
//
// Ex. Suppose we have a schema with four types A, B, C, D. A has a property of
// type B and B has a property of type C. C and D only have non-document
// properties.
//
-// The transitive child-parent dependency map for this schema would be:
-// C -> A, B
-// B -> A
+// The transitive dependent map for this schema would be:
+// C -> A, B (both A and B depend on C)
+// B -> A (A depends on B)
//
-// A and D would be considered orphaned properties because no type refers to
+// A and D will not be present in the map as keys because no type depends on
// them.
//
// RETURNS:
-// On success, a transitive child-parent dependency map of all types in the
-// schema.
+// On success, a transitive dependent map of all types in the schema.
// INVALID_ARGUMENT if the schema contains a cycle or an undefined type.
// ALREADY_EXISTS if a schema type is specified more than once in the schema
-libtextclassifier3::StatusOr<SchemaUtil::DependencyMap>
-BuildTransitiveDependencyGraph(const SchemaProto& schema) {
- // Child to parent map.
- SchemaUtil::DependencyMap child_to_direct_parent_map;
-
- // Add all first-order dependencies.
+libtextclassifier3::StatusOr<SchemaUtil::DependentMap>
+BuildTransitiveDependentGraph(const SchemaProto& schema,
+ bool allow_circular_schema_definitions) {
+ // We expand the nested-type dependent map and inheritance map differently
+ // when calculating transitive relations. These two types of relations also
+ // should not be transitive so we keep these as separate maps.
+ //
+ // e.g. For schema type A, B and C, B depends on A through inheritance, and
+ // C depends on B by having a property with type B, we will have the two
+ // relations {A, B} and {B, C} in the dependent map, but will not have {A, C}
+ // in the map.
+ SchemaUtil::DependentMap direct_nested_type_map;
+ SchemaUtil::InheritanceMap direct_inheritance_map;
+
+ // Set of schema types that have at least one joinable property.
+ std::unordered_set<std::string_view> joinable_types;
+
+ // Add all first-order dependents.
std::unordered_set<std::string_view> known_types;
std::unordered_set<std::string_view> unknown_types;
for (const auto& type_config : schema.types()) {
@@ -259,27 +477,29 @@ BuildTransitiveDependencyGraph(const SchemaProto& schema) {
}
known_types.insert(schema_type);
unknown_types.erase(schema_type);
+ // Insert inheritance relations into the inheritance map.
+ for (std::string_view parent_schema_type : type_config.parent_types()) {
+ if (known_types.count(parent_schema_type) == 0) {
+ unknown_types.insert(parent_schema_type);
+ }
+ direct_inheritance_map[parent_schema_type][schema_type] = true;
+ }
for (const auto& property_config : type_config.properties()) {
+ if (property_config.joinable_config().value_type() !=
+ JoinableConfig::ValueType::NONE) {
+ joinable_types.insert(schema_type);
+ }
+ // Insert nested-type relations into the nested-type map.
if (property_config.data_type() ==
PropertyConfigProto::DataType::DOCUMENT) {
// Need to know what schema_type these Document properties should be
// validated against
std::string_view property_schema_type(property_config.schema_type());
- if (property_schema_type == schema_type) {
- return absl_ports::InvalidArgumentError(
- absl_ports::StrCat("Infinite loop detected in type configs. '",
- schema_type, "' references itself."));
- }
if (known_types.count(property_schema_type) == 0) {
unknown_types.insert(property_schema_type);
}
- auto itr = child_to_direct_parent_map.find(property_schema_type);
- if (itr == child_to_direct_parent_map.end()) {
- child_to_direct_parent_map.insert(
- {property_schema_type, std::unordered_set<std::string_view>()});
- itr = child_to_direct_parent_map.find(property_schema_type);
- }
- itr->second.insert(schema_type);
+ direct_nested_type_map[property_schema_type][schema_type].push_back(
+ &property_config);
}
}
}
@@ -287,20 +507,57 @@ BuildTransitiveDependencyGraph(const SchemaProto& schema) {
return absl_ports::InvalidArgumentError(absl_ports::StrCat(
"Undefined 'schema_type's: ", absl_ports::StrJoin(unknown_types, ",")));
}
- return ExpandTranstiveDependencies(child_to_direct_parent_map);
+
+ // Merge two expanded maps into a single dependent_map, without making
+ // inheritance and nested-type relations transitive.
+ ICING_ASSIGN_OR_RETURN(SchemaUtil::DependentMap merged_dependent_map,
+ CalculateTransitiveNestedTypeRelations(
+ direct_nested_type_map, joinable_types,
+ allow_circular_schema_definitions));
+ ICING_ASSIGN_OR_RETURN(
+ SchemaUtil::InheritanceMap expanded_inheritance_map,
+ CalculateTransitiveInheritanceRelations(direct_inheritance_map));
+ for (const auto& [parent_type, inheritance_relation] :
+ expanded_inheritance_map) {
+ // Insert the parent_type into the dependent map if it is not present
+ // already.
+ merged_dependent_map.insert({parent_type, {}});
+ for (const auto& [child_type, _] : inheritance_relation) {
+ // Insert the child_type into parent_type's dependent map if it's not
+ // present already, in which case the value will be an empty vector.
+ merged_dependent_map[parent_type].insert({child_type, {}});
+ }
+ }
+ return merged_dependent_map;
}
-libtextclassifier3::StatusOr<SchemaUtil::DependencyMap> SchemaUtil::Validate(
- const SchemaProto& schema) {
- // 1. Build the dependency map. This will detect any cycles, non-existent or
+libtextclassifier3::StatusOr<SchemaUtil::InheritanceMap>
+SchemaUtil::BuildTransitiveInheritanceGraph(const SchemaProto& schema) {
+ SchemaUtil::InheritanceMap direct_inheritance_map;
+ for (const auto& type_config : schema.types()) {
+ for (std::string_view parent_schema_type : type_config.parent_types()) {
+ direct_inheritance_map[parent_schema_type][type_config.schema_type()] =
+ true;
+ }
+ }
+ return CalculateTransitiveInheritanceRelations(direct_inheritance_map);
+}
+
+libtextclassifier3::StatusOr<SchemaUtil::DependentMap> SchemaUtil::Validate(
+ const SchemaProto& schema, bool allow_circular_schema_definitions) {
+ // 1. Build the dependent map. This will detect any cycles, non-existent or
// duplicate types in the schema.
- ICING_ASSIGN_OR_RETURN(SchemaUtil::DependencyMap dependency_map,
- BuildTransitiveDependencyGraph(schema));
+ ICING_ASSIGN_OR_RETURN(
+ SchemaUtil::DependentMap dependent_map,
+ BuildTransitiveDependentGraph(schema, allow_circular_schema_definitions));
// Tracks PropertyConfigs within a SchemaTypeConfig that we've validated
// already.
std::unordered_set<std::string_view> known_property_names;
+ // Tracks PropertyConfigs containing joinable properties.
+ std::unordered_set<std::string_view> schema_types_with_joinable_property;
+
// 2. Validate the properties of each type.
for (const auto& type_config : schema.types()) {
std::string_view schema_type(type_config.schema_type());
@@ -337,6 +594,10 @@ libtextclassifier3::StatusOr<SchemaUtil::DependencyMap> SchemaUtil::Validate(
"data_types in schema property '",
schema_type, ".", property_name, "'"));
}
+
+ ICING_RETURN_IF_ERROR(ValidateDocumentIndexingConfig(
+ property_config.document_indexing_config(), schema_type,
+ property_name));
}
ICING_RETURN_IF_ERROR(ValidateCardinality(property_config.cardinality(),
@@ -347,10 +608,62 @@ libtextclassifier3::StatusOr<SchemaUtil::DependencyMap> SchemaUtil::Validate(
property_config.string_indexing_config(), data_type, schema_type,
property_name));
}
+
+ ICING_RETURN_IF_ERROR(ValidateJoinableConfig(
+ property_config.joinable_config(), data_type,
+ property_config.cardinality(), schema_type, property_name));
+ if (property_config.joinable_config().value_type() !=
+ JoinableConfig::ValueType::NONE) {
+ schema_types_with_joinable_property.insert(schema_type);
+ }
}
}
- return dependency_map;
+ // BFS traverse the dependent graph to make sure that no nested levels
+ // (properties with DOCUMENT data type) have REPEATED cardinality while
+ // depending on schema types with joinable property.
+ std::queue<std::string_view> frontier;
+ for (const auto& schema_type : schema_types_with_joinable_property) {
+ frontier.push(schema_type);
+ }
+ std::unordered_set<std::string_view> traversed =
+ std::move(schema_types_with_joinable_property);
+ while (!frontier.empty()) {
+ std::string_view schema_type = frontier.front();
+ frontier.pop();
+
+ const auto it = dependent_map.find(schema_type);
+ if (it == dependent_map.end()) {
+ continue;
+ }
+
+ // Check every type that has a property of type schema_type.
+ for (const auto& [next_schema_type, property_configs] : it->second) {
+ // Check all properties in "next_schema_type" that are of type
+ // "schema_type".
+ for (const PropertyConfigProto* property_config : property_configs) {
+ if (property_config != nullptr &&
+ property_config->cardinality() ==
+ PropertyConfigProto::Cardinality::REPEATED) {
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "Schema type '", next_schema_type,
+ "' cannot have REPEATED nested document property '",
+ property_config->property_name(),
+ "' while connecting to some joinable properties"));
+ }
+ }
+
+ if (traversed.count(next_schema_type) == 0) {
+ traversed.insert(next_schema_type);
+ frontier.push(next_schema_type);
+ }
+ }
+ }
+
+ // Verify that every child type's property set has included all compatible
+ // properties from parent types.
+ ICING_RETURN_IF_ERROR(ValidateInheritedProperties(schema));
+ return dependent_map;
}
libtextclassifier3::Status SchemaUtil::ValidateSchemaType(
@@ -436,6 +749,171 @@ libtextclassifier3::Status SchemaUtil::ValidateStringIndexingConfig(
return libtextclassifier3::Status::OK;
}
+libtextclassifier3::Status SchemaUtil::ValidateJoinableConfig(
+ const JoinableConfig& config, PropertyConfigProto::DataType::Code data_type,
+ PropertyConfigProto::Cardinality::Code cardinality,
+ std::string_view schema_type, std::string_view property_name) {
+ if (config.value_type() == JoinableConfig::ValueType::QUALIFIED_ID) {
+ if (data_type != PropertyConfigProto::DataType::STRING) {
+ return absl_ports::InvalidArgumentError(
+ absl_ports::StrCat("Qualified id joinable property '", property_name,
+ "' is required to have STRING data type"));
+ }
+
+ if (cardinality == PropertyConfigProto::Cardinality::REPEATED) {
+ return absl_ports::InvalidArgumentError(
+ absl_ports::StrCat("Qualified id joinable property '", property_name,
+ "' cannot have REPEATED cardinality"));
+ }
+ }
+
+ if (config.propagate_delete() &&
+ config.value_type() != JoinableConfig::ValueType::QUALIFIED_ID) {
+ return absl_ports::InvalidArgumentError(
+ absl_ports::StrCat("Field 'property_name' '", property_name,
+ "' is required to have QUALIFIED_ID joinable "
+ "value type with delete propagation enabled"));
+ }
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status SchemaUtil::ValidateDocumentIndexingConfig(
+ const DocumentIndexingConfig& config, std::string_view schema_type,
+ std::string_view property_name) {
+ if (!config.indexable_nested_properties_list().empty() &&
+ config.index_nested_properties()) {
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "DocumentIndexingConfig.index_nested_properties is required to be "
+ "false when providing a non-empty indexable_nested_properties_list "
+ "for property '",
+ schema_type, ".", property_name, "'"));
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+/* static */ bool SchemaUtil::IsIndexedProperty(
+ const PropertyConfigProto& property_config) {
+ switch (property_config.data_type()) {
+ case PropertyConfigProto::DataType::STRING:
+ return property_config.string_indexing_config().term_match_type() !=
+ TermMatchType::UNKNOWN &&
+ property_config.string_indexing_config().tokenizer_type() !=
+ StringIndexingConfig::TokenizerType::NONE;
+ case PropertyConfigProto::DataType::INT64:
+ return property_config.integer_indexing_config().numeric_match_type() !=
+ IntegerIndexingConfig::NumericMatchType::UNKNOWN;
+ case PropertyConfigProto::DataType::DOCUMENT:
+ // A document property is considered indexed if it has
+ // index_nested_properties=true, or a non-empty
+ // indexable_nested_properties_list.
+ return property_config.document_indexing_config()
+ .index_nested_properties() ||
+ !property_config.document_indexing_config()
+ .indexable_nested_properties_list()
+ .empty();
+ case PropertyConfigProto::DataType::UNKNOWN:
+ case PropertyConfigProto::DataType::DOUBLE:
+ case PropertyConfigProto::DataType::BOOLEAN:
+ case PropertyConfigProto::DataType::BYTES:
+ return false;
+ }
+}
+
+bool SchemaUtil::IsParent(const SchemaUtil::InheritanceMap& inheritance_map,
+ std::string_view parent_type,
+ std::string_view child_type) {
+ auto iter = inheritance_map.find(parent_type);
+ if (iter == inheritance_map.end()) {
+ return false;
+ }
+ return iter->second.count(child_type) > 0;
+}
+
+bool SchemaUtil::IsInheritedPropertyCompatible(
+ const SchemaUtil::InheritanceMap& inheritance_map,
+ const PropertyConfigProto& child_property_config,
+ const PropertyConfigProto& parent_property_config) {
+ // Check if child_property_config->cardinality() <=
+ // parent_property_config->cardinality().
+ // Subtype may require a stricter cardinality, but cannot loosen cardinality
+ // requirements.
+ if (!CardinalityLessThanEq(child_property_config.cardinality(),
+ parent_property_config.cardinality())) {
+ return false;
+ }
+
+ // Now we can assume T1 and T2 are not nullptr, and cardinality check passes.
+ if (child_property_config.data_type() !=
+ PropertyConfigProto::DataType::DOCUMENT ||
+ parent_property_config.data_type() !=
+ PropertyConfigProto::DataType::DOCUMENT) {
+ return child_property_config.data_type() ==
+ parent_property_config.data_type();
+ }
+
+ // Now we can assume T1 and T2 are both document type.
+ return child_property_config.schema_type() ==
+ parent_property_config.schema_type() ||
+ IsParent(inheritance_map, parent_property_config.schema_type(),
+ child_property_config.schema_type());
+}
+
+libtextclassifier3::Status SchemaUtil::ValidateInheritedProperties(
+ const SchemaProto& schema) {
+ // Create a inheritance map
+ ICING_ASSIGN_OR_RETURN(SchemaUtil::InheritanceMap inheritance_map,
+ BuildTransitiveInheritanceGraph(schema));
+
+ // Create a map that maps from type name to property names, and then from
+ // property names to PropertyConfigProto.
+ std::unordered_map<
+ std::string, std::unordered_map<std::string, const PropertyConfigProto*>>
+ property_map;
+ for (const SchemaTypeConfigProto& type_config : schema.types()) {
+ // Skipping building entries for types without any child or parent, since
+ // such entry will never be used.
+ if (type_config.parent_types().empty() &&
+ inheritance_map.count(type_config.schema_type()) == 0) {
+ continue;
+ }
+ auto& curr_property_map = property_map[type_config.schema_type()];
+ for (const PropertyConfigProto& property_config :
+ type_config.properties()) {
+ curr_property_map[property_config.property_name()] = &property_config;
+ }
+ }
+
+ // Validate child properties.
+ for (const SchemaTypeConfigProto& type_config : schema.types()) {
+ const std::string& child_type_name = type_config.schema_type();
+ auto& child_property_map = property_map[child_type_name];
+
+ for (const std::string& parent_type_name : type_config.parent_types()) {
+ auto& parent_property_map = property_map[parent_type_name];
+
+ for (const auto& [property_name, parent_property_config] :
+ parent_property_map) {
+ auto child_property_iter = child_property_map.find(property_name);
+ if (child_property_iter == child_property_map.end()) {
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "Property ", property_name, " is not present in child type ",
+ child_type_name, ", but it is defined in the parent type ",
+ parent_type_name, "."));
+ }
+ if (!IsInheritedPropertyCompatible(inheritance_map,
+ *child_property_iter->second,
+ *parent_property_config)) {
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "Property ", property_name, " from child type ", child_type_name,
+ " is not compatible to the parent type ", parent_type_name, "."));
+ }
+ }
+ }
+ }
+ return libtextclassifier3::Status::OK;
+}
+
void SchemaUtil::BuildTypeConfigMap(
const SchemaProto& schema, SchemaUtil::TypeConfigMap* type_config_map) {
type_config_map->clear();
@@ -455,14 +933,27 @@ SchemaUtil::ParsedPropertyConfigs SchemaUtil::ParsePropertyConfigs(
property_config.property_name(), &property_config);
if (property_config.cardinality() ==
PropertyConfigProto::Cardinality::REQUIRED) {
- parsed_property_configs.num_required_properties++;
+ ++parsed_property_configs.num_required_properties;
}
// A non-default term_match_type indicates that this property is meant to be
// indexed.
- if (property_config.string_indexing_config().term_match_type() !=
- TermMatchType::UNKNOWN) {
- parsed_property_configs.num_indexed_properties++;
+ if (IsIndexedProperty(property_config)) {
+ ++parsed_property_configs.num_indexed_properties;
+ }
+
+ // A non-default value_type indicates that this property is meant to be
+ // joinable.
+ if (property_config.joinable_config().value_type() !=
+ JoinableConfig::ValueType::NONE) {
+ ++parsed_property_configs.num_joinable_properties;
+ }
+
+ // Also keep track of how many nested document properties there are. Adding
+ // new nested document properties will result in join-index rebuild.
+ if (property_config.data_type() ==
+ PropertyConfigProto::DataType::DOCUMENT) {
+ ++parsed_property_configs.num_nested_document_properties;
}
}
@@ -471,7 +962,7 @@ SchemaUtil::ParsedPropertyConfigs SchemaUtil::ParsePropertyConfigs(
const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
const SchemaProto& old_schema, const SchemaProto& new_schema,
- const DependencyMap& new_schema_dependency_map) {
+ const DependentMap& new_schema_dependent_map) {
SchemaDelta schema_delta;
TypeConfigMap old_type_config_map, new_type_config_map;
@@ -501,6 +992,8 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
// be reindexed.
int32_t old_required_properties = 0;
int32_t old_indexed_properties = 0;
+ int32_t old_joinable_properties = 0;
+ int32_t old_nested_document_properties = 0;
// If there is a different number of properties, then there must have been a
// change.
@@ -509,6 +1002,7 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
new_schema_type_and_config->second.properties_size();
bool is_incompatible = false;
bool is_index_incompatible = false;
+ bool is_join_incompatible = false;
for (const auto& old_property_config : old_type_config.properties()) {
if (old_property_config.cardinality() ==
PropertyConfigProto::Cardinality::REQUIRED) {
@@ -517,13 +1011,26 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
// A non-default term_match_type indicates that this property is meant to
// be indexed.
- bool is_indexed_property =
- old_property_config.string_indexing_config().term_match_type() !=
- TermMatchType::UNKNOWN;
+ bool is_indexed_property = IsIndexedProperty(old_property_config);
if (is_indexed_property) {
++old_indexed_properties;
}
+ bool is_joinable_property =
+ old_property_config.joinable_config().value_type() !=
+ JoinableConfig::ValueType::NONE;
+ if (is_joinable_property) {
+ ++old_joinable_properties;
+ }
+
+ // A nested-document property is a property of DataType::DOCUMENT.
+ bool is_nested_document_property =
+ old_property_config.data_type() ==
+ PropertyConfigProto::DataType::DOCUMENT;
+ if (is_nested_document_property) {
+ ++old_nested_document_properties;
+ }
+
auto new_property_name_and_config =
new_parsed_property_configs.property_config_map.find(
old_property_config.property_name());
@@ -537,6 +1044,8 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
"' was not defined in new schema");
is_incompatible = true;
is_index_incompatible |= is_indexed_property;
+ is_join_incompatible |=
+ is_joinable_property || is_nested_document_property;
continue;
}
@@ -559,12 +1068,19 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
if (!IsTermMatchTypeCompatible(
old_property_config.string_indexing_config(),
new_property_config->string_indexing_config()) ||
- old_property_config.document_indexing_config()
- .index_nested_properties() !=
- new_property_config->document_indexing_config()
- .index_nested_properties()) {
+ !IsIntegerNumericMatchTypeCompatible(
+ old_property_config.integer_indexing_config(),
+ new_property_config->integer_indexing_config()) ||
+ !IsDocumentIndexingCompatible(
+ old_property_config.document_indexing_config(),
+ new_property_config->document_indexing_config())) {
is_index_incompatible = true;
}
+
+ if (old_property_config.joinable_config().value_type() !=
+ new_property_config->joinable_config().value_type()) {
+ is_join_incompatible = true;
+ }
}
// We can't have new properties that are REQUIRED since we won't know how
@@ -581,31 +1097,55 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
is_incompatible = true;
}
- // If we've gained any new indexed properties, then the section ids may
- // change. Since the section ids are stored in the index, we'll need to
+ // If we've gained any new indexed properties (this includes gaining new
+ // indexed nested document properties), then the section ids may change.
+ // Since the section ids are stored in the index, we'll need to
// reindex everything.
if (new_parsed_property_configs.num_indexed_properties >
old_indexed_properties) {
- ICING_VLOG(1) << absl_ports::StrCat(
- "Set of indexed properties in schema type '",
- old_type_config.schema_type(),
- "' has changed, required reindexing.");
+ ICING_VLOG(1) << "Set of indexed properties in schema type '"
+ << old_type_config.schema_type()
+ << "' has changed, required reindexing.";
is_index_incompatible = true;
}
+ // If we've gained any new joinable properties, then the joinable property
+ // ids may change. Since the joinable property ids are stored in the cache,
+ // we'll need to reconstruct join index.
+ // If we've gained any new nested document properties, we also rebuild the
+ // join index. This is because we index all nested joinable properties, so
+ // adding a nested document property will most probably result in having
+ // more joinable properties.
+ if (new_parsed_property_configs.num_joinable_properties >
+ old_joinable_properties ||
+ new_parsed_property_configs.num_nested_document_properties >
+ old_nested_document_properties) {
+ ICING_VLOG(1) << "Set of joinable properties in schema type '"
+ << old_type_config.schema_type()
+ << "' has changed, required reconstructing joinable cache.";
+ is_join_incompatible = true;
+ }
+
if (is_incompatible) {
AddIncompatibleChangeToDelta(schema_delta.schema_types_incompatible,
- old_type_config, new_schema_dependency_map,
+ old_type_config, new_schema_dependent_map,
old_type_config_map, new_type_config_map);
}
if (is_index_incompatible) {
AddIncompatibleChangeToDelta(schema_delta.schema_types_index_incompatible,
- old_type_config, new_schema_dependency_map,
+ old_type_config, new_schema_dependent_map,
+ old_type_config_map, new_type_config_map);
+ }
+
+ if (is_join_incompatible) {
+ AddIncompatibleChangeToDelta(schema_delta.schema_types_join_incompatible,
+ old_type_config, new_schema_dependent_map,
old_type_config_map, new_type_config_map);
}
- if (!is_incompatible && !is_index_incompatible && has_property_changed) {
+ if (!is_incompatible && !is_index_incompatible && !is_join_incompatible &&
+ has_property_changed) {
schema_delta.schema_types_changed_fully_compatible.insert(
old_type_config.schema_type());
}
diff --git a/icing/schema/schema-util.h b/icing/schema/schema-util.h
index fa80b15..6d0ff73 100644
--- a/icing/schema/schema-util.h
+++ b/icing/schema/schema-util.h
@@ -33,12 +33,39 @@ class SchemaUtil {
using TypeConfigMap =
std::unordered_map<std::string, const SchemaTypeConfigProto>;
- // Maps from a child type to the parent types that depend on it.
- // Ex. type A has a single property of type B
- // The dependency map will be { { "B", { "A" } } }
- using DependencyMap =
+ // A data structure that stores the relationships between schema types. The
+ // keys in TypeRelationMap are schema types, and the values are sets of schema
+ // types that are directly or indirectly related to the key.
+ template <typename T>
+ using TypeRelationMap =
std::unordered_map<std::string_view,
- std::unordered_set<std::string_view>>;
+ std::unordered_map<std::string_view, T>>;
+
+ // If A -> B is indicated in the map, then type A must be built before
+ // building type B, which implies one of the following situations.
+ //
+ // 1. B has a property of type A.
+ // 2. A is a parent type of B via polymorphism.
+ //
+ // For the first case, this map will also include all PropertyConfigProto
+ // (with DOCUMENT data_type) pointers which *directly* connects type A and B.
+ // IOW, this vector of PropertyConfigProto* are "direct edges" connecting A
+ // and B directly. It will be an empty vector if A and B are not "directly"
+ // connected, but instead via another intermediate level of schema type. For
+ // example, the actual dependency is A -> C -> B, so there will be A -> C and
+ // C -> B with valid PropertyConfigProto* respectively in this map, but we
+ // will also expand transitive dependents: add A -> B into dependent map with
+ // empty vector of "edges".
+ using DependentMap = TypeRelationMap<std::vector<const PropertyConfigProto*>>;
+
+ // If A -> B is indicated in the map, then type A is a parent type of B,
+ // directly or indirectly. If directly, the bool value in the map will be
+ // true, otherwise false.
+ //
+ // Note that all relationships contained in this map are also entries in the
+ // DependentMap, i.e. if B inherits from A, then there will be a mapping from
+ // A to B in both this map and the DependentMap.
+ using InheritanceMap = TypeRelationMap<bool>;
struct SchemaDelta {
// Which schema types were present in the old schema, but were deleted from
@@ -63,6 +90,11 @@ class SchemaUtil {
// SchemaTypeConfigProto.
std::unordered_set<std::string> schema_types_index_incompatible;
+ // Schema types that were changed in a way that was backwards compatible,
+ // but invalidated the joinable cache. Represented by the `schema_type`
+ // field in the SchemaTypeConfigProto.
+ std::unordered_set<std::string> schema_types_join_incompatible;
+
bool operator==(const SchemaDelta& other) const {
return schema_types_deleted == other.schema_types_deleted &&
schema_types_incompatible == other.schema_types_incompatible &&
@@ -70,7 +102,9 @@ class SchemaUtil {
schema_types_changed_fully_compatible ==
other.schema_types_changed_fully_compatible &&
schema_types_index_incompatible ==
- other.schema_types_index_incompatible;
+ other.schema_types_index_incompatible &&
+ schema_types_join_incompatible ==
+ other.schema_types_join_incompatible;
}
};
@@ -84,6 +118,12 @@ class SchemaUtil {
// Total number of properties that were REQUIRED
int32_t num_required_properties = 0;
+
+ // Total number of properties that have joinable config
+ int32_t num_joinable_properties = 0;
+
+ // Total number of properties that have DataType::DOCUMENT
+ int32_t num_nested_document_properties = 0;
};
// This function validates:
@@ -101,21 +141,57 @@ class SchemaUtil {
// SchemaTypeConfigProto.schema_type
// 10. Property names can only be alphanumeric.
// 11. Any STRING data types have a valid string_indexing_config
- // 12. A SchemaTypeConfigProto cannot have a property whose schema_type is
- // itself, thus creating an infinite loop.
- // 13. Two SchemaTypeConfigProtos cannot have properties that reference each
- // other's schema_type, thus creating an infinite loop.
- //
- // TODO(b/171996137): Clarify 12 and 13 are only for indexed properties, once
- // document properties can be opted out of indexing.
+ // 12. PropertyConfigProtos.joinable_config must be valid. See
+ // ValidateJoinableConfig for more details.
+ // 13. Any PropertyConfigProtos with nested DOCUMENT data type must not have
+ // REPEATED cardinality if they reference a schema type containing
+ // joinable property.
+ // 14. The schema definition cannot have invalid cycles. A cycle is invalid
+ // if:
+ // a. SchemaTypeConfigProto.parent_type definitions form an inheritance
+ // cycle.
+ // b. The schema's property definitions have schema_types that form a
+ // cycle, and all properties on the cycle declare
+ // DocumentIndexingConfig.index_nested_properties=true.
+ // c. The schema's property definitions have schema_types that form a
+ // cycle, and the cycle leads to an invalid joinable property config.
+ // This is the case if:
+ // i. Any type node in the cycle itself has a joinable proprty
+ // (property whose joinable config is not NONE), OR
+ // ii. Any type node in the cycle has a nested-type (direct or
+ // indirect) with a joinable property.
+ // 15. For DOCUMENT data types, if
+ // DocumentIndexingConfig.indexable_nested_properties_list is non-empty,
+ // DocumentIndexingConfig.index_nested_properties must be false.
//
// Returns:
- // On success, a dependency map from each child types to all parent types
+ // On success, a dependent map from each types to their dependent types
// that depend on it directly or indirectly.
// ALREADY_EXISTS for case 1 and 2
- // INVALID_ARGUMENT for 3-13
- static libtextclassifier3::StatusOr<DependencyMap> Validate(
- const SchemaProto& schema);
+ // INVALID_ARGUMENT for 3-15
+ static libtextclassifier3::StatusOr<DependentMap> Validate(
+ const SchemaProto& schema, bool allow_circular_schema_definitions);
+
+ // Builds a transitive inheritance map.
+ //
+ // Ex. Suppose we have a schema with four types A, B, C and D, and we have the
+ // following direct inheritance relation.
+ //
+ // A -> B (A is the parent type of B)
+ // B -> C (B is the parent type of C)
+ // C -> D (C is the parent type of D)
+ //
+ // Then, the transitive inheritance map for this schema would be:
+ //
+ // A -> B, C, D
+ // B -> C, D
+ // C -> D
+ //
+ // RETURNS:
+ // On success, a transitive inheritance map of all types in the schema.
+ // INVALID_ARGUMENT if the inheritance graph contains a cycle.
+ static libtextclassifier3::StatusOr<SchemaUtil::InheritanceMap>
+ BuildTransitiveInheritanceGraph(const SchemaProto& schema);
// Creates a mapping of schema type -> schema type config proto. The
// type_config_map is cleared, and then each schema-type_config_proto pair is
@@ -137,6 +213,8 @@ class SchemaUtil {
// `SchemaDelta.schema_types_deleted`
// 3. A schema type's new definition would mean any existing data of the old
// definition is now incompatible.
+ // 4. The derived join index would be incompatible. This is held in
+ // `SchemaDelta.join_incompatible`.
//
// For case 1, the two schemas would result in an incompatible index if:
// 1.1. The new SchemaProto has a different set of indexed properties than
@@ -159,13 +237,18 @@ class SchemaUtil {
// scale defined as:
// LEAST <REPEATED - OPTIONAL - REQUIRED> MOST
//
+ // For case 4, the two schemas would result in an incompatible join if:
+ // 4.1. A SchematypeConfig exists in the new SchemaProto that has a
+ // different set of joinable properties than it did in the old
+ // SchemaProto.
+ //
// A property is defined by the combination of the
// SchemaTypeConfig.schema_type and the PropertyConfigProto.property_name.
//
// Returns a SchemaDelta that captures the aforementioned differences.
static const SchemaDelta ComputeCompatibilityDelta(
const SchemaProto& old_schema, const SchemaProto& new_schema,
- const DependencyMap& new_schema_dependency_map);
+ const DependentMap& new_schema_dependent_map);
// Validates the 'property_name' field.
// 1. Can't be an empty string
@@ -181,6 +264,8 @@ class SchemaUtil {
static libtextclassifier3::Status ValidatePropertyName(
std::string_view property_name, std::string_view schema_type = "");
+ static bool IsIndexedProperty(const PropertyConfigProto& property_config);
+
private:
// Validates the 'schema_type' field
//
@@ -219,6 +304,79 @@ class SchemaUtil {
const StringIndexingConfig& config,
PropertyConfigProto::DataType::Code data_type,
std::string_view schema_type, std::string_view property_name);
+
+ // Checks that the 'joinable_config' satisfies the following rules:
+ // 1. If the data type matches joinable value type
+ // a. Only STRING data types can use QUALIFIED_ID joinable value type
+ // 2. Only QUALIFIED_ID joinable value type can have delete propagation
+ // enabled
+ // 3. Any joinable property should have non-REPEATED cardinality
+ //
+ // Returns:
+ // INVALID_ARGUMENT if any of the rules are not followed
+ // OK on success
+ static libtextclassifier3::Status ValidateJoinableConfig(
+ const JoinableConfig& config,
+ PropertyConfigProto::DataType::Code data_type,
+ PropertyConfigProto::Cardinality::Code cardinality,
+ std::string_view schema_type, std::string_view property_name);
+
+ // Checks that the 'document_indexing_config' satisfies the following rule:
+ // 1. If indexable_nested_properties is non-empty, index_nested_properties
+ // must be set to false.
+ //
+ // Returns:
+ // INVALID_ARGUMENT if any of the rules are not followed
+ // OK on success
+ static libtextclassifier3::Status ValidateDocumentIndexingConfig(
+ const DocumentIndexingConfig& config, std::string_view schema_type,
+ std::string_view property_name);
+
+ // Returns if 'parent_type' is a direct or indirect parent of 'child_type'.
+ static bool IsParent(const SchemaUtil::InheritanceMap& inheritance_map,
+ std::string_view parent_type,
+ std::string_view child_type);
+
+ // Returns if 'child_property_config' in a child type can override
+ // 'parent_property_config' in the parent type.
+ //
+ // Let's assign 'child_property_config' a type T1 and 'parent_property_config'
+ // a type T2 that captures information for their data_type, schema_type and
+ // cardinalities, so that 'child_property_config' can override
+ // 'parent_property_config' if and only if T1 <: T2, i.e. T1 is a subtype of
+ // T2.
+ //
+ // Below are the rules for inferring subtype relations.
+ // - T <: T for every type T.
+ // - If U extends T, then U <: T.
+ // - For every type T1, T2 and T3, if T1 <: T2 and T2 <: T3, then T1 <: T3.
+ // - Optional<T> <: Repeated<T> for every type T.
+ // - Required<T> <: Optional<T> for every type T.
+ // - If T1 <: T2, then
+ // - Required<T1> <: Required<T2>
+ // - Optional<T1> <: Optional<T2>
+ // - Repeated<T1> <: Repeated<T2>
+ //
+ // We assume the Closed World Assumption (CWA), i.e. if T1 <: T2 cannot be
+ // deduced from the above rules, then T1 is not a subtype of T2.
+ static bool IsInheritedPropertyCompatible(
+ const SchemaUtil::InheritanceMap& inheritance_map,
+ const PropertyConfigProto& child_property_config,
+ const PropertyConfigProto& parent_property_config);
+
+ // Verifies that every child type's property set has included all compatible
+ // properties from parent types, based on the following rule:
+ //
+ // - If a property "prop" of type T is in the parent, then the child type must
+ // also have "prop" that is of type U, such that U <: T, i.e. U is a subtype
+ // of T.
+ //
+ // RETURNS:
+ // Ok on validation success
+ // INVALID_ARGUMENT if an exception that violates the above validation rule
+ // is found.
+ static libtextclassifier3::Status ValidateInheritedProperties(
+ const SchemaProto& schema);
};
} // namespace lib
diff --git a/icing/schema/schema-util_test.cc b/icing/schema/schema-util_test.cc
index ffe1036..564bbc0 100644
--- a/icing/schema/schema-util_test.cc
+++ b/icing/schema/schema-util_test.cc
@@ -14,7 +14,7 @@
#include "icing/schema/schema-util.h"
-#include <cstdint>
+#include <initializer_list>
#include <string>
#include <string_view>
#include <unordered_set>
@@ -22,7 +22,6 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/proto/schema.pb.h"
-#include "icing/proto/term.pb.h"
#include "icing/schema-builder.h"
#include "icing/testing/common-matchers.h"
@@ -30,16 +29,26 @@ namespace icing {
namespace lib {
namespace {
+using portable_equals_proto::EqualsProto;
using ::testing::Eq;
using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+using ::testing::Pair;
+using ::testing::Pointee;
+using ::testing::SizeIs;
+using ::testing::UnorderedElementsAre;
// Properties/fields in a schema type
constexpr char kEmailType[] = "EmailMessage";
constexpr char kMessageType[] = "Text";
constexpr char kPersonType[] = "Person";
-TEST(SchemaUtilTest, DependencyGraphAlphabeticalOrder) {
- // Create a schema with the following dependencies:
+class SchemaUtilTest : public ::testing::TestWithParam<bool> {};
+
+TEST_P(SchemaUtilTest, DependentGraphAlphabeticalOrder) {
+ // Create a schema with the following dependent relation:
// C
// / \
// A - B E - F
@@ -105,7 +114,7 @@ TEST(SchemaUtilTest, DependencyGraphAlphabeticalOrder) {
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
.Build();
- // Provide these in alphabetical (also parent-child) order: A, B, C, D, E, F
+ // Provide these in alphabetical order: A, B, C, D, E, F
SchemaProto schema = SchemaBuilder()
.AddType(type_a)
.AddType(type_b)
@@ -114,19 +123,39 @@ TEST(SchemaUtilTest, DependencyGraphAlphabeticalOrder) {
.AddType(type_e)
.AddType(type_f)
.Build();
- ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependencyMap d_map,
- SchemaUtil::Validate(schema));
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
EXPECT_THAT(d_map, testing::SizeIs(5));
- EXPECT_THAT(d_map["F"],
- testing::UnorderedElementsAre("A", "B", "C", "D", "E"));
- EXPECT_THAT(d_map["E"], testing::UnorderedElementsAre("A", "B", "C", "D"));
- EXPECT_THAT(d_map["D"], testing::UnorderedElementsAre("A", "B"));
- EXPECT_THAT(d_map["C"], testing::UnorderedElementsAre("A", "B"));
- EXPECT_THAT(d_map["B"], testing::UnorderedElementsAre("A"));
+ EXPECT_THAT(
+ d_map["F"],
+ UnorderedElementsAre(Pair("A", IsEmpty()), Pair("B", IsEmpty()),
+ Pair("C", IsEmpty()), Pair("D", IsEmpty()),
+ Pair("E", UnorderedElementsAre(Pointee(
+ EqualsProto(type_e.properties(0)))))));
+ EXPECT_THAT(d_map["E"],
+ UnorderedElementsAre(
+ Pair("A", IsEmpty()), Pair("B", IsEmpty()),
+ Pair("C", UnorderedElementsAre(
+ Pointee(EqualsProto(type_c.properties(0))))),
+ Pair("D", UnorderedElementsAre(
+ Pointee(EqualsProto(type_d.properties(0)))))));
+ EXPECT_THAT(
+ d_map["D"],
+ UnorderedElementsAre(Pair("A", IsEmpty()),
+ Pair("B", UnorderedElementsAre(Pointee(
+ EqualsProto(type_b.properties(1)))))));
+ EXPECT_THAT(
+ d_map["C"],
+ UnorderedElementsAre(Pair("A", IsEmpty()),
+ Pair("B", UnorderedElementsAre(Pointee(
+ EqualsProto(type_b.properties(0)))))));
+ EXPECT_THAT(d_map["B"], UnorderedElementsAre(Pair(
+ "A", UnorderedElementsAre(Pointee(
+ EqualsProto(type_a.properties(0)))))));
}
-TEST(SchemaUtilTest, DependencyGraphReverseAlphabeticalOrder) {
- // Create a schema with the following dependencies:
+TEST_P(SchemaUtilTest, DependentGraphReverseAlphabeticalOrder) {
+ // Create a schema with the following dependent relation:
// C
// / \
// A - B E - F
@@ -192,7 +221,7 @@ TEST(SchemaUtilTest, DependencyGraphReverseAlphabeticalOrder) {
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
.Build();
- // Provide these in reverse alphabetical (also child-parent) order:
+ // Provide these in reverse alphabetical order:
// F, E, D, C, B, A
SchemaProto schema = SchemaBuilder()
.AddType(type_f)
@@ -202,19 +231,39 @@ TEST(SchemaUtilTest, DependencyGraphReverseAlphabeticalOrder) {
.AddType(type_b)
.AddType(type_a)
.Build();
- ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependencyMap d_map,
- SchemaUtil::Validate(schema));
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
EXPECT_THAT(d_map, testing::SizeIs(5));
- EXPECT_THAT(d_map["F"],
- testing::UnorderedElementsAre("A", "B", "C", "D", "E"));
- EXPECT_THAT(d_map["E"], testing::UnorderedElementsAre("A", "B", "C", "D"));
- EXPECT_THAT(d_map["D"], testing::UnorderedElementsAre("A", "B"));
- EXPECT_THAT(d_map["C"], testing::UnorderedElementsAre("A", "B"));
- EXPECT_THAT(d_map["B"], testing::UnorderedElementsAre("A"));
+ EXPECT_THAT(
+ d_map["F"],
+ UnorderedElementsAre(Pair("A", IsEmpty()), Pair("B", IsEmpty()),
+ Pair("C", IsEmpty()), Pair("D", IsEmpty()),
+ Pair("E", UnorderedElementsAre(Pointee(
+ EqualsProto(type_e.properties(0)))))));
+ EXPECT_THAT(d_map["E"],
+ UnorderedElementsAre(
+ Pair("A", IsEmpty()), Pair("B", IsEmpty()),
+ Pair("C", UnorderedElementsAre(
+ Pointee(EqualsProto(type_c.properties(0))))),
+ Pair("D", UnorderedElementsAre(
+ Pointee(EqualsProto(type_d.properties(0)))))));
+ EXPECT_THAT(
+ d_map["D"],
+ UnorderedElementsAre(Pair("A", IsEmpty()),
+ Pair("B", UnorderedElementsAre(Pointee(
+ EqualsProto(type_b.properties(1)))))));
+ EXPECT_THAT(
+ d_map["C"],
+ UnorderedElementsAre(Pair("A", IsEmpty()),
+ Pair("B", UnorderedElementsAre(Pointee(
+ EqualsProto(type_b.properties(0)))))));
+ EXPECT_THAT(d_map["B"], UnorderedElementsAre(Pair(
+ "A", UnorderedElementsAre(Pointee(
+ EqualsProto(type_a.properties(0)))))));
}
-TEST(SchemaUtilTest, DependencyGraphMixedOrder) {
- // Create a schema with the following dependencies:
+TEST_P(SchemaUtilTest, DependentGraphMixedOrder) {
+ // Create a schema with the following dependent relation:
// C
// / \
// A - B E - F
@@ -289,20 +338,40 @@ TEST(SchemaUtilTest, DependencyGraphMixedOrder) {
.AddType(type_b)
.AddType(type_d)
.Build();
- ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependencyMap d_map,
- SchemaUtil::Validate(schema));
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
EXPECT_THAT(d_map, testing::SizeIs(5));
- EXPECT_THAT(d_map["F"],
- testing::UnorderedElementsAre("A", "B", "C", "D", "E"));
- EXPECT_THAT(d_map["E"], testing::UnorderedElementsAre("A", "B", "C", "D"));
- EXPECT_THAT(d_map["D"], testing::UnorderedElementsAre("A", "B"));
- EXPECT_THAT(d_map["C"], testing::UnorderedElementsAre("A", "B"));
- EXPECT_THAT(d_map["B"], testing::UnorderedElementsAre("A"));
+ EXPECT_THAT(
+ d_map["F"],
+ UnorderedElementsAre(Pair("A", IsEmpty()), Pair("B", IsEmpty()),
+ Pair("C", IsEmpty()), Pair("D", IsEmpty()),
+ Pair("E", UnorderedElementsAre(Pointee(
+ EqualsProto(type_e.properties(0)))))));
+ EXPECT_THAT(d_map["E"],
+ UnorderedElementsAre(
+ Pair("A", IsEmpty()), Pair("B", IsEmpty()),
+ Pair("C", UnorderedElementsAre(
+ Pointee(EqualsProto(type_c.properties(0))))),
+ Pair("D", UnorderedElementsAre(
+ Pointee(EqualsProto(type_d.properties(0)))))));
+ EXPECT_THAT(
+ d_map["D"],
+ UnorderedElementsAre(Pair("A", IsEmpty()),
+ Pair("B", UnorderedElementsAre(Pointee(
+ EqualsProto(type_b.properties(1)))))));
+ EXPECT_THAT(
+ d_map["C"],
+ UnorderedElementsAre(Pair("A", IsEmpty()),
+ Pair("B", UnorderedElementsAre(Pointee(
+ EqualsProto(type_b.properties(0)))))));
+ EXPECT_THAT(d_map["B"], UnorderedElementsAre(Pair(
+ "A", UnorderedElementsAre(Pointee(
+ EqualsProto(type_a.properties(0)))))));
}
-TEST(SchemaUtilTest, TopLevelCycle) {
- // Create a schema with the following dependencies:
- // A - B - B - B - B....
+TEST_P(SchemaUtilTest, TopLevelCycleIndexableTrueInvalid) {
+ // Create a schema with the following nested-type relation:
+ // A - B - B - B - B.... where all edges declare index_nested_properties=true
SchemaTypeConfigProto type_a =
SchemaTypeConfigBuilder()
.SetType("A")
@@ -323,14 +392,57 @@ TEST(SchemaUtilTest, TopLevelCycle) {
.Build();
SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
- HasSubstr("Infinite loop")));
+ HasSubstr("Invalid cycle")));
}
-TEST(SchemaUtilTest, MultiLevelCycle) {
- // Create a schema with the following dependencies:
+TEST_P(SchemaUtilTest, TopLevelCycleIndexableFalseNotJoinableOK) {
+ if (GetParam() != true) {
+ GTEST_SKIP() << "This is an invalid cycle if circular schema definitions "
+ "are not allowed.";
+ }
+
+ // Create a schema with the following nested-type relation and
+ // index_nested_properties definition:
+ // A -(true)-> B -(false)-> B -(false)-> B....
+ // Edge B -(false)-> B breaks the invalid cycle, so this is allowed.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+ // Assert Validate status is OK and check dependent map
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
+ EXPECT_THAT(d_map, SizeIs(1));
+ EXPECT_THAT(d_map["B"],
+ UnorderedElementsAre(
+ Pair("A", UnorderedElementsAre(
+ Pointee(EqualsProto(type_a.properties(0))))),
+ Pair("B", UnorderedElementsAre(
+ Pointee(EqualsProto(type_b.properties(0)))))));
+}
+
+TEST_P(SchemaUtilTest, MultiLevelCycleIndexableTrueInvalid) {
+ // Create a schema with the following dependent relation:
// A - B - C - A - B - C - A ...
+ // where all edges declare index_nested_properties=true
SchemaTypeConfigProto type_a =
SchemaTypeConfigBuilder()
.SetType("A")
@@ -361,12 +473,1223 @@ TEST(SchemaUtilTest, MultiLevelCycle) {
SchemaProto schema =
SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs((libtextclassifier3::StatusCode::INVALID_ARGUMENT),
+ HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, MultiLevelCycleIndexableFalseNotJoinableOK) {
+ if (GetParam() != true) {
+ GTEST_SKIP() << "This is an invalid cycle if circular schema definitions "
+ "are not allowed.";
+ }
+
+ // Create a schema with the following nested-type relation:
+ // A -(true)-> B -(false)-> C -(true)-> A -(true)-> B -(false)-> C ...
+ // B -(false)-> C breaking the infinite cycle.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::OK));
+}
+
+TEST_P(SchemaUtilTest, MultiLevelCycleDependentMapOk) {
+ if (GetParam() != true) {
+ GTEST_SKIP() << "This is an invalid cycle if circular schema definitions "
+ "are not allowed.";
+ }
+
+ // Create a schema with the following nested-type dependent relation:
+ // A -(false)-> B -(false)-> C -(false)-> A --> B --> C ...
+ // i.e. A is a property of B
+ // B is a property of C
+ // C is a property of A
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ // Assert Validate status is OK and check dependent map
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
+ EXPECT_THAT(d_map, SizeIs(3));
+ EXPECT_THAT(
+ d_map["A"],
+ UnorderedElementsAre(Pair("A", IsEmpty()),
+ Pair("B", UnorderedElementsAre(Pointee(
+ EqualsProto(type_b.properties(0))))),
+ Pair("C", IsEmpty())));
+ EXPECT_THAT(
+ d_map["B"],
+ UnorderedElementsAre(Pair("A", IsEmpty()), Pair("B", IsEmpty()),
+ Pair("C", UnorderedElementsAre(Pointee(
+ EqualsProto(type_c.properties(0)))))));
+ EXPECT_THAT(
+ d_map["C"],
+ UnorderedElementsAre(Pair("A", UnorderedElementsAre(Pointee(
+ EqualsProto(type_a.properties(0))))),
+ Pair("B", IsEmpty()), Pair("C", IsEmpty())));
+}
+
+TEST_P(SchemaUtilTest, NestedCycleIndexableTrueInvalid) {
+ // Create a schema with the following dependent relation:
+ // A -(false)-> B <-(true)-> C -(false)-> D.
+ // B <-(true)-> C creates an invalid cycle.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("d")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("D", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("prop")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_a)
+ .AddType(type_b)
+ .AddType(type_c)
+ .AddType(type_d)
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, NestedCycleIndexableFalseNotJoinableOK) {
+ if (GetParam() != true) {
+ GTEST_SKIP() << "This is an invalid cycle if circular schema definitions "
+ "are not allowed.";
+ }
+
+ // Create a schema with the following nested-type relation:
+ // A -(true)-> B -(true)-> C -(false)-> B -(true)-> D.
+ // C -(false)-> B breaks the invalid cycle in B - C - B.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("d")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("d")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("prop")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_a)
+ .AddType(type_b)
+ .AddType(type_c)
+ .AddType(type_d)
+ .Build();
+ // Assert Validate status is OK and check dependent map
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
+ EXPECT_THAT(d_map, SizeIs(3));
+ EXPECT_THAT(d_map["B"],
+ UnorderedElementsAre(
+ Pair("A", UnorderedElementsAre(
+ Pointee(EqualsProto(type_a.properties(0))))),
+ Pair("B", IsEmpty()),
+ Pair("C", UnorderedElementsAre(
+ Pointee(EqualsProto(type_c.properties(0)))))));
+ EXPECT_THAT(
+ d_map["C"],
+ UnorderedElementsAre(Pair("A", IsEmpty()),
+ Pair("B", UnorderedElementsAre(Pointee(
+ EqualsProto(type_b.properties(0))))),
+ Pair("C", IsEmpty())));
+ EXPECT_THAT(d_map["D"],
+ UnorderedElementsAre(
+ Pair("A", IsEmpty()),
+ Pair("B", UnorderedElementsAre(
+ Pointee(EqualsProto(type_b.properties(1))))),
+ Pair("C", UnorderedElementsAre(
+ Pointee(EqualsProto(type_c.properties(1)))))));
+}
+
+TEST_P(SchemaUtilTest, MultiplePathsAnyPathContainsCycleIsInvalid) {
+ // Create a schema with the following nested-type relation:
+ // C -(false)-> B -(true)-> A
+ // ^ /
+ // (true)\ /(true)
+ // \ v
+ // D
+ // There is a cycle in B-A-D-B... so this is not allowed
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("d")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_a)
+ .AddType(type_d)
+ .AddType(type_c)
+ .AddType(type_b)
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, MultipleCycles_anyCycleIndexableTrueInvalid) {
+ // Create a schema with the following nested-type dependent relation:
+ // Note that the arrows in this graph shows the direction of the dependent
+ // relation, rather than nested-type relations.
+ // A -(F)-> B
+ // ^ \ |
+ // (T)| (T)\ |(T)
+ // | v v
+ // D <-(T)- C
+ // There are two cycles: A-B-C-D and A-C-D. The first cycle is allowed because
+ // A-B has nested-indexable=false, but A-C-D
+ //
+ // Schema nested-type property relation graph:
+ // A <-- B
+ // | ^ ^
+ // v \ |
+ // D --> C
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("d")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_d)
+ .AddType(type_c)
+ .AddType(type_b)
+ .AddType(type_a)
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, NonExistentType) {
- // Create a schema with the following dependencies:
+TEST_P(SchemaUtilTest, CycleWithSameTypedProps_allPropsIndexableFalseIsOK) {
+ if (GetParam() != true) {
+ GTEST_SKIP() << "This is an invalid cycle if circular schema definitions "
+ "are not allowed.";
+ }
+
+ // Create a schema with the following nested-type relation and
+ // index_nested_properties definition:
+ // A <-(true)- B <-(false)- A -(false)-> B -(true)-> A
+ // A has 2 properties with type B. A - B breaks the invalid cycle only when
+ // both properties declare index_nested_properties=false.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b1")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b2")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("A")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+ // Assert Validate status is OK and check dependent map
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
+ EXPECT_THAT(d_map, SizeIs(2));
+ EXPECT_THAT(
+ d_map["A"],
+ UnorderedElementsAre(Pair("A", IsEmpty()),
+ Pair("B", UnorderedElementsAre(Pointee(
+ EqualsProto(type_b.properties(0)))))));
+ EXPECT_THAT(d_map["B"],
+ UnorderedElementsAre(
+ Pair("A", UnorderedElementsAre(
+ Pointee(EqualsProto(type_a.properties(0))),
+ Pointee(EqualsProto(type_a.properties(1))))),
+ Pair("B", IsEmpty())));
+}
+
+TEST_P(SchemaUtilTest, CycleWithSameTypedProps_anyPropIndexableTrueIsInvalid) {
+ // Create a schema with the following nested-type relation and
+ // index_nested_properties definition:
+ // A <-(true)- B <-(true)- A -(false)-> B -(true)-> A
+ // A has 2 properties with type B. Prop 'b2' declares
+ // index_nested_properties=true, so there is an invalid cycle.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b1")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b2")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("A")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, CycleWithJoinablePropertyNotAllowed) {
+ // Create a schema with the following dependent relation:
+ // A
+ // / ^
+ // v \
+ // (joinable) B ---> C
+ // B also has a string property that is joinable on QUALIFIED_ID
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("joinableProp")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/false))
+ .Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, NonNestedJoinablePropOutsideCycleOK) {
+ if (GetParam() != true) {
+ GTEST_SKIP() << "This is an invalid cycle if circular schema definitions "
+ "are not allowed.";
+ }
+
+ // Create a schema with the following dependent relation:
+ // A -(false)-> B <-(false)-> C...
+ // A has a string property that is joinable on QUALIFIED_ID, but the cycle is
+ // B-C-B, and none of B or C depends on A, so this is fine.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("joinableProp")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ // Assert Validate status is OK and check dependent map
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
+ EXPECT_THAT(d_map, SizeIs(2));
+ EXPECT_THAT(d_map["B"],
+ UnorderedElementsAre(
+ Pair("A", UnorderedElementsAre(
+ Pointee(EqualsProto(type_a.properties(0))))),
+ Pair("B", IsEmpty()),
+ Pair("C", UnorderedElementsAre(
+ Pointee(EqualsProto(type_c.properties(0)))))));
+ EXPECT_THAT(
+ d_map["C"],
+ UnorderedElementsAre(Pair("A", IsEmpty()),
+ Pair("B", UnorderedElementsAre(Pointee(
+ EqualsProto(type_b.properties(0))))),
+ Pair("C", IsEmpty())));
+}
+
+TEST_P(SchemaUtilTest, DirectNestedJoinablePropOutsideCycleNotAllowed) {
+ // Create a schema with the following dependent relation:
+ // A
+ // / ^
+ // v \
+ // B ---> C ---> D(joinable)
+ // All edges have index_nested_properties=false and only D has a joinable
+ // property. The cycle A-B-C... is not allowed since there is a type in the
+ // cycle (C) which has a direct nested-type (D) with a joinable property.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("d")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("D", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("joinableProp")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_a)
+ .AddType(type_b)
+ .AddType(type_c)
+ .AddType(type_d)
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, TransitiveNestedJoinablePropOutsideCycleNotAllowed) {
+ // Create a schema with the following dependent relation:
+ // A
+ // / ^
+ // v \
+ // B ---> C ---> D ---> E (joinable)
+ // All edges have index_nested_properties=false and only D has a joinable
+ // property. The cycle A-B-C... is not allowed since there is a type in the
+ // cycle (C) which has a transitive nested-type (E) with a joinable property.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("d")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("D", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("e")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("E", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_e =
+ SchemaTypeConfigBuilder()
+ .SetType("E")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("joinableProp")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_a)
+ .AddType(type_b)
+ .AddType(type_c)
+ .AddType(type_d)
+ .AddType(type_e)
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest,
+ NestedJoinablePropOutsideCycleNotAllowed_reverseIterationOrder) {
+ // Create a schema with the following dependent relation:
+ // E
+ // / ^
+ // v \
+ // D ---> C ---> B ---> A (joinable)
+ // All edges have index_nested_properties=false and only D has a joinable
+ // property. The cycle A-B-C... is not allowed since there is a type in the
+ // cycle (C) which has a transitive nested-type (E) with a joinable property.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("joinableProp")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("e")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("E", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_e =
+ SchemaTypeConfigBuilder()
+ .SetType("E")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("d")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("D", /*index_nested_properties=*/false))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_a)
+ .AddType(type_b)
+ .AddType(type_c)
+ .AddType(type_d)
+ .AddType(type_e)
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, ComplexCycleWithJoinablePropertyNotAllowed) {
+ // Create a schema with the following dependent relation:
+ // A
+ // / ^
+ // v \
+ // B ---> E
+ // / \ ^
+ // v v \
+ // C D --> F
+ //
+ // Cycles: A-B-E-A, A-B-D-F-E-A.
+ // All edges have index_nested_properties=false, but D has a joinable property
+ // so the second cycle is not allowed.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("d")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("D", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("e")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("E", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("joinableProp")
+ .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("f")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("F", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("joinableProp")
+ .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaTypeConfigProto type_e =
+ SchemaTypeConfigBuilder()
+ .SetType("E")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_f =
+ SchemaTypeConfigBuilder()
+ .SetType("F")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("e")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("E", /*index_nested_properties=*/false))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_a)
+ .AddType(type_b)
+ .AddType(type_c)
+ .AddType(type_d)
+ .AddType(type_e)
+ .AddType(type_f)
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, ComplexCycleWithIndexableTrueNotAllowed) {
+ // Create a schema with the following dependent relation:
+ // A
+ // / ^
+ // v \
+ // B ---> E
+ // / \ ^
+ // v v \
+ // C D --> F
+ //
+ // Cycles: A-B-E-A, A-B-D-F-E-A.
+ // B->E has index_nested_properties=false, so the first cycle is allowed.
+ // All edges on the second cycle are nested_indexable, so the second cycle is
+ // not allowed
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("d")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("e")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("E", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("joinableProp")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID))
+ .Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("f")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("F", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_e =
+ SchemaTypeConfigBuilder()
+ .SetType("E")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_f =
+ SchemaTypeConfigBuilder()
+ .SetType("F")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("e")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("E", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_a)
+ .AddType(type_b)
+ .AddType(type_c)
+ .AddType(type_d)
+ .AddType(type_e)
+ .AddType(type_f)
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, InheritanceAndNestedTypeRelations_noCycle) {
+ if (GetParam() != true) {
+ GTEST_SKIP() << "This is an invalid cycle if circular schema definitions "
+ "are not allowed.";
+ }
+
+ // Create a schema with the following relations:
+ // index_nested_properties definition:
+ // 1. Nested-type relations:
+ // A -(true)-> B -(true)-> C
+ // (false)| (false)/ \(false)
+ // B B C
+ // The properties in the second row are required for B and C to be
+ // compatible with their parents. index_nested_properties must be false in
+ // these properties so that no invalid cycle can be formed because of these
+ // self reference.
+ //
+ // 2. Inheritance relations:
+ // C -> B -> A (A is a parent of B, which is a parent of C)
+ //
+ // These two relations are separate and do not affect each other. In this
+ // case there is no cycle.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddParentType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddParentType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("prop")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE))
+ .Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
+ EXPECT_THAT(d_map, SizeIs(3));
+ // Both A-B and A-C are inheritance relations.
+ EXPECT_THAT(d_map["A"],
+ UnorderedElementsAre(Pair("B", IsEmpty()), Pair("C", IsEmpty())));
+ // B-A and B-B are nested-type relations, B-C is both a nested-type and an
+ // inheritance relation.
+ EXPECT_THAT(d_map["B"],
+ UnorderedElementsAre(
+ Pair("A", UnorderedElementsAre(
+ Pointee(EqualsProto(type_a.properties(0))))),
+ Pair("B", UnorderedElementsAre(
+ Pointee(EqualsProto(type_b.properties(0))))),
+ Pair("C", UnorderedElementsAre(
+ Pointee(EqualsProto(type_c.properties(0)))))));
+ // C-C, C-B and C-A are all nested-type relations.
+ EXPECT_THAT(d_map["C"],
+ UnorderedElementsAre(
+ Pair("B", UnorderedElementsAre(
+ Pointee(EqualsProto(type_b.properties(1))))),
+ Pair("C", UnorderedElementsAre(
+ Pointee(EqualsProto(type_c.properties(1))))),
+ Pair("A", IsEmpty())));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SchemaUtil::InheritanceMap i_map,
+ SchemaUtil::BuildTransitiveInheritanceGraph(schema));
+ EXPECT_THAT(i_map, SizeIs(2));
+ EXPECT_THAT(i_map["A"],
+ UnorderedElementsAre(Pair("B", IsTrue()), Pair("C", IsFalse())));
+ EXPECT_THAT(i_map["B"], UnorderedElementsAre(Pair("C", IsTrue())));
+}
+
+TEST_P(SchemaUtilTest, InheritanceAndNestedTypeRelations_nestedTypeCycle) {
+ // Create a schema with the following relations:
+ // index_nested_properties definition:
+ // 1. Nested-type relations:
+ // A -(true)-> B -(true)-> C
+ // (true)| (false)/ \(false)
+ // B B C
+ //
+ // 2. Inheritance relations:
+ // C -> B -> A (A is a parent of B, which is a parent of C)
+ //
+ // These two relations are separate and do not affect each other, but there is
+ // a cycle in nested-type relations: B - B
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddParentType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddParentType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("prop")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE))
+ .Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, InheritanceAndNestedTypeRelations_inheritanceCycle) {
+ // Create a schema with the following relations:
+ // index_nested_properties definition:
+ // 1. Nested-type relations:
+ // A -(true)-> B -(true)-> C
+ // (false)| (false)/ \(false)
+ // B B C
+ //
+ // 2. Inheritance relations:
+ // C -> B -> A -> B (A is a parent of B, which is a parent of C and A)
+ //
+ // These two relations are separate and do not affect each other, but there is
+ // a cycle in inheritance relation: B - A - B
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddParentType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddParentType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddParentType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("prop")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE))
+ .Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("inherits from itself")));
+}
+
+TEST_P(SchemaUtilTest, NonExistentType) {
+ // Create a schema with the following dependent relation:
// A - B - C - X (does not exist)
SchemaTypeConfigProto type_a =
SchemaTypeConfigBuilder()
@@ -398,16 +1721,360 @@ TEST(SchemaUtilTest, NonExistentType) {
SchemaProto schema =
SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(SchemaUtilTest, SingleTypeIsBothDirectAndIndirectDependent) {
+ // Create a schema with the following dependent relation, all of which are via
+ // nested document. In this case, C is both a direct dependent and an indirect
+ // dependent of A.
+ // A
+ // | \
+ // | B
+ // | /
+ // C
+ SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
+ EXPECT_THAT(d_map, SizeIs(2));
+ EXPECT_THAT(d_map["A"],
+ UnorderedElementsAre(
+ Pair("B", UnorderedElementsAre(
+ Pointee(EqualsProto(type_b.properties(0))))),
+ Pair("C", UnorderedElementsAre(
+ Pointee(EqualsProto(type_c.properties(0)))))));
+ EXPECT_THAT(d_map["B"], UnorderedElementsAre(Pair(
+ "C", UnorderedElementsAre(Pointee(
+ EqualsProto(type_c.properties(1)))))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SchemaUtil::InheritanceMap i_map,
+ SchemaUtil::BuildTransitiveInheritanceGraph(schema));
+ EXPECT_THAT(i_map, IsEmpty());
+}
+
+TEST_P(SchemaUtilTest, SimpleInheritance) {
+ // Create a schema with the following inheritance relation:
+ // A <- B
+ SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
+ EXPECT_THAT(d_map, SizeIs(1));
+ EXPECT_THAT(d_map["A"], UnorderedElementsAre(Pair("B", IsEmpty())));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SchemaUtil::InheritanceMap i_map,
+ SchemaUtil::BuildTransitiveInheritanceGraph(schema));
+ EXPECT_THAT(i_map, SizeIs(1));
+ EXPECT_THAT(i_map["A"], UnorderedElementsAre(Pair("B", IsTrue())));
+}
+
+TEST_P(SchemaUtilTest, SingleInheritanceTypeIsBothDirectAndIndirectChild) {
+ // Create a schema with the following inheritance relation. In this case, C is
+ // both a direct and an indirect child of A.
+ // A
+ // | \
+ // | B
+ // | /
+ // C
+ SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+ SchemaTypeConfigProto type_c = SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddParentType("A")
+ .AddParentType("B")
+ .Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
+ EXPECT_THAT(d_map, SizeIs(2));
+ EXPECT_THAT(d_map["A"],
+ UnorderedElementsAre(Pair("B", IsEmpty()), Pair("C", IsEmpty())));
+ EXPECT_THAT(d_map["B"], UnorderedElementsAre(Pair("C", IsEmpty())));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SchemaUtil::InheritanceMap i_map,
+ SchemaUtil::BuildTransitiveInheritanceGraph(schema));
+ EXPECT_THAT(i_map, SizeIs(2));
+ EXPECT_THAT(i_map["A"],
+ UnorderedElementsAre(Pair("B", IsTrue()), Pair("C", IsTrue())));
+ EXPECT_THAT(i_map["B"], UnorderedElementsAre(Pair("C", IsTrue())));
+}
+
+TEST_P(SchemaUtilTest, ComplexInheritance) {
+ // Create a schema with the following inheritance relation:
+ // A
+ // / \
+ // B E
+ // / \
+ // C D
+ // |
+ // F
+ SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder().SetType("C").AddParentType("B").Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder().SetType("D").AddParentType("B").Build();
+ SchemaTypeConfigProto type_e =
+ SchemaTypeConfigBuilder().SetType("E").AddParentType("A").Build();
+ SchemaTypeConfigProto type_f =
+ SchemaTypeConfigBuilder().SetType("F").AddParentType("D").Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_a)
+ .AddType(type_b)
+ .AddType(type_c)
+ .AddType(type_d)
+ .AddType(type_e)
+ .AddType(type_f)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
+ EXPECT_THAT(d_map, SizeIs(3));
+ EXPECT_THAT(d_map["A"],
+ UnorderedElementsAre(Pair("B", IsEmpty()), Pair("C", IsEmpty()),
+ Pair("D", IsEmpty()), Pair("E", IsEmpty()),
+ Pair("F", IsEmpty())));
+ EXPECT_THAT(d_map["B"],
+ UnorderedElementsAre(Pair("C", IsEmpty()), Pair("D", IsEmpty()),
+ Pair("F", IsEmpty())));
+ EXPECT_THAT(d_map["D"], UnorderedElementsAre(Pair("F", IsEmpty())));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SchemaUtil::InheritanceMap i_map,
+ SchemaUtil::BuildTransitiveInheritanceGraph(schema));
+ EXPECT_THAT(i_map, SizeIs(3));
+ EXPECT_THAT(i_map["A"],
+ UnorderedElementsAre(Pair("B", IsTrue()), Pair("C", IsFalse()),
+ Pair("D", IsFalse()), Pair("E", IsTrue()),
+ Pair("F", IsFalse())));
+ EXPECT_THAT(i_map["B"],
+ UnorderedElementsAre(Pair("C", IsTrue()), Pair("D", IsTrue()),
+ Pair("F", IsFalse())));
+ EXPECT_THAT(i_map["D"], UnorderedElementsAre(Pair("F", IsTrue())));
+}
+
+TEST_P(SchemaUtilTest, InheritanceCycle) {
+ // Create a schema with the following inheritance relation:
+ // C <- A <- B <- C
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder().SetType("A").AddParentType("C").Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder().SetType("C").AddParentType("B").Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(SchemaUtilTest, SelfInheritance) {
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder().SetType("A").AddParentType("A").Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(SchemaUtilTest, NonExistentParentType) {
+ // Create a schema with the following inheritance relation:
+ // (does not exist) X <- A <- B <- C
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder().SetType("A").AddParentType("X").Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder().SetType("C").AddParentType("B").Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, EmptySchemaProtoIsValid) {
+TEST_P(SchemaUtilTest, SimpleInheritanceWithNestedType) {
+ // Create a schema with the following dependent relation:
+ // A - B (via inheritance)
+ // B - C (via nested document)
+ SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
+ EXPECT_THAT(d_map, SizeIs(2));
+ // Nested-type dependency and inheritance dependencies are not transitive.
+ EXPECT_THAT(d_map["A"], UnorderedElementsAre(Pair("B", IsEmpty())));
+ EXPECT_THAT(d_map["B"], UnorderedElementsAre(Pair(
+ "C", UnorderedElementsAre(Pointee(
+ EqualsProto(type_c.properties(0)))))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SchemaUtil::InheritanceMap i_map,
+ SchemaUtil::BuildTransitiveInheritanceGraph(schema));
+ EXPECT_THAT(i_map, SizeIs(1));
+ EXPECT_THAT(i_map["A"], UnorderedElementsAre(Pair("B", IsTrue())));
+}
+
+TEST_P(SchemaUtilTest, ComplexInheritanceWithNestedType) {
+ // Create a schema with the following dependent relation:
+ // A
+ // / \
+ // B E
+ // / \
+ // C D
+ // |
+ // F
+ // Approach:
+ // B extends A
+ // C extends B
+ // D has a nested document of type B
+ // E has a nested document of type A
+ // F has a nested document of type D
+ SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder().SetType("C").AddParentType("B").Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_e =
+ SchemaTypeConfigBuilder()
+ .SetType("E")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_f =
+ SchemaTypeConfigBuilder()
+ .SetType("F")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("d")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_a)
+ .AddType(type_b)
+ .AddType(type_c)
+ .AddType(type_d)
+ .AddType(type_e)
+ .AddType(type_f)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
+ EXPECT_THAT(d_map, SizeIs(3));
+ EXPECT_THAT(
+ d_map["A"],
+ UnorderedElementsAre(Pair("B", IsEmpty()), Pair("C", IsEmpty()),
+ Pair("E", UnorderedElementsAre(Pointee(
+ EqualsProto(type_e.properties(0)))))));
+ EXPECT_THAT(
+ d_map["B"],
+ UnorderedElementsAre(Pair("C", IsEmpty()),
+ Pair("D", UnorderedElementsAre(Pointee(
+ EqualsProto(type_d.properties(0))))),
+ Pair("F", IsEmpty())));
+ EXPECT_THAT(d_map["D"], UnorderedElementsAre(Pair(
+ "F", UnorderedElementsAre(Pointee(
+ EqualsProto(type_f.properties(0)))))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SchemaUtil::InheritanceMap i_map,
+ SchemaUtil::BuildTransitiveInheritanceGraph(schema));
+ EXPECT_THAT(i_map, SizeIs(2));
+ EXPECT_THAT(i_map["A"],
+ UnorderedElementsAre(Pair("B", IsTrue()), Pair("C", IsFalse())));
+ EXPECT_THAT(i_map["B"], UnorderedElementsAre(Pair("C", IsTrue())));
+}
+
+TEST_P(SchemaUtilTest, InheritanceWithNestedTypeCycle) {
+ // Create a schema that A and B depend on each other, in the sense that B
+ // extends A but A has a nested document of type B.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(SchemaUtilTest, EmptySchemaProtoIsValid) {
SchemaProto schema;
- ICING_ASSERT_OK(SchemaUtil::Validate(schema));
+ ICING_ASSERT_OK(SchemaUtil::Validate(schema, GetParam()));
}
-TEST(SchemaUtilTest, Valid_Nested) {
+TEST_P(SchemaUtilTest, Valid_Nested) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -430,43 +2097,43 @@ TEST(SchemaUtilTest, Valid_Nested) {
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- ICING_ASSERT_OK(SchemaUtil::Validate(schema));
+ ICING_ASSERT_OK(SchemaUtil::Validate(schema, GetParam()));
}
-TEST(SchemaUtilTest, ClearedPropertyConfigsIsValid) {
+TEST_P(SchemaUtilTest, ClearedPropertyConfigsIsValid) {
// No property fields is technically ok, but probably not realistic.
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType(kEmailType))
.Build();
- ICING_ASSERT_OK(SchemaUtil::Validate(schema));
+ ICING_ASSERT_OK(SchemaUtil::Validate(schema, GetParam()));
}
-TEST(SchemaUtilTest, ClearedSchemaTypeIsInvalid) {
+TEST_P(SchemaUtilTest, ClearedSchemaTypeIsInvalid) {
SchemaProto schema =
SchemaBuilder().AddType(SchemaTypeConfigBuilder()).Build();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, EmptySchemaTypeIsInvalid) {
+TEST_P(SchemaUtilTest, EmptySchemaTypeIsInvalid) {
SchemaProto schema =
SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("")).Build();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, AnySchemaTypeOk) {
+TEST_P(SchemaUtilTest, AnySchemaTypeOk) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType(
"abc123!@#$%^&*()_-+=[{]}|\\;:'\",<.>?你好"))
.Build();
- ICING_ASSERT_OK(SchemaUtil::Validate(schema));
+ ICING_ASSERT_OK(SchemaUtil::Validate(schema, GetParam()));
}
-TEST(SchemaUtilTest, ClearedPropertyNameIsInvalid) {
+TEST_P(SchemaUtilTest, ClearedPropertyNameIsInvalid) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -477,11 +2144,11 @@ TEST(SchemaUtilTest, ClearedPropertyNameIsInvalid) {
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
schema.mutable_types(0)->mutable_properties(0)->clear_property_name();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, EmptyPropertyNameIsInvalid) {
+TEST_P(SchemaUtilTest, EmptyPropertyNameIsInvalid) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -492,11 +2159,11 @@ TEST(SchemaUtilTest, EmptyPropertyNameIsInvalid) {
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, NonAlphanumericPropertyNameIsInvalid) {
+TEST_P(SchemaUtilTest, NonAlphanumericPropertyNameIsInvalid) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -507,11 +2174,11 @@ TEST(SchemaUtilTest, NonAlphanumericPropertyNameIsInvalid) {
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, AlphanumericPropertyNameOk) {
+TEST_P(SchemaUtilTest, AlphanumericPropertyNameOk) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -522,10 +2189,10 @@ TEST(SchemaUtilTest, AlphanumericPropertyNameOk) {
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- ICING_ASSERT_OK(SchemaUtil::Validate(schema));
+ ICING_ASSERT_OK(SchemaUtil::Validate(schema, GetParam()));
}
-TEST(SchemaUtilTest, DuplicatePropertyNameIsInvalid) {
+TEST_P(SchemaUtilTest, DuplicatePropertyNameIsInvalid) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -539,11 +2206,11 @@ TEST(SchemaUtilTest, DuplicatePropertyNameIsInvalid) {
.SetDataType(TYPE_STRING)
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::ALREADY_EXISTS));
}
-TEST(SchemaUtilTest, ClearedDataTypeIsInvalid) {
+TEST_P(SchemaUtilTest, ClearedDataTypeIsInvalid) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -554,11 +2221,11 @@ TEST(SchemaUtilTest, ClearedDataTypeIsInvalid) {
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
schema.mutable_types(0)->mutable_properties(0)->clear_data_type();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, UnknownDataTypeIsInvalid) {
+TEST_P(SchemaUtilTest, UnknownDataTypeIsInvalid) {
SchemaProto schema =
SchemaBuilder()
.AddType(
@@ -570,11 +2237,11 @@ TEST(SchemaUtilTest, UnknownDataTypeIsInvalid) {
.SetDataType(PropertyConfigProto::DataType::UNKNOWN)
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, ClearedCardinalityIsInvalid) {
+TEST_P(SchemaUtilTest, ClearedCardinalityIsInvalid) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -585,11 +2252,11 @@ TEST(SchemaUtilTest, ClearedCardinalityIsInvalid) {
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
schema.mutable_types(0)->mutable_properties(0)->clear_cardinality();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, UnknownCardinalityIsInvalid) {
+TEST_P(SchemaUtilTest, UnknownCardinalityIsInvalid) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -599,11 +2266,11 @@ TEST(SchemaUtilTest, UnknownCardinalityIsInvalid) {
.SetDataType(TYPE_STRING)
.SetCardinality(CARDINALITY_UNKNOWN)))
.Build();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, ClearedPropertySchemaTypeIsInvalid) {
+TEST_P(SchemaUtilTest, ClearedPropertySchemaTypeIsInvalid) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -613,11 +2280,11 @@ TEST(SchemaUtilTest, ClearedPropertySchemaTypeIsInvalid) {
.SetDataType(TYPE_DOCUMENT)
.SetCardinality(CARDINALITY_REPEATED)))
.Build();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, Invalid_EmptyPropertySchemaType) {
+TEST_P(SchemaUtilTest, Invalid_EmptyPropertySchemaType) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -630,11 +2297,11 @@ TEST(SchemaUtilTest, Invalid_EmptyPropertySchemaType) {
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, NoMatchingSchemaTypeIsInvalid) {
+TEST_P(SchemaUtilTest, NoMatchingSchemaTypeIsInvalid) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -647,12 +2314,12 @@ TEST(SchemaUtilTest, NoMatchingSchemaTypeIsInvalid) {
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
HasSubstr("Undefined 'schema_type'")));
}
-TEST(SchemaUtilTest, NewOptionalPropertyIsCompatible) {
+TEST_P(SchemaUtilTest, NewOptionalPropertyIsCompatible) {
// Configure old schema
SchemaProto old_schema =
SchemaBuilder()
@@ -682,13 +2349,13 @@ TEST(SchemaUtilTest, NewOptionalPropertyIsCompatible) {
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_changed_fully_compatible.insert(kEmailType);
- SchemaUtil::DependencyMap no_dependencies_map;
+ SchemaUtil::DependentMap no_dependents_map;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
- old_schema, new_schema_with_optional, no_dependencies_map),
+ old_schema, new_schema_with_optional, no_dependents_map),
Eq(schema_delta));
}
-TEST(SchemaUtilTest, NewRequiredPropertyIsIncompatible) {
+TEST_P(SchemaUtilTest, NewRequiredPropertyIsIncompatible) {
// Configure old schema
SchemaProto old_schema =
SchemaBuilder()
@@ -718,13 +2385,13 @@ TEST(SchemaUtilTest, NewRequiredPropertyIsIncompatible) {
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_incompatible.emplace(kEmailType);
- SchemaUtil::DependencyMap no_dependencies_map;
+ SchemaUtil::DependentMap no_dependents_map;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
- old_schema, new_schema_with_required, no_dependencies_map),
+ old_schema, new_schema_with_required, no_dependents_map),
Eq(schema_delta));
}
-TEST(SchemaUtilTest, NewSchemaMissingPropertyIsIncompatible) {
+TEST_P(SchemaUtilTest, NewSchemaMissingPropertyIsIncompatible) {
// Configure old schema
SchemaProto old_schema =
SchemaBuilder()
@@ -754,13 +2421,13 @@ TEST(SchemaUtilTest, NewSchemaMissingPropertyIsIncompatible) {
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_incompatible.emplace(kEmailType);
- SchemaUtil::DependencyMap no_dependencies_map;
+ SchemaUtil::DependentMap no_dependents_map;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
- no_dependencies_map),
+ no_dependents_map),
Eq(schema_delta));
}
-TEST(SchemaUtilTest, CompatibilityOfDifferentCardinalityOk) {
+TEST_P(SchemaUtilTest, CompatibilityOfDifferentCardinalityOk) {
// Configure less restrictive schema based on cardinality
SchemaProto less_restrictive_schema =
SchemaBuilder()
@@ -786,10 +2453,10 @@ TEST(SchemaUtilTest, CompatibilityOfDifferentCardinalityOk) {
// We can't have a new schema be more restrictive, REPEATED->OPTIONAL
SchemaUtil::SchemaDelta incompatible_schema_delta;
incompatible_schema_delta.schema_types_incompatible.emplace(kEmailType);
- SchemaUtil::DependencyMap no_dependencies_map;
+ SchemaUtil::DependentMap no_dependents_map;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
/*old_schema=*/less_restrictive_schema,
- /*new_schema=*/more_restrictive_schema, no_dependencies_map),
+ /*new_schema=*/more_restrictive_schema, no_dependents_map),
Eq(incompatible_schema_delta));
// We can have the new schema be less restrictive, OPTIONAL->REPEATED;
@@ -798,11 +2465,11 @@ TEST(SchemaUtilTest, CompatibilityOfDifferentCardinalityOk) {
kEmailType);
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
/*old_schema=*/more_restrictive_schema,
- /*new_schema=*/less_restrictive_schema, no_dependencies_map),
+ /*new_schema=*/less_restrictive_schema, no_dependents_map),
Eq(compatible_schema_delta));
}
-TEST(SchemaUtilTest, DifferentDataTypeIsIncompatible) {
+TEST_P(SchemaUtilTest, DifferentDataTypeIsIncompatible) {
// Configure old schema, with an int64_t property
SchemaProto old_schema =
SchemaBuilder()
@@ -827,13 +2494,13 @@ TEST(SchemaUtilTest, DifferentDataTypeIsIncompatible) {
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_incompatible.emplace(kEmailType);
- SchemaUtil::DependencyMap no_dependencies_map;
+ SchemaUtil::DependentMap no_dependents_map;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
- no_dependencies_map),
+ no_dependents_map),
Eq(schema_delta));
}
-TEST(SchemaUtilTest, DifferentSchemaTypeIsIncompatible) {
+TEST_P(SchemaUtilTest, DifferentSchemaTypeIsIncompatible) {
// Configure old schema, where Property is supposed to be a Person type
SchemaProto old_schema =
SchemaBuilder()
@@ -887,16 +2554,17 @@ TEST(SchemaUtilTest, DifferentSchemaTypeIsIncompatible) {
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_incompatible.emplace(kEmailType);
// kEmailType depends on kMessageType
- SchemaUtil::DependencyMap dependencies_map = {{kMessageType, {kEmailType}}};
+ SchemaUtil::DependentMap dependents_map = {
+ {kMessageType, {{kEmailType, {}}}}};
SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta(
- old_schema, new_schema, dependencies_map);
+ old_schema, new_schema, dependents_map);
EXPECT_THAT(actual, Eq(schema_delta));
EXPECT_THAT(actual.schema_types_incompatible,
testing::ElementsAre(kEmailType));
EXPECT_THAT(actual.schema_types_deleted, testing::IsEmpty());
}
-TEST(SchemaUtilTest, ChangingIndexedPropertiesMakesIndexIncompatible) {
+TEST_P(SchemaUtilTest, ChangingIndexedStringPropertiesMakesIndexIncompatible) {
// Configure old schema
SchemaProto schema_with_indexed_property =
SchemaBuilder()
@@ -924,21 +2592,21 @@ TEST(SchemaUtilTest, ChangingIndexedPropertiesMakesIndexIncompatible) {
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_index_incompatible.insert(kPersonType);
- // New schema gained a new indexed property.
- SchemaUtil::DependencyMap no_dependencies_map;
+ // New schema gained a new indexed string property.
+ SchemaUtil::DependentMap no_dependents_map;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
- schema_with_indexed_property, schema_with_unindexed_property,
- no_dependencies_map),
+ schema_with_unindexed_property, schema_with_indexed_property,
+ no_dependents_map),
Eq(schema_delta));
- // New schema lost an indexed property.
+ // New schema lost an indexed string property.
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
schema_with_indexed_property, schema_with_unindexed_property,
- no_dependencies_map),
+ no_dependents_map),
Eq(schema_delta));
}
-TEST(SchemaUtilTest, AddingNewIndexedPropertyMakesIndexIncompatible) {
+TEST_P(SchemaUtilTest, AddingNewIndexedStringPropertyMakesIndexIncompatible) {
// Configure old schema
SchemaProto old_schema =
SchemaBuilder()
@@ -970,13 +2638,710 @@ TEST(SchemaUtilTest, AddingNewIndexedPropertyMakesIndexIncompatible) {
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_index_incompatible.insert(kPersonType);
- SchemaUtil::DependencyMap no_dependencies_map;
+ SchemaUtil::DependentMap no_dependents_map;
+ EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+ no_dependents_map),
+ Eq(schema_delta));
+}
+
+TEST_P(SchemaUtilTest,
+ AddingNewNonIndexedStringPropertyShouldRemainIndexCompatible) {
+ // Configure old schema
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Configure new schema
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("NewProperty")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN,
+ TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SchemaUtil::DependentMap no_dependents_map;
+ EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+ no_dependents_map)
+ .schema_types_index_incompatible,
+ IsEmpty());
+}
+
+TEST_P(SchemaUtilTest, ChangingIndexedIntegerPropertiesMakesIndexIncompatible) {
+ // Configure old schema
+ SchemaProto schema_with_indexed_property =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Configure new schema
+ SchemaProto schema_with_unindexed_property =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SchemaUtil::SchemaDelta schema_delta;
+ schema_delta.schema_types_index_incompatible.insert(kPersonType);
+
+ // New schema gained a new indexed integer property.
+ SchemaUtil::DependentMap no_dependents_map;
+ EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
+ schema_with_unindexed_property, schema_with_indexed_property,
+ no_dependents_map),
+ Eq(schema_delta));
+
+ // New schema lost an indexed integer property.
+ EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
+ schema_with_indexed_property, schema_with_unindexed_property,
+ no_dependents_map),
+ Eq(schema_delta));
+}
+
+TEST_P(SchemaUtilTest, AddingNewIndexedIntegerPropertyMakesIndexIncompatible) {
+ // Configure old schema
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Configure new schema
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("NewIndexedProperty")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SchemaUtil::SchemaDelta schema_delta;
+ schema_delta.schema_types_index_incompatible.insert(kPersonType);
+ SchemaUtil::DependentMap no_dependents_map;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
- no_dependencies_map),
+ no_dependents_map),
Eq(schema_delta));
}
-TEST(SchemaUtilTest, AddingTypeIsCompatible) {
+TEST_P(SchemaUtilTest,
+ AddingNewNonIndexedIntegerPropertyShouldRemainIndexCompatible) {
+ // Configure old schema
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Configure new schema
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("NewProperty")
+ .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SchemaUtil::DependentMap no_dependents_map;
+ EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+ no_dependents_map)
+ .schema_types_index_incompatible,
+ IsEmpty());
+}
+
+TEST_P(SchemaUtilTest,
+ AddingNewIndexedDocumentPropertyMakesIndexAndJoinIncompatible) {
+ SchemaTypeConfigProto nested_schema =
+ SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+
+ // Configure old schema
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(nested_schema)
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Configure new schema
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(nested_schema)
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("NewEmailProperty")
+ .SetDataTypeDocument(
+ kEmailType, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SchemaUtil::SchemaDelta schema_delta;
+ schema_delta.schema_types_index_incompatible.insert(kPersonType);
+ schema_delta.schema_types_join_incompatible.insert(kPersonType);
+
+ SchemaUtil::DependentMap dependents_map = {{kEmailType, {{kPersonType, {}}}}};
+ SchemaUtil::SchemaDelta result_schema_delta =
+ SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+ dependents_map);
+ EXPECT_THAT(result_schema_delta, Eq(schema_delta));
+}
+
+TEST_P(
+ SchemaUtilTest,
+ AddingNewIndexedDocumentPropertyWithIndexableListMakesIndexAndJoinIncompatible) {
+ SchemaTypeConfigProto nested_schema =
+ SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+
+ // Configure old schema
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(nested_schema)
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Configure new schema. The added nested document property is indexed, so
+ // this is both index and join incompatible
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(nested_schema)
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("NewEmailProperty")
+ .SetDataTypeDocument(
+ kEmailType,
+ /*indexable_nested_properties_list=*/
+ std::initializer_list<std::string>{"subject"})
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SchemaUtil::SchemaDelta schema_delta;
+ schema_delta.schema_types_index_incompatible.insert(kPersonType);
+ schema_delta.schema_types_join_incompatible.insert(kPersonType);
+
+ SchemaUtil::DependentMap dependents_map = {{kEmailType, {{kPersonType, {}}}}};
+ SchemaUtil::SchemaDelta result_schema_delta =
+ SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+ dependents_map);
+ EXPECT_THAT(result_schema_delta, Eq(schema_delta));
+}
+
+TEST_P(SchemaUtilTest,
+ AddingNewNonIndexedDocumentPropertyMakesJoinIncompatible) {
+ SchemaTypeConfigProto nested_schema =
+ SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+
+ // Configure old schema
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(nested_schema)
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Configure new schema. The added nested document property is not indexed, so
+ // this is index compatible, but join incompatible
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(nested_schema)
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("NewEmailProperty")
+ .SetDataTypeDocument(
+ kEmailType,
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SchemaUtil::SchemaDelta schema_delta;
+ schema_delta.schema_types_join_incompatible.insert(kPersonType);
+
+ SchemaUtil::DependentMap dependents_map = {{kEmailType, {{kPersonType, {}}}}};
+ SchemaUtil::SchemaDelta result_schema_delta =
+ SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+ dependents_map);
+ EXPECT_THAT(result_schema_delta, Eq(schema_delta));
+}
+
+TEST_P(SchemaUtilTest, DeletingIndexedDocumentPropertyIsIncompatible) {
+ SchemaTypeConfigProto nested_schema =
+ SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+
+ // Configure old schemam with two nested document properties of the same type
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(nested_schema)
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("EmailProperty")
+ .SetDataTypeDocument(
+ kEmailType, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("AnotherEmailProperty")
+ .SetDataTypeDocument(
+ kEmailType, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Configure new schema and drop one of the nested document properties
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(nested_schema)
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("EmailProperty")
+ .SetDataTypeDocument(
+ kEmailType, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SchemaUtil::SchemaDelta schema_delta;
+ schema_delta.schema_types_incompatible.insert(kPersonType);
+ schema_delta.schema_types_index_incompatible.insert(kPersonType);
+ schema_delta.schema_types_join_incompatible.insert(kPersonType);
+
+ SchemaUtil::DependentMap dependents_map = {{kEmailType, {{kPersonType, {}}}}};
+ SchemaUtil::SchemaDelta result_schema_delta =
+ SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+ dependents_map);
+ EXPECT_THAT(result_schema_delta, Eq(schema_delta));
+}
+
+TEST_P(SchemaUtilTest,
+ DeletingNonIndexedDocumentPropertyIsIncompatible) {
+ SchemaTypeConfigProto nested_schema =
+ SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+
+ // Configure old schemam with two nested document properties of the same type
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(nested_schema)
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("EmailProperty")
+ .SetDataTypeDocument(
+ kEmailType, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("AnotherEmailProperty")
+ .SetDataTypeDocument(
+ kEmailType,
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Configure new schema and drop the non-indexed nested document property
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(nested_schema)
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("EmailProperty")
+ .SetDataTypeDocument(
+ kEmailType, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SchemaUtil::SchemaDelta schema_delta;
+ schema_delta.schema_types_incompatible.insert(kPersonType);
+ schema_delta.schema_types_join_incompatible.insert(kPersonType);
+
+ SchemaUtil::DependentMap dependents_map = {{kEmailType, {{kPersonType, {}}}}};
+ SchemaUtil::SchemaDelta result_schema_delta =
+ SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+ dependents_map);
+ EXPECT_THAT(result_schema_delta, Eq(schema_delta));
+}
+
+TEST_P(SchemaUtilTest, ChangingIndexedDocumentPropertyIsIncompatible) {
+ SchemaTypeConfigProto nested_schema =
+ SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+
+ // Configure old schemam with two nested document properties of the same type
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(nested_schema)
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("EmailProperty")
+ .SetDataTypeDocument(
+ kEmailType, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("AnotherEmailProperty")
+ .SetDataTypeDocument(
+ kEmailType, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Configure new schema and change one of the nested document properties
+ // to a different name (this is the same as deleting a property and adding
+ // another)
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(nested_schema)
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("EmailProperty")
+ .SetDataTypeDocument(
+ kEmailType, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("DifferentEmailProperty")
+ .SetDataTypeDocument(
+ kEmailType, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SchemaUtil::SchemaDelta schema_delta;
+ schema_delta.schema_types_incompatible.insert(kPersonType);
+ schema_delta.schema_types_index_incompatible.insert(kPersonType);
+ schema_delta.schema_types_join_incompatible.insert(kPersonType);
+
+ SchemaUtil::DependentMap dependents_map = {{kEmailType, {{kPersonType, {}}}}};
+ SchemaUtil::SchemaDelta result_schema_delta =
+ SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+ dependents_map);
+ EXPECT_THAT(result_schema_delta, Eq(schema_delta));
+}
+
+TEST_P(SchemaUtilTest, ChangingNonIndexedDocumentPropertyIsIncompatible) {
+ SchemaTypeConfigProto nested_schema =
+ SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+
+ // Configure old schemam with two nested document properties of the same type
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(nested_schema)
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("EmailProperty")
+ .SetDataTypeDocument(
+ kEmailType, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("AnotherEmailProperty")
+ .SetDataTypeDocument(
+ kEmailType,
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Configure new schema and change the non-indexed nested document property to
+ // a different name (this is the same as deleting a property and adding
+ // another)
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(nested_schema)
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("EmailProperty")
+ .SetDataTypeDocument(
+ kEmailType, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("DifferentEmailProperty")
+ .SetDataTypeDocument(
+ kEmailType,
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SchemaUtil::SchemaDelta schema_delta;
+ schema_delta.schema_types_incompatible.insert(kPersonType);
+ schema_delta.schema_types_join_incompatible.insert(kPersonType);
+
+ SchemaUtil::DependentMap dependents_map = {{kEmailType, {{kPersonType, {}}}}};
+ SchemaUtil::SchemaDelta result_schema_delta =
+ SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+ dependents_map);
+ EXPECT_THAT(result_schema_delta, Eq(schema_delta));
+}
+
+TEST_P(SchemaUtilTest, ChangingJoinablePropertiesMakesJoinIncompatible) {
+ // Configure old schema
+ SchemaProto schema_with_joinable_property =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Configure new schema
+ SchemaProto schema_with_non_joinable_property =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SchemaUtil::SchemaDelta expected_schema_delta;
+ expected_schema_delta.schema_types_join_incompatible.insert(kPersonType);
+
+ // New schema gained a new joinable property.
+ SchemaUtil::DependentMap no_dependents_map;
+ EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
+ schema_with_non_joinable_property,
+ schema_with_joinable_property, no_dependents_map),
+ Eq(expected_schema_delta));
+
+ // New schema lost a joinable property.
+ EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
+ schema_with_joinable_property,
+ schema_with_non_joinable_property, no_dependents_map),
+ Eq(expected_schema_delta));
+}
+
+TEST_P(SchemaUtilTest, AddingNewJoinablePropertyMakesJoinIncompatible) {
+ // Configure old schema
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Configure new schema
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("NewJoinableProperty")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SchemaUtil::SchemaDelta expected_schema_delta;
+ expected_schema_delta.schema_types_join_incompatible.insert(kPersonType);
+ SchemaUtil::DependentMap no_dependents_map;
+ EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+ no_dependents_map),
+ Eq(expected_schema_delta));
+}
+
+TEST_P(SchemaUtilTest, AddingNewNonJoinablePropertyShouldRemainJoinCompatible) {
+ // Configure old schema
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("JoinableProperty")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Configure new schema
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("JoinableProperty")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("NewProperty")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SchemaUtil::DependentMap no_dependents_map;
+ EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+ no_dependents_map)
+ .schema_types_join_incompatible,
+ IsEmpty());
+}
+
+TEST_P(SchemaUtilTest, AddingTypeIsCompatible) {
// Can add a new type, existing data isn't incompatible, since none of them
// are of this new schema type
SchemaProto old_schema =
@@ -1010,13 +3375,13 @@ TEST(SchemaUtilTest, AddingTypeIsCompatible) {
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_new.insert(kEmailType);
- SchemaUtil::DependencyMap no_dependencies_map;
+ SchemaUtil::DependentMap no_dependents_map;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
- no_dependencies_map),
+ no_dependents_map),
Eq(schema_delta));
}
-TEST(SchemaUtilTest, DeletingTypeIsNoted) {
+TEST_P(SchemaUtilTest, DeletingTypeIsNoted) {
// Can't remove an old type, new schema needs to at least have all the
// previously defined schema otherwise the Documents of the missing schema
// are invalid
@@ -1051,13 +3416,13 @@ TEST(SchemaUtilTest, DeletingTypeIsNoted) {
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_deleted.emplace(kPersonType);
- SchemaUtil::DependencyMap no_dependencies_map;
+ SchemaUtil::DependentMap no_dependents_map;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
- no_dependencies_map),
+ no_dependents_map),
Eq(schema_delta));
}
-TEST(SchemaUtilTest, DeletingPropertyAndChangingProperty) {
+TEST_P(SchemaUtilTest, DeletingPropertyAndChangingProperty) {
SchemaProto old_schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -1089,13 +3454,13 @@ TEST(SchemaUtilTest, DeletingPropertyAndChangingProperty) {
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_incompatible.emplace(kEmailType);
schema_delta.schema_types_index_incompatible.emplace(kEmailType);
- SchemaUtil::DependencyMap no_dependencies_map;
+ SchemaUtil::DependentMap no_dependents_map;
SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta(
- old_schema, new_schema, no_dependencies_map);
+ old_schema, new_schema, no_dependents_map);
EXPECT_THAT(actual, Eq(schema_delta));
}
-TEST(SchemaUtilTest, IndexNestedDocumentsIndexIncompatible) {
+TEST_P(SchemaUtilTest, IndexNestedDocumentsIndexIncompatible) {
// Make two schemas. One that sets index_nested_properties to false and one
// that sets it to true.
SchemaTypeConfigProto email_type_config =
@@ -1137,20 +3502,253 @@ TEST(SchemaUtilTest, IndexNestedDocumentsIndexIncompatible) {
// unaffected.
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_index_incompatible.emplace(kPersonType);
- SchemaUtil::DependencyMap dependencies_map = {{kEmailType, {kPersonType}}};
+ SchemaUtil::DependentMap dependents_map = {{kEmailType, {{kPersonType, {}}}}};
SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta(
- no_nested_index_schema, nested_index_schema, dependencies_map);
+ no_nested_index_schema, nested_index_schema, dependents_map);
EXPECT_THAT(actual, Eq(schema_delta));
// Going from index_nested_properties=true to index_nested_properties=false
// should also make kPersonType index_incompatible. kEmailType should be
// unaffected.
actual = SchemaUtil::ComputeCompatibilityDelta(
- nested_index_schema, no_nested_index_schema, dependencies_map);
+ nested_index_schema, no_nested_index_schema, dependents_map);
+ EXPECT_THAT(actual, Eq(schema_delta));
+}
+
+TEST_P(SchemaUtilTest, AddOrDropIndexableNestedProperties_IndexIncompatible) {
+ SchemaTypeConfigProto email_type_config =
+ SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("recipient")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema_1 =
+ SchemaBuilder()
+ .AddType(email_type_config)
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emails")
+ .SetDataTypeDocument(
+ kEmailType,
+ /*indexable_nested_properties_list=*/
+ {"recipient", "subject", "body"})
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+
+ SchemaProto schema_2 =
+ SchemaBuilder()
+ .AddType(email_type_config)
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emails")
+ .SetDataTypeDocument(
+ kEmailType,
+ /*indexable_nested_properties=*/
+ {"recipient", "subject"})
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+
+ // Dropping some indexable_nested_properties should make kPersonType
+ // index_incompatible. kEmailType should be unaffected.
+ SchemaUtil::SchemaDelta schema_delta;
+ schema_delta.schema_types_index_incompatible.emplace(kPersonType);
+ SchemaUtil::DependentMap dependents_map = {{kEmailType, {{kPersonType, {}}}}};
+ SchemaUtil::SchemaDelta actual =
+ SchemaUtil::ComputeCompatibilityDelta(schema_1, schema_2, dependents_map);
+ EXPECT_THAT(actual, Eq(schema_delta));
+
+ // Adding some indexable_nested_properties should also make kPersonType
+ // index_incompatible. kEmailType should be unaffected.
+ actual =
+ SchemaUtil::ComputeCompatibilityDelta(schema_2, schema_1, dependents_map);
+ EXPECT_THAT(actual, Eq(schema_delta));
+}
+
+TEST_P(SchemaUtilTest, ChangingIndexableNestedProperties_IndexIncompatible) {
+ SchemaTypeConfigProto email_type_config =
+ SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("recipient")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema_1 =
+ SchemaBuilder()
+ .AddType(email_type_config)
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emails")
+ .SetDataTypeDocument(
+ kEmailType,
+ /*indexable_nested_properties_list=*/
+ {"recipient", "subject"})
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+
+ SchemaProto schema_2 =
+ SchemaBuilder()
+ .AddType(email_type_config)
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emails")
+ .SetDataTypeDocument(
+ kEmailType,
+ /*indexable_nested_properties_list=*/
+ {"recipient", "body"})
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+
+ // Changing 'subject' to 'body' for indexable_nested_properties_list should
+ // make kPersonType index_incompatible. kEmailType should be unaffected.
+ SchemaUtil::SchemaDelta schema_delta;
+ schema_delta.schema_types_index_incompatible.emplace(kPersonType);
+ SchemaUtil::DependentMap dependents_map = {{kEmailType, {{kPersonType, {}}}}};
+ SchemaUtil::SchemaDelta actual =
+ SchemaUtil::ComputeCompatibilityDelta(schema_1, schema_2, dependents_map);
+ EXPECT_THAT(actual, Eq(schema_delta));
+}
+
+TEST_P(SchemaUtilTest, IndexableNestedPropertiesFullSet_IndexIncompatible) {
+ SchemaTypeConfigProto email_type_config =
+ SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("recipient")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema_1 =
+ SchemaBuilder()
+ .AddType(email_type_config)
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emails")
+ .SetDataTypeDocument(
+ kEmailType,
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+
+ SchemaProto schema_2 =
+ SchemaBuilder()
+ .AddType(email_type_config)
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emails")
+ .SetDataTypeDocument(
+ kEmailType,
+ /*indexable_nested_properties_list=*/
+ {"recipient", "body", "subject"})
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+
+ // This scenario also invalidates kPersonType and triggers an index rebuild at
+ // the moment, even though the set of indexable_nested_properties from
+ // schema_1 to schema_2 should be the same.
+ SchemaUtil::SchemaDelta schema_delta;
+ schema_delta.schema_types_index_incompatible.emplace(kPersonType);
+ SchemaUtil::DependentMap dependents_map = {{kEmailType, {{kPersonType, {}}}}};
+ SchemaUtil::SchemaDelta actual =
+ SchemaUtil::ComputeCompatibilityDelta(schema_1, schema_2, dependents_map);
+ EXPECT_THAT(actual, Eq(schema_delta));
+}
+
+TEST_P(SchemaUtilTest,
+ ChangingIndexableNestedPropertiesOrder_IndexIsCompatible) {
+ SchemaTypeConfigProto email_type_config =
+ SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("recipient")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema_1 =
+ SchemaBuilder()
+ .AddType(email_type_config)
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emails")
+ .SetDataTypeDocument(
+ kEmailType,
+ /*indexable_nested_properties_list=*/
+ {"recipient", "subject", "body"})
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+
+ SchemaProto schema_2 =
+ SchemaBuilder()
+ .AddType(email_type_config)
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emails")
+ .SetDataTypeDocument(
+ kEmailType,
+ /*indexable_nested_properties_list=*/
+ {"subject", "body", "recipient"})
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+
+ // Changing order of elements in indexable_nested_properties_list should have
+ // no effect on schema compatibility.
+ SchemaUtil::SchemaDelta schema_delta;
+ SchemaUtil::DependentMap dependents_map = {{kEmailType, {{kPersonType, {}}}}};
+ SchemaUtil::SchemaDelta actual =
+ SchemaUtil::ComputeCompatibilityDelta(schema_1, schema_2, dependents_map);
EXPECT_THAT(actual, Eq(schema_delta));
+ EXPECT_THAT(actual.schema_types_index_incompatible, IsEmpty());
}
-TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTermMatchType) {
+TEST_P(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTermMatchType) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
@@ -1161,7 +3759,7 @@ TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTermMatchType) {
.Build();
// Error if we don't set a term match type
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
// Passes once we set a term match type
@@ -1172,10 +3770,10 @@ TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTermMatchType) {
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
}
-TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTokenizer) {
+TEST_P(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTokenizer) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
@@ -1186,7 +3784,7 @@ TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTokenizer) {
.Build();
// Error if we don't set a tokenizer type
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
// Passes once we set a tokenizer type
@@ -1197,33 +3795,706 @@ TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTokenizer) {
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+}
+
+TEST_P(SchemaUtilTest,
+ ValidateJoinablePropertyTypeQualifiedIdShouldHaveStringDataType) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_INT64)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ // Error if data type is not STRING for qualified id joinable value type.
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Passes once we set STRING as the data type.
+ schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+}
+
+TEST_P(SchemaUtilTest,
+ ValidateJoinablePropertyShouldNotHaveRepeatedCardinality) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+
+ // Error if using REPEATED cardinality for joinable property.
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Passes once we use OPTIONAL cardinality with joinable property.
+ schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+
+ // Passes once we use REQUIRED cardinality with joinable property.
+ schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+
+ // Passes once we use REPEATED cardinality with non-joinable property.
+ schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_NONE,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+}
+
+TEST_P(SchemaUtilTest,
+ ValidateJoinablePropertyWithDeletePropagationShouldHaveTypeQualifiedId) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_NONE,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ // Error if enabling delete propagation with non qualified id joinable value
+ // type.
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Passes once we set qualified id joinable value type with delete propagation
+ // enabled.
+ schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+
+ // Passes once we disable delete propagation.
+ schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_NONE,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+}
+
+TEST_P(SchemaUtilTest,
+ ValidateNestedJoinablePropertyShouldNotHaveNestedRepeatedCardinality) {
+ // Dependency and nested document property cardinality:
+ // "C" --(REPEATED)--> "B" --(OPTIONAL)--> "A"
+ // where "A" contains joinable property. This should not be allowed.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("C").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeDocument("B",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Passes once we use non-REPEATED cardinality for "C.b", i.e. the dependency
+ // and nested document property cardinality becomes:
+ // "C" --(OPTIONAL)--> "B" --(OPTIONAL)--> "A"
+ schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("C").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeDocument("B",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+}
+
+TEST_P(
+ SchemaUtilTest,
+ ValidateNestedJoinablePropertyShouldAllowRepeatedCardinalityIfNoJoinableProperty) {
+ // Dependency and nested document property cardinality:
+ // "C" --(OPTIONAL)--> "B" --(REPEATED)--> "A"
+ // where only "B" contains joinable property. This should be allowed.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_NONE,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument(
+ "A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Bar")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("C").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeDocument("B",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Passes since nested schema type with REPEATED cardinality doesn't have
+ // joinable property.
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+}
+
+TEST_P(SchemaUtilTest,
+ ValidateNestedJoinablePropertyMultiplePropertiesWithSameSchema) {
+ // Dependency and nested document property cardinality:
+ // --(a1: OPTIONAL)--
+ // / \
+ // B -- --> A
+ // \ /
+ // --(a2: REPEATED)--
+ // where "A" contains joinable property. This should not be allowed.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("a1")
+ .SetDataTypeDocument(
+ "A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("a2")
+ .SetDataTypeDocument(
+ "A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Passes once we use non-REPEATED cardinality for "B.a2", i.e. the dependency
+ // and nested document property cardinality becomes:
+ // --(a1: OPTIONAL)--
+ // / \
+ // B -- --> A
+ // \ /
+ // --(a2: OPTIONAL)--
+ schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("a1")
+ .SetDataTypeDocument(
+ "A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("a2")
+ .SetDataTypeDocument(
+ "A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+}
+
+TEST_P(SchemaUtilTest, ValidateNestedJoinablePropertyDiamondRelationship) {
+ // Dependency and nested document property cardinality:
+ // B
+ // / \
+ // (OPTIONAL) (OPTIONAL)
+ // / \
+ // D --- --> A
+ // \ /
+ // (OPTIONAL) (OPTIONAL)
+ // \ /
+ // C
+ // where "A" contains joinable property. This should be allowed.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("C").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeDocument(
+ "B",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("c")
+ .SetDataTypeDocument(
+ "C",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+
+ // Fails once we change any of edge to REPEATED cardinality.
+ // B
+ // / \
+ // (REPEATED) (OPTIONAL)
+ // / \
+ // D --- --> A
+ // \ /
+ // (OPTIONAL) (OPTIONAL)
+ // \ /
+ // C
+ schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("C").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeDocument(
+ "B",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("c")
+ .SetDataTypeDocument(
+ "C",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // B
+ // / \
+ // (OPTIONAL) (REPEATED)
+ // / \
+ // D --- --> A
+ // \ /
+ // (OPTIONAL) (OPTIONAL)
+ // \ /
+ // C
+ schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .AddType(SchemaTypeConfigBuilder().SetType("C").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeDocument(
+ "B",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("c")
+ .SetDataTypeDocument(
+ "C",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // B
+ // / \
+ // (OPTIONAL) (OPTIONAL)
+ // / \
+ // D --- --> A
+ // \ /
+ // (REPEATED) (OPTIONAL)
+ // \ /
+ // C
+ schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("C").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeDocument(
+ "B",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("c")
+ .SetDataTypeDocument(
+ "C",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // B
+ // / \
+ // (OPTIONAL) (OPTIONAL)
+ // / \
+ // D --- --> A
+ // \ /
+ // (OPTIONAL) (REPEATED)
+ // \ /
+ // C
+ schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("C").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeDocument(
+ "B",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("c")
+ .SetDataTypeDocument(
+ "C",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(SchemaUtilTest,
+ ValidDocumentIndexingConfigFields_emptyIndexableListBooleanTrue) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("InnerSchema")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop2")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN,
+ TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("OuterSchema")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("InnerProperty")
+ .SetDataTypeDocument(
+ "InnerSchema",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+
+ SchemaTypeConfigProto* outerSchemaType = schema.mutable_types(1);
+ outerSchemaType->mutable_properties(0)
+ ->mutable_document_indexing_config()
+ ->clear_indexable_nested_properties_list();
+
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+}
+
+TEST_P(SchemaUtilTest,
+ ValidDocumentIndexingConfigFields_emptyIndexableListBooleanFalse) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("InnerSchema")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop2")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN,
+ TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("OuterSchema")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("InnerProperty")
+ .SetDataTypeDocument(
+ "InnerSchema",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+
+ SchemaTypeConfigProto* outerSchemaType = schema.mutable_types(1);
+ outerSchemaType->mutable_properties(0)
+ ->mutable_document_indexing_config()
+ ->clear_indexable_nested_properties_list();
+
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+}
+
+TEST_P(SchemaUtilTest,
+ ValidDocumentIndexingConfigFields_nonEmptyIndexableList) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("InnerSchema")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("OuterSchema")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("InnerProperty")
+ .SetDataTypeDocument(
+ "InnerSchema",
+ /*indexable_nested_properties_list=*/
+ std::initializer_list<std::string>{"prop1"})
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+
+ SchemaTypeConfigProto* outerSchemaType = schema.mutable_types(1);
+ outerSchemaType->mutable_properties(0)
+ ->mutable_document_indexing_config()
+ ->set_index_nested_properties(false);
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+}
+
+TEST_P(SchemaUtilTest, InvalidDocumentIndexingConfigFields) {
+ // If indexable_nested_properties is non-empty, index_nested_properties is
+ // required to be false.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("InnerSchema")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("OuterSchema")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("InnerProperty")
+ .SetDataTypeDocument(
+ "InnerSchema",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+
+ // Setting a non-empty indexable_nested_properties_list while
+ // index_nested_properties=true is invalid.
+ SchemaTypeConfigProto* outerSchemaType = schema.mutable_types(1);
+ outerSchemaType->mutable_properties(0)
+ ->mutable_document_indexing_config()
+ ->add_indexable_nested_properties_list("prop");
+
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, MultipleReferencesToSameNestedSchemaOk) {
+TEST_P(SchemaUtilTest, MultipleReferencesToSameNestedSchemaOk) {
SchemaProto schema =
SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("ChildSchema"))
+ .AddType(SchemaTypeConfigBuilder().SetType("InnerSchema"))
.AddType(SchemaTypeConfigBuilder()
- .SetType("ParentSchema")
+ .SetType("OuterSchema")
.AddProperty(PropertyConfigBuilder()
- .SetName("ChildProperty1")
+ .SetName("InnerProperty1")
.SetDataTypeDocument(
- "ChildSchema",
+ "InnerSchema",
/*index_nested_properties=*/true)
.SetCardinality(CARDINALITY_REPEATED))
.AddProperty(PropertyConfigBuilder()
- .SetName("ChildProperty2")
+ .SetName("InnerProperty2")
.SetDataTypeDocument(
- "ChildSchema",
+ "InnerSchema",
/*index_nested_properties=*/true)
.SetCardinality(CARDINALITY_REPEATED)))
.Build();
- EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
}
-TEST(SchemaUtilTest, InvalidSelfReference) {
+TEST_P(SchemaUtilTest, InvalidSelfReference) {
// Create a schema with a self-reference cycle in it: OwnSchema -> OwnSchema
SchemaProto schema =
SchemaBuilder()
@@ -1237,12 +4508,12 @@ TEST(SchemaUtilTest, InvalidSelfReference) {
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
- HasSubstr("Infinite loop")));
+ HasSubstr("Invalid cycle")));
}
-TEST(SchemaUtilTest, InvalidSelfReferenceEvenWithOtherProperties) {
+TEST_P(SchemaUtilTest, InvalidSelfReferenceEvenWithOtherProperties) {
// Create a schema with a self-reference cycle in it: OwnSchema -> OwnSchema
SchemaProto schema =
SchemaBuilder()
@@ -1261,13 +4532,13 @@ TEST(SchemaUtilTest, InvalidSelfReferenceEvenWithOtherProperties) {
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
- HasSubstr("Infinite loop")));
+ HasSubstr("Invalid cycle")));
}
-TEST(SchemaUtilTest, InvalidInfiniteLoopTwoDegrees) {
- // Create a schema for the parent schema
+TEST_P(SchemaUtilTest, InvalidInfiniteLoopTwoDegrees) {
+ // Create a schema for the outer schema
SchemaProto schema =
SchemaBuilder()
.AddType(
@@ -1279,11 +4550,11 @@ TEST(SchemaUtilTest, InvalidInfiniteLoopTwoDegrees) {
.SetDataTypeDocument(
"B", /*index_nested_properties=*/true)
.SetCardinality(CARDINALITY_OPTIONAL)))
- // Create the child schema
+ // Create the inner schema
.AddType(
SchemaTypeConfigBuilder()
.SetType("B")
- // Reference the schema A, causing an infinite loop of
+ // Reference the schema A, causing an invalid cycle of
// references.
.AddProperty(PropertyConfigBuilder()
.SetName("NestedDocument")
@@ -1293,15 +4564,15 @@ TEST(SchemaUtilTest, InvalidInfiniteLoopTwoDegrees) {
.Build();
// Two degrees of referencing: A -> B -> A
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
- HasSubstr("Infinite loop")));
+ HasSubstr("Invalid cycle")));
}
-TEST(SchemaUtilTest, InvalidInfiniteLoopThreeDegrees) {
+TEST_P(SchemaUtilTest, InvalidInfiniteLoopThreeDegrees) {
SchemaProto schema =
SchemaBuilder()
- // Create a schema for the parent schema
+ // Create a schema for the outer schema
.AddType(
SchemaTypeConfigBuilder()
.SetType("A")
@@ -1311,7 +4582,7 @@ TEST(SchemaUtilTest, InvalidInfiniteLoopThreeDegrees) {
.SetDataTypeDocument(
"B", /*index_nested_properties=*/true)
.SetCardinality(CARDINALITY_OPTIONAL)))
- // Create the child schema
+ // Create the inner schema
.AddType(
SchemaTypeConfigBuilder()
.SetType("B")
@@ -1321,7 +4592,7 @@ TEST(SchemaUtilTest, InvalidInfiniteLoopThreeDegrees) {
.SetDataTypeDocument(
"C", /*index_nested_properties=*/true)
.SetCardinality(CARDINALITY_REPEATED)))
- // Create the child schema
+ // Create the inner schema
.AddType(
SchemaTypeConfigBuilder()
.SetType("C")
@@ -1334,11 +4605,588 @@ TEST(SchemaUtilTest, InvalidInfiniteLoopThreeDegrees) {
.Build();
// Three degrees of referencing: A -> B -> C -> A
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
- HasSubstr("Infinite loop")));
+ HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, ChildMissingOptionalAndRepeatedPropertiesNotOk) {
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+ EXPECT_THAT(
+ SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Property text is not present in child type")));
+}
+
+TEST_P(SchemaUtilTest, ChildMissingRequiredPropertyNotOk) {
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+ EXPECT_THAT(
+ SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Property text is not present in child type")));
}
+TEST_P(SchemaUtilTest, ChildCompatiblePropertyOk) {
+ SchemaTypeConfigProto message_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("person")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument(
+ "Person", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto artist_message_type =
+ SchemaTypeConfigBuilder()
+ .SetType("ArtistMessage")
+ .AddParentType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ // OPTIONAL is compatible with REPEATED.
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ // An extra text is compatible.
+ PropertyConfigBuilder()
+ .SetName("extraText")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ // An extra double is compatible
+ PropertyConfigBuilder()
+ .SetName("extraDouble")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_DOUBLE))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("person")
+ // REQUIRED is compatible with OPTIONAL.
+ .SetCardinality(CARDINALITY_REQUIRED)
+ // Artist is compatible with Person.
+ .SetDataTypeDocument(
+ "Artist", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaTypeConfigProto person_type =
+ SchemaTypeConfigBuilder().SetType("Person").Build();
+ SchemaTypeConfigProto artist_type = SchemaTypeConfigBuilder()
+ .SetType("Artist")
+ .AddParentType("Person")
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(message_type)
+ .AddType(artist_message_type)
+ .AddType(person_type)
+ .AddType(artist_type)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
+ EXPECT_THAT(d_map, SizeIs(3));
+ EXPECT_THAT(d_map["Message"],
+ UnorderedElementsAre(Pair("ArtistMessage", IsEmpty())));
+ EXPECT_THAT(d_map["Person"],
+ UnorderedElementsAre(
+ Pair("Message", UnorderedElementsAre(Pointee(EqualsProto(
+ message_type.properties(1))))),
+ Pair("Artist", IsEmpty())));
+ EXPECT_THAT(d_map["Artist"],
+ UnorderedElementsAre(Pair(
+ "ArtistMessage", UnorderedElementsAre(Pointee(EqualsProto(
+ artist_message_type.properties(3)))))));
+}
+
+TEST_P(SchemaUtilTest, ChildIncompatibleCardinalityPropertyNotOk) {
+ SchemaTypeConfigProto message_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("person")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument(
+ "Person", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto artist_message_type =
+ SchemaTypeConfigBuilder()
+ .SetType("ArtistMessage")
+ .AddParentType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("extraText")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("person")
+ // Overwrite OPTIONAL to REPEATED is not ok.
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeDocument(
+ "Artist", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaTypeConfigProto person_type =
+ SchemaTypeConfigBuilder().SetType("Person").Build();
+ SchemaTypeConfigProto artist_type = SchemaTypeConfigBuilder()
+ .SetType("Artist")
+ .AddParentType("Person")
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(message_type)
+ .AddType(artist_message_type)
+ .AddType(person_type)
+ .AddType(artist_type)
+ .Build();
+ EXPECT_THAT(
+ SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Property person from child type ArtistMessage is not "
+ "compatible to the parent type Message.")));
+}
+
+TEST_P(SchemaUtilTest, ChildIncompatibleDataTypePropertyNotOk) {
+ SchemaTypeConfigProto message_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("person")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument(
+ "Person", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto artist_message_type =
+ SchemaTypeConfigBuilder()
+ .SetType("ArtistMessage")
+ .AddParentType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("text")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ // Double is not compatible to string.
+ .SetDataType(TYPE_DOUBLE))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("extraText")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("person")
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .SetDataTypeDocument(
+ "Artist", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaTypeConfigProto person_type =
+ SchemaTypeConfigBuilder().SetType("Person").Build();
+ SchemaTypeConfigProto artist_type = SchemaTypeConfigBuilder()
+ .SetType("Artist")
+ .AddParentType("Person")
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(message_type)
+ .AddType(artist_message_type)
+ .AddType(person_type)
+ .AddType(artist_type)
+ .Build();
+ EXPECT_THAT(
+ SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Property text from child type ArtistMessage is not "
+ "compatible to the parent type Message.")));
+}
+
+TEST_P(SchemaUtilTest, ChildIncompatibleDocumentTypePropertyNotOk) {
+ SchemaTypeConfigProto message_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("person")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument(
+ "Person", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto artist_message_type =
+ SchemaTypeConfigBuilder()
+ .SetType("ArtistMessage")
+ .AddParentType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("extraText")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("person")
+ .SetCardinality(CARDINALITY_REQUIRED)
+ // Artist is not a subtype of Person, thus incompatible
+ .SetDataTypeDocument("Artist",
+ /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaTypeConfigProto person_type =
+ SchemaTypeConfigBuilder().SetType("Person").Build();
+ // In this test, Artist is not a subtype of Person.
+ SchemaTypeConfigProto artist_type =
+ SchemaTypeConfigBuilder().SetType("Artist").Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(message_type)
+ .AddType(artist_message_type)
+ .AddType(person_type)
+ .AddType(artist_type)
+ .Build();
+ EXPECT_THAT(
+ SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Property person from child type ArtistMessage is not "
+ "compatible to the parent type Message.")));
+}
+
+TEST_P(SchemaUtilTest, ChildCompatibleMultipleParentPropertyOk) {
+ SchemaTypeConfigProto email_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("recipient")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaTypeConfigProto message_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("content")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaTypeConfigProto email_message_type =
+ SchemaTypeConfigBuilder()
+ .SetType("EmailMessage")
+ .AddParentType("Email")
+ .AddParentType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("recipient")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("content")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(email_type)
+ .AddType(message_type)
+ .AddType(email_message_type)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
+ EXPECT_THAT(d_map, SizeIs(2));
+ EXPECT_THAT(d_map["Email"],
+ UnorderedElementsAre(Pair("EmailMessage", IsEmpty())));
+ EXPECT_THAT(d_map["Message"],
+ UnorderedElementsAre(Pair("EmailMessage", IsEmpty())));
+}
+
+TEST_P(SchemaUtilTest, ChildIncompatibleMultipleParentPropertyNotOk) {
+ SchemaTypeConfigProto email_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("recipient")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaTypeConfigProto message_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("content")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+
+ // Missing the "sender" field from parent "Email", thus incompatible.
+ SchemaTypeConfigProto email_message_type1 =
+ SchemaTypeConfigBuilder()
+ .SetType("EmailMessage")
+ .AddParentType("Email")
+ .AddParentType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("recipient")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("content")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema1 = SchemaBuilder()
+ .AddType(email_type)
+ .AddType(message_type)
+ .AddType(email_message_type1)
+ .Build();
+ EXPECT_THAT(
+ SchemaUtil::Validate(schema1, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr(
+ "Property sender is not present in child type EmailMessage, "
+ "but it is defined in the parent type Email.")));
+
+ // Missing the "content" field from parent "Message", thus incompatible.
+ SchemaTypeConfigProto email_message_type2 =
+ SchemaTypeConfigBuilder()
+ .SetType("EmailMessage")
+ .AddParentType("Email")
+ .AddParentType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("recipient")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema2 = SchemaBuilder()
+ .AddType(email_type)
+ .AddType(message_type)
+ .AddType(email_message_type2)
+ .Build();
+ EXPECT_THAT(
+ SchemaUtil::Validate(schema2, GetParam()),
+ StatusIs(
+ libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr(
+ "Property content is not present in child type EmailMessage, "
+ "but it is defined in the parent type Message.")));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ SchemaUtilTest, SchemaUtilTest,
+ testing::Values(/*allow_circular_schema_definitions=*/true, false));
+
+struct IsIndexedPropertyTestParam {
+ PropertyConfigProto property_config;
+ bool expected_result;
+
+ explicit IsIndexedPropertyTestParam(PropertyConfigProto property_config_in,
+ bool expected_result_in)
+ : property_config(std::move(property_config_in)),
+ expected_result(expected_result_in) {}
+};
+
+class SchemaUtilIsIndexedPropertyTest
+ : public ::testing::TestWithParam<IsIndexedPropertyTestParam> {};
+
+TEST_P(SchemaUtilIsIndexedPropertyTest, IsIndexedProperty) {
+ const IsIndexedPropertyTestParam& test_param = GetParam();
+ EXPECT_THAT(SchemaUtil::IsIndexedProperty(test_param.property_config),
+ Eq(test_param.expected_result));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ SchemaUtilIsIndexedPropertyTest, SchemaUtilIsIndexedPropertyTest,
+ testing::Values(
+ IsIndexedPropertyTestParam(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN,
+ TOKENIZER_NONE)
+ .Build(),
+ false),
+ IsIndexedPropertyTestParam(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN,
+ TOKENIZER_PLAIN)
+ .Build(),
+ false),
+ IsIndexedPropertyTestParam(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN,
+ TOKENIZER_VERBATIM)
+ .Build(),
+ false),
+ IsIndexedPropertyTestParam(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN,
+ TOKENIZER_RFC822)
+ .Build(),
+ false),
+ IsIndexedPropertyTestParam(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN,
+ TOKENIZER_URL)
+ .Build(),
+ false),
+ IsIndexedPropertyTestParam(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_NONE)
+ .Build(),
+ false),
+ IsIndexedPropertyTestParam(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .Build(),
+ true),
+ IsIndexedPropertyTestParam(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_VERBATIM)
+ .Build(),
+ true),
+ IsIndexedPropertyTestParam(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_RFC822)
+ .Build(),
+ true),
+ IsIndexedPropertyTestParam(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_URL)
+ .Build(),
+ true),
+ IsIndexedPropertyTestParam(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_NONE)
+ .Build(),
+ false),
+ IsIndexedPropertyTestParam(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .Build(),
+ true),
+ IsIndexedPropertyTestParam(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_VERBATIM)
+ .Build(),
+ true),
+ IsIndexedPropertyTestParam(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_RFC822)
+ .Build(),
+ true),
+ IsIndexedPropertyTestParam(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_URL)
+ .Build(),
+ true),
+ IsIndexedPropertyTestParam(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN)
+ .Build(),
+ false),
+ IsIndexedPropertyTestParam(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .Build(),
+ true),
+ IsIndexedPropertyTestParam(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataType(TYPE_DOUBLE)
+ .Build(),
+ false),
+ IsIndexedPropertyTestParam(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataType(TYPE_BOOLEAN)
+ .Build(),
+ false),
+ IsIndexedPropertyTestParam(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataType(TYPE_BYTES)
+ .Build(),
+ false),
+ IsIndexedPropertyTestParam(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataType(TYPE_DOCUMENT)
+ .Build(),
+ false)));
+
} // namespace
} // namespace lib
diff --git a/icing/schema/section-manager-builder_test.cc b/icing/schema/section-manager-builder_test.cc
new file mode 100644
index 0000000..1d452d5
--- /dev/null
+++ b/icing/schema/section-manager-builder_test.cc
@@ -0,0 +1,341 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/section-manager.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
+#include "icing/store/key-mapper.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::Pointee;
+
+class SectionManagerBuilderTest : public ::testing::Test {
+ protected:
+ void SetUp() override { test_dir_ = GetTestTempDir() + "/icing"; }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ }
+
+ Filesystem filesystem_;
+ std::string test_dir_;
+};
+
+TEST_F(SectionManagerBuilderTest, Build) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put("typeOne", 0));
+ ICING_ASSERT_OK(schema_type_mapper->Put("typeTwo", 1));
+
+ PropertyConfigProto prop_foo =
+ PropertyConfigBuilder()
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+ PropertyConfigProto prop_bar =
+ PropertyConfigBuilder()
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+ PropertyConfigProto prop_baz =
+ PropertyConfigBuilder()
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+
+ SectionManager::Builder builder(*schema_type_mapper);
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, prop_foo, /*property_path=*/"foo"));
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, prop_bar, /*property_path=*/"bar"));
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/1, prop_baz, /*property_path=*/"baz"));
+
+ std::unique_ptr<SectionManager> section_manager = std::move(builder).Build();
+ // Check "typeOne"
+ EXPECT_THAT(
+ section_manager->GetMetadataList("typeOne"),
+ IsOkAndHolds(Pointee(ElementsAre(
+ EqualsSectionMetadata(/*expected_id=*/0,
+ /*expected_property_path=*/"foo", prop_foo),
+ EqualsSectionMetadata(/*expected_id=*/1,
+ /*expected_property_path=*/"bar", prop_bar)))));
+ // Check "typeTwo"
+ EXPECT_THAT(section_manager->GetMetadataList("typeTwo"),
+ IsOkAndHolds(Pointee(ElementsAre(EqualsSectionMetadata(
+ /*expected_id=*/0,
+ /*expected_property_path=*/"baz", prop_baz)))));
+}
+
+TEST_F(SectionManagerBuilderTest, TooManyPropertiesShouldFail) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put("type", 0));
+
+ SectionManager::Builder builder(*schema_type_mapper);
+ // Add kTotalNumSections indexable properties
+ for (int i = 0; i < kTotalNumSections; i++) {
+ PropertyConfigProto property_config =
+ PropertyConfigBuilder()
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, property_config,
+ /*property_path=*/"property" + std::to_string(i)));
+ }
+
+ // Add another indexable property. This should fail.
+ PropertyConfigProto property_config =
+ PropertyConfigBuilder()
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+ EXPECT_THAT(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, property_config,
+ /*property_path=*/"propertyExceed"),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE,
+ HasSubstr("Too many properties")));
+}
+
+TEST_F(SectionManagerBuilderTest, InvalidSchemaTypeIdShouldFail) {
+ // Create a schema type mapper with invalid schema type id.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put("type", 0));
+
+ PropertyConfigProto property_config =
+ PropertyConfigBuilder()
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+
+ SectionManager::Builder builder(*schema_type_mapper);
+ EXPECT_THAT(
+ builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/-1, property_config, /*property_path=*/"property"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(SectionManagerBuilderTest,
+ SchemaTypeIdInconsistentWithSchemaTypeMapperSizeShouldFail) {
+ // Create a schema type mapper with schema type id = 2, but size of mapper is
+ // 2.
+ // Since SectionManagerBuilder expects 2 schema type ids = [0, 1], building
+ // with schema type id = 2 should fail even though id = 2 is in schema type
+ // mapper.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put("typeOne", 0));
+ ICING_ASSERT_OK(schema_type_mapper->Put("typeTwo", 2));
+
+ PropertyConfigProto property_config =
+ PropertyConfigBuilder()
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+
+ SectionManager::Builder builder(*schema_type_mapper);
+ EXPECT_THAT(
+ builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/2, property_config, /*property_path=*/"property"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+class IndexableSectionManagerBuilderTest
+ : public SectionManagerBuilderTest,
+ public ::testing::WithParamInterface<PropertyConfigProto> {};
+
+TEST_P(IndexableSectionManagerBuilderTest, Build) {
+ static constexpr std::string_view kSchemaType = "type";
+ static constexpr std::string_view kPropertyPath = "foo.bar";
+ const PropertyConfigProto& property_config = GetParam();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put(kSchemaType, 0));
+
+ SectionManager::Builder builder(*schema_type_mapper);
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, property_config, std::string(kPropertyPath)));
+
+ std::unique_ptr<SectionManager> section_manager = std::move(builder).Build();
+ EXPECT_THAT(section_manager->GetMetadataList(std::string(kSchemaType)),
+ IsOkAndHolds(Pointee(ElementsAre(EqualsSectionMetadata(
+ /*expected_id=*/0, kPropertyPath, property_config)))));
+}
+
+// The following types are considered indexable:
+// - String with valid TermMatchType and TokenizerType
+// - Int64 with valid NumericMatchType
+INSTANTIATE_TEST_SUITE_P(
+ IndexableSectionManagerBuilderTest, IndexableSectionManagerBuilderTest,
+ testing::Values(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_VERBATIM)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_RFC822)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_VERBATIM)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build()));
+
+class NonIndexableSectionManagerBuilderTest
+ : public SectionManagerBuilderTest,
+ public ::testing::WithParamInterface<PropertyConfigProto> {};
+
+TEST_P(NonIndexableSectionManagerBuilderTest, Build) {
+ static constexpr std::string_view kSchemaType = "type";
+ static constexpr std::string_view kPropertyPath = "foo.bar";
+ const PropertyConfigProto& property_config = GetParam();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put(kSchemaType, 0));
+
+ SectionManager::Builder builder(*schema_type_mapper);
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, property_config, std::string(kPropertyPath)));
+
+ // NonIndexable sections will still consume a sectionId.
+ std::unique_ptr<SectionManager> section_manager = std::move(builder).Build();
+ EXPECT_THAT(section_manager->GetMetadataList(std::string(kSchemaType)),
+ IsOkAndHolds(Pointee(ElementsAre(EqualsSectionMetadata(
+ /*expected_id=*/0, kPropertyPath, property_config)))));
+}
+
+// The following types are considered non-indexable:
+// - String with TERM_MATCH_UNKNOWN or TOKENIZER_NONE
+// - Int64 with NUMERIC_MATCH_UNKNOWN
+// - Double
+// - Boolean
+// - Bytes
+// - Document
+INSTANTIATE_TEST_SUITE_P(
+ NonIndexableSectionManagerBuilderTest,
+ NonIndexableSectionManagerBuilderTest,
+ testing::Values(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataType(TYPE_DOUBLE)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataType(TYPE_BOOLEAN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataType(TYPE_BYTES)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeDocument("anotherSchema",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeDocument("anotherSchema",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build()));
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/schema/section-manager.cc b/icing/schema/section-manager.cc
index 2ca534e..3d540d6 100644
--- a/icing/schema/section-manager.cc
+++ b/icing/schema/section-manager.cc
@@ -15,27 +15,20 @@
#include "icing/schema/section-manager.h"
#include <algorithm>
-#include <cinttypes>
-#include <cstddef>
#include <cstdint>
-#include <iterator>
-#include <memory>
#include <string>
#include <string_view>
-#include <unordered_map>
-#include <unordered_set>
#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
-#include "icing/absl_ports/str_cat.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/term.pb.h"
-#include "icing/schema/schema-util.h"
+#include "icing/schema/property-util.h"
#include "icing/schema/section.h"
#include "icing/store/document-filter-data.h"
#include "icing/store/key-mapper.h"
@@ -43,28 +36,14 @@
namespace icing {
namespace lib {
-namespace {
-
-using TypeSectionMap =
- std::unordered_map<std::string, const std::vector<SectionMetadata>>;
-// Helper function to concatenate a path and a property name
-std::string ConcatenatePath(const std::string& path,
- const std::string& next_property_name) {
- if (path.empty()) {
- return next_property_name;
- }
- return absl_ports::StrCat(path, kPropertySeparator, next_property_name);
-}
+namespace {
// Helper function to append a new section metadata
libtextclassifier3::Status AppendNewSectionMetadata(
std::vector<SectionMetadata>* metadata_list,
std::string&& concatenated_path,
- PropertyConfigProto::DataType::Code data_type,
- StringIndexingConfig::TokenizerType::Code string_tokenizer_type,
- TermMatchType::Code term_match_type,
- IntegerIndexingConfig::NumericMatchType::Code numeric_match_type) {
+ const PropertyConfigProto& property_config) {
// Validates next section id, makes sure that section id is the same as the
// list index so that we could find any section metadata by id in O(1) later.
SectionId new_section_id = static_cast<SectionId>(metadata_list->size());
@@ -78,270 +57,64 @@ libtextclassifier3::Status AppendNewSectionMetadata(
// Creates section metadata
metadata_list->push_back(SectionMetadata(
- new_section_id, data_type, string_tokenizer_type, term_match_type,
- numeric_match_type, std::move(concatenated_path)));
+ new_section_id, property_config.data_type(),
+ property_config.string_indexing_config().tokenizer_type(),
+ property_config.string_indexing_config().term_match_type(),
+ property_config.integer_indexing_config().numeric_match_type(),
+ std::move(concatenated_path)));
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::Status AssignSections(
- const SchemaTypeConfigProto& current_type_config,
- const std::string& current_section_path,
- const SchemaUtil::TypeConfigMap& type_config_map,
- std::vector<SectionMetadata>* metadata_list) {
- // Sorts properties by name's alphabetical order so that order doesn't affect
- // section assigning.
- auto sorted_properties = current_type_config.properties();
- std::sort(sorted_properties.pointer_begin(), sorted_properties.pointer_end(),
- [](const PropertyConfigProto* p1, const PropertyConfigProto* p2) {
- return p1->property_name() < p2->property_name();
- });
- for (const auto& property_config : sorted_properties) {
- // Creates section metadata according to data type
- switch (property_config.data_type()) {
- case PropertyConfigProto::DataType::DOCUMENT: {
- auto nested_type_config_iter =
- type_config_map.find(property_config.schema_type());
- if (nested_type_config_iter == type_config_map.end()) {
- // This should never happen because our schema should already be
- // validated by this point.
- return absl_ports::NotFoundError(absl_ports::StrCat(
- "Type config not found: ", property_config.schema_type()));
- }
-
- if (property_config.document_indexing_config()
- .index_nested_properties()) {
- // Assign any indexed sections recursively
- const SchemaTypeConfigProto& nested_type_config =
- nested_type_config_iter->second;
- ICING_RETURN_IF_ERROR(
- AssignSections(nested_type_config,
- ConcatenatePath(current_section_path,
- property_config.property_name()),
- type_config_map, metadata_list));
- }
- break;
- }
- case PropertyConfigProto::DataType::STRING: {
- if (property_config.string_indexing_config().term_match_type() !=
- TermMatchType::UNKNOWN) {
- ICING_RETURN_IF_ERROR(AppendNewSectionMetadata(
- metadata_list,
- ConcatenatePath(current_section_path,
- property_config.property_name()),
- PropertyConfigProto::DataType::STRING,
- property_config.string_indexing_config().tokenizer_type(),
- property_config.string_indexing_config().term_match_type(),
- IntegerIndexingConfig::NumericMatchType::UNKNOWN));
- }
- break;
- }
- case PropertyConfigProto::DataType::INT64: {
- if (property_config.integer_indexing_config().numeric_match_type() !=
- IntegerIndexingConfig::NumericMatchType::UNKNOWN) {
- ICING_RETURN_IF_ERROR(AppendNewSectionMetadata(
- metadata_list,
- ConcatenatePath(current_section_path,
- property_config.property_name()),
- PropertyConfigProto::DataType::INT64,
- StringIndexingConfig::TokenizerType::NONE, TermMatchType::UNKNOWN,
- property_config.integer_indexing_config().numeric_match_type()));
- }
- break;
- }
- default: {
- // Skip other data types.
- break;
- }
- }
- }
- return libtextclassifier3::Status::OK;
-}
-
-// Builds a vector of vectors that holds SectionMetadatas for all the schema
-// types. The outer vector's index corresponds with a type's SchemaTypeId. The
-// inner vector's index corresponds to the section's SectionId.
-libtextclassifier3::StatusOr<std::vector<std::vector<SectionMetadata>>>
-BuildSectionMetadataCache(const SchemaUtil::TypeConfigMap& type_config_map,
- const KeyMapper<SchemaTypeId>& schema_type_mapper) {
- // Create our vector and reserve the number of schema types we have
- std::vector<std::vector<SectionMetadata>> section_metadata_cache(
- schema_type_mapper.num_keys());
-
- for (const auto& name_and_type : type_config_map) {
- // Assigns sections for each type config
- const std::string& type_config_name = name_and_type.first;
- const SchemaTypeConfigProto& type_config = name_and_type.second;
- std::vector<SectionMetadata> metadata_list;
- ICING_RETURN_IF_ERROR(AssignSections(type_config,
- /*current_section_path*/ "",
- type_config_map, &metadata_list));
-
- // Insert the section metadata list at the index of the type's SchemaTypeId
- ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
- schema_type_mapper.Get(type_config_name));
- section_metadata_cache[schema_type_id] = std::move(metadata_list);
- }
- return section_metadata_cache;
-}
-
-// Helper function to get content from a property according to the template type
-// T. We only care about STRING and INT64, which are the only 2 indexable data
-// types.
-template <typename T>
-libtextclassifier3::StatusOr<std::vector<T>> GetPropertyContent(
- const PropertyProto& property) {
- return absl_ports::UnimplementedError(
- "Unimplemented template type for GetPropertyContent");
-}
-
-template <>
-libtextclassifier3::StatusOr<std::vector<std::string_view>>
-GetPropertyContent<std::string_view>(const PropertyProto& property) {
- return std::vector<std::string_view>(property.string_values().begin(),
- property.string_values().end());
-}
-
-template <>
-libtextclassifier3::StatusOr<std::vector<int64_t>> GetPropertyContent<int64_t>(
- const PropertyProto& property) {
- return std::vector<int64_t>(property.int64_values().begin(),
- property.int64_values().end());
-}
-
template <typename T>
void AppendSection(
SectionMetadata section_metadata,
libtextclassifier3::StatusOr<std::vector<T>>&& section_content_or,
std::vector<Section<T>>& sections_out) {
- if (section_content_or.ok()) {
+ if (!section_content_or.ok()) {
+ return;
+ }
+
+ std::vector<T> section_content = std::move(section_content_or).ValueOrDie();
+ if (!section_content.empty()) {
// Adds to result vector if section is found in document
sections_out.emplace_back(std::move(section_metadata),
- std::move(section_content_or).ValueOrDie());
+ std::move(section_content));
}
}
} // namespace
-SectionManager::SectionManager(
- const KeyMapper<SchemaTypeId>* schema_type_mapper,
- std::vector<std::vector<SectionMetadata>>&& section_metadata_cache)
- : schema_type_mapper_(*schema_type_mapper),
- section_metadata_cache_(std::move(section_metadata_cache)) {}
-
-libtextclassifier3::StatusOr<std::unique_ptr<SectionManager>>
-SectionManager::Create(const SchemaUtil::TypeConfigMap& type_config_map,
- const KeyMapper<SchemaTypeId>* schema_type_mapper) {
- ICING_RETURN_ERROR_IF_NULL(schema_type_mapper);
-
- ICING_ASSIGN_OR_RETURN(
- std::vector<std::vector<SectionMetadata>> section_metadata_cache,
- BuildSectionMetadataCache(type_config_map, *schema_type_mapper));
- return std::unique_ptr<SectionManager>(new SectionManager(
- schema_type_mapper, std::move(section_metadata_cache)));
-}
-
-template <typename T>
-libtextclassifier3::StatusOr<std::vector<T>> SectionManager::GetSectionContent(
- const DocumentProto& document, std::string_view section_path) const {
- // Finds the first property name in section_path
- size_t separator_position = section_path.find(kPropertySeparator);
- std::string_view current_property_name =
- (separator_position == std::string::npos)
- ? section_path
- : section_path.substr(0, separator_position);
-
- // Tries to match the property name with the ones in document
- auto property_iterator =
- std::find_if(document.properties().begin(), document.properties().end(),
- [current_property_name](const PropertyProto& property) {
- return property.name() == current_property_name;
- });
-
- if (property_iterator == document.properties().end()) {
- // Property name not found, it could be one of the following 2 cases:
- // 1. The property is optional and it's not in the document
- // 2. The property name is invalid
- return absl_ports::NotFoundError(absl_ports::StrCat(
- "Section path '", section_path, "' not found in document."));
+libtextclassifier3::Status
+SectionManager::Builder::ProcessSchemaTypePropertyConfig(
+ SchemaTypeId schema_type_id, const PropertyConfigProto& property_config,
+ std::string&& property_path) {
+ if (schema_type_id < 0 || schema_type_id >= section_metadata_cache_.size()) {
+ return absl_ports::InvalidArgumentError("Invalid schema type id");
}
- if (separator_position == std::string::npos) {
- // Current property name is the last one in section path
- ICING_ASSIGN_OR_RETURN(std::vector<T> content,
- GetPropertyContent<T>(*property_iterator));
- if (content.empty()) {
- // The content of property is explicitly set to empty, we'll treat it as
- // NOT_FOUND because the index doesn't care about empty contents.
- return absl_ports::NotFoundError(absl_ports::StrCat(
- "Section path '", section_path, "' content was empty"));
- }
- return content;
- }
-
- // Gets section content recursively
- std::string_view sub_section_path =
- section_path.substr(separator_position + 1);
- std::vector<T> nested_document_content;
- for (const auto& nested_document : property_iterator->document_values()) {
- auto content_or = GetSectionContent<T>(nested_document, sub_section_path);
- if (content_or.ok()) {
- std::vector<T> content = std::move(content_or).ValueOrDie();
- std::move(content.begin(), content.end(),
- std::back_inserter(nested_document_content));
- }
- }
- if (nested_document_content.empty()) {
- return absl_ports::NotFoundError(
- absl_ports::StrCat("Section path ", section_path,
- " not found in type config ", document.schema()));
- }
- return nested_document_content;
-}
-
-// Explicit template instantiation
-template libtextclassifier3::StatusOr<std::vector<std::string_view>>
-SectionManager::GetSectionContent<std::string_view>(
- const DocumentProto& document, std::string_view section_path) const;
-template libtextclassifier3::StatusOr<std::vector<int64_t>>
-SectionManager::GetSectionContent<int64_t>(const DocumentProto& document,
- std::string_view section_path) const;
-
-template <typename T>
-libtextclassifier3::StatusOr<std::vector<T>> SectionManager::GetSectionContent(
- const DocumentProto& document, SectionId section_id) const {
- if (!IsSectionIdValid(section_id)) {
- return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
- "Section id %d is greater than the max value %d", section_id,
- kMaxSectionId));
- }
- ICING_ASSIGN_OR_RETURN(const std::vector<SectionMetadata>* metadata_list,
- GetMetadataList(document.schema()));
- if (section_id >= metadata_list->size()) {
- return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
- "Section with id %d doesn't exist in type config %s", section_id,
- document.schema().c_str()));
- }
- // The index of metadata list is the same as the section id, so we can use
- // section id as the index.
- return GetSectionContent<T>(document, metadata_list->at(section_id).path);
+ // We don't need to check if the property is indexable. This method will
+ // only be called properties that should consume sectionIds, even if the
+ // property's indexing configuration itself is not indexable.
+ // This would be the case for unknown and non-indexable property paths that
+ // are defined in the indexable_nested_properties_list.
+ ICING_RETURN_IF_ERROR(
+ AppendNewSectionMetadata(&section_metadata_cache_[schema_type_id],
+ std::move(property_path), property_config));
+ return libtextclassifier3::Status::OK;
}
-// Explicit template instantiation
-template libtextclassifier3::StatusOr<std::vector<std::string_view>>
-SectionManager::GetSectionContent<std::string_view>(
- const DocumentProto& document, SectionId section_id) const;
-template libtextclassifier3::StatusOr<std::vector<int64_t>>
-SectionManager::GetSectionContent<int64_t>(const DocumentProto& document,
- SectionId section_id) const;
-
libtextclassifier3::StatusOr<const SectionMetadata*>
SectionManager::GetSectionMetadata(SchemaTypeId schema_type_id,
SectionId section_id) const {
+ if (schema_type_id < 0 || schema_type_id >= section_metadata_cache_.size()) {
+ return absl_ports::InvalidArgumentError("Invalid schema type id");
+ }
if (!IsSectionIdValid(section_id)) {
return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
"Section id %d is greater than the max value %d", section_id,
kMaxSectionId));
}
+
const std::vector<SectionMetadata>& section_metadatas =
section_metadata_cache_[schema_type_id];
if (section_id >= section_metadatas.size()) {
@@ -363,17 +136,30 @@ libtextclassifier3::StatusOr<SectionGroup> SectionManager::ExtractSections(
for (const SectionMetadata& section_metadata : *metadata_list) {
switch (section_metadata.data_type) {
case PropertyConfigProto::DataType::STRING: {
- AppendSection(section_metadata,
- GetSectionContent<std::string_view>(
- document, section_metadata.path),
- section_group.string_sections);
+ if (section_metadata.term_match_type == TermMatchType::UNKNOWN ||
+ section_metadata.tokenizer ==
+ StringIndexingConfig::TokenizerType::NONE) {
+ // Skip if term-match type is UNKNOWN, or if the tokenizer-type is
+ // NONE.
+ break;
+ }
+ AppendSection(
+ section_metadata,
+ property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+ document, section_metadata.path),
+ section_group.string_sections);
break;
}
case PropertyConfigProto::DataType::INT64: {
- AppendSection(
- section_metadata,
- GetSectionContent<int64_t>(document, section_metadata.path),
- section_group.integer_sections);
+ if (section_metadata.numeric_match_type ==
+ IntegerIndexingConfig::NumericMatchType::UNKNOWN) {
+ // Skip if numeric-match type is UNKNOWN.
+ break;
+ }
+ AppendSection(section_metadata,
+ property_util::ExtractPropertyValuesFromDocument<int64_t>(
+ document, section_metadata.path),
+ section_group.integer_sections);
break;
}
default: {
diff --git a/icing/schema/section-manager.h b/icing/schema/section-manager.h
index 78a5acb..6241dc0 100644
--- a/icing/schema/section-manager.h
+++ b/icing/schema/section-manager.h
@@ -22,7 +22,6 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/proto/document.pb.h"
-#include "icing/schema/schema-util.h"
#include "icing/schema/section.h"
#include "icing/store/document-filter-data.h"
#include "icing/store/key-mapper.h"
@@ -30,62 +29,55 @@
namespace icing {
namespace lib {
-inline constexpr std::string_view kPropertySeparator = ".";
-inline constexpr std::string_view kLBracket = "[";
-inline constexpr std::string_view kRBracket = "]";
-
// This class provides section-related operations. It assigns sections according
// to type configs and extracts section / sections from documents.
+// The actual instance is created together with JoinablePropertyManager and both
+// of them are wrapped into SchemaTypeManager.
+//
+// Note: SectionManager assumes schema type ids are consecutive integers
+// starting from 0, so it maintains a vector with size
+// schema_type_mapper_->num_keys() that maps schema type id to a list (2nd level
+// vector) of SectionMetadatas. Therefore, all schema type ids stored in
+// schema_type_mapper_ must be in range [0, schema_type_mapper_->num_keys() - 1]
+// and unique.
class SectionManager {
public:
+ // Builder class to create a SectionManager which does not take ownership of
+ // any input components, and all pointers must refer to valid objects that
+ // outlive the created SectionManager instance.
+ class Builder {
+ public:
+ explicit Builder(const KeyMapper<SchemaTypeId>& schema_type_mapper)
+ : schema_type_mapper_(schema_type_mapper),
+ section_metadata_cache_(schema_type_mapper.num_keys()) {}
+
+ // Checks and appends a new SectionMetadata for the schema type id if the
+ // given property config is indexable.
+ //
+ // Returns:
+ // - OK on success
+ // - INVALID_ARGUMENT_ERROR if schema type id is invalid (not in range [0,
+ // schema_type_mapper_.num_keys() - 1])
+ // - OUT_OF_RANGE_ERROR if # of indexable properties in a single Schema
+ // exceeds the threshold (kTotalNumSections)
+ libtextclassifier3::Status ProcessSchemaTypePropertyConfig(
+ SchemaTypeId schema_type_id, const PropertyConfigProto& property_config,
+ std::string&& property_path);
+
+ // Builds and returns a SectionManager instance.
+ std::unique_ptr<SectionManager> Build() && {
+ return std::unique_ptr<SectionManager>(new SectionManager(
+ schema_type_mapper_, std::move(section_metadata_cache_)));
+ }
+
+ private:
+ const KeyMapper<SchemaTypeId>& schema_type_mapper_; // Does not own.
+ std::vector<std::vector<SectionMetadata>> section_metadata_cache_;
+ };
+
SectionManager(const SectionManager&) = delete;
SectionManager& operator=(const SectionManager&) = delete;
- // Factory function to create a SectionManager which does not take ownership
- // of any input components, and all pointers must refer to valid objects that
- // outlive the created SectionManager instance.
- //
- // Returns:
- // A SectionManager on success
- // FAILED_PRECONDITION on any null pointer input
- // INVALID_ARGUMENT if infinite loop detected in the type configs
- // OUT_OF_RANGE if number of properties need indexing exceeds the max number
- // NOT_FOUND if any type config name not found in the map
- static libtextclassifier3::StatusOr<std::unique_ptr<SectionManager>> Create(
- const SchemaUtil::TypeConfigMap& type_config_map,
- const KeyMapper<SchemaTypeId>* schema_type_mapper);
-
- // Finds contents of a section by section path (e.g. property1.property2)
- // according to the template type T.
- //
- // Types of supported T:
- // - std::string, std::string_view: return property.string_values()
- // - int64_t : return property.int64_values()
- //
- // Returns:
- // A vector of contents with the specified type on success
- // NOT_FOUND if:
- // 1. Property is optional and not found in the document
- // 2. section_path is invalid
- // 3. Content is empty (could be caused by incorrect type T)
- template <typename T>
- libtextclassifier3::StatusOr<std::vector<T>> GetSectionContent(
- const DocumentProto& document, std::string_view section_path) const;
-
- // Finds contents of a section by id according to the template type T.
- //
- // Types of supported T:
- // - std::string, std::string_view: return property.string_values()
- // - int64_t : return property.int64_values()
- //
- // Returns:
- // A vector of contents on success
- // INVALID_ARGUMENT if section id is invalid
- // NOT_FOUND if type config name of document not found
- template <typename T>
- libtextclassifier3::StatusOr<std::vector<T>> GetSectionContent(
- const DocumentProto& document, SectionId section_id) const;
-
// Returns the SectionMetadata associated with the SectionId that's in the
// SchemaTypeId.
//
@@ -103,24 +95,26 @@ class SectionManager {
//
// Returns:
// A SectionGroup instance on success
- // NOT_FOUND if type config name of document not found
+ // NOT_FOUND if the type config name of document is not present in
+ // schema_type_mapper_
libtextclassifier3::StatusOr<SectionGroup> ExtractSections(
const DocumentProto& document) const;
// Returns:
// - On success, the section metadatas for the specified type
- // - NOT_FOUND if the type config name is not present in the schema
+ // - NOT_FOUND if the type config name is not present in schema_type_mapper_
libtextclassifier3::StatusOr<const std::vector<SectionMetadata>*>
GetMetadataList(const std::string& type_config_name) const;
private:
- // Use SectionManager::Create() to instantiate
explicit SectionManager(
- const KeyMapper<SchemaTypeId>* schema_type_mapper,
- std::vector<std::vector<SectionMetadata>>&& section_metadata_cache);
+ const KeyMapper<SchemaTypeId>& schema_type_mapper,
+ std::vector<std::vector<SectionMetadata>>&& section_metadata_cache)
+ : schema_type_mapper_(schema_type_mapper),
+ section_metadata_cache_(std::move(section_metadata_cache)) {}
// Maps schema types to a densely-assigned unique id.
- const KeyMapper<SchemaTypeId>& schema_type_mapper_;
+ const KeyMapper<SchemaTypeId>& schema_type_mapper_; // Does not own
// The index of section_metadata_cache_ corresponds to a schema type's
// SchemaTypeId. At that SchemaTypeId index, we store an inner vector. The
diff --git a/icing/schema/section-manager_test.cc b/icing/schema/section-manager_test.cc
index 4e8fbbd..eee78e9 100644
--- a/icing/schema/section-manager_test.cc
+++ b/icing/schema/section-manager_test.cc
@@ -14,7 +14,9 @@
#include "icing/schema/section-manager.h"
-#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
@@ -22,8 +24,8 @@
#include "icing/file/filesystem.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
-#include "icing/proto/term.pb.h"
#include "icing/schema-builder.h"
+#include "icing/schema/schema-type-manager.h"
#include "icing/schema/schema-util.h"
#include "icing/store/dynamic-trie-key-mapper.h"
#include "icing/store/key-mapper.h"
@@ -36,379 +38,263 @@ namespace lib {
namespace {
using ::testing::ElementsAre;
-using ::testing::Eq;
-using ::testing::HasSubstr;
using ::testing::IsEmpty;
+using ::testing::Pointee;
using ::testing::SizeIs;
-// type and property names of EmailMessage
-constexpr char kTypeEmail[] = "EmailMessage";
-constexpr char kPropertySubject[] = "subject";
-constexpr char kPropertyText[] = "text";
-constexpr char kPropertyAttachment[] = "attachment";
-constexpr char kPropertyRecipients[] = "recipients";
-constexpr char kPropertyRecipientIds[] = "recipientIds";
-constexpr char kPropertyTimestamp[] = "timestamp";
-constexpr char kPropertyNonIndexableInteger[] = "non_indexable_integer";
+// type and property names of Email
+static constexpr std::string_view kTypeEmail = "Email";
+// indexable
+static constexpr std::string_view kPropertyRecipientIds = "recipientIds";
+static constexpr std::string_view kPropertyRecipients = "recipients";
+static constexpr std::string_view kPropertySubject = "subject";
+static constexpr std::string_view kPropertyTimestamp = "timestamp";
+// non-indexable
+static constexpr std::string_view kPropertyAttachment = "attachment";
+static constexpr std::string_view kPropertyNonIndexableInteger =
+ "nonIndexableInteger";
+static constexpr std::string_view kPropertyText = "text";
+
// type and property names of Conversation
-constexpr char kTypeConversation[] = "Conversation";
-constexpr char kPropertyName[] = "name";
-constexpr char kPropertyEmails[] = "emails";
+static constexpr std::string_view kTypeConversation = "Conversation";
+// indexable
+static constexpr std::string_view kPropertyEmails = "emails";
+static constexpr std::string_view kPropertyName = "name";
+
+// type and property names of Group
+static constexpr std::string_view kTypeGroup = "Group";
+// indexable
+static constexpr std::string_view kPropertyConversation = "conversation";
+static constexpr std::string_view kPropertyGroupName = "groupName";
+// nested indexable
+static constexpr std::string_view kPropertyNestedConversationName = "name";
+static constexpr std::string_view kPropertyNestedConversationEmailRecipientIds =
+ "emails.recipientIds";
+static constexpr std::string_view kPropertyNestedConversationEmailRecipient =
+ "emails.recipients";
+static constexpr std::string_view kPropertyNestedConversationEmailSubject =
+ "emails.subject";
+// nested non-indexable
+static constexpr std::string_view kPropertyNestedConversationEmailAttachment =
+ "emails.attachment";
+// non-existent property path
+static constexpr std::string_view kPropertyNestedNonExistent =
+ "emails.nonExistentNestedProperty";
+static constexpr std::string_view kPropertyNestedNonExistent2 =
+ "emails.nonExistentNestedProperty2";
constexpr int64_t kDefaultTimestamp = 1663274901;
+PropertyConfigProto CreateRecipientIdsPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyRecipientIds)
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REPEATED)
+ .Build();
+}
+
+PropertyConfigProto CreateRecipientsPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyRecipients)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED)
+ .Build();
+}
+
+PropertyConfigProto CreateSubjectPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertySubject)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+}
+
+PropertyConfigProto CreateTimestampPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyTimestamp)
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+}
+
+PropertyConfigProto CreateNamePropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyName)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+}
+
+PropertyConfigProto CreateAttachmentPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyAttachment)
+ .SetDataType(TYPE_BYTES)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+}
+
+PropertyConfigProto CreateGroupNamePropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyGroupName)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+}
+
+SchemaTypeConfigProto CreateEmailTypeConfig() {
+ return SchemaTypeConfigBuilder()
+ .SetType(kTypeEmail)
+ .AddProperty(CreateSubjectPropertyConfig())
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyText)
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyAttachment)
+ .SetDataType(TYPE_BYTES)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(CreateRecipientsPropertyConfig())
+ .AddProperty(CreateRecipientIdsPropertyConfig())
+ .AddProperty(CreateTimestampPropertyConfig())
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyNonIndexableInteger)
+ .SetDataType(TYPE_INT64)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .Build();
+}
+
+SchemaTypeConfigProto CreateConversationTypeConfig() {
+ return SchemaTypeConfigBuilder()
+ .SetType(kTypeConversation)
+ .AddProperty(CreateNamePropertyConfig())
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyEmails)
+ .SetDataTypeDocument(kTypeEmail,
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .Build();
+}
+
+SchemaTypeConfigProto CreateGroupTypeConfig() {
+ return SchemaTypeConfigBuilder()
+ .SetType(kTypeGroup)
+ .AddProperty(CreateGroupNamePropertyConfig())
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName(kPropertyConversation)
+ .SetDataTypeDocument(
+ kTypeConversation,
+ /*indexable_nested_properties_list=*/
+ {std::string(kPropertyNestedConversationName),
+ std::string(kPropertyNestedConversationEmailRecipientIds),
+ std::string(kPropertyNestedConversationEmailSubject),
+ std::string(kPropertyNestedConversationEmailRecipient),
+ std::string(kPropertyNestedConversationEmailAttachment),
+ std::string(kPropertyNestedNonExistent2),
+ std::string(kPropertyNestedNonExistent),
+ std::string(kPropertyNestedNonExistent)})
+ .SetCardinality(CARDINALITY_REPEATED))
+ .Build();
+}
+
class SectionManagerTest : public ::testing::Test {
protected:
- SectionManagerTest() : test_dir_(GetTestTempDir() + "/icing") {
+ void SetUp() override {
+ test_dir_ = GetTestTempDir() + "/icing";
+
auto email_type = CreateEmailTypeConfig();
auto conversation_type = CreateConversationTypeConfig();
+ auto group_type = CreateGroupTypeConfig();
type_config_map_.emplace(email_type.schema_type(), email_type);
type_config_map_.emplace(conversation_type.schema_type(),
conversation_type);
+ type_config_map_.emplace(group_type.schema_type(), group_type);
+
+ // DynamicTrieKeyMapper uses 3 internal arrays for bookkeeping. Give each
+ // one 128KiB so the total DynamicTrieKeyMapper should get 384KiB
+ int key_mapper_size = 3 * 128 * 1024;
+ ICING_ASSERT_OK_AND_ASSIGN(schema_type_mapper_,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_, key_mapper_size));
+ ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeEmail, 0));
+ ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeConversation, 1));
+ ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeGroup, 2));
email_document_ =
DocumentBuilder()
.SetKey("icing", "email/1")
- .SetSchema(kTypeEmail)
- .AddStringProperty(kPropertySubject, "the subject")
- .AddStringProperty(kPropertyText, "the text")
- .AddBytesProperty(kPropertyAttachment, "attachment bytes")
- .AddStringProperty(kPropertyRecipients, "recipient1", "recipient2",
- "recipient3")
- .AddInt64Property(kPropertyRecipientIds, 1, 2, 3)
- .AddInt64Property(kPropertyTimestamp, kDefaultTimestamp)
- .AddInt64Property(kPropertyNonIndexableInteger, 100)
+ .SetSchema(std::string(kTypeEmail))
+ .AddStringProperty(std::string(kPropertySubject), "the subject")
+ .AddStringProperty(std::string(kPropertyText), "the text")
+ .AddBytesProperty(std::string(kPropertyAttachment),
+ "attachment bytes")
+ .AddStringProperty(std::string(kPropertyRecipients), "recipient1",
+ "recipient2", "recipient3")
+ .AddInt64Property(std::string(kPropertyRecipientIds), 1, 2, 3)
+ .AddInt64Property(std::string(kPropertyTimestamp),
+ kDefaultTimestamp)
+ .AddInt64Property(std::string(kPropertyNonIndexableInteger), 100)
.Build();
conversation_document_ =
DocumentBuilder()
.SetKey("icing", "conversation/1")
- .SetSchema(kTypeConversation)
- .AddDocumentProperty(kPropertyEmails,
+ .SetSchema(std::string(kTypeConversation))
+ .AddDocumentProperty(std::string(kPropertyEmails),
DocumentProto(email_document_),
DocumentProto(email_document_))
.Build();
- }
-
- void SetUp() override {
- // DynamicTrieKeyMapper uses 3 internal arrays for bookkeeping. Give each
- // one 128KiB so the total DynamicTrieKeyMapper should get 384KiB
- int key_mapper_size = 3 * 128 * 1024;
- ICING_ASSERT_OK_AND_ASSIGN(schema_type_mapper_,
- DynamicTrieKeyMapper<SchemaTypeId>::Create(
- filesystem_, test_dir_, key_mapper_size));
- ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeEmail, 0));
- ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeConversation, 1));
- }
- static SchemaTypeConfigProto CreateEmailTypeConfig() {
- SchemaTypeConfigProto type =
- SchemaTypeConfigBuilder()
- .SetType(kTypeEmail)
- .AddProperty(
- PropertyConfigBuilder()
- .SetName(kPropertySubject)
- .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED))
- .AddProperty(
- PropertyConfigBuilder()
- .SetName(kPropertyText)
- .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName(kPropertyAttachment)
- .SetDataType(TYPE_BYTES)
- .SetCardinality(CARDINALITY_REQUIRED))
- .AddProperty(
- PropertyConfigBuilder()
- .SetName(kPropertyRecipients)
- .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REPEATED))
- .AddProperty(PropertyConfigBuilder()
- .SetName(kPropertyRecipientIds)
- .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
- .SetCardinality(CARDINALITY_REPEATED))
- .AddProperty(PropertyConfigBuilder()
- .SetName(kPropertyTimestamp)
- .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
- .SetCardinality(CARDINALITY_REQUIRED))
- .AddProperty(PropertyConfigBuilder()
- .SetName(kPropertyNonIndexableInteger)
- .SetDataType(TYPE_INT64)
- .SetCardinality(CARDINALITY_REQUIRED))
+ group_document_ =
+ DocumentBuilder()
+ .SetKey("icing", "group/1")
+ .SetSchema(std::string(kTypeGroup))
+ .AddDocumentProperty(std::string(kPropertyConversation),
+ DocumentProto(conversation_document_))
+ .AddStringProperty(std::string(kPropertyGroupName), "group_name_1")
.Build();
- return type;
}
- static SchemaTypeConfigProto CreateConversationTypeConfig() {
- SchemaTypeConfigProto type;
- type.set_schema_type(kTypeConversation);
-
- auto name = type.add_properties();
- name->set_property_name(kPropertyName);
- name->set_data_type(TYPE_STRING);
- name->set_cardinality(CARDINALITY_OPTIONAL);
- name->mutable_string_indexing_config()->set_term_match_type(
- TERM_MATCH_EXACT);
-
- auto emails = type.add_properties();
- emails->set_property_name(kPropertyEmails);
- emails->set_data_type(TYPE_DOCUMENT);
- emails->set_cardinality(CARDINALITY_REPEATED);
- emails->set_schema_type(kTypeEmail);
- emails->mutable_document_indexing_config()->set_index_nested_properties(
- true);
-
- return type;
+ void TearDown() override {
+ schema_type_mapper_.reset();
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
}
Filesystem filesystem_;
- const std::string test_dir_;
+ std::string test_dir_;
SchemaUtil::TypeConfigMap type_config_map_;
std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper_;
DocumentProto email_document_;
DocumentProto conversation_document_;
+ DocumentProto group_document_;
};
-TEST_F(SectionManagerTest, CreationWithNullPointerShouldFail) {
- EXPECT_THAT(
- SectionManager::Create(type_config_map_, /*schema_type_mapper=*/nullptr),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
-}
-
-TEST_F(SectionManagerTest, CreationWithTooManyPropertiesShouldFail) {
- SchemaTypeConfigProto type_config;
- type_config.set_schema_type("type");
- // Adds more properties than allowed
- int max_num_sections_allowed = kMaxSectionId - kMinSectionId + 1;
- for (int i = 0; i < max_num_sections_allowed + 1; i++) {
- auto property = type_config.add_properties();
- property->set_property_name("property" + std::to_string(i));
- property->set_data_type(TYPE_STRING);
- property->set_cardinality(CARDINALITY_REQUIRED);
- property->mutable_string_indexing_config()->set_term_match_type(
- TERM_MATCH_EXACT);
- }
-
- SchemaUtil::TypeConfigMap type_config_map;
- type_config_map.emplace("type", type_config);
-
- EXPECT_THAT(
- SectionManager::Create(type_config_map, schema_type_mapper_.get()),
- StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE,
- HasSubstr("Too many properties")));
-}
-
-TEST_F(SectionManagerTest, GetSectionContent) {
- ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
-
- // Test simple section paths
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- email_document_,
- /*section_path=*/"subject"),
- IsOkAndHolds(ElementsAre("the subject")));
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- email_document_,
- /*section_path=*/"text"),
- IsOkAndHolds(ElementsAre("the text")));
- EXPECT_THAT(
- section_manager->GetSectionContent<int64_t>(email_document_,
- /*section_path=*/"timestamp"),
- IsOkAndHolds(ElementsAre(kDefaultTimestamp)));
-}
-
-TEST_F(SectionManagerTest, GetSectionContentRepeatedValues) {
- ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
-
- // Test repeated values
- EXPECT_THAT(
- section_manager->GetSectionContent<std::string_view>(
- email_document_,
- /*section_path=*/"recipients"),
- IsOkAndHolds(ElementsAre("recipient1", "recipient2", "recipient3")));
- EXPECT_THAT(section_manager->GetSectionContent<int64_t>(
- email_document_,
- /*section_path=*/"recipientIds"),
- IsOkAndHolds(ElementsAre(1, 2, 3)));
-}
-
-TEST_F(SectionManagerTest, GetSectionContentConcatenatedSectionPaths) {
- ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
-
- // Test concatenated section paths: "property1.property2"
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- conversation_document_,
- /*section_path=*/"emails.subject"),
- IsOkAndHolds(ElementsAre("the subject", "the subject")));
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- conversation_document_,
- /*section_path=*/"emails.text"),
- IsOkAndHolds(ElementsAre("the text", "the text")));
- EXPECT_THAT(section_manager->GetSectionContent<int64_t>(
- conversation_document_,
- /*section_path=*/"emails.timestamp"),
- IsOkAndHolds(ElementsAre(kDefaultTimestamp, kDefaultTimestamp)));
- EXPECT_THAT(
- section_manager->GetSectionContent<std::string_view>(
- conversation_document_,
- /*section_path=*/"emails.recipients"),
- IsOkAndHolds(ElementsAre("recipient1", "recipient2", "recipient3",
- "recipient1", "recipient2", "recipient3")));
- EXPECT_THAT(section_manager->GetSectionContent<int64_t>(
- conversation_document_,
- /*section_path=*/"emails.recipientIds"),
- IsOkAndHolds(ElementsAre(1, 2, 3, 1, 2, 3)));
-}
-
-TEST_F(SectionManagerTest, GetSectionContentNonExistingPaths) {
- ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
-
- // Test non-existing paths
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- email_document_,
- /*section_path=*/"name"),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- email_document_,
- /*section_path=*/"invalid"),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- conversation_document_,
- /*section_path=*/"emails.invalid"),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-}
-
-TEST_F(SectionManagerTest, GetSectionContentNonIndexableTypes) {
- ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
-
- // Test other data types
- // BYTES type can't be indexed, so content won't be returned
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- email_document_,
- /*section_path=*/"attachment"),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-}
-
-TEST_F(SectionManagerTest, GetSectionContentMismatchedType) {
- ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
-
- // Use the wrong template type to get the indexable content. GetSectionContent
- // should get empty content from the corresponding proto (repeated) field and
- // return NOT_FOUND error.
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- email_document_,
- /*section_path=*/"recipientIds"),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(section_manager->GetSectionContent<int64_t>(
- email_document_,
- /*section_path=*/"recipients"),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-}
-
-// The following tests are similar to the ones above but use section ids
-// instead of section paths
-TEST_F(SectionManagerTest, GetSectionContentBySectionId) {
- ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
-
- // EmailMessage (section id -> section path):
- SectionId recipient_ids_section_id = 0;
- SectionId recipients_section_id = 1;
- SectionId subject_section_id = 2;
- SectionId timestamp_section_id = 3;
- SectionId invalid_email_section_id = 4;
- EXPECT_THAT(section_manager->GetSectionContent<int64_t>(
- email_document_, recipient_ids_section_id),
- IsOkAndHolds(ElementsAre(1, 2, 3)));
- EXPECT_THAT(
- section_manager->GetSectionContent<std::string_view>(
- email_document_, recipients_section_id),
- IsOkAndHolds(ElementsAre("recipient1", "recipient2", "recipient3")));
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- email_document_, subject_section_id),
- IsOkAndHolds(ElementsAre("the subject")));
- EXPECT_THAT(section_manager->GetSectionContent<int64_t>(email_document_,
- timestamp_section_id),
- IsOkAndHolds(ElementsAre(kDefaultTimestamp)));
-
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- email_document_, invalid_email_section_id),
- StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-
- // Conversation (section id -> section path):
- // 0 -> emails.recipientIds
- // 1 -> emails.recipients
- // 2 -> emails.subject
- // 3 -> emails.timestamp
- // 4 -> name
- SectionId emails_recipient_ids_section_id = 0;
- SectionId emails_recipients_section_id = 1;
- SectionId emails_subject_section_id = 2;
- SectionId emails_timestamp_section_id = 3;
- SectionId name_section_id = 4;
- SectionId invalid_conversation_section_id = 5;
- EXPECT_THAT(section_manager->GetSectionContent<int64_t>(
- conversation_document_, emails_recipient_ids_section_id),
- IsOkAndHolds(ElementsAre(1, 2, 3, 1, 2, 3)));
- EXPECT_THAT(
- section_manager->GetSectionContent<std::string_view>(
- conversation_document_, emails_recipients_section_id),
- IsOkAndHolds(ElementsAre("recipient1", "recipient2", "recipient3",
- "recipient1", "recipient2", "recipient3")));
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- conversation_document_, emails_subject_section_id),
- IsOkAndHolds(ElementsAre("the subject", "the subject")));
- EXPECT_THAT(section_manager->GetSectionContent<int64_t>(
- conversation_document_, emails_timestamp_section_id),
- IsOkAndHolds(ElementsAre(kDefaultTimestamp, kDefaultTimestamp)));
-
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- conversation_document_, name_section_id),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- conversation_document_, invalid_conversation_section_id),
- StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-}
-
TEST_F(SectionManagerTest, ExtractSections) {
+ // Use SchemaTypeManager factory method to instantiate SectionManager.
ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
- // Extracts all sections from 'EmailMessage' document
- ICING_ASSERT_OK_AND_ASSIGN(SectionGroup section_group,
- section_manager->ExtractSections(email_document_));
+ // Extracts all sections from 'Email' document
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SectionGroup section_group,
+ schema_type_manager->section_manager().ExtractSections(email_document_));
// String sections
EXPECT_THAT(section_group.string_sections, SizeIs(2));
EXPECT_THAT(section_group.string_sections[0].metadata,
- Eq(SectionMetadata(
- /*id_in=*/1, TYPE_STRING, TOKENIZER_PLAIN, TERM_MATCH_EXACT,
- NUMERIC_MATCH_UNKNOWN,
- /*path_in=*/"recipients")));
+ EqualsSectionMetadata(/*expected_id=*/1,
+ /*expected_property_path=*/"recipients",
+ CreateRecipientsPropertyConfig()));
EXPECT_THAT(section_group.string_sections[0].content,
ElementsAre("recipient1", "recipient2", "recipient3"));
EXPECT_THAT(section_group.string_sections[1].metadata,
- Eq(SectionMetadata(
- /*id_in=*/2, TYPE_STRING, TOKENIZER_PLAIN, TERM_MATCH_EXACT,
- NUMERIC_MATCH_UNKNOWN,
- /*path_in=*/"subject")));
+ EqualsSectionMetadata(/*expected_id=*/2,
+ /*expected_property_path=*/"subject",
+ CreateSubjectPropertyConfig()));
EXPECT_THAT(section_group.string_sections[1].content,
ElementsAre("the subject"));
@@ -416,67 +302,341 @@ TEST_F(SectionManagerTest, ExtractSections) {
EXPECT_THAT(section_group.integer_sections, SizeIs(2));
EXPECT_THAT(section_group.integer_sections[0].metadata,
- Eq(SectionMetadata(/*id_in=*/0, TYPE_INT64, TOKENIZER_NONE,
- TERM_MATCH_UNKNOWN, NUMERIC_MATCH_RANGE,
- /*path_in=*/"recipientIds")));
+ EqualsSectionMetadata(/*expected_id=*/0,
+ /*expected_property_path=*/"recipientIds",
+ CreateRecipientIdsPropertyConfig()));
EXPECT_THAT(section_group.integer_sections[0].content, ElementsAre(1, 2, 3));
EXPECT_THAT(section_group.integer_sections[1].metadata,
- Eq(SectionMetadata(/*id_in=*/3, TYPE_INT64, TOKENIZER_NONE,
- TERM_MATCH_UNKNOWN, NUMERIC_MATCH_RANGE,
- /*path_in=*/"timestamp")));
+ EqualsSectionMetadata(/*expected_id=*/3,
+ /*expected_property_path=*/"timestamp",
+ CreateTimestampPropertyConfig()));
EXPECT_THAT(section_group.integer_sections[1].content,
ElementsAre(kDefaultTimestamp));
}
TEST_F(SectionManagerTest, ExtractSectionsNested) {
+ // Use SchemaTypeManager factory method to instantiate SectionManager.
ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
// Extracts all sections from 'Conversation' document
ICING_ASSERT_OK_AND_ASSIGN(
SectionGroup section_group,
- section_manager->ExtractSections(conversation_document_));
+ schema_type_manager->section_manager().ExtractSections(
+ conversation_document_));
// String sections
EXPECT_THAT(section_group.string_sections, SizeIs(2));
- EXPECT_THAT(section_group.string_sections[0].metadata,
- Eq(SectionMetadata(
- /*id_in=*/1, TYPE_STRING, TOKENIZER_PLAIN, TERM_MATCH_EXACT,
- NUMERIC_MATCH_UNKNOWN,
- /*path_in=*/"emails.recipients")));
+ EXPECT_THAT(
+ section_group.string_sections[0].metadata,
+ EqualsSectionMetadata(/*expected_id=*/1,
+ /*expected_property_path=*/"emails.recipients",
+ CreateRecipientsPropertyConfig()));
EXPECT_THAT(section_group.string_sections[0].content,
ElementsAre("recipient1", "recipient2", "recipient3",
"recipient1", "recipient2", "recipient3"));
EXPECT_THAT(section_group.string_sections[1].metadata,
- Eq(SectionMetadata(
- /*id_in=*/2, TYPE_STRING, TOKENIZER_PLAIN, TERM_MATCH_EXACT,
- NUMERIC_MATCH_UNKNOWN,
- /*path_in=*/"emails.subject")));
+ EqualsSectionMetadata(/*expected_id=*/2,
+ /*expected_property_path=*/"emails.subject",
+ CreateSubjectPropertyConfig()));
EXPECT_THAT(section_group.string_sections[1].content,
ElementsAre("the subject", "the subject"));
// Integer sections
EXPECT_THAT(section_group.integer_sections, SizeIs(2));
- EXPECT_THAT(section_group.integer_sections[0].metadata,
- Eq(SectionMetadata(/*id_in=*/0, TYPE_INT64, TOKENIZER_NONE,
- TERM_MATCH_UNKNOWN, NUMERIC_MATCH_RANGE,
- /*path_in=*/"emails.recipientIds")));
+ EXPECT_THAT(
+ section_group.integer_sections[0].metadata,
+ EqualsSectionMetadata(/*expected_id=*/0,
+ /*expected_property_path=*/"emails.recipientIds",
+ CreateRecipientIdsPropertyConfig()));
EXPECT_THAT(section_group.integer_sections[0].content,
ElementsAre(1, 2, 3, 1, 2, 3));
- EXPECT_THAT(section_group.integer_sections[1].metadata,
- Eq(SectionMetadata(/*id_in=*/3, TYPE_INT64, TOKENIZER_NONE,
- TERM_MATCH_UNKNOWN, NUMERIC_MATCH_RANGE,
- /*path_in=*/"emails.timestamp")));
+ EXPECT_THAT(
+ section_group.integer_sections[1].metadata,
+ EqualsSectionMetadata(/*expected_id=*/3,
+ /*expected_property_path=*/"emails.timestamp",
+ CreateTimestampPropertyConfig()));
EXPECT_THAT(section_group.integer_sections[1].content,
ElementsAre(kDefaultTimestamp, kDefaultTimestamp));
}
+TEST_F(SectionManagerTest, ExtractSectionsIndexableNestedPropertiesList) {
+ // Use SchemaTypeManager factory method to instantiate SectionManager.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+ // Extracts all sections from 'Group' document
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SectionGroup section_group,
+ schema_type_manager->section_manager().ExtractSections(group_document_));
+
+ // SectionId assignments:
+ // 0 -> conversation.emails.attachment (bytes, non-indexable)
+ // 1 -> conversation.emails.recipientIds (int64)
+ // 2 -> conversation.emails.recipients (string)
+ // 3 -> conversation.emails.subject (string)
+ // 4 -> conversation.name
+ // (string, but no entry for this in conversation_document_)
+ // 5 -> groupName (string)
+ // 6 -> conversation.emails.nonExistentNestedProperty
+ // (unknown, non-indexable)
+ // 7 -> conversation.emails.nonExistentNestedProperty2
+ // (unknown, non-indexable)
+ //
+ // SectionId assignment order:
+ // - We assign section ids to known (existing) properties first in alphabet
+ // order.
+ // - After handling all known properties, we assign section ids to all unknown
+ // (non-existent) properties that are specified in the
+ // indexable_nested_properties_list.
+ // - As a result, assignment of the entire section set is not done
+ // alphabetically, but assignment is still deterministic and alphabetical
+ // order is preserved inside the known properties and unknown properties
+ // sets individually.
+ //
+ // 'conversation.emails.attachment',
+ // 'conversation.emails.nonExistentNestedProperty' and
+ // 'conversation.emails.nonExistentNestedProperty2' are assigned sectionIds
+ // even though they are non-indexable because they appear in 'Group' schema
+ // type's indexable_nested_props_list.
+ // However 'conversation.emails.attachment' does not exist in section_group
+ // (even though the property exists and has a sectionId assignment) as
+ // SectionManager::ExtractSections only extracts indexable string and integer
+ // section data from a document.
+
+ // String sections
+ EXPECT_THAT(section_group.string_sections, SizeIs(3));
+
+ EXPECT_THAT(section_group.string_sections[0].metadata,
+ EqualsSectionMetadata(
+ /*expected_id=*/2,
+ /*expected_property_path=*/"conversation.emails.recipients",
+ CreateRecipientsPropertyConfig()));
+ EXPECT_THAT(section_group.string_sections[0].content,
+ ElementsAre("recipient1", "recipient2", "recipient3",
+ "recipient1", "recipient2", "recipient3"));
+
+ EXPECT_THAT(section_group.string_sections[1].metadata,
+ EqualsSectionMetadata(
+ /*expected_id=*/3,
+ /*expected_property_path=*/"conversation.emails.subject",
+ CreateSubjectPropertyConfig()));
+ EXPECT_THAT(section_group.string_sections[1].content,
+ ElementsAre("the subject", "the subject"));
+
+ EXPECT_THAT(section_group.string_sections[2].metadata,
+ EqualsSectionMetadata(
+ /*expected_id=*/5,
+ /*expected_property_path=*/"groupName",
+ CreateGroupNamePropertyConfig()));
+ EXPECT_THAT(section_group.string_sections[2].content,
+ ElementsAre("group_name_1"));
+
+ // Integer sections
+ EXPECT_THAT(section_group.integer_sections, SizeIs(1));
+
+ EXPECT_THAT(section_group.integer_sections[0].metadata,
+ EqualsSectionMetadata(
+ /*expected_id=*/1,
+ /*expected_property_path=*/"conversation.emails.recipientIds",
+ CreateRecipientIdsPropertyConfig()));
+ EXPECT_THAT(section_group.integer_sections[0].content,
+ ElementsAre(1, 2, 3, 1, 2, 3));
+}
+
+TEST_F(SectionManagerTest, GetSectionMetadata) {
+ // Use SchemaTypeManager factory method to instantiate SectionManager.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+ // Email (section id -> section property path):
+ // 0 -> recipientIds
+ // 1 -> recipients
+ // 2 -> subject
+ // 3 -> timestamp
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/0, /*section_id=*/0),
+ IsOkAndHolds(Pointee(EqualsSectionMetadata(
+ /*expected_id=*/0, /*expected_property_path=*/"recipientIds",
+ CreateRecipientIdsPropertyConfig()))));
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/0, /*section_id=*/1),
+ IsOkAndHolds(Pointee(EqualsSectionMetadata(
+ /*expected_id=*/1, /*expected_property_path=*/"recipients",
+ CreateRecipientsPropertyConfig()))));
+
+ // Conversation (section id -> section property path):
+ // 0 -> emails.recipientIds
+ // 1 -> emails.recipients
+ // 2 -> emails.subject
+ // 3 -> emails.timestamp
+ // 4 -> name
+ EXPECT_THAT(
+ schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/1, /*section_id=*/0),
+ IsOkAndHolds(Pointee(EqualsSectionMetadata(
+ /*expected_id=*/0, /*expected_property_path=*/"emails.recipientIds",
+ CreateRecipientIdsPropertyConfig()))));
+ EXPECT_THAT(
+ schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/1, /*section_id=*/1),
+ IsOkAndHolds(Pointee(EqualsSectionMetadata(
+ /*expected_id=*/1, /*expected_property_path=*/"emails.recipients",
+ CreateRecipientsPropertyConfig()))));
+ EXPECT_THAT(
+ schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/1, /*section_id=*/2),
+ IsOkAndHolds(Pointee(EqualsSectionMetadata(
+ /*expected_id=*/2, /*expected_property_path=*/"emails.subject",
+ CreateSubjectPropertyConfig()))));
+ EXPECT_THAT(
+ schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/1, /*section_id=*/3),
+ IsOkAndHolds(Pointee(EqualsSectionMetadata(
+ /*expected_id=*/3, /*expected_property_path=*/"emails.timestamp",
+ CreateTimestampPropertyConfig()))));
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/1, /*section_id=*/4),
+ IsOkAndHolds(Pointee(EqualsSectionMetadata(
+ /*expected_id=*/4, /*expected_property_path=*/"name",
+ CreateNamePropertyConfig()))));
+
+ // Group (section id -> section property path):
+ // 0 -> conversation.emails.attachment (non-indexable)
+ // 1 -> conversation.emails.recipientIds
+ // 2 -> conversation.emails.recipients
+ // 3 -> conversation.emails.subject
+ // 4 -> conversation.name
+ // 5 -> groupName
+ // 6 -> conversation.emails.nonExistentNestedProperty (non-indexable)
+ // 7 -> conversation.emails.nonExistentNestedProperty2 (non-indexable)
+ //
+ // SectionId assignment order:
+ // - We assign section ids to known (existing) properties first in alphabet
+ // order.
+ // - After handling all known properties, we assign section ids to all unknown
+ // (non-existent) properties that are specified in the
+ // indexable_nested_properties_list.
+ // - As a result, assignment of the entire section set is not done
+ // alphabetically, but assignment is still deterministic and alphabetical
+ // order is preserved inside the known properties and unknown properties
+ // sets individually.
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/2, /*section_id=*/0),
+ IsOkAndHolds(Pointee(EqualsSectionMetadata(
+ /*expected_id=*/0,
+ /*expected_property_path=*/"conversation.emails.attachment",
+ CreateAttachmentPropertyConfig()))));
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/2, /*section_id=*/1),
+ IsOkAndHolds(Pointee(EqualsSectionMetadata(
+ /*expected_id=*/1,
+ /*expected_property_path=*/"conversation.emails.recipientIds",
+ CreateRecipientIdsPropertyConfig()))));
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/2, /*section_id=*/2),
+ IsOkAndHolds(Pointee(EqualsSectionMetadata(
+ /*expected_id=*/2,
+ /*expected_property_path=*/"conversation.emails.recipients",
+ CreateRecipientsPropertyConfig()))));
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/2, /*section_id=*/3),
+ IsOkAndHolds(Pointee(EqualsSectionMetadata(
+ /*expected_id=*/3,
+ /*expected_property_path=*/"conversation.emails.subject",
+ CreateSubjectPropertyConfig()))));
+ EXPECT_THAT(
+ schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/2, /*section_id=*/4),
+ IsOkAndHolds(Pointee(EqualsSectionMetadata(
+ /*expected_id=*/4, /*expected_property_path=*/"conversation.name",
+ CreateNamePropertyConfig()))));
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/2, /*section_id=*/5),
+ IsOkAndHolds(Pointee(EqualsSectionMetadata(
+ /*expected_id=*/5, /*expected_property_path=*/"groupName",
+ CreateGroupNamePropertyConfig()))));
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/2, /*section_id=*/6),
+ IsOkAndHolds(Pointee(EqualsSectionMetadata(
+ /*expected_id=*/6,
+ /*expected_property_path=*/
+ "conversation.emails.nonExistentNestedProperty",
+ PropertyConfigBuilder()
+ .SetName("nonExistentNestedProperty")
+ .SetDataType(TYPE_UNKNOWN)
+ .Build()))));
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/2, /*section_id=*/7),
+ IsOkAndHolds(Pointee(EqualsSectionMetadata(
+ /*expected_id=*/7,
+ /*expected_property_path=*/
+ "conversation.emails.nonExistentNestedProperty2",
+ PropertyConfigBuilder()
+ .SetName("nonExistentNestedProperty2")
+ .SetDataType(TYPE_UNKNOWN)
+ .Build()))));
+ // Check that no more properties are indexed
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/2, /*section_id=*/8),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(SectionManagerTest, GetSectionMetadataInvalidSchemaTypeId) {
+ // Use SchemaTypeManager factory method to instantiate SectionManager.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+ ASSERT_THAT(type_config_map_, SizeIs(3));
+
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/-1, /*section_id=*/0),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/3, /*section_id=*/0),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(SectionManagerTest, GetSectionMetadataInvalidSectionId) {
+ // Use SchemaTypeManager factory method to instantiate SectionManager.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+ // Email (section id -> section property path):
+ // 0 -> recipientIds
+ // 1 -> recipients
+ // 2 -> subject
+ // 3 -> timestamp
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/0, /*section_id=*/-1),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/0, /*section_id=*/4),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Conversation (section id -> section property path):
+ // 0 -> emails.recipientIds
+ // 1 -> emails.recipients
+ // 2 -> emails.subject
+ // 3 -> emails.timestamp
+ // 4 -> name
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/1, /*section_id=*/-1),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/1, /*section_id=*/5),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
TEST_F(SectionManagerTest,
NonStringFieldsWithStringIndexingConfigDontCreateSections) {
// Create a schema for an empty document.
@@ -557,9 +717,10 @@ TEST_F(SectionManagerTest,
ICING_ASSERT_OK(schema_type_mapper->Put(empty_type.schema_type(),
/*schema_type_id=*/1));
+ // Use SchemaTypeManager factory method to instantiate SectionManager.
ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map, schema_type_mapper.get()));
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, schema_type_mapper.get()));
// Create an empty document to be nested
DocumentProto empty_document = DocumentBuilder()
@@ -580,8 +741,9 @@ TEST_F(SectionManagerTest,
.Build();
// Extracts sections from 'Schema' document
- ICING_ASSERT_OK_AND_ASSIGN(SectionGroup section_group,
- section_manager->ExtractSections(document));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SectionGroup section_group,
+ schema_type_manager->section_manager().ExtractSections(document));
EXPECT_THAT(section_group.string_sections, IsEmpty());
EXPECT_THAT(section_group.integer_sections, IsEmpty());
}
@@ -656,9 +818,10 @@ TEST_F(SectionManagerTest,
ICING_ASSERT_OK(schema_type_mapper->Put(empty_type.schema_type(),
/*schema_type_id=*/1));
+ // Use SchemaTypeManager factory method to instantiate SectionManager.
ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map, schema_type_mapper.get()));
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, schema_type_mapper.get()));
// Create an empty document to be nested
DocumentProto empty_document = DocumentBuilder()
@@ -679,8 +842,9 @@ TEST_F(SectionManagerTest,
.Build();
// Extracts sections from 'Schema' document
- ICING_ASSERT_OK_AND_ASSIGN(SectionGroup section_group,
- section_manager->ExtractSections(document));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SectionGroup section_group,
+ schema_type_manager->section_manager().ExtractSections(document));
EXPECT_THAT(section_group.string_sections, IsEmpty());
EXPECT_THAT(section_group.integer_sections, IsEmpty());
}
@@ -756,14 +920,16 @@ TEST_F(SectionManagerTest, AssignSectionsRecursivelyForDocumentFields) {
ICING_ASSERT_OK(schema_type_mapper->Put(document_type.schema_type(),
document_type_schema_type_id));
+ // Use SchemaTypeManager factory method to instantiate SectionManager.
ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map, schema_type_mapper.get()));
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, schema_type_mapper.get()));
// Extracts sections from 'Schema' document; there should be the 1 string
// property and 1 integer property inside the document.
- ICING_ASSERT_OK_AND_ASSIGN(SectionGroup section_group,
- section_manager->ExtractSections(outer_document));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SectionGroup section_group,
+ schema_type_manager->section_manager().ExtractSections(outer_document));
EXPECT_THAT(section_group.string_sections, SizeIs(1));
EXPECT_THAT(section_group.integer_sections, SizeIs(1));
}
@@ -839,14 +1005,16 @@ TEST_F(SectionManagerTest, DontAssignSectionsRecursivelyForDocumentFields) {
ICING_ASSERT_OK(schema_type_mapper->Put(document_type.schema_type(),
document_type_schema_type_id));
+ // Use SchemaTypeManager factory method to instantiate SectionManager.
ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map, schema_type_mapper.get()));
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, schema_type_mapper.get()));
// Extracts sections from 'Schema' document; there won't be any since we
// didn't recurse into the document to see the inner string property
- ICING_ASSERT_OK_AND_ASSIGN(SectionGroup section_group,
- section_manager->ExtractSections(outer_document));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SectionGroup section_group,
+ schema_type_manager->section_manager().ExtractSections(outer_document));
EXPECT_THAT(section_group.string_sections, IsEmpty());
EXPECT_THAT(section_group.integer_sections, IsEmpty());
}
diff --git a/icing/schema/section.h b/icing/schema/section.h
index 241095b..3685a29 100644
--- a/icing/schema/section.h
+++ b/icing/schema/section.h
@@ -33,6 +33,8 @@ inline constexpr int kSectionIdBits = 6;
inline constexpr SectionId kTotalNumSections = (1 << kSectionIdBits);
inline constexpr SectionId kInvalidSectionId = kTotalNumSections;
inline constexpr SectionId kMaxSectionId = kTotalNumSections - 1;
+// Prior versions of Icing only supported 16 indexed properties.
+inline constexpr SectionId kOldTotalNumSections = 16;
inline constexpr SectionId kMinSectionId = 0;
constexpr bool IsSectionIdValid(SectionId section_id) {
return section_id >= kMinSectionId && section_id <= kMaxSectionId;
@@ -42,6 +44,10 @@ using SectionIdMask = int64_t;
inline constexpr SectionIdMask kSectionIdMaskAll = ~SectionIdMask{0};
inline constexpr SectionIdMask kSectionIdMaskNone = SectionIdMask{0};
+static_assert(kSectionIdBits < 8 * sizeof(SectionId),
+ "Cannot exhaust all bits of SectionId since it is a signed "
+ "integer and the most significant bit should be preserved.");
+
static_assert(
kMaxSectionId < 8 * sizeof(SectionIdMask),
"SectionIdMask is not large enough to represent all section values!");
@@ -132,6 +138,9 @@ struct Section {
// Groups of different type sections. Callers can access sections with types
// they want and avoid going through non-desired ones.
+//
+// REQUIRES: lifecycle of the property must be longer than this object, since we
+// use std::string_view for extracting its string_values.
struct SectionGroup {
std::vector<Section<std::string_view>> string_sections;
std::vector<Section<int64_t>> integer_sections;
diff --git a/icing/scoring/advanced_scoring/advanced-scorer.cc b/icing/scoring/advanced_scoring/advanced-scorer.cc
index 212a476..83c1519 100644
--- a/icing/scoring/advanced_scoring/advanced-scorer.cc
+++ b/icing/scoring/advanced_scoring/advanced-scorer.cc
@@ -30,7 +30,8 @@ libtextclassifier3::StatusOr<std::unique_ptr<AdvancedScorer>>
AdvancedScorer::Create(const ScoringSpecProto& scoring_spec,
double default_score,
const DocumentStore* document_store,
- const SchemaStore* schema_store) {
+ const SchemaStore* schema_store, int64_t current_time_ms,
+ const JoinChildrenFetcher* join_children_fetcher) {
ICING_RETURN_ERROR_IF_NULL(document_store);
ICING_RETURN_ERROR_IF_NULL(schema_store);
@@ -45,21 +46,22 @@ AdvancedScorer::Create(const ScoringSpecProto& scoring_spec,
ICING_ASSIGN_OR_RETURN(std::unique_ptr<SectionWeights> section_weights,
SectionWeights::Create(schema_store, scoring_spec));
std::unique_ptr<Bm25fCalculator> bm25f_calculator =
- std::make_unique<Bm25fCalculator>(document_store,
- std::move(section_weights));
+ std::make_unique<Bm25fCalculator>(document_store, section_weights.get(),
+ current_time_ms);
ScoringVisitor visitor(default_score, document_store, schema_store,
- bm25f_calculator.get());
+ section_weights.get(), bm25f_calculator.get(),
+ join_children_fetcher, current_time_ms);
tree_root->Accept(&visitor);
ICING_ASSIGN_OR_RETURN(std::unique_ptr<ScoreExpression> expression,
std::move(visitor).Expression());
- if (expression->is_document_type()) {
+ if (expression->type() != ScoreExpressionType::kDouble) {
return absl_ports::InvalidArgumentError(
- "The root scoring expression will always be evaluated to a document, "
- "but a number is expected.");
+ "The root scoring expression is not of double type.");
}
- return std::unique_ptr<AdvancedScorer>(new AdvancedScorer(
- std::move(expression), std::move(bm25f_calculator), default_score));
+ return std::unique_ptr<AdvancedScorer>(
+ new AdvancedScorer(std::move(expression), std::move(section_weights),
+ std::move(bm25f_calculator), default_score));
}
} // namespace lib
diff --git a/icing/scoring/advanced_scoring/advanced-scorer.h b/icing/scoring/advanced_scoring/advanced-scorer.h
index 763499b..d69abad 100644
--- a/icing/scoring/advanced_scoring/advanced-scorer.h
+++ b/icing/scoring/advanced_scoring/advanced-scorer.h
@@ -20,6 +20,7 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/join/join-children-fetcher.h"
#include "icing/schema/schema-store.h"
#include "icing/scoring/advanced_scoring/score-expression.h"
#include "icing/scoring/bm25f-calculator.h"
@@ -37,7 +38,9 @@ class AdvancedScorer : public Scorer {
// INVALID_ARGUMENT if fails to create an instance
static libtextclassifier3::StatusOr<std::unique_ptr<AdvancedScorer>> Create(
const ScoringSpecProto& scoring_spec, double default_score,
- const DocumentStore* document_store, const SchemaStore* schema_store);
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ int64_t current_time_ms,
+ const JoinChildrenFetcher* join_children_fetcher = nullptr);
double GetScore(const DocHitInfo& hit_info,
const DocHitInfoIterator* query_it) override {
@@ -64,9 +67,11 @@ class AdvancedScorer : public Scorer {
private:
explicit AdvancedScorer(std::unique_ptr<ScoreExpression> score_expression,
+ std::unique_ptr<SectionWeights> section_weights,
std::unique_ptr<Bm25fCalculator> bm25f_calculator,
double default_score)
: score_expression_(std::move(score_expression)),
+ section_weights_(std::move(section_weights)),
bm25f_calculator_(std::move(bm25f_calculator)),
default_score_(default_score) {
if (is_constant()) {
@@ -76,6 +81,7 @@ class AdvancedScorer : public Scorer {
}
std::unique_ptr<ScoreExpression> score_expression_;
+ std::unique_ptr<SectionWeights> section_weights_;
std::unique_ptr<Bm25fCalculator> bm25f_calculator_;
double default_score_;
};
diff --git a/icing/scoring/advanced_scoring/advanced-scorer_fuzz_test.cc b/icing/scoring/advanced_scoring/advanced-scorer_fuzz_test.cc
new file mode 100644
index 0000000..3612359
--- /dev/null
+++ b/icing/scoring/advanced_scoring/advanced-scorer_fuzz_test.cc
@@ -0,0 +1,70 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdint>
+#include <memory>
+#include <string_view>
+
+#include "icing/scoring/advanced_scoring/advanced-scorer.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+ FakeClock fake_clock;
+ Filesystem filesystem;
+ const std::string test_dir = GetTestTempDir() + "/icing";
+ const std::string doc_store_dir = test_dir + "/doc_store";
+ const std::string schema_store_dir = test_dir + "/schema_store";
+ filesystem.DeleteDirectoryRecursively(test_dir.c_str());
+ filesystem.CreateDirectoryRecursively(doc_store_dir.c_str());
+ filesystem.CreateDirectoryRecursively(schema_store_dir.c_str());
+
+ std::unique_ptr<SchemaStore> schema_store =
+ SchemaStore::Create(&filesystem, schema_store_dir, &fake_clock)
+ .ValueOrDie();
+ std::unique_ptr<DocumentStore> document_store =
+ DocumentStore::Create(
+ &filesystem, doc_store_dir, &fake_clock, schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr)
+ .ValueOrDie()
+ .document_store;
+
+ std::string_view text(reinterpret_cast<const char*>(data), size);
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::ADVANCED_SCORING_EXPRESSION);
+ scoring_spec.set_advanced_scoring_expression(text);
+
+ AdvancedScorer::Create(scoring_spec,
+ /*default_score=*/10, document_store.get(),
+ schema_store.get(),
+ fake_clock.GetSystemTimeMilliseconds());
+
+ // Not able to test the GetScore method of AdvancedScorer, since it will only
+ // be available after AdvancedScorer is successfully created. However, the
+ // text provided by the fuzz test is very random, which means that in most
+ // cases, there will be syntax errors or type errors that cause
+ // AdvancedScorer::Create to fail.
+ return 0;
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/scoring/advanced_scoring/advanced-scorer_test.cc b/icing/scoring/advanced_scoring/advanced-scorer_test.cc
index b0b32e9..cc1d413 100644
--- a/icing/scoring/advanced_scoring/advanced-scorer_test.cc
+++ b/icing/scoring/advanced_scoring/advanced-scorer_test.cc
@@ -24,6 +24,7 @@
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
#include "icing/index/hit/doc-hit-info.h"
+#include "icing/join/join-children-fetcher.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/scoring.pb.h"
@@ -63,8 +64,14 @@ class AdvancedScorerTest : public testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
- schema_store_.get()));
+ DocumentStore::Create(
+ &filesystem_, doc_store_dir_, &fake_clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
// Creates a simple email schema
@@ -76,11 +83,36 @@ class AdvancedScorerTest : public testing::Test {
.SetDataTypeString(
TermMatchType::PREFIX,
StringIndexingConfig::TokenizerType::PLAIN)
- .SetDataType(TYPE_STRING)
.SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("person")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(
+ TermMatchType::PREFIX,
+ StringIndexingConfig::TokenizerType::PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(
+ TermMatchType::PREFIX,
+ StringIndexingConfig::TokenizerType::PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("phoneNumber")
+ .SetDataTypeString(
+ TermMatchType::PREFIX,
+ StringIndexingConfig::TokenizerType::PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- ICING_ASSERT_OK(schema_store_->SetSchema(test_email_schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ test_email_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
}
void TearDown() override {
@@ -115,7 +147,7 @@ DocumentProto CreateDocument(
}
UsageReport CreateUsageReport(std::string name_space, std::string uri,
- int64 timestamp_ms,
+ int64_t timestamp_ms,
UsageReport::UsageType usage_type) {
UsageReport usage_report;
usage_report.set_document_namespace(name_space);
@@ -134,24 +166,45 @@ ScoringSpecProto CreateAdvancedScoringSpec(
return scoring_spec;
}
+PropertyWeight CreatePropertyWeight(std::string path, double weight) {
+ PropertyWeight property_weight;
+ property_weight.set_path(std::move(path));
+ property_weight.set_weight(weight);
+ return property_weight;
+}
+
+TypePropertyWeights CreateTypePropertyWeights(
+ std::string schema_type, std::vector<PropertyWeight>&& property_weights) {
+ TypePropertyWeights type_property_weights;
+ type_property_weights.set_schema_type(std::move(schema_type));
+ type_property_weights.mutable_property_weights()->Reserve(
+ property_weights.size());
+
+ for (PropertyWeight& property_weight : property_weights) {
+ *type_property_weights.add_property_weights() = std::move(property_weight);
+ }
+
+ return type_property_weights;
+}
+
TEST_F(AdvancedScorerTest, InvalidAdvancedScoringSpec) {
// Empty scoring expression for advanced scoring
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(
ScoringSpecProto::RankingStrategy::ADVANCED_SCORING_EXPRESSION);
- EXPECT_THAT(
- scorer_factory::Create(scoring_spec, /*default_score=*/10,
- document_store_.get(), schema_store_.get()),
- StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(scorer_factory::Create(scoring_spec, /*default_score=*/10,
+ document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
// Non-empty scoring expression for normal scoring
scoring_spec = ScoringSpecProto::default_instance();
scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
scoring_spec.set_advanced_scoring_expression("1");
- EXPECT_THAT(
- scorer_factory::Create(scoring_spec, /*default_score=*/10,
- document_store_.get(), schema_store_.get()),
- StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(scorer_factory::Create(scoring_spec, /*default_score=*/10,
+ document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST_F(AdvancedScorerTest, SimpleExpression) {
@@ -163,7 +216,8 @@ TEST_F(AdvancedScorerTest, SimpleExpression) {
std::unique_ptr<Scorer> scorer,
AdvancedScorer::Create(CreateAdvancedScoringSpec("123"),
/*default_score=*/10, document_store_.get(),
- schema_store_.get()));
+ schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
DocHitInfo docHitInfo = DocHitInfo(document_id);
@@ -180,42 +234,43 @@ TEST_F(AdvancedScorerTest, BasicPureArithmeticExpression) {
std::unique_ptr<Scorer> scorer,
AdvancedScorer::Create(CreateAdvancedScoringSpec("1 + 2"),
/*default_score=*/10, document_store_.get(),
- schema_store_.get()));
+ schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(3));
ICING_ASSERT_OK_AND_ASSIGN(
- scorer,
- AdvancedScorer::Create(CreateAdvancedScoringSpec("-1 + 2"),
- /*default_score=*/10, document_store_.get(),
- schema_store_.get()));
+ scorer, AdvancedScorer::Create(CreateAdvancedScoringSpec("-1 + 2"),
+ /*default_score=*/10,
+ document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(1));
ICING_ASSERT_OK_AND_ASSIGN(
- scorer,
- AdvancedScorer::Create(CreateAdvancedScoringSpec("1 + -2"),
- /*default_score=*/10, document_store_.get(),
- schema_store_.get()));
+ scorer, AdvancedScorer::Create(CreateAdvancedScoringSpec("1 + -2"),
+ /*default_score=*/10,
+ document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(-1));
ICING_ASSERT_OK_AND_ASSIGN(
- scorer,
- AdvancedScorer::Create(CreateAdvancedScoringSpec("1 - 2"),
- /*default_score=*/10, document_store_.get(),
- schema_store_.get()));
+ scorer, AdvancedScorer::Create(CreateAdvancedScoringSpec("1 - 2"),
+ /*default_score=*/10,
+ document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(-1));
ICING_ASSERT_OK_AND_ASSIGN(
- scorer,
- AdvancedScorer::Create(CreateAdvancedScoringSpec("1 * 2"),
- /*default_score=*/10, document_store_.get(),
- schema_store_.get()));
+ scorer, AdvancedScorer::Create(CreateAdvancedScoringSpec("1 * 2"),
+ /*default_score=*/10,
+ document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(2));
ICING_ASSERT_OK_AND_ASSIGN(
- scorer,
- AdvancedScorer::Create(CreateAdvancedScoringSpec("1 / 2"),
- /*default_score=*/10, document_store_.get(),
- schema_store_.get()));
+ scorer, AdvancedScorer::Create(CreateAdvancedScoringSpec("1 / 2"),
+ /*default_score=*/10,
+ document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(0.5));
}
@@ -229,70 +284,102 @@ TEST_F(AdvancedScorerTest, BasicMathFunctionExpression) {
std::unique_ptr<Scorer> scorer,
AdvancedScorer::Create(CreateAdvancedScoringSpec("log(10, 1000)"),
/*default_score=*/10, document_store_.get(),
- schema_store_.get()));
+ schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), DoubleNear(3, kEps));
ICING_ASSERT_OK_AND_ASSIGN(
scorer,
AdvancedScorer::Create(
CreateAdvancedScoringSpec("log(2.718281828459045)"),
- /*default_score=*/10, document_store_.get(), schema_store_.get()));
+ /*default_score=*/10, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), DoubleNear(1, kEps));
ICING_ASSERT_OK_AND_ASSIGN(
- scorer,
- AdvancedScorer::Create(CreateAdvancedScoringSpec("pow(2, 10)"),
- /*default_score=*/10, document_store_.get(),
- schema_store_.get()));
+ scorer, AdvancedScorer::Create(CreateAdvancedScoringSpec("pow(2, 10)"),
+ /*default_score=*/10,
+ document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(1024));
ICING_ASSERT_OK_AND_ASSIGN(
scorer,
AdvancedScorer::Create(
CreateAdvancedScoringSpec("max(10, 11, 12, 13, 14)"),
- /*default_score=*/10, document_store_.get(), schema_store_.get()));
+ /*default_score=*/10, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(14));
ICING_ASSERT_OK_AND_ASSIGN(
scorer,
AdvancedScorer::Create(
CreateAdvancedScoringSpec("min(10, 11, 12, 13, 14)"),
- /*default_score=*/10, document_store_.get(), schema_store_.get()));
+ /*default_score=*/10, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(10));
ICING_ASSERT_OK_AND_ASSIGN(
scorer,
- AdvancedScorer::Create(CreateAdvancedScoringSpec("sqrt(2)"),
- /*default_score=*/10, document_store_.get(),
- schema_store_.get()));
+ AdvancedScorer::Create(
+ CreateAdvancedScoringSpec("len(10, 11, 12, 13, 14)"),
+ /*default_score=*/10, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(5));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ scorer,
+ AdvancedScorer::Create(
+ CreateAdvancedScoringSpec("sum(10, 11, 12, 13, 14)"),
+ /*default_score=*/10, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(10 + 11 + 12 + 13 + 14));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ scorer,
+ AdvancedScorer::Create(
+ CreateAdvancedScoringSpec("avg(10, 11, 12, 13, 14)"),
+ /*default_score=*/10, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(scorer->GetScore(docHitInfo), Eq((10 + 11 + 12 + 13 + 14) / 5.));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ scorer, AdvancedScorer::Create(CreateAdvancedScoringSpec("sqrt(2)"),
+ /*default_score=*/10,
+ document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), DoubleNear(sqrt(2), kEps));
ICING_ASSERT_OK_AND_ASSIGN(
scorer,
AdvancedScorer::Create(CreateAdvancedScoringSpec("abs(-2) + abs(2)"),
/*default_score=*/10, document_store_.get(),
- schema_store_.get()));
+ schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(4));
ICING_ASSERT_OK_AND_ASSIGN(
scorer,
AdvancedScorer::Create(
CreateAdvancedScoringSpec("sin(3.141592653589793)"),
- /*default_score=*/10, document_store_.get(), schema_store_.get()));
+ /*default_score=*/10, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), DoubleNear(0, kEps));
ICING_ASSERT_OK_AND_ASSIGN(
scorer,
AdvancedScorer::Create(
CreateAdvancedScoringSpec("cos(3.141592653589793)"),
- /*default_score=*/10, document_store_.get(), schema_store_.get()));
+ /*default_score=*/10, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), DoubleNear(-1, kEps));
ICING_ASSERT_OK_AND_ASSIGN(
scorer,
AdvancedScorer::Create(
CreateAdvancedScoringSpec("tan(3.141592653589793 / 4)"),
- /*default_score=*/10, document_store_.get(), schema_store_.get()));
+ /*default_score=*/10, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), DoubleNear(1, kEps));
}
@@ -308,14 +395,16 @@ TEST_F(AdvancedScorerTest, DocumentScoreCreationTimestampFunctionExpression) {
std::unique_ptr<Scorer> scorer,
AdvancedScorer::Create(CreateAdvancedScoringSpec("this.documentScore()"),
/*default_score=*/10, document_store_.get(),
- schema_store_.get()));
+ schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(123));
ICING_ASSERT_OK_AND_ASSIGN(
scorer,
AdvancedScorer::Create(
CreateAdvancedScoringSpec("this.creationTimestamp()"),
- /*default_score=*/10, document_store_.get(), schema_store_.get()));
+ /*default_score=*/10, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(kDefaultCreationTimestampMs));
ICING_ASSERT_OK_AND_ASSIGN(
@@ -323,7 +412,8 @@ TEST_F(AdvancedScorerTest, DocumentScoreCreationTimestampFunctionExpression) {
AdvancedScorer::Create(
CreateAdvancedScoringSpec(
"this.documentScore() + this.creationTimestamp()"),
- /*default_score=*/10, document_store_.get(), schema_store_.get()));
+ /*default_score=*/10, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo),
Eq(123 + kDefaultCreationTimestampMs));
}
@@ -339,7 +429,8 @@ TEST_F(AdvancedScorerTest, DocumentUsageFunctionExpression) {
AdvancedScorer::Create(
CreateAdvancedScoringSpec("this.usageCount(1) + this.usageCount(2) "
"+ this.usageLastUsedTimestamp(3)"),
- /*default_score=*/10, document_store_.get(), schema_store_.get()));
+ /*default_score=*/10, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(0));
ICING_ASSERT_OK(document_store_->ReportUsage(
CreateUsageReport("namespace", "uri", 100000, UsageReport::USAGE_TYPE1)));
@@ -355,19 +446,22 @@ TEST_F(AdvancedScorerTest, DocumentUsageFunctionExpression) {
scorer,
AdvancedScorer::Create(
CreateAdvancedScoringSpec("this.usageLastUsedTimestamp(1)"),
- /*default_score=*/10, document_store_.get(), schema_store_.get()));
+ /*default_score=*/10, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(100000));
ICING_ASSERT_OK_AND_ASSIGN(
scorer,
AdvancedScorer::Create(
CreateAdvancedScoringSpec("this.usageLastUsedTimestamp(2)"),
- /*default_score=*/10, document_store_.get(), schema_store_.get()));
+ /*default_score=*/10, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(200000));
ICING_ASSERT_OK_AND_ASSIGN(
scorer,
AdvancedScorer::Create(
CreateAdvancedScoringSpec("this.usageLastUsedTimestamp(3)"),
- /*default_score=*/10, document_store_.get(), schema_store_.get()));
+ /*default_score=*/10, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(300000));
}
@@ -386,19 +480,22 @@ TEST_F(AdvancedScorerTest, DocumentUsageFunctionOutOfRange) {
std::unique_ptr<Scorer> scorer,
AdvancedScorer::Create(CreateAdvancedScoringSpec("this.usageCount(4)"),
default_score, document_store_.get(),
- schema_store_.get()));
+ schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(default_score));
ICING_ASSERT_OK_AND_ASSIGN(
scorer, AdvancedScorer::Create(
CreateAdvancedScoringSpec("this.usageCount(0)"),
- default_score, document_store_.get(), schema_store_.get()));
+ default_score, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(default_score));
ICING_ASSERT_OK_AND_ASSIGN(
scorer, AdvancedScorer::Create(
CreateAdvancedScoringSpec("this.usageCount(1.5)"),
- default_score, document_store_.get(), schema_store_.get()));
+ default_score, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(default_score));
}
@@ -420,7 +517,8 @@ TEST_F(AdvancedScorerTest, RelevanceScoreFunctionScoreExpression) {
std::unique_ptr<AdvancedScorer> scorer,
AdvancedScorer::Create(CreateAdvancedScoringSpec("this.relevanceScore()"),
/*default_score=*/10, document_store_.get(),
- schema_store_.get()));
+ schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
scorer->PrepareToScore(/*query_term_iterators=*/{});
// Should get the default score.
@@ -428,6 +526,282 @@ TEST_F(AdvancedScorerTest, RelevanceScoreFunctionScoreExpression) {
EXPECT_THAT(scorer->GetScore(docHitInfo, /*query_it=*/nullptr), Eq(10));
}
+TEST_F(AdvancedScorerTest, ChildrenScoresFunctionScoreExpression) {
+ const double default_score = 123;
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id_1,
+ document_store_->Put(CreateDocument("namespace", "uri1")));
+ DocHitInfo docHitInfo1 = DocHitInfo(document_id_1);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id_2,
+ document_store_->Put(CreateDocument("namespace", "uri2")));
+ DocHitInfo docHitInfo2 = DocHitInfo(document_id_2);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id_3,
+ document_store_->Put(CreateDocument("namespace", "uri3")));
+ DocHitInfo docHitInfo3 = DocHitInfo(document_id_3);
+
+ // Create a JoinChildrenFetcher that matches:
+ // document_id_1 to fake_child1 with score 1 and fake_child2 with score 2.
+ // document_id_2 to fake_child3 with score 4.
+ // document_id_3 has no child.
+ JoinSpecProto join_spec;
+ join_spec.set_parent_property_expression("this.qualifiedId()");
+ join_spec.set_child_property_expression("sender");
+ std::unordered_map<DocumentId, std::vector<ScoredDocumentHit>>
+ map_joinable_qualified_id;
+ ScoredDocumentHit fake_child1(/*document_id=*/10, kSectionIdMaskNone,
+ /*score=*/1.0);
+ ScoredDocumentHit fake_child2(/*document_id=*/11, kSectionIdMaskNone,
+ /*score=*/2.0);
+ ScoredDocumentHit fake_child3(/*document_id=*/12, kSectionIdMaskNone,
+ /*score=*/4.0);
+ map_joinable_qualified_id[document_id_1].push_back(fake_child1);
+ map_joinable_qualified_id[document_id_1].push_back(fake_child2);
+ map_joinable_qualified_id[document_id_2].push_back(fake_child3);
+ JoinChildrenFetcher fetcher(join_spec, std::move(map_joinable_qualified_id));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<AdvancedScorer> scorer,
+ AdvancedScorer::Create(
+ CreateAdvancedScoringSpec("len(this.childrenRankingSignals())"),
+ default_score, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds(), &fetcher));
+ // document_id_1 has two children.
+ EXPECT_THAT(scorer->GetScore(docHitInfo1, /*query_it=*/nullptr), Eq(2));
+ // document_id_2 has one child.
+ EXPECT_THAT(scorer->GetScore(docHitInfo2, /*query_it=*/nullptr), Eq(1));
+ // document_id_3 has no child.
+ EXPECT_THAT(scorer->GetScore(docHitInfo3, /*query_it=*/nullptr), Eq(0));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ scorer,
+ AdvancedScorer::Create(
+ CreateAdvancedScoringSpec("sum(this.childrenRankingSignals())"),
+ default_score, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds(), &fetcher));
+ // document_id_1 has two children with scores 1 and 2.
+ EXPECT_THAT(scorer->GetScore(docHitInfo1, /*query_it=*/nullptr), Eq(3));
+ // document_id_2 has one child with score 4.
+ EXPECT_THAT(scorer->GetScore(docHitInfo2, /*query_it=*/nullptr), Eq(4));
+ // document_id_3 has no child.
+ EXPECT_THAT(scorer->GetScore(docHitInfo3, /*query_it=*/nullptr), Eq(0));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ scorer,
+ AdvancedScorer::Create(
+ CreateAdvancedScoringSpec("avg(this.childrenRankingSignals())"),
+ default_score, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds(), &fetcher));
+ // document_id_1 has two children with scores 1 and 2.
+ EXPECT_THAT(scorer->GetScore(docHitInfo1, /*query_it=*/nullptr), Eq(3 / 2.));
+ // document_id_2 has one child with score 4.
+ EXPECT_THAT(scorer->GetScore(docHitInfo2, /*query_it=*/nullptr), Eq(4 / 1.));
+ // document_id_3 has no child.
+ // This is an evaluation error, so default_score will be returned.
+ EXPECT_THAT(scorer->GetScore(docHitInfo3, /*query_it=*/nullptr),
+ Eq(default_score));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ scorer, AdvancedScorer::Create(
+ CreateAdvancedScoringSpec(
+ // Equivalent to "avg(this.childrenRankingSignals())"
+ "sum(this.childrenRankingSignals()) / "
+ "len(this.childrenRankingSignals())"),
+ default_score, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds(), &fetcher));
+ // document_id_1 has two children with scores 1 and 2.
+ EXPECT_THAT(scorer->GetScore(docHitInfo1, /*query_it=*/nullptr), Eq(3 / 2.));
+ // document_id_2 has one child with score 4.
+ EXPECT_THAT(scorer->GetScore(docHitInfo2, /*query_it=*/nullptr), Eq(4 / 1.));
+ // document_id_3 has no child.
+ // This is an evaluation error, so default_score will be returned.
+ EXPECT_THAT(scorer->GetScore(docHitInfo3, /*query_it=*/nullptr),
+ Eq(default_score));
+}
+
+TEST_F(AdvancedScorerTest, PropertyWeightsFunctionScoreExpression) {
+ DocumentProto test_document_1 =
+ DocumentBuilder().SetKey("namespace", "uri1").SetSchema("email").Build();
+ DocumentProto test_document_2 =
+ DocumentBuilder().SetKey("namespace", "uri2").SetSchema("person").Build();
+ DocumentProto test_document_3 =
+ DocumentBuilder().SetKey("namespace", "uri3").SetSchema("person").Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id_1,
+ document_store_->Put(test_document_1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id_2,
+ document_store_->Put(test_document_2));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id_3,
+ document_store_->Put(test_document_3));
+
+ ScoringSpecProto spec_proto = CreateAdvancedScoringSpec("");
+
+ *spec_proto.add_type_property_weights() = CreateTypePropertyWeights(
+ /*schema_type=*/"email",
+ {CreatePropertyWeight(/*path=*/"subject", /*weight=*/1.0)});
+ *spec_proto.add_type_property_weights() = CreateTypePropertyWeights(
+ /*schema_type=*/"person",
+ {CreatePropertyWeight(/*path=*/"emailAddress", /*weight=*/0.5),
+ CreatePropertyWeight(/*path=*/"name", /*weight=*/0.8),
+ CreatePropertyWeight(/*path=*/"phoneNumber", /*weight=*/1.0)});
+
+ // Let the hit for test_document_1 match property "subject".
+ // So this.propertyWeights() for test_document_1 will return [1].
+ DocHitInfo doc_hit_info_1 = DocHitInfo(document_id_1);
+ doc_hit_info_1.UpdateSection(0);
+
+ // Let the hit for test_document_2 match properties "emailAddress" and "name".
+ // So this.propertyWeights() for test_document_2 will return [0.5, 0.8].
+ DocHitInfo doc_hit_info_2 = DocHitInfo(document_id_2);
+ doc_hit_info_2.UpdateSection(0);
+ doc_hit_info_2.UpdateSection(1);
+
+ // Let the hit for test_document_3 match properties "emailAddress", "name" and
+ // "phoneNumber". So this.propertyWeights() for test_document_3 will return
+ // [0.5, 0.8, 1].
+ DocHitInfo doc_hit_info_3 = DocHitInfo(document_id_3);
+ doc_hit_info_3.UpdateSection(0);
+ doc_hit_info_3.UpdateSection(1);
+ doc_hit_info_3.UpdateSection(2);
+
+ spec_proto.set_advanced_scoring_expression("min(this.propertyWeights())");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<AdvancedScorer> scorer,
+ AdvancedScorer::Create(spec_proto,
+ /*default_score=*/10, document_store_.get(),
+ schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
+ // min([1]) = 1
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_1, /*query_it=*/nullptr), Eq(1));
+ // min([0.5, 0.8]) = 0.5
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_2, /*query_it=*/nullptr), Eq(0.5));
+ // min([0.5, 0.8, 1.0]) = 0.5
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_3, /*query_it=*/nullptr), Eq(0.5));
+
+ spec_proto.set_advanced_scoring_expression("max(this.propertyWeights())");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ scorer, AdvancedScorer::Create(spec_proto,
+ /*default_score=*/10,
+ document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
+ // max([1]) = 1
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_1, /*query_it=*/nullptr), Eq(1));
+ // max([0.5, 0.8]) = 0.8
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_2, /*query_it=*/nullptr), Eq(0.8));
+ // max([0.5, 0.8, 1.0]) = 1
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_3, /*query_it=*/nullptr), Eq(1));
+
+ spec_proto.set_advanced_scoring_expression("sum(this.propertyWeights())");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ scorer, AdvancedScorer::Create(spec_proto,
+ /*default_score=*/10,
+ document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
+ // sum([1]) = 1
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_1, /*query_it=*/nullptr), Eq(1));
+ // sum([0.5, 0.8]) = 1.3
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_2, /*query_it=*/nullptr), Eq(1.3));
+ // sum([0.5, 0.8, 1.0]) = 2.3
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_3, /*query_it=*/nullptr), Eq(2.3));
+}
+
+TEST_F(AdvancedScorerTest,
+ PropertyWeightsFunctionScoreExpressionUnspecifiedWeights) {
+ DocumentProto test_document_1 =
+ DocumentBuilder().SetKey("namespace", "uri1").SetSchema("email").Build();
+ DocumentProto test_document_2 =
+ DocumentBuilder().SetKey("namespace", "uri2").SetSchema("person").Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id_1,
+ document_store_->Put(test_document_1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id_2,
+ document_store_->Put(test_document_2));
+
+ ScoringSpecProto spec_proto = CreateAdvancedScoringSpec("");
+
+ // The entry for type "email" is missing, so every properties in "email"
+ // should get weight 1.0.
+ // The weight of "phoneNumber" in "person" type is unspecified, which should
+ // default to 1/2 = 0.5
+ *spec_proto.add_type_property_weights() = CreateTypePropertyWeights(
+ /*schema_type=*/"person",
+ {CreatePropertyWeight(/*path=*/"emailAddress", /*weight=*/1.0),
+ CreatePropertyWeight(/*path=*/"name", /*weight=*/2)});
+
+ // Let the hit for test_document_1 match property "subject".
+ // So this.propertyWeights() for test_document_1 will return [1].
+ DocHitInfo doc_hit_info_1 = DocHitInfo(document_id_1);
+ doc_hit_info_1.UpdateSection(0);
+
+ // Let the hit for test_document_2 match properties "emailAddress", "name" and
+ // "phoneNumber". So this.propertyWeights() for test_document_3 will return
+ // [0.5, 1, 0.5].
+ DocHitInfo doc_hit_info_2 = DocHitInfo(document_id_2);
+ doc_hit_info_2.UpdateSection(0);
+ doc_hit_info_2.UpdateSection(1);
+ doc_hit_info_2.UpdateSection(2);
+
+ spec_proto.set_advanced_scoring_expression("min(this.propertyWeights())");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<AdvancedScorer> scorer,
+ AdvancedScorer::Create(spec_proto,
+ /*default_score=*/10, document_store_.get(),
+ schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
+ // min([1]) = 1
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_1, /*query_it=*/nullptr), Eq(1));
+ // min([0.5, 1, 0.5]) = 0.5
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_2, /*query_it=*/nullptr), Eq(0.5));
+
+ spec_proto.set_advanced_scoring_expression("max(this.propertyWeights())");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ scorer, AdvancedScorer::Create(spec_proto,
+ /*default_score=*/10,
+ document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
+ // max([1]) = 1
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_1, /*query_it=*/nullptr), Eq(1));
+ // max([0.5, 1, 0.5]) = 1
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_2, /*query_it=*/nullptr), Eq(1));
+
+ spec_proto.set_advanced_scoring_expression("sum(this.propertyWeights())");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ scorer, AdvancedScorer::Create(spec_proto,
+ /*default_score=*/10,
+ document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
+ // sum([1]) = 1
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_1, /*query_it=*/nullptr), Eq(1));
+ // sum([0.5, 1, 0.5]) = 2
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_2, /*query_it=*/nullptr), Eq(2));
+}
+
+TEST_F(AdvancedScorerTest, InvalidChildrenScoresFunctionScoreExpression) {
+ const double default_score = 123;
+
+ // Without join_children_fetcher provided,
+ // "len(this.childrenRankingSignals())" cannot be created.
+ EXPECT_THAT(
+ AdvancedScorer::Create(
+ CreateAdvancedScoringSpec("len(this.childrenRankingSignals())"),
+ default_score, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds(),
+ /*join_children_fetcher=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // The root expression can only be of double type, but here it is of list
+ // type.
+ JoinChildrenFetcher fake_fetcher(JoinSpecProto::default_instance(),
+ /*map_joinable_qualified_id=*/{});
+ EXPECT_THAT(AdvancedScorer::Create(
+ CreateAdvancedScoringSpec("this.childrenRankingSignals()"),
+ default_score, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds(), &fake_fetcher),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
TEST_F(AdvancedScorerTest, ComplexExpression) {
const int64_t creation_timestamp_ms = 123;
ICING_ASSERT_OK_AND_ASSIGN(
@@ -448,7 +822,8 @@ TEST_F(AdvancedScorerTest, ComplexExpression) {
// This should evaluate to default score.
"+ this.relevanceScore()"),
/*default_score=*/10, document_store_.get(),
- schema_store_.get()));
+ schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_FALSE(scorer->is_constant());
scorer->PrepareToScore(/*query_term_iterators=*/{});
@@ -473,17 +848,18 @@ TEST_F(AdvancedScorerTest, ConstantExpression) {
"+ log(2, 122) / 12.34"
"* (10 * pow(2 * 1, sin(2)) + 10 * (2 + 10))"),
/*default_score=*/10, document_store_.get(),
- schema_store_.get()));
+ schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_TRUE(scorer->is_constant());
}
// Should be a parsing Error
TEST_F(AdvancedScorerTest, EmptyExpression) {
- EXPECT_THAT(
- AdvancedScorer::Create(CreateAdvancedScoringSpec(""),
- /*default_score=*/10, document_store_.get(),
- schema_store_.get()),
- StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(AdvancedScorer::Create(CreateAdvancedScoringSpec(""),
+ /*default_score=*/10,
+ document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST_F(AdvancedScorerTest, EvaluationErrorShouldReturnDefaultScore) {
@@ -497,25 +873,29 @@ TEST_F(AdvancedScorerTest, EvaluationErrorShouldReturnDefaultScore) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer,
AdvancedScorer::Create(CreateAdvancedScoringSpec("log(0)"), default_score,
- document_store_.get(), schema_store_.get()));
+ document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), DoubleNear(default_score, kEps));
ICING_ASSERT_OK_AND_ASSIGN(
scorer,
AdvancedScorer::Create(CreateAdvancedScoringSpec("1 / 0"), default_score,
- document_store_.get(), schema_store_.get()));
+ document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), DoubleNear(default_score, kEps));
ICING_ASSERT_OK_AND_ASSIGN(
scorer, AdvancedScorer::Create(CreateAdvancedScoringSpec("sqrt(-1)"),
default_score, document_store_.get(),
- schema_store_.get()));
+ schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), DoubleNear(default_score, kEps));
ICING_ASSERT_OK_AND_ASSIGN(
scorer, AdvancedScorer::Create(CreateAdvancedScoringSpec("pow(-1, 0.5)"),
default_score, document_store_.get(),
- schema_store_.get()));
+ schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo), DoubleNear(default_score, kEps));
}
@@ -526,67 +906,80 @@ TEST_F(AdvancedScorerTest, MathTypeError) {
EXPECT_THAT(
AdvancedScorer::Create(CreateAdvancedScoringSpec("test"), default_score,
- document_store_.get(), schema_store_.get()),
+ document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
EXPECT_THAT(
AdvancedScorer::Create(CreateAdvancedScoringSpec("log()"), default_score,
- document_store_.get(), schema_store_.get()),
+ document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
EXPECT_THAT(AdvancedScorer::Create(CreateAdvancedScoringSpec("log(1, 2, 3)"),
default_score, document_store_.get(),
- schema_store_.get()),
+ schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
EXPECT_THAT(AdvancedScorer::Create(CreateAdvancedScoringSpec("log(1, this)"),
default_score, document_store_.get(),
- schema_store_.get()),
+ schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
EXPECT_THAT(
AdvancedScorer::Create(CreateAdvancedScoringSpec("pow(1)"), default_score,
- document_store_.get(), schema_store_.get()),
+ document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
EXPECT_THAT(AdvancedScorer::Create(CreateAdvancedScoringSpec("sqrt(1, 2)"),
default_score, document_store_.get(),
- schema_store_.get()),
+ schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
EXPECT_THAT(AdvancedScorer::Create(CreateAdvancedScoringSpec("abs(1, 2)"),
default_score, document_store_.get(),
- schema_store_.get()),
+ schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
EXPECT_THAT(AdvancedScorer::Create(CreateAdvancedScoringSpec("sin(1, 2)"),
default_score, document_store_.get(),
- schema_store_.get()),
+ schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
EXPECT_THAT(AdvancedScorer::Create(CreateAdvancedScoringSpec("cos(1, 2)"),
default_score, document_store_.get(),
- schema_store_.get()),
+ schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
EXPECT_THAT(AdvancedScorer::Create(CreateAdvancedScoringSpec("tan(1, 2)"),
default_score, document_store_.get(),
- schema_store_.get()),
+ schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
EXPECT_THAT(
AdvancedScorer::Create(CreateAdvancedScoringSpec("this"), default_score,
- document_store_.get(), schema_store_.get()),
+ document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
EXPECT_THAT(
AdvancedScorer::Create(CreateAdvancedScoringSpec("-this"), default_score,
- document_store_.get(), schema_store_.get()),
+ document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
EXPECT_THAT(AdvancedScorer::Create(CreateAdvancedScoringSpec("1 + this"),
default_score, document_store_.get(),
- schema_store_.get()),
+ schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
@@ -595,39 +988,48 @@ TEST_F(AdvancedScorerTest, DocumentFunctionTypeError) {
EXPECT_THAT(AdvancedScorer::Create(
CreateAdvancedScoringSpec("documentScore(1)"), default_score,
- document_store_.get(), schema_store_.get()),
+ document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
EXPECT_THAT(AdvancedScorer::Create(
CreateAdvancedScoringSpec("this.creationTimestamp(1)"),
- default_score, document_store_.get(), schema_store_.get()),
+ default_score, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
EXPECT_THAT(AdvancedScorer::Create(
CreateAdvancedScoringSpec("this.usageCount()"), default_score,
- document_store_.get(), schema_store_.get()),
+ document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
EXPECT_THAT(AdvancedScorer::Create(
CreateAdvancedScoringSpec("usageLastUsedTimestamp(1, 1)"),
- default_score, document_store_.get(), schema_store_.get()),
+ default_score, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
EXPECT_THAT(AdvancedScorer::Create(
CreateAdvancedScoringSpec("relevanceScore(1)"), default_score,
- document_store_.get(), schema_store_.get()),
+ document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
EXPECT_THAT(AdvancedScorer::Create(
CreateAdvancedScoringSpec("documentScore(this)"),
- default_score, document_store_.get(), schema_store_.get()),
+ default_score, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
EXPECT_THAT(AdvancedScorer::Create(
CreateAdvancedScoringSpec("that.documentScore()"),
- default_score, document_store_.get(), schema_store_.get()),
+ default_score, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
EXPECT_THAT(AdvancedScorer::Create(
CreateAdvancedScoringSpec("this.this.creationTimestamp()"),
- default_score, document_store_.get(), schema_store_.get()),
+ default_score, document_store_.get(), schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
EXPECT_THAT(AdvancedScorer::Create(CreateAdvancedScoringSpec("this.log(2)"),
default_score, document_store_.get(),
- schema_store_.get()),
+ schema_store_.get(),
+ fake_clock_.GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
diff --git a/icing/scoring/advanced_scoring/score-expression.cc b/icing/scoring/advanced_scoring/score-expression.cc
index a8749df..e8a2a89 100644
--- a/icing/scoring/advanced_scoring/score-expression.cc
+++ b/icing/scoring/advanced_scoring/score-expression.cc
@@ -14,9 +14,26 @@
#include "icing/scoring/advanced_scoring/score-expression.h"
+#include <numeric>
+#include <vector>
+
+#include "icing/absl_ports/canonical_errors.h"
+
namespace icing {
namespace lib {
+namespace {
+
+libtextclassifier3::Status CheckChildrenNotNull(
+ const std::vector<std::unique_ptr<ScoreExpression>>& children) {
+ for (const auto& child : children) {
+ ICING_RETURN_ERROR_IF_NULL(child);
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+} // namespace
+
libtextclassifier3::StatusOr<std::unique_ptr<ScoreExpression>>
OperatorScoreExpression::Create(
OperatorType op, std::vector<std::unique_ptr<ScoreExpression>> children) {
@@ -24,12 +41,13 @@ OperatorScoreExpression::Create(
return absl_ports::InvalidArgumentError(
"OperatorScoreExpression must have at least one argument.");
}
+ ICING_RETURN_IF_ERROR(CheckChildrenNotNull(children));
+
bool children_all_constant_double = true;
for (const auto& child : children) {
- ICING_RETURN_ERROR_IF_NULL(child);
- if (child->is_document_type()) {
+ if (child->type() != ScoreExpressionType::kDouble) {
return absl_ports::InvalidArgumentError(
- "Operators are not supported for document type.");
+ "Operators are only supported for double type.");
}
if (!child->is_constant_double()) {
children_all_constant_double = false;
@@ -54,7 +72,7 @@ OperatorScoreExpression::Create(
}
libtextclassifier3::StatusOr<double> OperatorScoreExpression::eval(
- const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) {
+ const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const {
// The Create factory guarantees that an operator will have at least one
// child.
ICING_ASSIGN_OR_RETURN(double res, children_.at(0)->eval(hit_info, query_it));
@@ -91,79 +109,104 @@ libtextclassifier3::StatusOr<double> OperatorScoreExpression::eval(
const std::unordered_map<std::string, MathFunctionScoreExpression::FunctionType>
MathFunctionScoreExpression::kFunctionNames = {
- {"log", FunctionType::kLog}, {"pow", FunctionType::kPow},
- {"max", FunctionType::kMax}, {"min", FunctionType::kMin},
- {"sqrt", FunctionType::kSqrt}, {"abs", FunctionType::kAbs},
- {"sin", FunctionType::kSin}, {"cos", FunctionType::kCos},
- {"tan", FunctionType::kTan}};
+ {"log", FunctionType::kLog}, {"pow", FunctionType::kPow},
+ {"max", FunctionType::kMax}, {"min", FunctionType::kMin},
+ {"len", FunctionType::kLen}, {"sum", FunctionType::kSum},
+ {"avg", FunctionType::kAvg}, {"sqrt", FunctionType::kSqrt},
+ {"abs", FunctionType::kAbs}, {"sin", FunctionType::kSin},
+ {"cos", FunctionType::kCos}, {"tan", FunctionType::kTan}};
+
+const std::unordered_set<MathFunctionScoreExpression::FunctionType>
+ MathFunctionScoreExpression::kVariableArgumentsFunctions = {
+ FunctionType::kMax, FunctionType::kMin, FunctionType::kLen,
+ FunctionType::kSum, FunctionType::kAvg};
libtextclassifier3::StatusOr<std::unique_ptr<ScoreExpression>>
MathFunctionScoreExpression::Create(
FunctionType function_type,
- std::vector<std::unique_ptr<ScoreExpression>> children) {
- if (children.empty()) {
+ std::vector<std::unique_ptr<ScoreExpression>> args) {
+ if (args.empty()) {
return absl_ports::InvalidArgumentError(
"Math functions must have at least one argument.");
}
- bool children_all_constant_double = true;
- for (const auto& child : children) {
- ICING_RETURN_ERROR_IF_NULL(child);
- if (child->is_document_type()) {
+ ICING_RETURN_IF_ERROR(CheckChildrenNotNull(args));
+
+ // Received a list type in the function argument.
+ if (args.size() == 1 && args[0]->type() == ScoreExpressionType::kDoubleList) {
+ // Only certain functions support list type.
+ if (kVariableArgumentsFunctions.count(function_type) > 0) {
+ return std::unique_ptr<MathFunctionScoreExpression>(
+ new MathFunctionScoreExpression(function_type, std::move(args)));
+ }
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "Received an unsupported list type argument in the math function."));
+ }
+
+ bool args_all_constant_double = true;
+ for (const auto& child : args) {
+ if (child->type() != ScoreExpressionType::kDouble) {
return absl_ports::InvalidArgumentError(
- "Math functions are not supported for document type.");
+ "Got an invalid type for the math function. Should expect a double "
+ "type argument.");
}
if (!child->is_constant_double()) {
- children_all_constant_double = false;
+ args_all_constant_double = false;
}
}
switch (function_type) {
case FunctionType::kLog:
- if (children.size() != 1 && children.size() != 2) {
+ if (args.size() != 1 && args.size() != 2) {
return absl_ports::InvalidArgumentError(
"log must have 1 or 2 arguments.");
}
break;
case FunctionType::kPow:
- if (children.size() != 2) {
+ if (args.size() != 2) {
return absl_ports::InvalidArgumentError("pow must have 2 arguments.");
}
break;
case FunctionType::kSqrt:
- if (children.size() != 1) {
+ if (args.size() != 1) {
return absl_ports::InvalidArgumentError("sqrt must have 1 argument.");
}
break;
case FunctionType::kAbs:
- if (children.size() != 1) {
+ if (args.size() != 1) {
return absl_ports::InvalidArgumentError("abs must have 1 argument.");
}
break;
case FunctionType::kSin:
- if (children.size() != 1) {
+ if (args.size() != 1) {
return absl_ports::InvalidArgumentError("sin must have 1 argument.");
}
break;
case FunctionType::kCos:
- if (children.size() != 1) {
+ if (args.size() != 1) {
return absl_ports::InvalidArgumentError("cos must have 1 argument.");
}
break;
case FunctionType::kTan:
- if (children.size() != 1) {
+ if (args.size() != 1) {
return absl_ports::InvalidArgumentError("tan must have 1 argument.");
}
break;
- // max and min support variable length arguments
+ // Functions that support variable length arguments
case FunctionType::kMax:
[[fallthrough]];
case FunctionType::kMin:
+ [[fallthrough]];
+ case FunctionType::kLen:
+ [[fallthrough]];
+ case FunctionType::kSum:
+ [[fallthrough]];
+ case FunctionType::kAvg:
break;
}
std::unique_ptr<ScoreExpression> expression =
std::unique_ptr<MathFunctionScoreExpression>(
- new MathFunctionScoreExpression(function_type, std::move(children)));
- if (children_all_constant_double) {
- // Because all of the children are constants, this expression does not
+ new MathFunctionScoreExpression(function_type, std::move(args)));
+ if (args_all_constant_double) {
+ // Because all of the arguments are constants, this expression does not
// depend on the DocHitInto or query_it that are passed into it.
return ConstantScoreExpression::Create(
expression->eval(DocHitInfo(), /*query_it=*/nullptr));
@@ -172,11 +215,15 @@ MathFunctionScoreExpression::Create(
}
libtextclassifier3::StatusOr<double> MathFunctionScoreExpression::eval(
- const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) {
+ const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const {
std::vector<double> values;
- for (const auto& child : children_) {
- ICING_ASSIGN_OR_RETURN(double v, child->eval(hit_info, query_it));
- values.push_back(v);
+ if (args_.at(0)->type() == ScoreExpressionType::kDoubleList) {
+ ICING_ASSIGN_OR_RETURN(values, args_.at(0)->eval_list(hit_info, query_it));
+ } else {
+ for (const auto& child : args_) {
+ ICING_ASSIGN_OR_RETURN(double v, child->eval(hit_info, query_it));
+ values.push_back(v);
+ }
}
double res = 0;
@@ -194,11 +241,32 @@ libtextclassifier3::StatusOr<double> MathFunctionScoreExpression::eval(
res = pow(values[0], values[1]);
break;
case FunctionType::kMax:
+ if (values.empty()) {
+ return absl_ports::InvalidArgumentError(
+ "Got an empty parameter set in max function");
+ }
res = *std::max_element(values.begin(), values.end());
break;
case FunctionType::kMin:
+ if (values.empty()) {
+ return absl_ports::InvalidArgumentError(
+ "Got an empty parameter set in min function");
+ }
res = *std::min_element(values.begin(), values.end());
break;
+ case FunctionType::kLen:
+ res = values.size();
+ break;
+ case FunctionType::kSum:
+ res = std::reduce(values.begin(), values.end());
+ break;
+ case FunctionType::kAvg:
+ if (values.empty()) {
+ return absl_ports::InvalidArgumentError(
+ "Got an empty parameter set in avg function.");
+ }
+ res = std::reduce(values.begin(), values.end()) / values.size();
+ break;
case FunctionType::kSqrt:
res = sqrt(values[0]);
break;
@@ -234,16 +302,16 @@ const std::unordered_map<std::string,
libtextclassifier3::StatusOr<std::unique_ptr<DocumentFunctionScoreExpression>>
DocumentFunctionScoreExpression::Create(
FunctionType function_type,
- std::vector<std::unique_ptr<ScoreExpression>> children,
- const DocumentStore* document_store, double default_score) {
- if (children.empty()) {
+ std::vector<std::unique_ptr<ScoreExpression>> args,
+ const DocumentStore* document_store, double default_score,
+ int64_t current_time_ms) {
+ if (args.empty()) {
return absl_ports::InvalidArgumentError(
"Document-based functions must have at least one argument.");
}
- for (const auto& child : children) {
- ICING_RETURN_ERROR_IF_NULL(child);
- }
- if (!children[0]->is_document_type()) {
+ ICING_RETURN_IF_ERROR(CheckChildrenNotNull(args));
+
+ if (args[0]->type() != ScoreExpressionType::kDocument) {
return absl_ports::InvalidArgumentError(
"The first parameter of document-based functions must be \"this\".");
}
@@ -251,7 +319,7 @@ DocumentFunctionScoreExpression::Create(
case FunctionType::kDocumentScore:
[[fallthrough]];
case FunctionType::kCreationTimestamp:
- if (children.size() != 1) {
+ if (args.size() != 1) {
return absl_ports::InvalidArgumentError(
"DocumentScore/CreationTimestamp must have 1 argument.");
}
@@ -259,7 +327,7 @@ DocumentFunctionScoreExpression::Create(
case FunctionType::kUsageCount:
[[fallthrough]];
case FunctionType::kUsageLastUsedTimestamp:
- if (children.size() != 2 || children[1]->is_document_type()) {
+ if (args.size() != 2 || args[1]->type() != ScoreExpressionType::kDouble) {
return absl_ports::InvalidArgumentError(
"UsageCount/UsageLastUsedTimestamp must have 2 arguments. The "
"first argument should be \"this\", and the second argument "
@@ -268,12 +336,13 @@ DocumentFunctionScoreExpression::Create(
break;
}
return std::unique_ptr<DocumentFunctionScoreExpression>(
- new DocumentFunctionScoreExpression(function_type, std::move(children),
- document_store, default_score));
+ new DocumentFunctionScoreExpression(function_type, std::move(args),
+ document_store, default_score,
+ current_time_ms));
}
libtextclassifier3::StatusOr<double> DocumentFunctionScoreExpression::eval(
- const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) {
+ const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const {
switch (function_type_) {
case FunctionType::kDocumentScore:
[[fallthrough]];
@@ -291,31 +360,35 @@ libtextclassifier3::StatusOr<double> DocumentFunctionScoreExpression::eval(
[[fallthrough]];
case FunctionType::kUsageLastUsedTimestamp: {
ICING_ASSIGN_OR_RETURN(double raw_usage_type,
- children_[1]->eval(hit_info, query_it));
+ args_[1]->eval(hit_info, query_it));
int usage_type = (int)raw_usage_type;
if (usage_type < 1 || usage_type > 3 || raw_usage_type != usage_type) {
return absl_ports::InvalidArgumentError(
"Usage type must be an integer from 1 to 3");
}
- ICING_ASSIGN_OR_RETURN(
- UsageStore::UsageScores usage_scores,
- document_store_.GetUsageScores(hit_info.document_id()),
- default_score_);
+ std::optional<UsageStore::UsageScores> usage_scores =
+ document_store_.GetUsageScores(hit_info.document_id(),
+ current_time_ms_);
+ if (!usage_scores) {
+ // If there's no UsageScores entry present for this doc, then just
+ // treat it as a default instance.
+ usage_scores = UsageStore::UsageScores();
+ }
if (function_type_ == FunctionType::kUsageCount) {
if (usage_type == 1) {
- return usage_scores.usage_type1_count;
+ return usage_scores->usage_type1_count;
} else if (usage_type == 2) {
- return usage_scores.usage_type2_count;
+ return usage_scores->usage_type2_count;
} else {
- return usage_scores.usage_type3_count;
+ return usage_scores->usage_type3_count;
}
}
if (usage_type == 1) {
- return usage_scores.usage_type1_last_used_timestamp_s * 1000.0;
+ return usage_scores->usage_type1_last_used_timestamp_s * 1000.0;
} else if (usage_type == 2) {
- return usage_scores.usage_type2_last_used_timestamp_s * 1000.0;
+ return usage_scores->usage_type2_last_used_timestamp_s * 1000.0;
} else {
- return usage_scores.usage_type3_last_used_timestamp_s * 1000.0;
+ return usage_scores->usage_type3_last_used_timestamp_s * 1000.0;
}
}
}
@@ -324,25 +397,26 @@ libtextclassifier3::StatusOr<double> DocumentFunctionScoreExpression::eval(
libtextclassifier3::StatusOr<
std::unique_ptr<RelevanceScoreFunctionScoreExpression>>
RelevanceScoreFunctionScoreExpression::Create(
- std::vector<std::unique_ptr<ScoreExpression>> children,
+ std::vector<std::unique_ptr<ScoreExpression>> args,
Bm25fCalculator* bm25f_calculator, double default_score) {
- if (children.size() != 1) {
+ if (args.size() != 1) {
return absl_ports::InvalidArgumentError(
"relevanceScore must have 1 argument.");
}
- ICING_RETURN_ERROR_IF_NULL(children[0]);
- if (!children[0]->is_document_type()) {
+ ICING_RETURN_IF_ERROR(CheckChildrenNotNull(args));
+
+ if (args[0]->type() != ScoreExpressionType::kDocument) {
return absl_ports::InvalidArgumentError(
"relevanceScore must take \"this\" as its argument.");
}
return std::unique_ptr<RelevanceScoreFunctionScoreExpression>(
- new RelevanceScoreFunctionScoreExpression(
- std::move(children), bm25f_calculator, default_score));
+ new RelevanceScoreFunctionScoreExpression(bm25f_calculator,
+ default_score));
}
libtextclassifier3::StatusOr<double>
RelevanceScoreFunctionScoreExpression::eval(
- const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) {
+ const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const {
if (query_it == nullptr) {
return default_score_;
}
@@ -350,5 +424,98 @@ RelevanceScoreFunctionScoreExpression::eval(
bm25f_calculator_.ComputeScore(query_it, hit_info, default_score_));
}
+libtextclassifier3::StatusOr<
+ std::unique_ptr<ChildrenRankingSignalsFunctionScoreExpression>>
+ChildrenRankingSignalsFunctionScoreExpression::Create(
+ std::vector<std::unique_ptr<ScoreExpression>> args,
+ const JoinChildrenFetcher* join_children_fetcher) {
+ if (args.size() != 1) {
+ return absl_ports::InvalidArgumentError(
+ "childrenRankingSignals must have 1 argument.");
+ }
+ ICING_RETURN_IF_ERROR(CheckChildrenNotNull(args));
+
+ if (args[0]->type() != ScoreExpressionType::kDocument) {
+ return absl_ports::InvalidArgumentError(
+ "childrenRankingSignals must take \"this\" as its argument.");
+ }
+ if (join_children_fetcher == nullptr) {
+ return absl_ports::InvalidArgumentError(
+ "childrenRankingSignals must only be used with join, but "
+ "JoinChildrenFetcher "
+ "is not provided.");
+ }
+ return std::unique_ptr<ChildrenRankingSignalsFunctionScoreExpression>(
+ new ChildrenRankingSignalsFunctionScoreExpression(
+ *join_children_fetcher));
+}
+
+libtextclassifier3::StatusOr<std::vector<double>>
+ChildrenRankingSignalsFunctionScoreExpression::eval_list(
+ const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const {
+ ICING_ASSIGN_OR_RETURN(
+ std::vector<ScoredDocumentHit> children_hits,
+ join_children_fetcher_.GetChildren(hit_info.document_id()));
+ std::vector<double> children_scores;
+ children_scores.reserve(children_hits.size());
+ for (const ScoredDocumentHit& child_hit : children_hits) {
+ children_scores.push_back(child_hit.score());
+ }
+ return std::move(children_scores);
+}
+
+libtextclassifier3::StatusOr<
+ std::unique_ptr<PropertyWeightsFunctionScoreExpression>>
+PropertyWeightsFunctionScoreExpression::Create(
+ std::vector<std::unique_ptr<ScoreExpression>> args,
+ const DocumentStore* document_store, const SectionWeights* section_weights,
+ int64_t current_time_ms) {
+ if (args.size() != 1) {
+ return absl_ports::InvalidArgumentError(
+ "propertyWeights must have 1 argument.");
+ }
+ ICING_RETURN_IF_ERROR(CheckChildrenNotNull(args));
+
+ if (args[0]->type() != ScoreExpressionType::kDocument) {
+ return absl_ports::InvalidArgumentError(
+ "propertyWeights must take \"this\" as its argument.");
+ }
+ return std::unique_ptr<PropertyWeightsFunctionScoreExpression>(
+ new PropertyWeightsFunctionScoreExpression(
+ document_store, section_weights, current_time_ms));
+}
+
+libtextclassifier3::StatusOr<std::vector<double>>
+PropertyWeightsFunctionScoreExpression::eval_list(
+ const DocHitInfo& hit_info, const DocHitInfoIterator*) const {
+ std::vector<double> weights;
+ SectionIdMask sections = hit_info.hit_section_ids_mask();
+ SchemaTypeId schema_type_id = GetSchemaTypeId(hit_info.document_id());
+
+ while (sections != 0) {
+ SectionId section_id = __builtin_ctzll(sections);
+ sections &= ~(UINT64_C(1) << section_id);
+ weights.push_back(section_weights_.GetNormalizedSectionWeight(
+ schema_type_id, section_id));
+ }
+ return weights;
+}
+
+SchemaTypeId PropertyWeightsFunctionScoreExpression::GetSchemaTypeId(
+ DocumentId document_id) const {
+ auto filter_data_optional =
+ document_store_.GetAliveDocumentFilterData(document_id, current_time_ms_);
+ if (!filter_data_optional) {
+ // This should never happen. The only failure case for
+ // GetAliveDocumentFilterData is if the document_id is outside of the range
+ // of allocated document_ids, which shouldn't be possible since we're
+ // getting this document_id from the posting lists.
+ ICING_LOG(WARNING) << "No document filter data for document ["
+ << document_id << "]";
+ return kInvalidSchemaTypeId;
+ }
+ return filter_data_optional.value().schema_type_id();
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/scoring/advanced_scoring/score-expression.h b/icing/scoring/advanced_scoring/score-expression.h
index f80da33..08d7997 100644
--- a/icing/scoring/advanced_scoring/score-expression.h
+++ b/icing/scoring/advanced_scoring/score-expression.h
@@ -19,11 +19,13 @@
#include <cmath>
#include <memory>
#include <unordered_map>
+#include <unordered_set>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/join/join-children-fetcher.h"
#include "icing/scoring/bm25f-calculator.h"
#include "icing/store/document-store.h"
#include "icing/util/status-macros.h"
@@ -31,6 +33,12 @@
namespace icing {
namespace lib {
+enum class ScoreExpressionType {
+ kDouble,
+ kDoubleList,
+ kDocument // Only "this" is considered as document type.
+};
+
class ScoreExpression {
public:
virtual ~ScoreExpression() = default;
@@ -43,10 +51,32 @@ class ScoreExpression {
// expression.
// - INTERNAL if there are inconsistencies.
virtual libtextclassifier3::StatusOr<double> eval(
- const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) = 0;
+ const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const {
+ if (type() == ScoreExpressionType::kDouble) {
+ return absl_ports::UnimplementedError(
+ "All ScoreExpressions of type Double must provide their own "
+ "implementation of eval!");
+ }
+ return absl_ports::InternalError(
+ "Runtime type error: the expression should never be evaluated to a "
+ "double. There must be inconsistencies in the static type checking.");
+ }
+
+ virtual libtextclassifier3::StatusOr<std::vector<double>> eval_list(
+ const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const {
+ if (type() == ScoreExpressionType::kDoubleList) {
+ return absl_ports::UnimplementedError(
+ "All ScoreExpressions of type Double List must provide their own "
+ "implementation of eval_list!");
+ }
+ return absl_ports::InternalError(
+ "Runtime type error: the expression should never be evaluated to a "
+ "double list. There must be inconsistencies in the static type "
+ "checking.");
+ }
- // Indicate whether the current expression is of document type
- virtual bool is_document_type() const { return false; }
+ // Indicate the type to which the current expression will be evaluated.
+ virtual ScoreExpressionType type() const = 0;
// Indicate whether the current expression is a constant double.
// Returns true if and only if the object is of ConstantScoreExpression type.
@@ -59,15 +89,10 @@ class ThisExpression : public ScoreExpression {
return std::unique_ptr<ThisExpression>(new ThisExpression());
}
- libtextclassifier3::StatusOr<double> eval(
- const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) override {
- return absl_ports::InternalError(
- "Should never reach here to evaluate a document type as double. "
- "There must be inconsistencies.");
+ ScoreExpressionType type() const override {
+ return ScoreExpressionType::kDocument;
}
- bool is_document_type() const override { return true; }
-
private:
ThisExpression() = default;
};
@@ -81,10 +106,14 @@ class ConstantScoreExpression : public ScoreExpression {
}
libtextclassifier3::StatusOr<double> eval(
- const DocHitInfo&, const DocHitInfoIterator*) override {
+ const DocHitInfo&, const DocHitInfoIterator*) const override {
return c_;
}
+ ScoreExpressionType type() const override {
+ return ScoreExpressionType::kDouble;
+ }
+
bool is_constant_double() const override { return true; }
private:
@@ -107,7 +136,12 @@ class OperatorScoreExpression : public ScoreExpression {
OperatorType op, std::vector<std::unique_ptr<ScoreExpression>> children);
libtextclassifier3::StatusOr<double> eval(
- const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) override;
+ const DocHitInfo& hit_info,
+ const DocHitInfoIterator* query_it) const override;
+
+ ScoreExpressionType type() const override {
+ return ScoreExpressionType::kDouble;
+ }
private:
explicit OperatorScoreExpression(
@@ -125,6 +159,9 @@ class MathFunctionScoreExpression : public ScoreExpression {
kPow,
kMax,
kMin,
+ kLen,
+ kSum,
+ kAvg,
kSqrt,
kAbs,
kSin,
@@ -134,26 +171,33 @@ class MathFunctionScoreExpression : public ScoreExpression {
static const std::unordered_map<std::string, FunctionType> kFunctionNames;
+ static const std::unordered_set<FunctionType> kVariableArgumentsFunctions;
+
// RETURNS:
// - A MathFunctionScoreExpression instance on success if not simplifiable.
// - A ConstantScoreExpression instance on success if simplifiable.
- // - FAILED_PRECONDITION on any null pointer in children.
+ // - FAILED_PRECONDITION on any null pointer in args.
// - INVALID_ARGUMENT on type errors.
static libtextclassifier3::StatusOr<std::unique_ptr<ScoreExpression>> Create(
FunctionType function_type,
- std::vector<std::unique_ptr<ScoreExpression>> children);
+ std::vector<std::unique_ptr<ScoreExpression>> args);
libtextclassifier3::StatusOr<double> eval(
- const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) override;
+ const DocHitInfo& hit_info,
+ const DocHitInfoIterator* query_it) const override;
+
+ ScoreExpressionType type() const override {
+ return ScoreExpressionType::kDouble;
+ }
private:
explicit MathFunctionScoreExpression(
FunctionType function_type,
- std::vector<std::unique_ptr<ScoreExpression>> children)
- : function_type_(function_type), children_(std::move(children)) {}
+ std::vector<std::unique_ptr<ScoreExpression>> args)
+ : function_type_(function_type), args_(std::move(args)) {}
FunctionType function_type_;
- std::vector<std::unique_ptr<ScoreExpression>> children_;
+ std::vector<std::unique_ptr<ScoreExpression>> args_;
};
class DocumentFunctionScoreExpression : public ScoreExpression {
@@ -169,31 +213,40 @@ class DocumentFunctionScoreExpression : public ScoreExpression {
// RETURNS:
// - A DocumentFunctionScoreExpression instance on success.
- // - FAILED_PRECONDITION on any null pointer in children.
+ // - FAILED_PRECONDITION on any null pointer in args.
// - INVALID_ARGUMENT on type errors.
static libtextclassifier3::StatusOr<
std::unique_ptr<DocumentFunctionScoreExpression>>
Create(FunctionType function_type,
- std::vector<std::unique_ptr<ScoreExpression>> children,
- const DocumentStore* document_store, double default_score);
+ std::vector<std::unique_ptr<ScoreExpression>> args,
+ const DocumentStore* document_store, double default_score,
+ int64_t current_time_ms);
libtextclassifier3::StatusOr<double> eval(
- const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) override;
+ const DocHitInfo& hit_info,
+ const DocHitInfoIterator* query_it) const override;
+
+ ScoreExpressionType type() const override {
+ return ScoreExpressionType::kDouble;
+ }
private:
explicit DocumentFunctionScoreExpression(
FunctionType function_type,
- std::vector<std::unique_ptr<ScoreExpression>> children,
- const DocumentStore* document_store, double default_score)
- : children_(std::move(children)),
+ std::vector<std::unique_ptr<ScoreExpression>> args,
+ const DocumentStore* document_store, double default_score,
+ int64_t current_time_ms)
+ : args_(std::move(args)),
document_store_(*document_store),
default_score_(default_score),
- function_type_(function_type) {}
+ function_type_(function_type),
+ current_time_ms_(current_time_ms) {}
- std::vector<std::unique_ptr<ScoreExpression>> children_;
+ std::vector<std::unique_ptr<ScoreExpression>> args_;
const DocumentStore& document_store_;
double default_score_;
FunctionType function_type_;
+ int64_t current_time_ms_;
};
class RelevanceScoreFunctionScoreExpression : public ScoreExpression {
@@ -202,29 +255,93 @@ class RelevanceScoreFunctionScoreExpression : public ScoreExpression {
// RETURNS:
// - A RelevanceScoreFunctionScoreExpression instance on success.
- // - FAILED_PRECONDITION on any null pointer in children.
+ // - FAILED_PRECONDITION on any null pointer in args.
// - INVALID_ARGUMENT on type errors.
static libtextclassifier3::StatusOr<
std::unique_ptr<RelevanceScoreFunctionScoreExpression>>
- Create(std::vector<std::unique_ptr<ScoreExpression>> children,
+ Create(std::vector<std::unique_ptr<ScoreExpression>> args,
Bm25fCalculator* bm25f_calculator, double default_score);
libtextclassifier3::StatusOr<double> eval(
- const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) override;
+ const DocHitInfo& hit_info,
+ const DocHitInfoIterator* query_it) const override;
+
+ ScoreExpressionType type() const override {
+ return ScoreExpressionType::kDouble;
+ }
private:
explicit RelevanceScoreFunctionScoreExpression(
- std::vector<std::unique_ptr<ScoreExpression>> children,
Bm25fCalculator* bm25f_calculator, double default_score)
- : children_(std::move(children)),
- bm25f_calculator_(*bm25f_calculator),
- default_score_(default_score) {}
+ : bm25f_calculator_(*bm25f_calculator), default_score_(default_score) {}
- std::vector<std::unique_ptr<ScoreExpression>> children_;
Bm25fCalculator& bm25f_calculator_;
double default_score_;
};
+class ChildrenRankingSignalsFunctionScoreExpression : public ScoreExpression {
+ public:
+ static constexpr std::string_view kFunctionName = "childrenRankingSignals";
+
+ // RETURNS:
+ // - A ChildrenRankingSignalsFunctionScoreExpression instance on success.
+ // - FAILED_PRECONDITION on any null pointer in children.
+ // - INVALID_ARGUMENT on type errors.
+ static libtextclassifier3::StatusOr<
+ std::unique_ptr<ChildrenRankingSignalsFunctionScoreExpression>>
+ Create(std::vector<std::unique_ptr<ScoreExpression>> args,
+ const JoinChildrenFetcher* join_children_fetcher);
+
+ libtextclassifier3::StatusOr<std::vector<double>> eval_list(
+ const DocHitInfo& hit_info,
+ const DocHitInfoIterator* query_it) const override;
+
+ ScoreExpressionType type() const override {
+ return ScoreExpressionType::kDoubleList;
+ }
+
+ private:
+ explicit ChildrenRankingSignalsFunctionScoreExpression(
+ const JoinChildrenFetcher& join_children_fetcher)
+ : join_children_fetcher_(join_children_fetcher) {}
+ const JoinChildrenFetcher& join_children_fetcher_;
+};
+
+class PropertyWeightsFunctionScoreExpression : public ScoreExpression {
+ public:
+ static constexpr std::string_view kFunctionName = "propertyWeights";
+
+ // RETURNS:
+ // - A PropertyWeightsFunctionScoreExpression instance on success.
+ // - FAILED_PRECONDITION on any null pointer in children.
+ // - INVALID_ARGUMENT on type errors.
+ static libtextclassifier3::StatusOr<
+ std::unique_ptr<PropertyWeightsFunctionScoreExpression>>
+ Create(std::vector<std::unique_ptr<ScoreExpression>> args,
+ const DocumentStore* document_store,
+ const SectionWeights* section_weights, int64_t current_time_ms);
+
+ libtextclassifier3::StatusOr<std::vector<double>> eval_list(
+ const DocHitInfo& hit_info, const DocHitInfoIterator*) const override;
+
+ ScoreExpressionType type() const override {
+ return ScoreExpressionType::kDoubleList;
+ }
+
+ SchemaTypeId GetSchemaTypeId(DocumentId document_id) const;
+
+ private:
+ explicit PropertyWeightsFunctionScoreExpression(
+ const DocumentStore* document_store,
+ const SectionWeights* section_weights, int64_t current_time_ms)
+ : document_store_(*document_store),
+ section_weights_(*section_weights),
+ current_time_ms_(current_time_ms) {}
+ const DocumentStore& document_store_;
+ const SectionWeights& section_weights_;
+ int64_t current_time_ms_;
+};
+
} // namespace lib
} // namespace icing
diff --git a/icing/scoring/advanced_scoring/score-expression_test.cc b/icing/scoring/advanced_scoring/score-expression_test.cc
index b49b658..588090d 100644
--- a/icing/scoring/advanced_scoring/score-expression_test.cc
+++ b/icing/scoring/advanced_scoring/score-expression_test.cc
@@ -39,13 +39,39 @@ class NonConstantScoreExpression : public ScoreExpression {
}
libtextclassifier3::StatusOr<double> eval(
- const DocHitInfo&, const DocHitInfoIterator*) override {
+ const DocHitInfo &, const DocHitInfoIterator *) const override {
return 0;
}
+ ScoreExpressionType type() const override {
+ return ScoreExpressionType::kDouble;
+ }
+
bool is_constant_double() const override { return false; }
};
+class ListScoreExpression : public ScoreExpression {
+ public:
+ static std::unique_ptr<ListScoreExpression> Create(
+ const std::vector<double> &values) {
+ std::unique_ptr<ListScoreExpression> res =
+ std::make_unique<ListScoreExpression>();
+ res->values = values;
+ return res;
+ }
+
+ libtextclassifier3::StatusOr<std::vector<double>> eval_list(
+ const DocHitInfo &, const DocHitInfoIterator *) const override {
+ return values;
+ }
+
+ ScoreExpressionType type() const override {
+ return ScoreExpressionType::kDoubleList;
+ }
+
+ std::vector<double> values;
+};
+
template <typename... Args>
std::vector<std::unique_ptr<ScoreExpression>> MakeChildren(Args... args) {
std::vector<std::unique_ptr<ScoreExpression>> children;
@@ -180,6 +206,147 @@ TEST(ScoreExpressionTest, CannotSimplifyNonConstant) {
ASSERT_FALSE(expression->is_constant_double());
}
+TEST(ScoreExpressionTest, MathFunctionsWithListTypeArgument) {
+ // max({1, 2, 3}) = 3
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ScoreExpression> expression,
+ MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kMax,
+ MakeChildren(ListScoreExpression::Create({1, 2, 3}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(3)));
+
+ // min({1, 2, 3}) = 1
+ ICING_ASSERT_OK_AND_ASSIGN(
+ expression, MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kMin,
+ MakeChildren(ListScoreExpression::Create({1, 2, 3}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(1)));
+
+ // len({1, 2, 3}) = 3
+ ICING_ASSERT_OK_AND_ASSIGN(
+ expression, MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kLen,
+ MakeChildren(ListScoreExpression::Create({1, 2, 3}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(3)));
+
+ // sum({1, 2, 3}) = 6
+ ICING_ASSERT_OK_AND_ASSIGN(
+ expression, MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kSum,
+ MakeChildren(ListScoreExpression::Create({1, 2, 3}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(6)));
+
+ // avg({1, 2, 3}) = 2
+ ICING_ASSERT_OK_AND_ASSIGN(
+ expression, MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kAvg,
+ MakeChildren(ListScoreExpression::Create({1, 2, 3}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(2)));
+
+ // max({4}) = 4
+ ICING_ASSERT_OK_AND_ASSIGN(
+ expression, MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kMax,
+ MakeChildren(ListScoreExpression::Create({4}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(4)));
+
+ // min({5}) = 5
+ ICING_ASSERT_OK_AND_ASSIGN(
+ expression, MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kMin,
+ MakeChildren(ListScoreExpression::Create({5}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(5)));
+
+ // len({6}) = 1
+ ICING_ASSERT_OK_AND_ASSIGN(
+ expression, MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kLen,
+ MakeChildren(ListScoreExpression::Create({6}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(1)));
+
+ // sum({7}) = 7
+ ICING_ASSERT_OK_AND_ASSIGN(
+ expression, MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kSum,
+ MakeChildren(ListScoreExpression::Create({7}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(7)));
+
+ // avg({7}) = 7
+ ICING_ASSERT_OK_AND_ASSIGN(
+ expression, MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kAvg,
+ MakeChildren(ListScoreExpression::Create({7}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(7)));
+
+ // len({}) = 0
+ ICING_ASSERT_OK_AND_ASSIGN(
+ expression, MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kLen,
+ MakeChildren(ListScoreExpression::Create({}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(0)));
+
+ // sum({}) = 0
+ ICING_ASSERT_OK_AND_ASSIGN(
+ expression, MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kSum,
+ MakeChildren(ListScoreExpression::Create({}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(0)));
+}
+
+TEST(ScoreExpressionTest, MathFunctionsWithListTypeArgumentError) {
+ // max({}) = evaluation error, since max on empty list does not produce a
+ // valid result.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ScoreExpression> expression,
+ MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kMax,
+ MakeChildren(ListScoreExpression::Create({}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // avg({}) = evaluation error, since avg on empty list does not produce a
+ // valid result.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ expression, MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kAvg,
+ MakeChildren(ListScoreExpression::Create({}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // max(1, {2}) = type error, since max must take either n > 0 parameters of
+ // type double, or a single parameter of type list.
+ EXPECT_THAT(MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kMax,
+ MakeChildren(ConstantScoreExpression::Create(1),
+ ListScoreExpression::Create({2}))),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // min({1}, {2}) = type error, since min must take either n > 0 parameters of
+ // type double, or a single parameter of type list.
+ EXPECT_THAT(MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kMin,
+ MakeChildren(ListScoreExpression::Create({1}),
+ ListScoreExpression::Create({2}))),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // sin({1}) = type error, since sin does not support list type parameters.
+ EXPECT_THAT(MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kSin,
+ MakeChildren(ListScoreExpression::Create({1}))),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ScoreExpressionTest, ChildrenCannotBeNull) {
+ EXPECT_THAT(OperatorScoreExpression::Create(
+ OperatorScoreExpression::OperatorType::kPlus,
+ MakeChildren(ConstantScoreExpression::Create(1), nullptr)),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kPow,
+ MakeChildren(ConstantScoreExpression::Create(2), nullptr)),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/scoring/advanced_scoring/scoring-visitor.cc b/icing/scoring/advanced_scoring/scoring-visitor.cc
index ea2e190..e2b24a2 100644
--- a/icing/scoring/advanced_scoring/scoring-visitor.cc
+++ b/icing/scoring/advanced_scoring/scoring-visitor.cc
@@ -72,21 +72,21 @@ void ScoringVisitor::VisitMember(const MemberNode* node) {
absl_ports::StrCat("Expect a numeric literal, but got ", value));
return;
}
- stack.push_back(ConstantScoreExpression::Create(number));
+ stack_.push_back(ConstantScoreExpression::Create(number));
}
void ScoringVisitor::VisitFunctionHelper(const FunctionNode* node,
bool is_member_function) {
- std::vector<std::unique_ptr<ScoreExpression>> children;
+ std::vector<std::unique_ptr<ScoreExpression>> args;
if (is_member_function) {
- children.push_back(ThisExpression::Create());
+ args.push_back(ThisExpression::Create());
}
for (const auto& arg : node->args()) {
arg->Accept(this);
if (has_pending_error()) {
return;
}
- children.push_back(pop_stack());
+ args.push_back(pop_stack());
}
const std::string& function_name = node->function_name()->value();
libtextclassifier3::StatusOr<std::unique_ptr<ScoreExpression>> expression =
@@ -98,25 +98,35 @@ void ScoringVisitor::VisitFunctionHelper(const FunctionNode* node,
// Document-based function
expression = DocumentFunctionScoreExpression::Create(
DocumentFunctionScoreExpression::kFunctionNames.at(function_name),
- std::move(children), &document_store_, default_score_);
+ std::move(args), &document_store_, default_score_, current_time_ms_);
} else if (function_name ==
RelevanceScoreFunctionScoreExpression::kFunctionName) {
// relevanceScore function
expression = RelevanceScoreFunctionScoreExpression::Create(
- std::move(children), &bm25f_calculator_, default_score_);
+ std::move(args), &bm25f_calculator_, default_score_);
+ } else if (function_name ==
+ ChildrenRankingSignalsFunctionScoreExpression::kFunctionName) {
+ // childrenRankingSignals function
+ expression = ChildrenRankingSignalsFunctionScoreExpression::Create(
+ std::move(args), join_children_fetcher_);
+ } else if (function_name ==
+ PropertyWeightsFunctionScoreExpression::kFunctionName) {
+ // propertyWeights function
+ expression = PropertyWeightsFunctionScoreExpression::Create(
+ std::move(args), &document_store_, &section_weights_, current_time_ms_);
} else if (MathFunctionScoreExpression::kFunctionNames.find(function_name) !=
MathFunctionScoreExpression::kFunctionNames.end()) {
// Math functions
expression = MathFunctionScoreExpression::Create(
MathFunctionScoreExpression::kFunctionNames.at(function_name),
- std::move(children));
+ std::move(args));
}
if (!expression.ok()) {
pending_error_ = expression.status();
return;
}
- stack.push_back(std::move(expression).ValueOrDie());
+ stack_.push_back(std::move(expression).ValueOrDie());
}
void ScoringVisitor::VisitUnaryOperator(const UnaryOperatorNode* node) {
@@ -140,7 +150,7 @@ void ScoringVisitor::VisitUnaryOperator(const UnaryOperatorNode* node) {
pending_error_ = expression.status();
return;
}
- stack.push_back(std::move(expression).ValueOrDie());
+ stack_.push_back(std::move(expression).ValueOrDie());
}
void ScoringVisitor::VisitNaryOperator(const NaryOperatorNode* node) {
@@ -174,7 +184,7 @@ void ScoringVisitor::VisitNaryOperator(const NaryOperatorNode* node) {
pending_error_ = expression.status();
return;
}
- stack.push_back(std::move(expression).ValueOrDie());
+ stack_.push_back(std::move(expression).ValueOrDie());
}
} // namespace lib
diff --git a/icing/scoring/advanced_scoring/scoring-visitor.h b/icing/scoring/advanced_scoring/scoring-visitor.h
index 539af2d..cfee25b 100644
--- a/icing/scoring/advanced_scoring/scoring-visitor.h
+++ b/icing/scoring/advanced_scoring/scoring-visitor.h
@@ -17,6 +17,7 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/join/join-children-fetcher.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/proto/scoring.pb.h"
#include "icing/query/advanced_query_parser/abstract-syntax-tree.h"
@@ -32,11 +33,17 @@ class ScoringVisitor : public AbstractSyntaxTreeVisitor {
explicit ScoringVisitor(double default_score,
const DocumentStore* document_store,
const SchemaStore* schema_store,
- Bm25fCalculator* bm25f_calculator)
+ SectionWeights* section_weights,
+ Bm25fCalculator* bm25f_calculator,
+ const JoinChildrenFetcher* join_children_fetcher,
+ int64_t current_time_ms)
: default_score_(default_score),
document_store_(*document_store),
schema_store_(*schema_store),
- bm25f_calculator_(*bm25f_calculator) {}
+ section_weights_(*section_weights),
+ bm25f_calculator_(*bm25f_calculator),
+ join_children_fetcher_(join_children_fetcher),
+ current_time_ms_(current_time_ms) {}
void VisitFunctionName(const FunctionNameNode* node) override;
void VisitString(const StringNode* node) override;
@@ -60,13 +67,13 @@ class ScoringVisitor : public AbstractSyntaxTreeVisitor {
if (has_pending_error()) {
return pending_error_;
}
- if (stack.size() != 1) {
+ if (stack_.size() != 1) {
return absl_ports::InternalError(IcingStringUtil::StringPrintf(
"Expect to get only one result from "
"ScoringVisitor, but got %zu. There must be inconsistencies.",
- stack.size()));
+ stack_.size()));
}
- return std::move(stack[0]);
+ return std::move(stack_[0]);
}
private:
@@ -77,18 +84,22 @@ class ScoringVisitor : public AbstractSyntaxTreeVisitor {
bool has_pending_error() const { return !pending_error_.ok(); }
std::unique_ptr<ScoreExpression> pop_stack() {
- std::unique_ptr<ScoreExpression> result = std::move(stack.back());
- stack.pop_back();
+ std::unique_ptr<ScoreExpression> result = std::move(stack_.back());
+ stack_.pop_back();
return result;
}
double default_score_;
const DocumentStore& document_store_;
const SchemaStore& schema_store_;
+ SectionWeights& section_weights_;
Bm25fCalculator& bm25f_calculator_;
+ // A non-null join_children_fetcher_ indicates scoring in a join.
+ const JoinChildrenFetcher* join_children_fetcher_; // Does not own.
libtextclassifier3::Status pending_error_;
- std::vector<std::unique_ptr<ScoreExpression>> stack;
+ std::vector<std::unique_ptr<ScoreExpression>> stack_;
+ int64_t current_time_ms_;
};
} // namespace lib
diff --git a/icing/scoring/bm25f-calculator.cc b/icing/scoring/bm25f-calculator.cc
index f169cda..a80ef34 100644
--- a/icing/scoring/bm25f-calculator.cc
+++ b/icing/scoring/bm25f-calculator.cc
@@ -42,11 +42,12 @@ constexpr float k1_ = 1.2f;
constexpr float b_ = 0.7f;
// TODO(b/158603900): add tests for Bm25fCalculator
-Bm25fCalculator::Bm25fCalculator(
- const DocumentStore* document_store,
- std::unique_ptr<SectionWeights> section_weights)
+Bm25fCalculator::Bm25fCalculator(const DocumentStore* document_store,
+ SectionWeights* section_weights,
+ int64_t current_time_ms)
: document_store_(document_store),
- section_weights_(std::move(section_weights)) {}
+ section_weights_(*section_weights),
+ current_time_ms_(current_time_ms) {}
// During initialization, Bm25fCalculator iterates through
// hit-iterators for each query term to pre-compute n(q_i) for each corpus under
@@ -219,7 +220,7 @@ float Bm25fCalculator::ComputeTermFrequencyForMatchedSections(
sections &= ~(UINT64_C(1) << section_id);
Hit::TermFrequency tf = term_match_info.term_frequencies[section_id];
- double weighted_tf = tf * section_weights_->GetNormalizedSectionWeight(
+ double weighted_tf = tf * section_weights_.GetNormalizedSectionWeight(
schema_type_id, section_id);
if (tf != Hit::kNoTermFrequency) {
sum += weighted_tf;
@@ -229,13 +230,13 @@ float Bm25fCalculator::ComputeTermFrequencyForMatchedSections(
}
SchemaTypeId Bm25fCalculator::GetSchemaTypeId(DocumentId document_id) const {
- auto filter_data_optional =
- document_store_->GetAliveDocumentFilterData(document_id);
+ auto filter_data_optional = document_store_->GetAliveDocumentFilterData(
+ document_id, current_time_ms_);
if (!filter_data_optional) {
// This should never happen. The only failure case for
- // GetDocumentFilterData is if the document_id is outside of the range of
- // allocated document_ids, which shouldn't be possible since we're getting
- // this document_id from the posting lists.
+ // GetAliveDocumentFilterData is if the document_id is outside of the range
+ // of allocated document_ids, which shouldn't be possible since we're
+ // getting this document_id from the posting lists.
ICING_LOG(WARNING) << "No document filter data for document ["
<< document_id << "]";
return kInvalidSchemaTypeId;
diff --git a/icing/scoring/bm25f-calculator.h b/icing/scoring/bm25f-calculator.h
index 05009d8..36f9c68 100644
--- a/icing/scoring/bm25f-calculator.h
+++ b/icing/scoring/bm25f-calculator.h
@@ -63,8 +63,9 @@ namespace lib {
// see: glossary/bm25
class Bm25fCalculator {
public:
- explicit Bm25fCalculator(const DocumentStore *document_store_,
- std::unique_ptr<SectionWeights> section_weights_);
+ explicit Bm25fCalculator(const DocumentStore *document_store,
+ SectionWeights *section_weights,
+ int64_t current_time_ms);
// Precompute and cache statistics relevant to BM25F.
// Populates term_id_map_ and corpus_nqi_map_ for use while scoring other
@@ -145,7 +146,7 @@ class Bm25fCalculator {
// Used for accessing normalized section weights when computing the weighted
// term frequency.
- std::unique_ptr<SectionWeights> section_weights_;
+ SectionWeights &section_weights_;
// Map from query term to compact term ID.
// Necessary as a key to the other maps.
@@ -166,6 +167,8 @@ class Bm25fCalculator {
// Map from <corpus ID, term ID> to IDF(q_i) (inverse document frequency).
std::unordered_map<CorpusTermInfo::Value, float> corpus_idf_map_;
+
+ int64_t current_time_ms_;
};
} // namespace lib
diff --git a/icing/scoring/score-and-rank_benchmark.cc b/icing/scoring/score-and-rank_benchmark.cc
index bf12f96..7cb5a95 100644
--- a/icing/scoring/score-and-rank_benchmark.cc
+++ b/icing/scoring/score-and-rank_benchmark.cc
@@ -89,6 +89,18 @@ DocumentProto CreateEmailDocument(int id, int document_score,
.Build();
}
+libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
+ const Filesystem* filesystem, const std::string& base_dir,
+ const Clock* clock, const SchemaStore* schema_store) {
+ return DocumentStore::Create(
+ filesystem, base_dir, clock, schema_store,
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr);
+}
+
void BM_ScoreAndRankDocumentHitsByDocumentScore(benchmark::State& state) {
const std::string base_dir = GetTestTempDir() + "/score_and_rank_benchmark";
const std::string document_store_dir = base_dir + "/document_store";
@@ -97,30 +109,33 @@ void BM_ScoreAndRankDocumentHitsByDocumentScore(benchmark::State& state) {
// Creates file directories
Filesystem filesystem;
filesystem.DeleteDirectoryRecursively(base_dir.c_str());
- filesystem.CreateDirectoryRecursively(document_store_dir.c_str());
- filesystem.CreateDirectoryRecursively(schema_store_dir.c_str());
+ ASSERT_TRUE(
+ filesystem.CreateDirectoryRecursively(document_store_dir.c_str()));
+ ASSERT_TRUE(filesystem.CreateDirectoryRecursively(schema_store_dir.c_str()));
Clock clock;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem, base_dir, &clock));
+ SchemaStore::Create(&filesystem, schema_store_dir, &clock));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem, document_store_dir, &clock,
- schema_store.get()));
+ CreateDocumentStore(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
- ICING_ASSERT_OK(schema_store->SetSchema(CreateSchemaWithEmailType()));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ CreateSchemaWithEmailType(), /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
ScoringProcessor::Create(scoring_spec, document_store.get(),
- schema_store.get()));
-
+ schema_store.get(),
+ clock.GetSystemTimeMilliseconds()));
int num_to_score = state.range(0);
int num_of_documents = state.range(1);
@@ -160,7 +175,6 @@ void BM_ScoreAndRankDocumentHitsByDocumentScore(benchmark::State& state) {
PopTopResultsFromHeap(&scored_document_hits, /*num_results=*/20,
scored_document_hit_comparator);
}
-
// Clean up
document_store.reset();
schema_store.reset();
@@ -200,22 +214,25 @@ void BM_ScoreAndRankDocumentHitsByCreationTime(benchmark::State& state) {
// Creates file directories
Filesystem filesystem;
filesystem.DeleteDirectoryRecursively(base_dir.c_str());
- filesystem.CreateDirectoryRecursively(document_store_dir.c_str());
- filesystem.CreateDirectoryRecursively(schema_store_dir.c_str());
+ ASSERT_TRUE(
+ filesystem.CreateDirectoryRecursively(document_store_dir.c_str()));
+ ASSERT_TRUE(filesystem.CreateDirectoryRecursively(schema_store_dir.c_str()));
Clock clock;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem, base_dir, &clock));
+ SchemaStore::Create(&filesystem, schema_store_dir, &clock));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem, document_store_dir, &clock,
- schema_store.get()));
+ CreateDocumentStore(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
- ICING_ASSERT_OK(schema_store->SetSchema(CreateSchemaWithEmailType()));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ CreateSchemaWithEmailType(), /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(
@@ -223,7 +240,8 @@ void BM_ScoreAndRankDocumentHitsByCreationTime(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
ScoringProcessor::Create(scoring_spec, document_store.get(),
- schema_store.get()));
+ schema_store.get(),
+ clock.GetSystemTimeMilliseconds()));
int num_to_score = state.range(0);
int num_of_documents = state.range(1);
@@ -304,29 +322,33 @@ void BM_ScoreAndRankDocumentHitsNoScoring(benchmark::State& state) {
// Creates file directories
Filesystem filesystem;
filesystem.DeleteDirectoryRecursively(base_dir.c_str());
- filesystem.CreateDirectoryRecursively(document_store_dir.c_str());
- filesystem.CreateDirectoryRecursively(schema_store_dir.c_str());
+ ASSERT_TRUE(
+ filesystem.CreateDirectoryRecursively(document_store_dir.c_str()));
+ ASSERT_TRUE(filesystem.CreateDirectoryRecursively(schema_store_dir.c_str()));
Clock clock;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem, base_dir, &clock));
+ SchemaStore::Create(&filesystem, schema_store_dir, &clock));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem, document_store_dir, &clock,
- schema_store.get()));
+ CreateDocumentStore(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
- ICING_ASSERT_OK(schema_store->SetSchema(CreateSchemaWithEmailType()));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ CreateSchemaWithEmailType(), /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
ScoringProcessor::Create(scoring_spec, document_store.get(),
- schema_store.get()));
+ schema_store.get(),
+ clock.GetSystemTimeMilliseconds()));
int num_to_score = state.range(0);
int num_of_documents = state.range(1);
@@ -402,29 +424,33 @@ void BM_ScoreAndRankDocumentHitsByRelevanceScoring(benchmark::State& state) {
// Creates file directories
Filesystem filesystem;
filesystem.DeleteDirectoryRecursively(base_dir.c_str());
- filesystem.CreateDirectoryRecursively(document_store_dir.c_str());
- filesystem.CreateDirectoryRecursively(schema_store_dir.c_str());
+ ASSERT_TRUE(
+ filesystem.CreateDirectoryRecursively(document_store_dir.c_str()));
+ ASSERT_TRUE(filesystem.CreateDirectoryRecursively(schema_store_dir.c_str()));
Clock clock;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem, base_dir, &clock));
+ SchemaStore::Create(&filesystem, schema_store_dir, &clock));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem, document_store_dir, &clock,
- schema_store.get()));
+ CreateDocumentStore(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
- ICING_ASSERT_OK(schema_store->SetSchema(CreateSchemaWithEmailType()));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ CreateSchemaWithEmailType(), /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
ScoringProcessor::Create(scoring_spec, document_store.get(),
- schema_store.get()));
+ schema_store.get(),
+ clock.GetSystemTimeMilliseconds()));
int num_to_score = state.range(0);
int num_of_documents = state.range(1);
diff --git a/icing/scoring/scorer-factory.cc b/icing/scoring/scorer-factory.cc
index f75b564..e56f10c 100644
--- a/icing/scoring/scorer-factory.cc
+++ b/icing/scoring/scorer-factory.cc
@@ -78,8 +78,10 @@ class DocumentCreationTimestampScorer : public Scorer {
class RelevanceScoreScorer : public Scorer {
public:
explicit RelevanceScoreScorer(
+ std::unique_ptr<SectionWeights> section_weights,
std::unique_ptr<Bm25fCalculator> bm25f_calculator, double default_score)
- : bm25f_calculator_(std::move(bm25f_calculator)),
+ : section_weights_(std::move(section_weights)),
+ bm25f_calculator_(std::move(bm25f_calculator)),
default_score_(default_score) {}
void PrepareToScore(
@@ -99,6 +101,7 @@ class RelevanceScoreScorer : public Scorer {
}
private:
+ std::unique_ptr<SectionWeights> section_weights_;
std::unique_ptr<Bm25fCalculator> bm25f_calculator_;
double default_score_;
};
@@ -108,30 +111,36 @@ class UsageScorer : public Scorer {
public:
UsageScorer(const DocumentStore* document_store,
ScoringSpecProto::RankingStrategy::Code ranking_strategy,
- double default_score)
+ double default_score, int64_t current_time_ms)
: document_store_(*document_store),
ranking_strategy_(ranking_strategy),
- default_score_(default_score) {}
+ default_score_(default_score),
+ current_time_ms_(current_time_ms) {}
double GetScore(const DocHitInfo& hit_info,
const DocHitInfoIterator*) override {
- ICING_ASSIGN_OR_RETURN(
- UsageStore::UsageScores usage_scores,
- document_store_.GetUsageScores(hit_info.document_id()), default_score_);
+ std::optional<UsageStore::UsageScores> usage_scores =
+ document_store_.GetUsageScores(hit_info.document_id(),
+ current_time_ms_);
+ if (!usage_scores) {
+ // If there's no UsageScores entry present for this doc, then just
+ // treat it as a default instance.
+ usage_scores = UsageStore::UsageScores();
+ }
switch (ranking_strategy_) {
case ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT:
- return usage_scores.usage_type1_count;
+ return usage_scores->usage_type1_count;
case ScoringSpecProto::RankingStrategy::USAGE_TYPE2_COUNT:
- return usage_scores.usage_type2_count;
+ return usage_scores->usage_type2_count;
case ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT:
- return usage_scores.usage_type3_count;
+ return usage_scores->usage_type3_count;
case ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP:
- return usage_scores.usage_type1_last_used_timestamp_s * 1000.0;
+ return usage_scores->usage_type1_last_used_timestamp_s * 1000.0;
case ScoringSpecProto::RankingStrategy::USAGE_TYPE2_LAST_USED_TIMESTAMP:
- return usage_scores.usage_type2_last_used_timestamp_s * 1000.0;
+ return usage_scores->usage_type2_last_used_timestamp_s * 1000.0;
case ScoringSpecProto::RankingStrategy::USAGE_TYPE3_LAST_USED_TIMESTAMP:
- return usage_scores.usage_type3_last_used_timestamp_s * 1000.0;
+ return usage_scores->usage_type3_last_used_timestamp_s * 1000.0;
default:
// This shouldn't happen if this scorer is used correctly.
return default_score_;
@@ -142,6 +151,7 @@ class UsageScorer : public Scorer {
const DocumentStore& document_store_;
ScoringSpecProto::RankingStrategy::Code ranking_strategy_;
double default_score_;
+ int64_t current_time_ms_;
};
// A special scorer which does nothing but assigns the default score to each
@@ -163,7 +173,8 @@ namespace scorer_factory {
libtextclassifier3::StatusOr<std::unique_ptr<Scorer>> Create(
const ScoringSpecProto& scoring_spec, double default_score,
- const DocumentStore* document_store, const SchemaStore* schema_store) {
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ int64_t current_time_ms, const JoinChildrenFetcher* join_children_fetcher) {
ICING_RETURN_ERROR_IF_NULL(document_store);
ICING_RETURN_ERROR_IF_NULL(schema_store);
@@ -188,8 +199,9 @@ libtextclassifier3::StatusOr<std::unique_ptr<Scorer>> Create(
SectionWeights::Create(schema_store, scoring_spec));
auto bm25f_calculator = std::make_unique<Bm25fCalculator>(
- document_store, std::move(section_weights));
- return std::make_unique<RelevanceScoreScorer>(std::move(bm25f_calculator),
+ document_store, section_weights.get(), current_time_ms);
+ return std::make_unique<RelevanceScoreScorer>(std::move(section_weights),
+ std::move(bm25f_calculator),
default_score);
}
case ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT:
@@ -203,15 +215,17 @@ libtextclassifier3::StatusOr<std::unique_ptr<Scorer>> Create(
case ScoringSpecProto::RankingStrategy::USAGE_TYPE2_LAST_USED_TIMESTAMP:
[[fallthrough]];
case ScoringSpecProto::RankingStrategy::USAGE_TYPE3_LAST_USED_TIMESTAMP:
- return std::make_unique<UsageScorer>(
- document_store, scoring_spec.rank_by(), default_score);
+ return std::make_unique<UsageScorer>(document_store,
+ scoring_spec.rank_by(),
+ default_score, current_time_ms);
case ScoringSpecProto::RankingStrategy::ADVANCED_SCORING_EXPRESSION:
if (scoring_spec.advanced_scoring_expression().empty()) {
return absl_ports::InvalidArgumentError(
"Advanced scoring is enabled, but the expression is empty!");
}
return AdvancedScorer::Create(scoring_spec, default_score, document_store,
- schema_store);
+ schema_store, current_time_ms,
+ join_children_fetcher);
case ScoringSpecProto::RankingStrategy::JOIN_AGGREGATE_SCORE:
// Use join aggregate score to rank. Since the aggregation score is
// calculated by child documents after joining (in JoinProcessor), we can
diff --git a/icing/scoring/scorer-factory.h b/icing/scoring/scorer-factory.h
index 8c19c75..659bebd 100644
--- a/icing/scoring/scorer-factory.h
+++ b/icing/scoring/scorer-factory.h
@@ -16,6 +16,7 @@
#define ICING_SCORING_SCORER_FACTORY_H_
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/join/join-children-fetcher.h"
#include "icing/scoring/scorer.h"
#include "icing/store/document-store.h"
@@ -36,7 +37,9 @@ namespace scorer_factory {
// INVALID_ARGUMENT if fails to create an instance
libtextclassifier3::StatusOr<std::unique_ptr<Scorer>> Create(
const ScoringSpecProto& scoring_spec, double default_score,
- const DocumentStore* document_store, const SchemaStore* schema_store);
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ int64_t current_time_ms,
+ const JoinChildrenFetcher* join_children_fetcher = nullptr);
} // namespace scorer_factory
diff --git a/icing/scoring/scorer_test.cc b/icing/scoring/scorer_test.cc
index b13d54a..5194c7f 100644
--- a/icing/scoring/scorer_test.cc
+++ b/icing/scoring/scorer_test.cc
@@ -64,8 +64,14 @@ class ScorerTest : public ::testing::TestWithParam<ScorerTestingMode> {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock1_,
- schema_store_.get()));
+ DocumentStore::Create(
+ &filesystem_, doc_store_dir_, &fake_clock1_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
// Creates a simple email schema
@@ -78,7 +84,9 @@ class ScorerTest : public ::testing::TestWithParam<ScorerTestingMode> {
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- ICING_ASSERT_OK(schema_store_->SetSchema(test_email_schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ test_email_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
}
void TearDown() override {
@@ -111,7 +119,7 @@ class ScorerTest : public ::testing::TestWithParam<ScorerTestingMode> {
};
UsageReport CreateUsageReport(std::string name_space, std::string uri,
- int64 timestamp_ms,
+ int64_t timestamp_ms,
UsageReport::UsageType usage_type) {
UsageReport usage_report;
usage_report.set_document_namespace(name_space);
@@ -126,7 +134,8 @@ TEST_P(ScorerTest, CreationWithNullDocumentStoreShouldFail) {
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam()),
- /*default_score=*/0, /*document_store=*/nullptr, schema_store()),
+ /*default_score=*/0, /*document_store=*/nullptr, schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
@@ -136,7 +145,7 @@ TEST_P(ScorerTest, CreationWithNullSchemaStoreShouldFail) {
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam()),
/*default_score=*/0, document_store(),
- /*schema_store=*/nullptr),
+ /*schema_store=*/nullptr, fake_clock1().GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
@@ -146,7 +155,8 @@ TEST_P(ScorerTest, ShouldGetDefaultScoreIfDocumentDoesntExist) {
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam()),
- /*default_score=*/10, document_store(), schema_store()));
+ /*default_score=*/10, document_store(), schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
// Non existent document id
DocHitInfo docHitInfo = DocHitInfo(/*document_id_in=*/1);
@@ -154,70 +164,6 @@ TEST_P(ScorerTest, ShouldGetDefaultScoreIfDocumentDoesntExist) {
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(10));
}
-TEST_P(ScorerTest, ShouldGetDefaultScoreIfDocumentIsDeleted) {
- // Creates a test document with a provided score
- DocumentProto test_document = DocumentBuilder()
- .SetKey("icing", "email/1")
- .SetSchema("email")
- .AddStringProperty("subject", "subject foo")
- .SetScore(42)
- .Build();
-
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
- document_store()->Put(test_document));
-
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<Scorer> scorer,
- scorer_factory::Create(
- CreateScoringSpecForRankingStrategy(
- ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam()),
- /*default_score=*/10, document_store(), schema_store()));
-
- DocHitInfo docHitInfo = DocHitInfo(document_id);
-
- // The document's score is returned
- EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(42));
-
- // Delete the document and check that the caller-provided default score is
- // returned
- EXPECT_THAT(document_store()->Delete(document_id), IsOk());
- EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(10));
-}
-
-TEST_P(ScorerTest, ShouldGetDefaultScoreIfDocumentIsExpired) {
- // Creates a test document with a provided score
- int64_t creation_time = fake_clock1().GetSystemTimeMilliseconds();
- int64_t ttl = 100;
- DocumentProto test_document = DocumentBuilder()
- .SetKey("icing", "email/1")
- .SetSchema("email")
- .AddStringProperty("subject", "subject foo")
- .SetScore(42)
- .SetCreationTimestampMs(creation_time)
- .SetTtlMs(ttl)
- .Build();
-
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
- document_store()->Put(test_document));
-
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<Scorer> scorer,
- scorer_factory::Create(
- CreateScoringSpecForRankingStrategy(
- ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam()),
- /*default_score=*/10, document_store(), schema_store()));
-
- DocHitInfo docHitInfo = DocHitInfo(document_id);
-
- // The document's score is returned since the document hasn't expired yet.
- EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(42));
-
- // Expire the document and check that the caller-provided default score is
- // returned
- SetFakeClock1Time(creation_time + ttl + 10);
- EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(10));
-}
-
TEST_P(ScorerTest, ShouldGetDefaultDocumentScore) {
// Creates a test document with the default document score 0
DocumentProto test_document =
@@ -235,7 +181,8 @@ TEST_P(ScorerTest, ShouldGetDefaultDocumentScore) {
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam()),
- /*default_score=*/10, document_store(), schema_store()));
+ /*default_score=*/10, document_store(), schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
DocHitInfo docHitInfo = DocHitInfo(document_id);
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(0));
@@ -259,7 +206,8 @@ TEST_P(ScorerTest, ShouldGetCorrectDocumentScore) {
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam()),
- /*default_score=*/0, document_store(), schema_store()));
+ /*default_score=*/0, document_store(), schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
DocHitInfo docHitInfo = DocHitInfo(document_id);
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(5));
@@ -285,7 +233,8 @@ TEST_P(ScorerTest, QueryIteratorNullRelevanceScoreShouldReturnDefaultScore) {
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam()),
- /*default_score=*/10, document_store(), schema_store()));
+ /*default_score=*/10, document_store(), schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
DocHitInfo docHitInfo = DocHitInfo(document_id);
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(10));
@@ -319,7 +268,8 @@ TEST_P(ScorerTest, ShouldGetCorrectCreationTimestampScore) {
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP,
GetParam()),
- /*default_score=*/0, document_store(), schema_store()));
+ /*default_score=*/0, document_store(), schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
DocHitInfo docHitInfo1 = DocHitInfo(document_id1);
DocHitInfo docHitInfo2 = DocHitInfo(document_id2);
@@ -347,19 +297,22 @@ TEST_P(ScorerTest, ShouldGetCorrectUsageCountScoreForType1) {
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT, GetParam()),
- /*default_score=*/0, document_store(), schema_store()));
+ /*default_score=*/0, document_store(), schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer2,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::USAGE_TYPE2_COUNT, GetParam()),
- /*default_score=*/0, document_store(), schema_store()));
+ /*default_score=*/0, document_store(), schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer3,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT, GetParam()),
- /*default_score=*/0, document_store(), schema_store()));
+ /*default_score=*/0, document_store(), schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
DocHitInfo docHitInfo = DocHitInfo(document_id);
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
@@ -394,19 +347,22 @@ TEST_P(ScorerTest, ShouldGetCorrectUsageCountScoreForType2) {
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT, GetParam()),
- /*default_score=*/0, document_store(), schema_store()));
+ /*default_score=*/0, document_store(), schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer2,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::USAGE_TYPE2_COUNT, GetParam()),
- /*default_score=*/0, document_store(), schema_store()));
+ /*default_score=*/0, document_store(), schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer3,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT, GetParam()),
- /*default_score=*/0, document_store(), schema_store()));
+ /*default_score=*/0, document_store(), schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
DocHitInfo docHitInfo = DocHitInfo(document_id);
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
@@ -441,19 +397,22 @@ TEST_P(ScorerTest, ShouldGetCorrectUsageCountScoreForType3) {
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT, GetParam()),
- /*default_score=*/0, document_store(), schema_store()));
+ /*default_score=*/0, document_store(), schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer2,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::USAGE_TYPE2_COUNT, GetParam()),
- /*default_score=*/0, document_store(), schema_store()));
+ /*default_score=*/0, document_store(), schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer3,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT, GetParam()),
- /*default_score=*/0, document_store(), schema_store()));
+ /*default_score=*/0, document_store(), schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
DocHitInfo docHitInfo = DocHitInfo(document_id);
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
@@ -490,7 +449,8 @@ TEST_P(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType1) {
USAGE_TYPE1_LAST_USED_TIMESTAMP,
GetParam()),
/*default_score=*/0, document_store(),
- schema_store()));
+ schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer2,
scorer_factory::Create(CreateScoringSpecForRankingStrategy(
@@ -498,7 +458,8 @@ TEST_P(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType1) {
USAGE_TYPE2_LAST_USED_TIMESTAMP,
GetParam()),
/*default_score=*/0, document_store(),
- schema_store()));
+ schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer3,
scorer_factory::Create(CreateScoringSpecForRankingStrategy(
@@ -506,7 +467,8 @@ TEST_P(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType1) {
USAGE_TYPE3_LAST_USED_TIMESTAMP,
GetParam()),
/*default_score=*/0, document_store(),
- schema_store()));
+ schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
DocHitInfo docHitInfo = DocHitInfo(document_id);
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
@@ -559,7 +521,8 @@ TEST_P(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType2) {
USAGE_TYPE1_LAST_USED_TIMESTAMP,
GetParam()),
/*default_score=*/0, document_store(),
- schema_store()));
+ schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer2,
scorer_factory::Create(CreateScoringSpecForRankingStrategy(
@@ -567,7 +530,8 @@ TEST_P(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType2) {
USAGE_TYPE2_LAST_USED_TIMESTAMP,
GetParam()),
/*default_score=*/0, document_store(),
- schema_store()));
+ schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer3,
scorer_factory::Create(CreateScoringSpecForRankingStrategy(
@@ -575,7 +539,8 @@ TEST_P(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType2) {
USAGE_TYPE3_LAST_USED_TIMESTAMP,
GetParam()),
/*default_score=*/0, document_store(),
- schema_store()));
+ schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
DocHitInfo docHitInfo = DocHitInfo(document_id);
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
@@ -628,7 +593,8 @@ TEST_P(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType3) {
USAGE_TYPE1_LAST_USED_TIMESTAMP,
GetParam()),
/*default_score=*/0, document_store(),
- schema_store()));
+ schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer2,
scorer_factory::Create(CreateScoringSpecForRankingStrategy(
@@ -636,7 +602,8 @@ TEST_P(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType3) {
USAGE_TYPE2_LAST_USED_TIMESTAMP,
GetParam()),
/*default_score=*/0, document_store(),
- schema_store()));
+ schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer3,
scorer_factory::Create(CreateScoringSpecForRankingStrategy(
@@ -644,7 +611,8 @@ TEST_P(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType3) {
USAGE_TYPE3_LAST_USED_TIMESTAMP,
GetParam()),
/*default_score=*/0, document_store(),
- schema_store()));
+ schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
DocHitInfo docHitInfo = DocHitInfo(document_id);
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
@@ -683,7 +651,8 @@ TEST_P(ScorerTest, NoScorerShouldAlwaysReturnDefaultScore) {
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::NONE, GetParam()),
- /*default_score=*/3, document_store(), schema_store()));
+ /*default_score=*/3, document_store(), schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
DocHitInfo docHitInfo1 = DocHitInfo(/*document_id_in=*/0);
DocHitInfo docHitInfo2 = DocHitInfo(/*document_id_in=*/1);
@@ -696,7 +665,8 @@ TEST_P(ScorerTest, NoScorerShouldAlwaysReturnDefaultScore) {
scorer, scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::NONE, GetParam()),
- /*default_score=*/111, document_store(), schema_store()));
+ /*default_score=*/111, document_store(), schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
docHitInfo1 = DocHitInfo(/*document_id_in=*/4);
docHitInfo2 = DocHitInfo(/*document_id_in=*/5);
@@ -725,7 +695,8 @@ TEST_P(ScorerTest, ShouldScaleUsageTimestampScoreForMaxTimestamp) {
USAGE_TYPE1_LAST_USED_TIMESTAMP,
GetParam()),
/*default_score=*/0, document_store(),
- schema_store()));
+ schema_store(),
+ fake_clock1().GetSystemTimeMilliseconds()));
DocHitInfo docHitInfo = DocHitInfo(document_id);
// Create usage report for the maximum allowable timestamp.
diff --git a/icing/scoring/scoring-processor.cc b/icing/scoring/scoring-processor.cc
index 571a112..8284426 100644
--- a/icing/scoring/scoring-processor.cc
+++ b/icing/scoring/scoring-processor.cc
@@ -43,7 +43,9 @@ constexpr double kDefaultScoreInAscendingOrder =
libtextclassifier3::StatusOr<std::unique_ptr<ScoringProcessor>>
ScoringProcessor::Create(const ScoringSpecProto& scoring_spec,
const DocumentStore* document_store,
- const SchemaStore* schema_store) {
+ const SchemaStore* schema_store,
+ int64_t current_time_ms,
+ const JoinChildrenFetcher* join_children_fetcher) {
ICING_RETURN_ERROR_IF_NULL(document_store);
ICING_RETURN_ERROR_IF_NULL(schema_store);
@@ -56,7 +58,8 @@ ScoringProcessor::Create(const ScoringSpecProto& scoring_spec,
is_descending_order
? kDefaultScoreInDescendingOrder
: kDefaultScoreInAscendingOrder,
- document_store, schema_store));
+ document_store, schema_store, current_time_ms,
+ join_children_fetcher));
// Using `new` to access a non-public constructor.
return std::unique_ptr<ScoringProcessor>(
new ScoringProcessor(std::move(scorer)));
diff --git a/icing/scoring/scoring-processor.h b/icing/scoring/scoring-processor.h
index e7d09b1..e9efda7 100644
--- a/icing/scoring/scoring-processor.h
+++ b/icing/scoring/scoring-processor.h
@@ -21,6 +21,7 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/join/join-children-fetcher.h"
#include "icing/proto/scoring.pb.h"
#include "icing/scoring/scored-document-hit.h"
#include "icing/scoring/scorer.h"
@@ -41,7 +42,8 @@ class ScoringProcessor {
// FAILED_PRECONDITION on any null pointer input
static libtextclassifier3::StatusOr<std::unique_ptr<ScoringProcessor>> Create(
const ScoringSpecProto& scoring_spec, const DocumentStore* document_store,
- const SchemaStore* schema_store);
+ const SchemaStore* schema_store, int64_t current_time_ms,
+ const JoinChildrenFetcher* join_children_fetcher = nullptr);
// Assigns scores to DocHitInfos from the given DocHitInfoIterator and returns
// a vector of ScoredDocumentHits. The size of results is no more than
diff --git a/icing/scoring/scoring-processor_test.cc b/icing/scoring/scoring-processor_test.cc
index 7e4ca1d..deddff8 100644
--- a/icing/scoring/scoring-processor_test.cc
+++ b/icing/scoring/scoring-processor_test.cc
@@ -62,8 +62,14 @@ class ScoringProcessorTest
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
- schema_store_.get()));
+ DocumentStore::Create(
+ &filesystem_, doc_store_dir_, &fake_clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
// Creates a simple email schema
@@ -88,7 +94,9 @@ class ScoringProcessorTest
.SetDataType(TYPE_STRING)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- ICING_ASSERT_OK(schema_store_->SetSchema(test_email_schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ test_email_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
}
void TearDown() override {
@@ -101,6 +109,8 @@ class ScoringProcessorTest
SchemaStore* schema_store() { return schema_store_.get(); }
+ const FakeClock& fake_clock() const { return fake_clock_; }
+
private:
const std::string test_dir_;
const std::string doc_store_dir_;
@@ -144,7 +154,7 @@ CreateAndInsertsDocumentsWithScores(DocumentStore* document_store,
}
UsageReport CreateUsageReport(std::string name_space, std::string uri,
- int64 timestamp_ms,
+ int64_t timestamp_ms,
UsageReport::UsageType usage_type) {
UsageReport usage_report;
usage_report.set_document_namespace(name_space);
@@ -177,23 +187,27 @@ PropertyWeight CreatePropertyWeight(std::string path, double weight) {
TEST_F(ScoringProcessorTest, CreationWithNullDocumentStoreShouldFail) {
ScoringSpecProto spec_proto;
- EXPECT_THAT(ScoringProcessor::Create(spec_proto, /*document_store=*/nullptr,
- schema_store()),
+ EXPECT_THAT(ScoringProcessor::Create(
+ spec_proto, /*document_store=*/nullptr, schema_store(),
+ fake_clock().GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
TEST_F(ScoringProcessorTest, CreationWithNullSchemaStoreShouldFail) {
ScoringSpecProto spec_proto;
- EXPECT_THAT(ScoringProcessor::Create(spec_proto, document_store(),
- /*schema_store=*/nullptr),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(
+ ScoringProcessor::Create(spec_proto, document_store(),
+ /*schema_store=*/nullptr,
+ fake_clock().GetSystemTimeMilliseconds()),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
TEST_P(ScoringProcessorTest, ShouldCreateInstance) {
ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam());
ICING_EXPECT_OK(
- ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+ ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+ fake_clock().GetSystemTimeMilliseconds()));
}
TEST_P(ScoringProcessorTest, ShouldHandleEmptyDocHitIterator) {
@@ -208,7 +222,8 @@ TEST_P(ScoringProcessorTest, ShouldHandleEmptyDocHitIterator) {
// Creates a ScoringProcessor
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
- ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+ ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+ fake_clock().GetSystemTimeMilliseconds()));
EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
/*num_to_score=*/5),
@@ -234,7 +249,8 @@ TEST_P(ScoringProcessorTest, ShouldHandleNonPositiveNumToScore) {
// Creates a ScoringProcessor
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
- ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+ ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+ fake_clock().GetSystemTimeMilliseconds()));
EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
/*num_to_score=*/-1),
@@ -264,7 +280,8 @@ TEST_P(ScoringProcessorTest, ShouldRespectNumToScore) {
// Creates a ScoringProcessor
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
- ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+ ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+ fake_clock().GetSystemTimeMilliseconds()));
EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
/*num_to_score=*/2),
@@ -296,7 +313,8 @@ TEST_P(ScoringProcessorTest, ShouldScoreByDocumentScore) {
// Creates a ScoringProcessor
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
- ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+ ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+ fake_clock().GetSystemTimeMilliseconds()));
EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
/*num_to_score=*/3),
@@ -351,7 +369,8 @@ TEST_P(ScoringProcessorTest,
// Creates a ScoringProcessor
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
- ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+ ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+ fake_clock().GetSystemTimeMilliseconds()));
std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
query_term_iterators;
@@ -420,7 +439,8 @@ TEST_P(ScoringProcessorTest,
// Creates a ScoringProcessor
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
- ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+ ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+ fake_clock().GetSystemTimeMilliseconds()));
std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
query_term_iterators;
@@ -493,7 +513,8 @@ TEST_P(ScoringProcessorTest,
// Creates a ScoringProcessor
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
- ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+ ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+ fake_clock().GetSystemTimeMilliseconds()));
std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
query_term_iterators;
@@ -542,7 +563,8 @@ TEST_P(ScoringProcessorTest,
// Creates a ScoringProcessor
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
- ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+ ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+ fake_clock().GetSystemTimeMilliseconds()));
std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
query_term_iterators;
@@ -607,7 +629,8 @@ TEST_P(ScoringProcessorTest,
// Creates a ScoringProcessor
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
- ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+ ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+ fake_clock().GetSystemTimeMilliseconds()));
std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
query_term_iterators;
@@ -677,7 +700,8 @@ TEST_P(ScoringProcessorTest,
// Creates a ScoringProcessor
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
- ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+ ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+ fake_clock().GetSystemTimeMilliseconds()));
std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
query_term_iterators;
@@ -738,7 +762,8 @@ TEST_P(ScoringProcessorTest,
// Creates a ScoringProcessor with no explicit weights set.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
- ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+ ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+ fake_clock().GetSystemTimeMilliseconds()));
ScoringSpecProto spec_proto_with_weights =
CreateScoringSpecForRankingStrategy(
@@ -754,7 +779,8 @@ TEST_P(ScoringProcessorTest,
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor_with_weights,
ScoringProcessor::Create(spec_proto_with_weights, document_store(),
- schema_store()));
+ schema_store(),
+ fake_clock().GetSystemTimeMilliseconds()));
std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
query_term_iterators;
@@ -840,7 +866,8 @@ TEST_P(ScoringProcessorTest,
// Creates a ScoringProcessor
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
- ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+ ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+ fake_clock().GetSystemTimeMilliseconds()));
std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
query_term_iterators;
@@ -902,7 +929,8 @@ TEST_P(ScoringProcessorTest, ShouldScoreByCreationTimestamp) {
// Creates a ScoringProcessor which ranks in descending order
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
- ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+ ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+ fake_clock().GetSystemTimeMilliseconds()));
EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
/*num_to_score=*/3),
@@ -962,7 +990,8 @@ TEST_P(ScoringProcessorTest, ShouldScoreByUsageCount) {
// Creates a ScoringProcessor which ranks in descending order
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
- ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+ ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+ fake_clock().GetSystemTimeMilliseconds()));
EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
/*num_to_score=*/3),
@@ -1022,7 +1051,8 @@ TEST_P(ScoringProcessorTest, ShouldScoreByUsageTimestamp) {
// Creates a ScoringProcessor which ranks in descending order
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
- ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+ ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+ fake_clock().GetSystemTimeMilliseconds()));
EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
/*num_to_score=*/3),
@@ -1058,7 +1088,8 @@ TEST_P(ScoringProcessorTest, ShouldHandleNoScores) {
// Creates a ScoringProcessor which ranks in descending order
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
- ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+ ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+ fake_clock().GetSystemTimeMilliseconds()));
EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
/*num_to_score=*/4),
ElementsAre(EqualsScoredDocumentHit(scored_document_hit_default),
@@ -1107,7 +1138,8 @@ TEST_P(ScoringProcessorTest, ShouldWrapResultsWhenNoScoring) {
// Creates a ScoringProcessor which ranks in descending order
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
- ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+ ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+ fake_clock().GetSystemTimeMilliseconds()));
EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
/*num_to_score=*/3),
diff --git a/icing/scoring/section-weights_test.cc b/icing/scoring/section-weights_test.cc
index 02205f5..28b1797 100644
--- a/icing/scoring/section-weights_test.cc
+++ b/icing/scoring/section-weights_test.cc
@@ -87,7 +87,9 @@ class SectionWeightsTest : public testing::Test {
SchemaProto schema =
SchemaBuilder().AddType(sender_schema).AddType(email_schema).Build();
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
}
void TearDown() override {
diff --git a/icing/store/document-log-creator.cc b/icing/store/document-log-creator.cc
index c9769f2..2abd315 100644
--- a/icing/store/document-log-creator.cc
+++ b/icing/store/document-log-creator.cc
@@ -66,7 +66,8 @@ std::string DocumentLogCreator::GetDocumentLogFilename() {
libtextclassifier3::StatusOr<DocumentLogCreator::CreateResult>
DocumentLogCreator::Create(const Filesystem* filesystem,
- const std::string& base_dir) {
+ const std::string& base_dir,
+ int32_t compression_level) {
bool v0_exists =
filesystem->FileExists(MakeDocumentLogFilenameV0(base_dir).c_str());
bool v1_exists =
@@ -75,7 +76,8 @@ DocumentLogCreator::Create(const Filesystem* filesystem,
bool new_file = false;
int preexisting_file_version = kCurrentVersion;
if (v0_exists && !v1_exists) {
- ICING_RETURN_IF_ERROR(MigrateFromV0ToV1(filesystem, base_dir));
+ ICING_RETURN_IF_ERROR(
+ MigrateFromV0ToV1(filesystem, base_dir, compression_level));
// Need to regenerate derived files since documents may be written to a
// different file offset in the log.
@@ -94,7 +96,9 @@ DocumentLogCreator::Create(const Filesystem* filesystem,
PortableFileBackedProtoLog<DocumentWrapper>::Create(
filesystem, MakeDocumentLogFilenameV1(base_dir),
PortableFileBackedProtoLog<DocumentWrapper>::Options(
- /*compress_in=*/true)));
+ /*compress_in=*/true,
+ PortableFileBackedProtoLog<DocumentWrapper>::kMaxProtoSize,
+ compression_level)));
CreateResult create_result = {std::move(log_create_result),
preexisting_file_version, new_file};
@@ -102,15 +106,15 @@ DocumentLogCreator::Create(const Filesystem* filesystem,
}
libtextclassifier3::Status DocumentLogCreator::MigrateFromV0ToV1(
- const Filesystem* filesystem, const std::string& base_dir) {
+ const Filesystem* filesystem, const std::string& base_dir,
+ int32_t compression_level) {
ICING_VLOG(1) << "Migrating from v0 to v1 document log.";
// Our v0 proto log was non-portable, create it so we can read protos out from
// it.
auto v0_create_result_or = FileBackedProtoLog<DocumentWrapper>::Create(
filesystem, MakeDocumentLogFilenameV0(base_dir),
- FileBackedProtoLog<DocumentWrapper>::Options(
- /*compress_in=*/true));
+ FileBackedProtoLog<DocumentWrapper>::Options(/*compress_in=*/true));
if (!v0_create_result_or.ok()) {
return absl_ports::Annotate(
v0_create_result_or.status(),
@@ -127,7 +131,10 @@ libtextclassifier3::Status DocumentLogCreator::MigrateFromV0ToV1(
PortableFileBackedProtoLog<DocumentWrapper>::Create(
filesystem, MakeDocumentLogFilenameV1(base_dir),
PortableFileBackedProtoLog<DocumentWrapper>::Options(
- /*compress_in=*/true));
+ /*compress_in=*/true,
+ /*max_proto_size_in=*/
+ PortableFileBackedProtoLog<DocumentWrapper>::kMaxProtoSize,
+ /*compression_level_in=*/compression_level));
if (!v1_create_result_or.ok()) {
return absl_ports::Annotate(
v1_create_result_or.status(),
diff --git a/icing/store/document-log-creator.h b/icing/store/document-log-creator.h
index be8feed..0c2794a 100644
--- a/icing/store/document-log-creator.h
+++ b/icing/store/document-log-creator.h
@@ -57,7 +57,8 @@ class DocumentLogCreator {
// CreateResult on success.
// INTERNAL on any I/O error.
static libtextclassifier3::StatusOr<DocumentLogCreator::CreateResult> Create(
- const Filesystem* filesystem, const std::string& base_dir);
+ const Filesystem* filesystem, const std::string& base_dir,
+ int32_t compression_level);
// Returns the filename of the document log, without any directory prefixes.
// Used mainly for testing purposes.
@@ -74,7 +75,8 @@ class DocumentLogCreator {
// INVALID_ARGUMENT if some invalid option was passed to the document log.
// INTERNAL on I/O error.
static libtextclassifier3::Status MigrateFromV0ToV1(
- const Filesystem* filesystem, const std::string& base_dir);
+ const Filesystem* filesystem, const std::string& base_dir,
+ int32_t compression_level);
};
} // namespace lib
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 9e79790..30de410 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -17,6 +17,7 @@
#include <cstdint>
#include <limits>
#include <memory>
+#include <optional>
#include <string>
#include <string_view>
#include <unordered_map>
@@ -53,11 +54,13 @@
#include "icing/store/document-log-creator.h"
#include "icing/store/dynamic-trie-key-mapper.h"
#include "icing/store/namespace-id.h"
+#include "icing/store/persistent-hash-map-key-mapper.h"
#include "icing/store/usage-store.h"
#include "icing/tokenization/language-segmenter.h"
#include "icing/util/clock.h"
#include "icing/util/crc32.h"
#include "icing/util/data-loss.h"
+#include "icing/util/encode-util.h"
#include "icing/util/fingerprint-util.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
@@ -71,6 +74,7 @@ namespace {
// Used in DocumentId mapper to mark a document as deleted
constexpr int64_t kDocDeletedFlag = -1;
constexpr char kDocumentIdMapperFilename[] = "document_id_mapper";
+constexpr char kUriHashMapperWorkingPath[] = "uri_mapper";
constexpr char kDocumentStoreHeaderFilename[] = "document_store_header";
constexpr char kScoreCacheFilename[] = "score_cache";
constexpr char kCorpusScoreCache[] = "corpus_score_cache";
@@ -79,9 +83,17 @@ constexpr char kNamespaceMapperFilename[] = "namespace_mapper";
constexpr char kUsageStoreDirectoryName[] = "usage_store";
constexpr char kCorpusIdMapperFilename[] = "corpus_mapper";
-// Determined through manual testing to allow for 1 million uris. 1 million
-// because we allow up to 1 million DocumentIds.
-constexpr int32_t kUriMapperMaxSize = 36 * 1024 * 1024; // 36 MiB
+// Determined through manual testing to allow for 4 million uris. 4 million
+// because we allow up to 4 million DocumentIds.
+constexpr int32_t kUriDynamicTrieKeyMapperMaxSize =
+ 144 * 1024 * 1024; // 144 MiB
+
+constexpr int32_t kUriHashKeyMapperMaxNumEntries =
+ kMaxDocumentId + 1; // 1 << 22, 4M
+// - Key: namespace_id_str (3 bytes) + fingerprinted_uri (10 bytes) + '\0' (1
+// byte)
+// - Value: DocumentId (4 bytes)
+constexpr int32_t kUriHashKeyMapperKVByteSize = 13 + 1 + sizeof(DocumentId);
// 384 KiB for a DynamicTrieKeyMapper would allow each internal array to have a
// max of 128 KiB for storage.
@@ -98,6 +110,10 @@ std::string MakeHeaderFilename(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kDocumentStoreHeaderFilename);
}
+std::string MakeUriHashMapperWorkingPath(const std::string& base_dir) {
+ return absl_ports::StrCat(base_dir, "/", kUriHashMapperWorkingPath);
+}
+
std::string MakeDocumentIdMapperFilename(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kDocumentIdMapperFilename);
}
@@ -126,18 +142,23 @@ std::string MakeCorpusMapperFilename(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kCorpusIdMapperFilename);
}
-// TODO(adorokhine): This class internally uses an 8-byte fingerprint of the
-// Key and stores the key/value in a file-backed-trie that adds an ~80 byte
-// overhead per key. As we know that these fingerprints are always 8-bytes in
-// length and that they're random, we might be able to store them more
-// compactly.
-std::string MakeFingerprint(std::string_view field1, std::string_view field2) {
- // Using a 64-bit fingerprint to represent the key could lead to collisions.
- // But, even with 200K unique keys, the probability of collision is about
- // one-in-a-billion (https://en.wikipedia.org/wiki/Birthday_attack).
- uint64_t fprint =
- tc3farmhash::Fingerprint64(absl_ports::StrCat(field1, field2));
- return fingerprint_util::GetFingerprintString(fprint);
+// This function will encode a namespace id into a fixed 3 bytes string.
+std::string EncodeNamespaceId(NamespaceId namespace_id) {
+ // encoding should be 1 to 3 bytes based on the value of namespace_id.
+ std::string encoding = encode_util::EncodeIntToCString(namespace_id);
+ // Make encoding to fixed 3 bytes.
+ while (encoding.size() < 3) {
+ // DynamicTrie cannot handle keys with 0 as bytes, so we append it using 1,
+ // just like what we do in encode_util::EncodeIntToCString.
+ //
+ // The reason that this works is because DecodeIntToString decodes a byte
+ // value of 0x01 as 0x00. When EncodeIntToCString returns a namespaceid
+ // encoding that is less than 3 bytes, it means that the id contains
+ // unencoded leading 0x00. So here we're explicitly encoding those bytes as
+ // 0x01.
+ encoding.push_back(1);
+ }
+ return encoding;
}
int64_t CalculateExpirationTimestampMs(int64_t creation_timestamp_ms,
@@ -200,17 +221,76 @@ std::unordered_map<NamespaceId, std::string> GetNamespaceIdsToNamespaces(
return namespace_ids_to_namespaces;
}
+libtextclassifier3::StatusOr<std::unique_ptr<
+ KeyMapper<DocumentId, fingerprint_util::FingerprintStringFormatter>>>
+CreateUriMapper(const Filesystem& filesystem, const std::string& base_dir,
+ bool pre_mapping_fbv, bool use_persistent_hash_map) {
+ std::string uri_hash_mapper_working_path =
+ MakeUriHashMapperWorkingPath(base_dir);
+ // Due to historic issue, we use document store's base_dir directly as
+ // DynamicTrieKeyMapper's working directory for uri mapper.
+ // DynamicTrieKeyMapper also creates a subdirectory "key_mapper_dir", so the
+ // actual files will be put under "<base_dir>/key_mapper_dir/".
+ bool dynamic_trie_key_mapper_dir_exists = filesystem.DirectoryExists(
+ absl_ports::StrCat(base_dir, "/key_mapper_dir").c_str());
+ bool persistent_hash_map_dir_exists =
+ filesystem.DirectoryExists(uri_hash_mapper_working_path.c_str());
+ if ((use_persistent_hash_map && dynamic_trie_key_mapper_dir_exists) ||
+ (!use_persistent_hash_map && persistent_hash_map_dir_exists)) {
+ // Return a failure here so that the caller can properly delete and rebuild
+ // this component.
+ return absl_ports::FailedPreconditionError("Key mapper type mismatch");
+ }
+
+ if (use_persistent_hash_map) {
+ return PersistentHashMapKeyMapper<
+ DocumentId, fingerprint_util::FingerprintStringFormatter>::
+ Create(filesystem, std::move(uri_hash_mapper_working_path),
+ pre_mapping_fbv,
+ /*max_num_entries=*/kUriHashKeyMapperMaxNumEntries,
+ /*average_kv_byte_size=*/kUriHashKeyMapperKVByteSize);
+ } else {
+ return DynamicTrieKeyMapper<DocumentId,
+ fingerprint_util::FingerprintStringFormatter>::
+ Create(filesystem, base_dir, kUriDynamicTrieKeyMapperMaxSize);
+ }
+}
+
} // namespace
+std::string DocumentStore::MakeFingerprint(
+ NamespaceId namespace_id, std::string_view namespace_,
+ std::string_view uri_or_schema) const {
+ if (!namespace_id_fingerprint_) {
+ // Using a 64-bit fingerprint to represent the key could lead to collisions.
+ // But, even with 200K unique keys, the probability of collision is about
+ // one-in-a-billion (https://en.wikipedia.org/wiki/Birthday_attack).
+ uint64_t fprint = tc3farmhash::Fingerprint64(
+ absl_ports::StrCat(namespace_, uri_or_schema));
+ return fingerprint_util::GetFingerprintString(fprint);
+ } else {
+ return absl_ports::StrCat(EncodeNamespaceId(namespace_id),
+ encode_util::EncodeIntToCString(
+ tc3farmhash::Fingerprint64(uri_or_schema)));
+ }
+}
+
DocumentStore::DocumentStore(const Filesystem* filesystem,
const std::string_view base_dir,
const Clock* clock,
- const SchemaStore* schema_store)
+ const SchemaStore* schema_store,
+ bool namespace_id_fingerprint,
+ bool pre_mapping_fbv, bool use_persistent_hash_map,
+ int32_t compression_level)
: filesystem_(filesystem),
base_dir_(base_dir),
clock_(*clock),
schema_store_(schema_store),
- document_validator_(schema_store) {}
+ document_validator_(schema_store),
+ namespace_id_fingerprint_(namespace_id_fingerprint),
+ pre_mapping_fbv_(pre_mapping_fbv),
+ use_persistent_hash_map_(use_persistent_hash_map),
+ compression_level_(compression_level) {}
libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
const DocumentProto& document, int32_t num_tokens,
@@ -237,14 +317,16 @@ DocumentStore::~DocumentStore() {
libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, const SchemaStore* schema_store,
- bool force_recovery_and_revalidate_documents,
- InitializeStatsProto* initialize_stats) {
+ bool force_recovery_and_revalidate_documents, bool namespace_id_fingerprint,
+ bool pre_mapping_fbv, bool use_persistent_hash_map,
+ int32_t compression_level, InitializeStatsProto* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(filesystem);
ICING_RETURN_ERROR_IF_NULL(clock);
ICING_RETURN_ERROR_IF_NULL(schema_store);
- auto document_store = std::unique_ptr<DocumentStore>(
- new DocumentStore(filesystem, base_dir, clock, schema_store));
+ auto document_store = std::unique_ptr<DocumentStore>(new DocumentStore(
+ filesystem, base_dir, clock, schema_store, namespace_id_fingerprint,
+ pre_mapping_fbv, use_persistent_hash_map, compression_level));
ICING_ASSIGN_OR_RETURN(
DataLoss data_loss,
document_store->Initialize(force_recovery_and_revalidate_documents,
@@ -256,10 +338,53 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
return create_result;
}
+/* static */ libtextclassifier3::Status DocumentStore::DiscardDerivedFiles(
+ const Filesystem* filesystem, const std::string& base_dir) {
+ // Header
+ const std::string header_filename = MakeHeaderFilename(base_dir);
+ if (!filesystem->DeleteFile(MakeHeaderFilename(base_dir).c_str())) {
+ return absl_ports::InternalError("Couldn't delete header file");
+ }
+
+ // Document key mapper. Doesn't hurt to delete both dynamic trie and
+ // persistent hash map without checking.
+ ICING_RETURN_IF_ERROR(
+ DynamicTrieKeyMapper<DocumentId>::Delete(*filesystem, base_dir));
+ ICING_RETURN_IF_ERROR(PersistentHashMapKeyMapper<DocumentId>::Delete(
+ *filesystem, MakeUriHashMapperWorkingPath(base_dir)));
+
+ // Document id mapper
+ ICING_RETURN_IF_ERROR(FileBackedVector<int64_t>::Delete(
+ *filesystem, MakeDocumentIdMapperFilename(base_dir)));
+
+ // Document associated score cache
+ ICING_RETURN_IF_ERROR(FileBackedVector<DocumentAssociatedScoreData>::Delete(
+ *filesystem, MakeScoreCacheFilename(base_dir)));
+
+ // Filter cache
+ ICING_RETURN_IF_ERROR(FileBackedVector<DocumentFilterData>::Delete(
+ *filesystem, MakeFilterCacheFilename(base_dir)));
+
+ // Namespace mapper
+ ICING_RETURN_IF_ERROR(DynamicTrieKeyMapper<NamespaceId>::Delete(
+ *filesystem, MakeNamespaceMapperFilename(base_dir)));
+
+ // Corpus mapper
+ ICING_RETURN_IF_ERROR(DynamicTrieKeyMapper<CorpusId>::Delete(
+ *filesystem, MakeCorpusMapperFilename(base_dir)));
+
+ // Corpus associated score cache
+ ICING_RETURN_IF_ERROR(FileBackedVector<CorpusAssociatedScoreData>::Delete(
+ *filesystem, MakeCorpusScoreCache(base_dir)));
+
+ return libtextclassifier3::Status::OK;
+}
+
libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
bool force_recovery_and_revalidate_documents,
InitializeStatsProto* initialize_stats) {
- auto create_result_or = DocumentLogCreator::Create(filesystem_, base_dir_);
+ auto create_result_or =
+ DocumentLogCreator::Create(filesystem_, base_dir_, compression_level_);
// TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
// that can support error logging.
@@ -352,18 +477,16 @@ libtextclassifier3::Status DocumentStore::InitializeExistingDerivedFiles() {
absl_ports::StrCat("Couldn't read: ", MakeHeaderFilename(base_dir_)));
}
- if (header.magic != DocumentStore::Header::kMagic) {
+ if (header.magic !=
+ DocumentStore::Header::GetCurrentMagic(namespace_id_fingerprint_)) {
return absl_ports::InternalError(absl_ports::StrCat(
"Invalid header kMagic for file: ", MakeHeaderFilename(base_dir_)));
}
// TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
// that can support error logging.
- auto document_key_mapper_or = DynamicTrieKeyMapper<
- DocumentId,
- fingerprint_util::FingerprintStringFormatter>::Create(*filesystem_,
- base_dir_,
- kUriMapperMaxSize);
+ auto document_key_mapper_or = CreateUriMapper(
+ *filesystem_, base_dir_, pre_mapping_fbv_, use_persistent_hash_map_);
if (!document_key_mapper_or.ok()) {
ICING_LOG(ERROR) << document_key_mapper_or.status().error_message()
<< "Failed to initialize KeyMapper";
@@ -489,10 +612,16 @@ libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles(
continue;
}
}
+
+ ICING_ASSIGN_OR_RETURN(
+ NamespaceId namespace_id,
+ namespace_mapper_->GetOrPut(document_wrapper.document().namespace_(),
+ namespace_mapper_->num_keys()));
+
// Updates key mapper and document_id mapper with the new document
DocumentId new_document_id = document_id_mapper_->num_elements();
ICING_RETURN_IF_ERROR(document_key_mapper_->Put(
- MakeFingerprint(document_wrapper.document().namespace_(),
+ MakeFingerprint(namespace_id, document_wrapper.document().namespace_(),
document_wrapper.document().uri()),
new_document_id));
ICING_RETURN_IF_ERROR(
@@ -517,14 +646,9 @@ libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles(
schema_type_id = schema_type_id_or.ValueOrDie();
}
- ICING_ASSIGN_OR_RETURN(
- NamespaceId namespace_id,
- namespace_mapper_->GetOrPut(document_wrapper.document().namespace_(),
- namespace_mapper_->num_keys()));
-
// Update corpus maps
std::string corpus =
- MakeFingerprint(document_wrapper.document().namespace_(),
+ MakeFingerprint(namespace_id, document_wrapper.document().namespace_(),
document_wrapper.document().schema());
ICING_ASSIGN_OR_RETURN(
CorpusId corpusId,
@@ -575,6 +699,10 @@ libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles(
}
libtextclassifier3::Status DocumentStore::ResetDocumentKeyMapper() {
+ // Only one type of KeyMapper (either DynamicTrieKeyMapper or
+ // PersistentHashMapKeyMapper) will actually exist at any moment, but it is ok
+ // to call Delete() for both since Delete() returns OK if any of them doesn't
+ // exist.
// TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
document_key_mapper_.reset();
// TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
@@ -583,17 +711,21 @@ libtextclassifier3::Status DocumentStore::ResetDocumentKeyMapper() {
DynamicTrieKeyMapper<DocumentId>::Delete(*filesystem_, base_dir_);
if (!status.ok()) {
ICING_LOG(ERROR) << status.error_message()
- << "Failed to delete old key mapper";
+ << "Failed to delete old dynamic trie key mapper";
+ return status;
+ }
+ status = PersistentHashMapKeyMapper<DocumentId>::Delete(
+ *filesystem_, MakeUriHashMapperWorkingPath(base_dir_));
+ if (!status.ok()) {
+ ICING_LOG(ERROR) << status.error_message()
+ << "Failed to delete old persistent hash map key mapper";
return status;
}
// TODO(b/216487496): Implement a more robust version of TC_ASSIGN_OR_RETURN
// that can support error logging.
- auto document_key_mapper_or = DynamicTrieKeyMapper<
- DocumentId,
- fingerprint_util::FingerprintStringFormatter>::Create(*filesystem_,
- base_dir_,
- kUriMapperMaxSize);
+ auto document_key_mapper_or = CreateUriMapper(
+ *filesystem_, base_dir_, pre_mapping_fbv_, use_persistent_hash_map_);
if (!document_key_mapper_or.ok()) {
ICING_LOG(ERROR) << document_key_mapper_or.status().error_message()
<< "Failed to re-init key mapper";
@@ -824,7 +956,8 @@ bool DocumentStore::HeaderExists() {
libtextclassifier3::Status DocumentStore::UpdateHeader(const Crc32& checksum) {
// Write the header
DocumentStore::Header header;
- header.magic = DocumentStore::Header::kMagic;
+ header.magic =
+ DocumentStore::Header::GetCurrentMagic(namespace_id_fingerprint_);
header.checksum = checksum.Get();
// This should overwrite the header.
@@ -892,20 +1025,21 @@ libtextclassifier3::StatusOr<DocumentId> DocumentStore::InternalPut(
"some space.");
}
- ICING_RETURN_IF_ERROR(document_key_mapper_->Put(
- MakeFingerprint(name_space, uri), new_document_id));
- ICING_RETURN_IF_ERROR(document_id_mapper_->Set(new_document_id, file_offset));
-
// Update namespace maps
ICING_ASSIGN_OR_RETURN(
NamespaceId namespace_id,
namespace_mapper_->GetOrPut(name_space, namespace_mapper_->num_keys()));
+ // Updates key mapper and document_id mapper
+ ICING_RETURN_IF_ERROR(document_key_mapper_->Put(
+ MakeFingerprint(namespace_id, name_space, uri), new_document_id));
+ ICING_RETURN_IF_ERROR(document_id_mapper_->Set(new_document_id, file_offset));
+
// Update corpus maps
- ICING_ASSIGN_OR_RETURN(
- CorpusId corpusId,
- corpus_mapper_->GetOrPut(MakeFingerprint(name_space, schema),
- corpus_mapper_->num_keys()));
+ ICING_ASSIGN_OR_RETURN(CorpusId corpusId,
+ corpus_mapper_->GetOrPut(
+ MakeFingerprint(namespace_id, name_space, schema),
+ corpus_mapper_->num_keys()));
ICING_ASSIGN_OR_RETURN(CorpusAssociatedScoreData scoring_data,
GetCorpusAssociatedScoreDataToUpdate(corpusId));
@@ -937,7 +1071,8 @@ libtextclassifier3::StatusOr<DocumentId> DocumentStore::InternalPut(
// Delete the old document. It's fine if it's not found since it might have
// been deleted previously.
- auto delete_status = Delete(old_document_id);
+ auto delete_status =
+ Delete(old_document_id, clock_.GetSystemTimeMilliseconds());
if (!delete_status.ok() && !absl_ports::IsNotFound(delete_status)) {
// Real error, pass it up.
return delete_status;
@@ -982,7 +1117,9 @@ libtextclassifier3::StatusOr<DocumentProto> DocumentStore::Get(
libtextclassifier3::StatusOr<DocumentProto> DocumentStore::Get(
DocumentId document_id, bool clear_internal_fields) const {
- auto document_filter_data_optional_ = GetAliveDocumentFilterData(document_id);
+ int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
+ auto document_filter_data_optional_ =
+ GetAliveDocumentFilterData(document_id, current_time_ms);
if (!document_filter_data_optional_) {
// The document doesn't exist. Let's check if the document id is invalid, we
// will return InvalidArgumentError. Otherwise we should return NOT_FOUND
@@ -1023,17 +1160,21 @@ libtextclassifier3::StatusOr<DocumentProto> DocumentStore::Get(
libtextclassifier3::StatusOr<DocumentId> DocumentStore::GetDocumentId(
const std::string_view name_space, const std::string_view uri) const {
- auto document_id_or =
- document_key_mapper_->Get(MakeFingerprint(name_space, uri));
- if (!document_id_or.ok()) {
- return absl_ports::Annotate(
- document_id_or.status(),
- absl_ports::StrCat("Failed to find DocumentId by key: ", name_space,
- ", ", uri));
+ auto namespace_id_or = namespace_mapper_->Get(name_space);
+ libtextclassifier3::Status status = namespace_id_or.status();
+ if (status.ok()) {
+ NamespaceId namespace_id = namespace_id_or.ValueOrDie();
+ auto document_id_or = document_key_mapper_->Get(
+ MakeFingerprint(namespace_id, name_space, uri));
+ status = document_id_or.status();
+ if (status.ok()) {
+ // Guaranteed to have a DocumentId now
+ return document_id_or.ValueOrDie();
+ }
}
-
- // Guaranteed to have a DocumentId now
- return document_id_or.ValueOrDie();
+ return absl_ports::Annotate(
+ status, absl_ports::StrCat(
+ "Failed to find DocumentId by key: ", name_space, ", ", uri));
}
std::vector<std::string> DocumentStore::GetAllNamespaces() const {
@@ -1041,6 +1182,7 @@ std::vector<std::string> DocumentStore::GetAllNamespaces() const {
GetNamespaceIdsToNamespaces(namespace_mapper_.get());
std::unordered_set<NamespaceId> existing_namespace_ids;
+ int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
for (DocumentId document_id = 0; document_id < filter_cache_->num_elements();
++document_id) {
// filter_cache_->Get can only fail if document_id is < 0
@@ -1053,7 +1195,7 @@ std::vector<std::string> DocumentStore::GetAllNamespaces() const {
}
const DocumentFilterData* data = status_or_data.ValueOrDie();
- if (GetAliveDocumentFilterData(document_id)) {
+ if (GetAliveDocumentFilterData(document_id, current_time_ms)) {
existing_namespace_ids.insert(data->namespace_id());
}
}
@@ -1067,14 +1209,11 @@ std::vector<std::string> DocumentStore::GetAllNamespaces() const {
}
std::optional<DocumentFilterData> DocumentStore::GetAliveDocumentFilterData(
- DocumentId document_id) const {
- if (!IsDocumentIdValid(document_id)) {
- return std::nullopt;
- }
+ DocumentId document_id, int64_t current_time_ms) const {
if (IsDeleted(document_id)) {
return std::nullopt;
}
- return GetNonExpiredDocumentFilterData(document_id);
+ return GetNonExpiredDocumentFilterData(document_id, current_time_ms);
}
bool DocumentStore::IsDeleted(DocumentId document_id) const {
@@ -1094,7 +1233,8 @@ bool DocumentStore::IsDeleted(DocumentId document_id) const {
// Returns DocumentFilterData if the document is not expired. Otherwise,
// std::nullopt.
std::optional<DocumentFilterData>
-DocumentStore::GetNonExpiredDocumentFilterData(DocumentId document_id) const {
+DocumentStore::GetNonExpiredDocumentFilterData(DocumentId document_id,
+ int64_t current_time_ms) const {
auto filter_data_or = filter_cache_->GetCopy(document_id);
if (!filter_data_or.ok()) {
// This would only happen if document_id is out of range of the
@@ -1107,15 +1247,15 @@ DocumentStore::GetNonExpiredDocumentFilterData(DocumentId document_id) const {
DocumentFilterData document_filter_data = filter_data_or.ValueOrDie();
// Check if it's past the expiration time
- if (clock_.GetSystemTimeMilliseconds() >=
- document_filter_data.expiration_timestamp_ms()) {
+ if (current_time_ms >= document_filter_data.expiration_timestamp_ms()) {
return std::nullopt;
}
return document_filter_data;
}
libtextclassifier3::Status DocumentStore::Delete(
- const std::string_view name_space, const std::string_view uri) {
+ const std::string_view name_space, const std::string_view uri,
+ int64_t current_time_ms) {
// Try to get the DocumentId first
auto document_id_or = GetDocumentId(name_space, uri);
if (!document_id_or.ok()) {
@@ -1124,11 +1264,13 @@ libtextclassifier3::Status DocumentStore::Delete(
absl_ports::StrCat("Failed to delete Document. namespace: ", name_space,
", uri: ", uri));
}
- return Delete(document_id_or.ValueOrDie());
+ return Delete(document_id_or.ValueOrDie(), current_time_ms);
}
-libtextclassifier3::Status DocumentStore::Delete(DocumentId document_id) {
- auto document_filter_data_optional_ = GetAliveDocumentFilterData(document_id);
+libtextclassifier3::Status DocumentStore::Delete(DocumentId document_id,
+ int64_t current_time_ms) {
+ auto document_filter_data_optional_ =
+ GetAliveDocumentFilterData(document_id, current_time_ms);
if (!document_filter_data_optional_) {
// The document doesn't exist. We should return InvalidArgumentError if the
// document id is invalid. Otherwise we should return NOT_FOUND error.
@@ -1158,7 +1300,9 @@ libtextclassifier3::StatusOr<NamespaceId> DocumentStore::GetNamespaceId(
libtextclassifier3::StatusOr<CorpusId> DocumentStore::GetCorpusId(
const std::string_view name_space, const std::string_view schema) const {
- return corpus_mapper_->Get(MakeFingerprint(name_space, schema));
+ ICING_ASSIGN_OR_RETURN(NamespaceId namespace_id,
+ namespace_mapper_->Get(name_space));
+ return corpus_mapper_->Get(MakeFingerprint(namespace_id, name_space, schema));
}
libtextclassifier3::StatusOr<int32_t> DocumentStore::GetResultGroupingEntryId(
@@ -1213,24 +1357,16 @@ libtextclassifier3::StatusOr<int32_t> DocumentStore::GetResultGroupingEntryId(
libtextclassifier3::StatusOr<DocumentAssociatedScoreData>
DocumentStore::GetDocumentAssociatedScoreData(DocumentId document_id) const {
- if (!GetAliveDocumentFilterData(document_id)) {
- return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
- "Can't get usage scores, document id '%d' doesn't exist", document_id));
- }
-
auto score_data_or = score_cache_->GetCopy(document_id);
if (!score_data_or.ok()) {
ICING_LOG(ERROR) << " while trying to access DocumentId " << document_id
<< " from score_cache_";
- return score_data_or.status();
+ return absl_ports::NotFoundError(
+ std::move(score_data_or).status().error_message());
}
DocumentAssociatedScoreData document_associated_score_data =
std::move(score_data_or).ValueOrDie();
- if (document_associated_score_data.document_score() < 0) {
- // An negative / invalid score means that the score data has been deleted.
- return absl_ports::NotFoundError("Document score data not found.");
- }
return document_associated_score_data;
}
@@ -1262,13 +1398,25 @@ DocumentStore::GetCorpusAssociatedScoreDataToUpdate(CorpusId corpus_id) const {
return corpus_scoring_data_or.status();
}
-libtextclassifier3::StatusOr<UsageStore::UsageScores>
-DocumentStore::GetUsageScores(DocumentId document_id) const {
- if (!GetAliveDocumentFilterData(document_id)) {
- return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
- "Can't get usage scores, document id '%d' doesn't exist", document_id));
+// TODO(b/273826815): Decide on and adopt a consistent pattern for handling
+// NOT_FOUND 'errors' returned by our internal classes.
+std::optional<UsageStore::UsageScores> DocumentStore::GetUsageScores(
+ DocumentId document_id, int64_t current_time_ms) const {
+ std::optional<DocumentFilterData> opt =
+ GetAliveDocumentFilterData(document_id, current_time_ms);
+ if (!opt) {
+ return std::nullopt;
+ }
+ if (document_id >= usage_store_->num_elements()) {
+ return std::nullopt;
}
- return usage_store_->GetUsageScores(document_id);
+ auto usage_scores_or = usage_store_->GetUsageScores(document_id);
+ if (!usage_scores_or.ok()) {
+ ICING_LOG(ERROR) << "Error retrieving usage for " << document_id << ": "
+ << usage_scores_or.status().error_message();
+ return std::nullopt;
+ }
+ return std::move(usage_scores_or).ValueOrDie();
}
libtextclassifier3::Status DocumentStore::ReportUsage(
@@ -1279,7 +1427,8 @@ libtextclassifier3::Status DocumentStore::ReportUsage(
// We can use the internal version here because we got our document_id from
// our internal data structures. We would have thrown some error if the
// namespace and/or uri were incorrect.
- if (!GetAliveDocumentFilterData(document_id)) {
+ int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
+ if (!GetAliveDocumentFilterData(document_id, current_time_ms)) {
// Document was probably deleted or expired.
return absl_ports::NotFoundError(absl_ports::StrCat(
"Couldn't report usage on a nonexistent document: (namespace: '",
@@ -1355,6 +1504,7 @@ libtextclassifier3::StatusOr<int> DocumentStore::BatchDelete(
// Traverse FilterCache and delete all docs that match namespace_id and
// schema_type_id.
+ int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
for (DocumentId document_id = 0; document_id < filter_cache_->num_elements();
++document_id) {
// filter_cache_->Get can only fail if document_id is < 0
@@ -1382,7 +1532,8 @@ libtextclassifier3::StatusOr<int> DocumentStore::BatchDelete(
// The document has the desired namespace and schema type, it either
// exists or has expired.
- libtextclassifier3::Status delete_status = Delete(document_id);
+ libtextclassifier3::Status delete_status =
+ Delete(document_id, current_time_ms);
if (absl_ports::IsNotFound(delete_status)) {
continue;
} else if (!delete_status.ok()) {
@@ -1454,6 +1605,7 @@ DocumentStorageInfoProto DocumentStore::CalculateDocumentStatusCounts(
std::unordered_map<std::string, NamespaceStorageInfoProto>
namespace_to_storage_info;
+ int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
for (DocumentId document_id = 0;
document_id < document_id_mapper_->num_elements(); ++document_id) {
// Check if it's deleted first.
@@ -1497,7 +1649,7 @@ DocumentStorageInfoProto DocumentStore::CalculateDocumentStatusCounts(
UsageStore::UsageScores usage_scores = usage_scores_or.ValueOrDie();
// Update our stats
- if (!GetNonExpiredDocumentFilterData(document_id)) {
+ if (!GetNonExpiredDocumentFilterData(document_id, current_time_ms)) {
++total_num_expired;
namespace_storage_info.set_num_expired_documents(
namespace_storage_info.num_expired_documents() + 1);
@@ -1560,6 +1712,7 @@ libtextclassifier3::Status DocumentStore::UpdateSchemaStore(
document_validator_.UpdateSchemaStore(schema_store);
int size = document_id_mapper_->num_elements();
+ int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
for (DocumentId document_id = 0; document_id < size; document_id++) {
auto document_or = Get(document_id);
if (absl_ports::IsNotFound(document_or.status())) {
@@ -1589,7 +1742,8 @@ libtextclassifier3::Status DocumentStore::UpdateSchemaStore(
} else {
// Document is no longer valid with the new SchemaStore. Mark as
// deleted
- auto delete_status = Delete(document.namespace_(), document.uri());
+ auto delete_status =
+ Delete(document.namespace_(), document.uri(), current_time_ms);
if (!delete_status.ok() && !absl_ports::IsNotFound(delete_status)) {
// Real error, pass up
return delete_status;
@@ -1613,8 +1767,9 @@ libtextclassifier3::Status DocumentStore::OptimizedUpdateSchemaStore(
document_validator_.UpdateSchemaStore(schema_store);
int size = document_id_mapper_->num_elements();
+ int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
for (DocumentId document_id = 0; document_id < size; document_id++) {
- if (!GetAliveDocumentFilterData(document_id)) {
+ if (!GetAliveDocumentFilterData(document_id, current_time_ms)) {
// Skip nonexistent documents
continue;
}
@@ -1658,7 +1813,7 @@ libtextclassifier3::Status DocumentStore::OptimizedUpdateSchemaStore(
if (delete_document) {
// Document is no longer valid with the new SchemaStore. Mark as deleted
- auto delete_status = Delete(document_id);
+ auto delete_status = Delete(document_id, current_time_ms);
if (!delete_status.ok() && !absl_ports::IsNotFound(delete_status)) {
// Real error, pass up
return delete_status;
@@ -1684,9 +1839,13 @@ DocumentStore::OptimizeInto(const std::string& new_directory,
"New directory is the same as the current one.");
}
- ICING_ASSIGN_OR_RETURN(auto doc_store_create_result,
- DocumentStore::Create(filesystem_, new_directory,
- &clock_, schema_store_));
+ ICING_ASSIGN_OR_RETURN(
+ auto doc_store_create_result,
+ DocumentStore::Create(filesystem_, new_directory, &clock_, schema_store_,
+ /*force_recovery_and_revalidate_documents=*/false,
+ namespace_id_fingerprint_, pre_mapping_fbv_,
+ use_persistent_hash_map_, compression_level_,
+ /*initialize_stats=*/nullptr));
std::unique_ptr<DocumentStore> new_doc_store =
std::move(doc_store_create_result.document_store);
@@ -1696,12 +1855,14 @@ DocumentStore::OptimizeInto(const std::string& new_directory,
int num_expired = 0;
UsageStore::UsageScores default_usage;
std::vector<DocumentId> document_id_old_to_new(size, kInvalidDocumentId);
+ int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
for (DocumentId document_id = 0; document_id < size; document_id++) {
auto document_or = Get(document_id, /*clear_internal_fields=*/false);
if (absl_ports::IsNotFound(document_or.status())) {
if (IsDeleted(document_id)) {
++num_deleted;
- } else if (!GetNonExpiredDocumentFilterData(document_id)) {
+ } else if (!GetNonExpiredDocumentFilterData(document_id,
+ current_time_ms)) {
++num_expired;
}
continue;
@@ -1771,9 +1932,10 @@ DocumentStore::GetOptimizeInfo() const {
// Figure out our ratio of optimizable/total docs.
int32_t num_documents = document_id_mapper_->num_elements();
+ int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
for (DocumentId document_id = kMinDocumentId; document_id < num_documents;
++document_id) {
- if (!GetAliveDocumentFilterData(document_id)) {
+ if (!GetAliveDocumentFilterData(document_id, current_time_ms)) {
++optimize_info.optimizable_docs;
}
@@ -1873,8 +2035,7 @@ libtextclassifier3::Status DocumentStore::SetUsageScores(
libtextclassifier3::StatusOr<
google::protobuf::RepeatedPtrField<DocumentDebugInfoProto::CorpusInfo>>
DocumentStore::CollectCorpusInfo() const {
- google::protobuf::RepeatedPtrField<DocumentDebugInfoProto::CorpusInfo>
- corpus_info;
+ google::protobuf::RepeatedPtrField<DocumentDebugInfoProto::CorpusInfo> corpus_info;
libtextclassifier3::StatusOr<const SchemaProto*> schema_proto_or =
schema_store_->GetSchema();
if (!schema_proto_or.ok()) {
@@ -1885,9 +2046,10 @@ DocumentStore::CollectCorpusInfo() const {
std::unordered_map<NamespaceId, std::string> namespace_id_to_namespace =
GetNamespaceIdsToNamespaces(namespace_mapper_.get());
const SchemaProto* schema_proto = schema_proto_or.ValueOrDie();
+ int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
for (DocumentId document_id = 0; document_id < filter_cache_->num_elements();
++document_id) {
- if (!GetAliveDocumentFilterData(document_id)) {
+ if (!GetAliveDocumentFilterData(document_id, current_time_ms)) {
continue;
}
ICING_ASSIGN_OR_RETURN(const DocumentFilterData* filter_data,
@@ -1919,10 +2081,10 @@ DocumentStore::GetDebugInfo(int verbosity) const {
ICING_ASSIGN_OR_RETURN(Crc32 crc, ComputeChecksum());
debug_info.set_crc(crc.Get());
if (verbosity > 0) {
- ICING_ASSIGN_OR_RETURN(google::protobuf::RepeatedPtrField<
- DocumentDebugInfoProto::CorpusInfo>
- corpus_info,
- CollectCorpusInfo());
+ ICING_ASSIGN_OR_RETURN(
+ google::protobuf::RepeatedPtrField<DocumentDebugInfoProto::CorpusInfo>
+ corpus_info,
+ CollectCorpusInfo());
*debug_info.mutable_corpus_info() = std::move(corpus_info);
}
return debug_info;
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index bda351d..92d4286 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -59,13 +59,19 @@ namespace lib {
class DocumentStore {
public:
struct Header {
- static constexpr int32_t kMagic = 0x746f7265;
+ static int32_t GetCurrentMagic(bool namespace_id_fingerprint) {
+ return namespace_id_fingerprint ? kNewMagic : kOldMagic;
+ }
// Holds the magic as a quick sanity check against file corruption.
int32_t magic;
// Checksum of the DocumentStore's sub-component's checksums.
uint32_t checksum;
+
+ private:
+ static constexpr int32_t kOldMagic = 0x746f7265;
+ static constexpr int32_t kNewMagic = 0x1b99c8b0;
};
struct OptimizeInfo {
@@ -135,8 +141,18 @@ class DocumentStore {
static libtextclassifier3::StatusOr<DocumentStore::CreateResult> Create(
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, const SchemaStore* schema_store,
- bool force_recovery_and_revalidate_documents = false,
- InitializeStatsProto* initialize_stats = nullptr);
+ bool force_recovery_and_revalidate_documents,
+ bool namespace_id_fingerprint, bool pre_mapping_fbv,
+ bool use_persistent_hash_map, int32_t compression_level,
+ InitializeStatsProto* initialize_stats);
+
+ // Discards all derived data in the document store.
+ //
+ // Returns:
+ // OK on success or nothing to discard
+ // INTERNAL_ERROR on any I/O errors
+ static libtextclassifier3::Status DiscardDerivedFiles(
+ const Filesystem* filesystem, const std::string& base_dir);
// Returns the maximum DocumentId that the DocumentStore has assigned. If
// there has not been any DocumentIds assigned, i.e. the DocumentStore is
@@ -213,7 +229,8 @@ class DocumentStore {
// NOT_FOUND if no document exists with namespace, uri
// INTERNAL_ERROR on IO error
libtextclassifier3::Status Delete(std::string_view name_space,
- std::string_view uri);
+ std::string_view uri,
+ int64_t current_time_ms);
// Deletes the document identified by the given document_id. The document
// proto will be erased immediately.
@@ -227,7 +244,8 @@ class DocumentStore {
// NOT_FOUND if the document doesn't exist (i.e. deleted or expired)
// INTERNAL_ERROR on IO error
// INVALID_ARGUMENT if document_id is invalid.
- libtextclassifier3::Status Delete(DocumentId document_id);
+ libtextclassifier3::Status Delete(DocumentId document_id,
+ int64_t current_time_ms);
// Returns the NamespaceId of the string namespace
//
@@ -323,16 +341,15 @@ class DocumentStore {
// True:DocumentFilterData if the given document exists.
// False if the given document doesn't exist.
std::optional<DocumentFilterData> GetAliveDocumentFilterData(
- DocumentId document_id) const;
+ DocumentId document_id, int64_t current_time_ms) const;
// Gets the usage scores of a document.
//
// Returns:
// UsageScores on success
- // NOT_FOUND if document_id no longer exists.
- // INVALID_ARGUMENT if document_id is invalid
- libtextclassifier3::StatusOr<UsageStore::UsageScores> GetUsageScores(
- DocumentId document_id) const;
+ // nullopt if there are no usage scores stored for the requested docid.
+ std::optional<UsageStore::UsageScores> GetUsageScores(
+ DocumentId document_id, int64_t current_time_ms) const;
// Reports usage. The corresponding usage scores of the specified document in
// the report will be updated.
@@ -471,8 +488,12 @@ class DocumentStore {
private:
// Use DocumentStore::Create() to instantiate.
- DocumentStore(const Filesystem* filesystem, std::string_view base_dir,
- const Clock* clock, const SchemaStore* schema_store);
+ explicit DocumentStore(const Filesystem* filesystem,
+ std::string_view base_dir, const Clock* clock,
+ const SchemaStore* schema_store,
+ bool namespace_id_fingerprint, bool pre_mapping_fbv,
+ bool use_persistent_hash_map,
+ int32_t compression_level);
const Filesystem* const filesystem_;
const std::string base_dir_;
@@ -485,6 +506,21 @@ class DocumentStore {
// Used to validate incoming documents
DocumentValidator document_validator_;
+ // Whether to use namespace id or namespace name to build up fingerprint for
+ // document_key_mapper_ and corpus_mapper_.
+ bool namespace_id_fingerprint_;
+
+ // Flag indicating whether memory map max possible file size for underlying
+ // FileBackedVector before growing the actual file size.
+ bool pre_mapping_fbv_;
+
+ // Flag indicating whether use persistent hash map as the key mapper (if
+ // false, then fall back to dynamic trie key mapper). Note: we only use
+ // persistent hash map for uri mapper if it is true.
+ bool use_persistent_hash_map_;
+
+ const int32_t compression_level_;
+
// A log used to store all documents, it serves as a ground truth of doc
// store. key_mapper_ and document_id_mapper_ can be regenerated from it.
std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log_;
@@ -692,7 +728,7 @@ class DocumentStore {
// True:DocumentFilterData if the given document isn't expired.
// False if the given doesn't document is expired.
std::optional<DocumentFilterData> GetNonExpiredDocumentFilterData(
- DocumentId document_id) const;
+ DocumentId document_id, int64_t current_time_ms) const;
// Updates the entry in the score cache for document_id.
libtextclassifier3::Status UpdateDocumentAssociatedScoreCache(
@@ -730,9 +766,16 @@ class DocumentStore {
// Returns:
// - on success, a RepeatedPtrField for CorpusInfo collected.
// - OUT_OF_RANGE, this should never happen.
- libtextclassifier3::StatusOr<google::protobuf::RepeatedPtrField<
- DocumentDebugInfoProto::CorpusInfo>>
+ libtextclassifier3::StatusOr<
+ google::protobuf::RepeatedPtrField<DocumentDebugInfoProto::CorpusInfo>>
CollectCorpusInfo() const;
+
+ // Build fingerprint for the keys of document_key_mapper_ and corpus_mapper_.
+ // Note that namespace_id_fingerprint_ controls the way that a fingerprint is
+ // built.
+ std::string MakeFingerprint(NamespaceId namespace_id,
+ std::string_view namespace_,
+ std::string_view uri_or_schema) const;
};
} // namespace lib
diff --git a/icing/store/document-store_benchmark.cc b/icing/store/document-store_benchmark.cc
index a4b3a17..5b9c568 100644
--- a/icing/store/document-store_benchmark.cc
+++ b/icing/store/document-store_benchmark.cc
@@ -116,7 +116,9 @@ std::unique_ptr<SchemaStore> CreateSchemaStore(Filesystem filesystem,
std::unique_ptr<SchemaStore> schema_store =
SchemaStore::Create(&filesystem, schema_store_dir, clock).ValueOrDie();
- auto set_schema_status = schema_store->SetSchema(CreateSchema());
+ auto set_schema_status = schema_store->SetSchema(
+ CreateSchema(), /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false);
if (!set_schema_status.ok()) {
ICING_LOG(ERROR) << set_schema_status.status().error_message();
}
@@ -124,6 +126,18 @@ std::unique_ptr<SchemaStore> CreateSchemaStore(Filesystem filesystem,
return schema_store;
}
+libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
+ const Filesystem* filesystem, const std::string& base_dir,
+ const Clock* clock, const SchemaStore* schema_store) {
+ return DocumentStore::Create(
+ filesystem, base_dir, clock, schema_store,
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr);
+}
+
void BM_DoesDocumentExistBenchmark(benchmark::State& state) {
Filesystem filesystem;
Clock clock;
@@ -138,8 +152,8 @@ void BM_DoesDocumentExistBenchmark(benchmark::State& state) {
filesystem.CreateDirectoryRecursively(document_store_dir.data());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem, document_store_dir, &clock,
- schema_store.get()));
+ CreateDocumentStore(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -149,7 +163,8 @@ void BM_DoesDocumentExistBenchmark(benchmark::State& state) {
// stuff.
ICING_ASSERT_OK(document_store->Put(
CreateDocument("namespace", /*uri=*/std::to_string(i))));
- document_store->Delete("namespace", /*uri=*/std::to_string(i));
+ document_store->Delete("namespace", /*uri=*/std::to_string(i),
+ clock.GetSystemTimeMilliseconds());
}
std::default_random_engine random;
@@ -158,8 +173,8 @@ void BM_DoesDocumentExistBenchmark(benchmark::State& state) {
// Check random document ids to see if they exist. Hopefully to simulate
// page faulting in different sections of our mmapped derived files.
int document_id = dist(random);
- benchmark::DoNotOptimize(
- document_store->GetAliveDocumentFilterData(document_id));
+ benchmark::DoNotOptimize(document_store->GetAliveDocumentFilterData(
+ document_id, clock.GetSystemTimeMilliseconds()));
}
}
BENCHMARK(BM_DoesDocumentExistBenchmark);
@@ -178,8 +193,8 @@ void BM_Put(benchmark::State& state) {
filesystem.CreateDirectoryRecursively(document_store_dir.data());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem, document_store_dir, &clock,
- schema_store.get()));
+ CreateDocumentStore(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -207,8 +222,8 @@ void BM_GetSameDocument(benchmark::State& state) {
filesystem.CreateDirectoryRecursively(document_store_dir.data());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem, document_store_dir, &clock,
- schema_store.get()));
+ CreateDocumentStore(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -234,8 +249,8 @@ void BM_Delete(benchmark::State& state) {
filesystem.CreateDirectoryRecursively(document_store_dir.data());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem, document_store_dir, &clock,
- schema_store.get()));
+ CreateDocumentStore(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -246,7 +261,8 @@ void BM_Delete(benchmark::State& state) {
ICING_ASSERT_OK(document_store->Put(document));
state.ResumeTiming();
- benchmark::DoNotOptimize(document_store->Delete("namespace", "uri"));
+ benchmark::DoNotOptimize(document_store->Delete(
+ "namespace", "uri", clock.GetSystemTimeMilliseconds()));
}
}
BENCHMARK(BM_Delete);
@@ -268,8 +284,8 @@ void BM_Create(benchmark::State& state) {
filesystem.CreateDirectoryRecursively(document_store_dir.data());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem, document_store_dir, &clock,
- schema_store.get()));
+ CreateDocumentStore(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -284,7 +300,7 @@ void BM_Create(benchmark::State& state) {
filesystem.CreateDirectoryRecursively(document_store_dir.data());
for (auto s : state) {
- benchmark::DoNotOptimize(DocumentStore::Create(
+ benchmark::DoNotOptimize(CreateDocumentStore(
&filesystem, document_store_dir, &clock, schema_store.get()));
}
}
@@ -304,8 +320,8 @@ void BM_ComputeChecksum(benchmark::State& state) {
filesystem.CreateDirectoryRecursively(document_store_dir.data());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem, document_store_dir, &clock,
- schema_store.get()));
+ CreateDocumentStore(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index 7cf951a..a9c47f0 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -18,6 +18,7 @@
#include <filesystem>
#include <limits>
#include <memory>
+#include <optional>
#include <string>
#include "icing/text_classifier/lib3/utils/base/status.h"
@@ -88,11 +89,13 @@ const NamespaceStorageInfoProto& GetNamespaceStorageInfo(
// Didn't find our namespace, fail the test.
EXPECT_TRUE(false) << "Failed to find namespace '" << name_space
<< "' in DocumentStorageInfoProto.";
- return std::move(NamespaceStorageInfoProto());
+ static const auto& default_namespace_storage_info =
+ *new NamespaceStorageInfoProto();
+ return default_namespace_storage_info;
}
UsageReport CreateUsageReport(std::string name_space, std::string uri,
- int64 timestamp_ms,
+ int64_t timestamp_ms,
UsageReport::UsageType usage_type) {
UsageReport usage_report;
usage_report.set_document_namespace(name_space);
@@ -118,7 +121,21 @@ void WriteDocumentLogHeader(
sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header));
}
-class DocumentStoreTest : public ::testing::Test {
+struct DocumentStoreTestParam {
+ bool namespace_id_fingerprint;
+ bool pre_mapping_fbv;
+ bool use_persistent_hash_map;
+
+ explicit DocumentStoreTestParam(bool namespace_id_fingerprint_in,
+ bool pre_mapping_fbv_in,
+ bool use_persistent_hash_map_in)
+ : namespace_id_fingerprint(namespace_id_fingerprint_in),
+ pre_mapping_fbv(pre_mapping_fbv_in),
+ use_persistent_hash_map(use_persistent_hash_map_in) {}
+};
+
+class DocumentStoreTest
+ : public ::testing::TestWithParam<DocumentStoreTestParam> {
protected:
DocumentStoreTest()
: test_dir_(GetTestTempDir() + "/icing"),
@@ -186,7 +203,10 @@ class DocumentStoreTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -195,6 +215,8 @@ class DocumentStoreTest : public ::testing::Test {
}
void TearDown() override {
+ lang_segmenter_.reset();
+ schema_store_.reset();
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
}
@@ -205,12 +227,25 @@ class DocumentStoreTest : public ::testing::Test {
const std::string header_file =
absl_ports::StrCat(document_store_dir_, "/document_store_header");
DocumentStore::Header header;
- header.magic = DocumentStore::Header::kMagic;
+ header.magic = DocumentStore::Header::GetCurrentMagic(
+ GetParam().namespace_id_fingerprint);
header.checksum = 10; // Arbitrary garbage checksum
filesystem_.DeleteFile(header_file.c_str());
filesystem_.Write(header_file.c_str(), &header, sizeof(header));
}
+ libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
+ const Filesystem* filesystem, const std::string& base_dir,
+ const Clock* clock, const SchemaStore* schema_store) {
+ return DocumentStore::Create(
+ filesystem, base_dir, clock, schema_store,
+ /*force_recovery_and_revalidate_documents=*/false,
+ GetParam().namespace_id_fingerprint, GetParam().pre_mapping_fbv,
+ GetParam().use_persistent_hash_map,
+ PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr);
+ }
+
const Filesystem filesystem_;
const std::string test_dir_;
FakeClock fake_clock_;
@@ -235,34 +270,34 @@ class DocumentStoreTest : public ::testing::Test {
const int64_t document2_expiration_timestamp_ = 3; // creation + ttl
};
-TEST_F(DocumentStoreTest, CreationWithNullPointerShouldFail) {
- EXPECT_THAT(DocumentStore::Create(/*filesystem=*/nullptr, document_store_dir_,
- &fake_clock_, schema_store_.get()),
+TEST_P(DocumentStoreTest, CreationWithNullPointerShouldFail) {
+ EXPECT_THAT(CreateDocumentStore(/*filesystem=*/nullptr, document_store_dir_,
+ &fake_clock_, schema_store_.get()),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
- EXPECT_THAT(DocumentStore::Create(&filesystem_, document_store_dir_,
- /*clock=*/nullptr, schema_store_.get()),
+ EXPECT_THAT(CreateDocumentStore(&filesystem_, document_store_dir_,
+ /*clock=*/nullptr, schema_store_.get()),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
- EXPECT_THAT(DocumentStore::Create(&filesystem_, document_store_dir_,
- &fake_clock_, /*schema_store=*/nullptr),
+ EXPECT_THAT(CreateDocumentStore(&filesystem_, document_store_dir_,
+ &fake_clock_, /*schema_store=*/nullptr),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
-TEST_F(DocumentStoreTest, CreationWithBadFilesystemShouldFail) {
+TEST_P(DocumentStoreTest, CreationWithBadFilesystemShouldFail) {
MockFilesystem mock_filesystem;
ON_CALL(mock_filesystem, OpenForWrite(_)).WillByDefault(Return(false));
- EXPECT_THAT(DocumentStore::Create(&mock_filesystem, document_store_dir_,
- &fake_clock_, schema_store_.get()),
+ EXPECT_THAT(CreateDocumentStore(&mock_filesystem, document_store_dir_,
+ &fake_clock_, schema_store_.get()),
StatusIs(libtextclassifier3::StatusCode::INTERNAL));
}
-TEST_F(DocumentStoreTest, PutAndGetInSameNamespaceOk) {
+TEST_P(DocumentStoreTest, PutAndGetInSameNamespaceOk) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -278,11 +313,11 @@ TEST_F(DocumentStoreTest, PutAndGetInSameNamespaceOk) {
IsOkAndHolds(EqualsProto(test_document2_)));
}
-TEST_F(DocumentStoreTest, PutAndGetAcrossNamespacesOk) {
+TEST_P(DocumentStoreTest, PutAndGetAcrossNamespacesOk) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -311,11 +346,11 @@ TEST_F(DocumentStoreTest, PutAndGetAcrossNamespacesOk) {
// Validates that putting an document with the same key will overwrite previous
// document and old doc ids are not getting reused.
-TEST_F(DocumentStoreTest, PutSameKey) {
+TEST_P(DocumentStoreTest, PutSameKey) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -340,11 +375,11 @@ TEST_F(DocumentStoreTest, PutSameKey) {
EXPECT_THAT(doc_store->Put(document3), IsOkAndHolds(Not(document_id1)));
}
-TEST_F(DocumentStoreTest, IsDocumentExistingWithoutStatus) {
+TEST_P(DocumentStoreTest, IsDocumentExistingWithoutStatus) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -353,29 +388,34 @@ TEST_F(DocumentStoreTest, IsDocumentExistingWithoutStatus) {
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
doc_store->Put(DocumentProto(test_document2_)));
- EXPECT_TRUE(doc_store->GetAliveDocumentFilterData(document_id1));
- EXPECT_TRUE(doc_store->GetAliveDocumentFilterData(document_id2));
+ EXPECT_TRUE(doc_store->GetAliveDocumentFilterData(
+ document_id1, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_TRUE(doc_store->GetAliveDocumentFilterData(
+ document_id2, fake_clock_.GetSystemTimeMilliseconds()));
DocumentId invalid_document_id_negative = -1;
- EXPECT_FALSE(
- doc_store->GetAliveDocumentFilterData(invalid_document_id_negative));
+ EXPECT_FALSE(doc_store->GetAliveDocumentFilterData(
+ invalid_document_id_negative, fake_clock_.GetSystemTimeMilliseconds()));
DocumentId invalid_document_id_greater_than_max = kMaxDocumentId + 2;
EXPECT_FALSE(doc_store->GetAliveDocumentFilterData(
- invalid_document_id_greater_than_max));
+ invalid_document_id_greater_than_max,
+ fake_clock_.GetSystemTimeMilliseconds()));
- EXPECT_FALSE(doc_store->GetAliveDocumentFilterData(kInvalidDocumentId));
+ EXPECT_FALSE(doc_store->GetAliveDocumentFilterData(
+ kInvalidDocumentId, fake_clock_.GetSystemTimeMilliseconds()));
DocumentId invalid_document_id_out_of_range = document_id2 + 1;
- EXPECT_FALSE(
- doc_store->GetAliveDocumentFilterData(invalid_document_id_out_of_range));
+ EXPECT_FALSE(doc_store->GetAliveDocumentFilterData(
+ invalid_document_id_out_of_range,
+ fake_clock_.GetSystemTimeMilliseconds()));
}
-TEST_F(DocumentStoreTest, GetDeletedDocumentNotFound) {
+TEST_P(DocumentStoreTest, GetDeletedDocumentNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -384,14 +424,15 @@ TEST_F(DocumentStoreTest, GetDeletedDocumentNotFound) {
document_store->Get(test_document1_.namespace_(), test_document1_.uri()),
IsOkAndHolds(EqualsProto(test_document1_)));
- ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
- test_document1_.uri()));
+ ICING_EXPECT_OK(document_store->Delete(
+ test_document1_.namespace_(), test_document1_.uri(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(
document_store->Get(test_document1_.namespace_(), test_document1_.uri()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, GetExpiredDocumentNotFound) {
+TEST_P(DocumentStoreTest, GetExpiredDocumentNotFound) {
DocumentProto document = DocumentBuilder()
.SetKey("namespace", "uri")
.SetSchema("email")
@@ -401,8 +442,8 @@ TEST_F(DocumentStoreTest, GetExpiredDocumentNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -426,11 +467,11 @@ TEST_F(DocumentStoreTest, GetExpiredDocumentNotFound) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, GetInvalidDocumentId) {
+TEST_P(DocumentStoreTest, GetInvalidDocumentId) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -453,11 +494,11 @@ TEST_F(DocumentStoreTest, GetInvalidDocumentId) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, DeleteNonexistentDocumentNotFound) {
+TEST_P(DocumentStoreTest, DeleteNonexistentDocumentNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -468,9 +509,9 @@ TEST_F(DocumentStoreTest, DeleteNonexistentDocumentNotFound) {
DocumentLogCreator::GetDocumentLogFilename())
.c_str());
- EXPECT_THAT(
- document_store->Delete("nonexistent_namespace", "nonexistent_uri"),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(document_store->Delete("nonexistent_namespace", "nonexistent_uri",
+ fake_clock_.GetSystemTimeMilliseconds()),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
int64_t document_log_size_after = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/",
@@ -479,11 +520,11 @@ TEST_F(DocumentStoreTest, DeleteNonexistentDocumentNotFound) {
EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
-TEST_F(DocumentStoreTest, DeleteNonexistentDocumentPrintableErrorMessage) {
+TEST_P(DocumentStoreTest, DeleteNonexistentDocumentPrintableErrorMessage) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -494,8 +535,8 @@ TEST_F(DocumentStoreTest, DeleteNonexistentDocumentPrintableErrorMessage) {
DocumentLogCreator::GetDocumentLogFilename())
.c_str());
- libtextclassifier3::Status status =
- document_store->Delete("android$contacts/", "661");
+ libtextclassifier3::Status status = document_store->Delete(
+ "android$contacts/", "661", fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(status, StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
for (char c : status.error_message()) {
EXPECT_THAT(std::isprint(c), IsTrue());
@@ -508,31 +549,33 @@ TEST_F(DocumentStoreTest, DeleteNonexistentDocumentPrintableErrorMessage) {
EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
-TEST_F(DocumentStoreTest, DeleteAlreadyDeletedDocumentNotFound) {
+TEST_P(DocumentStoreTest, DeleteAlreadyDeletedDocumentNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
ICING_EXPECT_OK(document_store->Put(test_document1_));
// First time is OK
- ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
- test_document1_.uri()));
+ ICING_EXPECT_OK(document_store->Delete(
+ test_document1_.namespace_(), test_document1_.uri(),
+ fake_clock_.GetSystemTimeMilliseconds()));
// Deleting it again is NOT_FOUND
EXPECT_THAT(document_store->Delete(test_document1_.namespace_(),
- test_document1_.uri()),
+ test_document1_.uri(),
+ fake_clock_.GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, DeleteByNamespaceOk) {
+TEST_P(DocumentStoreTest, DeleteByNamespaceOk) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -572,11 +615,11 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceOk) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, DeleteByNamespaceNonexistentNamespaceNotFound) {
+TEST_P(DocumentStoreTest, DeleteByNamespaceNonexistentNamespaceNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -597,17 +640,18 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceNonexistentNamespaceNotFound) {
EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
-TEST_F(DocumentStoreTest, DeleteByNamespaceNoExistingDocumentsNotFound) {
+TEST_P(DocumentStoreTest, DeleteByNamespaceNoExistingDocumentsNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
ICING_EXPECT_OK(document_store->Put(test_document1_));
- ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
- test_document1_.uri()));
+ ICING_EXPECT_OK(document_store->Delete(
+ test_document1_.namespace_(), test_document1_.uri(),
+ fake_clock_.GetSystemTimeMilliseconds()));
// At this point, there are no existing documents with the namespace, even
// though Icing's derived files know about this namespace. We should still
@@ -617,7 +661,7 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceNoExistingDocumentsNotFound) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
+TEST_P(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
DocumentProto document1 = test_document1_;
document1.set_namespace_("namespace.1");
document1.set_uri("uri1");
@@ -638,8 +682,8 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
{
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -665,8 +709,8 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
// Successfully recover from a corrupt derived file issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -687,7 +731,7 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, DeleteBySchemaTypeOk) {
+TEST_P(DocumentStoreTest, DeleteBySchemaTypeOk) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
@@ -702,12 +746,14 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeOk) {
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
- ICING_ASSERT_OK(schema_store->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -772,11 +818,11 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeOk) {
IsOkAndHolds(EqualsProto(person_document)));
}
-TEST_F(DocumentStoreTest, DeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
+TEST_P(DocumentStoreTest, DeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -798,24 +844,25 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
-TEST_F(DocumentStoreTest, DeleteBySchemaTypeNoExistingDocumentsNotFound) {
+TEST_P(DocumentStoreTest, DeleteBySchemaTypeNoExistingDocumentsNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
ICING_EXPECT_OK(document_store->Put(test_document1_));
- ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
- test_document1_.uri()));
+ ICING_EXPECT_OK(document_store->Delete(
+ test_document1_.namespace_(), test_document1_.uri(),
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(
document_store->DeleteBySchemaType(test_document1_.schema()).status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
+TEST_P(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
@@ -829,7 +876,9 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
- ICING_ASSERT_OK(schema_store->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
DocumentId email_document_id;
DocumentId message_document_id;
@@ -849,8 +898,8 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
{
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -875,8 +924,8 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
// Successfully recover from a corrupt derived file issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -893,20 +942,21 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
IsOkAndHolds(EqualsProto(message_document)));
}
-TEST_F(DocumentStoreTest, PutDeleteThenPut) {
+TEST_P(DocumentStoreTest, PutDeleteThenPut) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
ICING_EXPECT_OK(doc_store->Put(test_document1_));
- ICING_EXPECT_OK(
- doc_store->Delete(test_document1_.namespace_(), test_document1_.uri()));
+ ICING_EXPECT_OK(doc_store->Delete(test_document1_.namespace_(),
+ test_document1_.uri(),
+ fake_clock_.GetSystemTimeMilliseconds()));
ICING_EXPECT_OK(doc_store->Put(test_document1_));
}
-TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
+TEST_P(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
@@ -920,7 +970,9 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
- ICING_ASSERT_OK(schema_store->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
DocumentId email_document_id;
DocumentId message_document_id;
@@ -940,8 +992,8 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
{
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -974,13 +1026,14 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
.AddType(SchemaTypeConfigBuilder().SetType("message"))
.Build();
ICING_EXPECT_OK(schema_store->SetSchema(
- new_schema, /*ignore_errors_and_delete_documents=*/true));
+ new_schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
// Successfully recover from a corrupt derived file issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -997,11 +1050,11 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
IsOkAndHolds(EqualsProto(message_document)));
}
-TEST_F(DocumentStoreTest, OptimizeInto) {
+TEST_P(DocumentStoreTest, OptimizeInto) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1063,7 +1116,8 @@ TEST_F(DocumentStoreTest, OptimizeInto) {
// is deleted
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
- ICING_ASSERT_OK(doc_store->Delete("namespace", "uri1"));
+ ICING_ASSERT_OK(doc_store->Delete("namespace", "uri1",
+ fake_clock_.GetSystemTimeMilliseconds()));
// DocumentId 0 is removed.
EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()),
IsOkAndHolds(ElementsAre(kInvalidDocumentId, 0, 1)));
@@ -1090,7 +1144,8 @@ TEST_F(DocumentStoreTest, OptimizeInto) {
// Delete the last document
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
- ICING_ASSERT_OK(doc_store->Delete("namespace", "uri2"));
+ ICING_ASSERT_OK(doc_store->Delete("namespace", "uri2",
+ fake_clock_.GetSystemTimeMilliseconds()));
// DocumentId 0 and 1 is removed, and DocumentId 2 is expired.
EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()),
IsOkAndHolds(ElementsAre(kInvalidDocumentId, kInvalidDocumentId,
@@ -1100,11 +1155,11 @@ TEST_F(DocumentStoreTest, OptimizeInto) {
EXPECT_THAT(optimized_size3, Gt(optimized_size4));
}
-TEST_F(DocumentStoreTest, OptimizeIntoForEmptyDocumentStore) {
+TEST_P(DocumentStoreTest, OptimizeIntoForEmptyDocumentStore) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
std::string optimized_dir = document_store_dir_ + "_optimize";
@@ -1114,14 +1169,14 @@ TEST_F(DocumentStoreTest, OptimizeIntoForEmptyDocumentStore) {
IsOkAndHolds(IsEmpty()));
}
-TEST_F(DocumentStoreTest, ShouldRecoverFromDataLoss) {
+TEST_P(DocumentStoreTest, ShouldRecoverFromDataLoss) {
DocumentId document_id1, document_id2;
{
// Can put and delete fine.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1151,7 +1206,9 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromDataLoss) {
/*num_docs=*/2, /*sum_length_in_tokens=*/8)));
// Delete document 1
- EXPECT_THAT(doc_store->Delete("icing", "email/1"), IsOk());
+ EXPECT_THAT(doc_store->Delete("icing", "email/1",
+ fake_clock_.GetSystemTimeMilliseconds()),
+ IsOk());
EXPECT_THAT(doc_store->Get(document_id1),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
EXPECT_THAT(doc_store->Get(document_id2),
@@ -1173,8 +1230,8 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromDataLoss) {
// Successfully recover from a data loss issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1185,7 +1242,8 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromDataLoss) {
// Checks derived filter cache
ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData doc_filter_data,
- doc_store->GetAliveDocumentFilterData(document_id2));
+ doc_store->GetAliveDocumentFilterData(
+ document_id2, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(doc_filter_data,
Eq(DocumentFilterData(
/*namespace_id=*/0,
@@ -1202,14 +1260,14 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromDataLoss) {
/*num_docs=*/1, /*sum_length_in_tokens=*/4)));
}
-TEST_F(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) {
+TEST_P(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) {
DocumentId document_id1, document_id2;
{
// Can put and delete fine.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1238,16 +1296,23 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) {
IsOkAndHolds(CorpusAssociatedScoreData(
/*num_docs=*/2, /*sum_length_in_tokens=*/8)));
// Delete document 1
- EXPECT_THAT(doc_store->Delete("icing", "email/1"), IsOk());
+ EXPECT_THAT(doc_store->Delete("icing", "email/1",
+ fake_clock_.GetSystemTimeMilliseconds()),
+ IsOk());
EXPECT_THAT(doc_store->Get(document_id1),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
EXPECT_THAT(doc_store->Get(document_id2),
IsOkAndHolds(EqualsProto(test_document2_)));
+
+ EXPECT_THAT(doc_store->ReportUsage(CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/2",
+ /*timestamp_ms=*/0, UsageReport::USAGE_TYPE1)),
+ IsOk());
}
- // "Corrupt" one of the derived files by adding non-checksummed data to
- // it. This will mess up the checksum and throw an error on the derived file's
- // initialization.
+ // "Corrupt" one of the derived files by modifying an existing data without
+ // calling PersistToDisk() or updating its checksum. This will mess up the
+ // checksum and throw an error on the derived file's initialization.
const std::string document_id_mapper_file =
absl_ports::StrCat(document_store_dir_, "/document_id_mapper");
ICING_ASSERT_OK_AND_ASSIGN(
@@ -1255,17 +1320,18 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) {
FileBackedVector<int64_t>::Create(
filesystem_, document_id_mapper_file,
MemoryMappedFile::READ_WRITE_AUTO_SYNC));
- int64_t corrupt_document_id = 3;
- int64_t corrupt_offset = 3;
+ int64_t corrupt_document_id = 1;
+ int64_t corrupt_offset = 123456;
EXPECT_THAT(document_id_mapper->Set(corrupt_document_id, corrupt_offset),
IsOk());
+ // Will get error when initializing document id mapper file, so it will
+ // trigger RegenerateDerivedFiles.
// Successfully recover from a corrupt derived file issue.
- // NOTE: this doesn't trigger RegenerateDerivedFiles.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1277,14 +1343,112 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) {
// Checks derived filter cache
ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData doc_filter_data,
- doc_store->GetAliveDocumentFilterData(document_id2));
+ doc_store->GetAliveDocumentFilterData(
+ document_id2, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(doc_filter_data,
Eq(DocumentFilterData(
/*namespace_id=*/0,
/*schema_type_id=*/0, document2_expiration_timestamp_)));
- // Checks derived score cache - note that they aren't regenerated from
+ // Checks derived score cache
+ EXPECT_THAT(
+ doc_store->GetDocumentAssociatedScoreData(document_id2),
+ IsOkAndHolds(DocumentAssociatedScoreData(
+ /*corpus_id=*/0, document2_score_, document2_creation_timestamp_,
+ /*length_in_tokens=*/4)));
+ EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+ IsOkAndHolds(CorpusAssociatedScoreData(
+ /*num_docs=*/1, /*sum_length_in_tokens=*/4)));
+
+ // Checks usage score data - note that they aren't regenerated from
// scratch.
+ UsageStore::UsageScores expected_scores;
+ expected_scores.usage_type1_count = 1;
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ UsageStore::UsageScores actual_scores,
+ doc_store->GetUsageScores(document_id2,
+ fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(actual_scores, Eq(expected_scores));
+}
+
+TEST_P(DocumentStoreTest, ShouldRecoverFromDiscardDerivedFiles) {
+ DocumentId document_id1, document_id2;
+ {
+ // Can put and delete fine.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ document_id1,
+ doc_store->Put(DocumentProto(test_document1_), /*num_tokens=*/4));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ document_id2,
+ doc_store->Put(DocumentProto(test_document2_), /*num_tokens=*/4));
+ EXPECT_THAT(doc_store->Get(document_id1),
+ IsOkAndHolds(EqualsProto(test_document1_)));
+ EXPECT_THAT(doc_store->Get(document_id2),
+ IsOkAndHolds(EqualsProto(test_document2_)));
+ // Checks derived score cache
+ EXPECT_THAT(
+ doc_store->GetDocumentAssociatedScoreData(document_id1),
+ IsOkAndHolds(DocumentAssociatedScoreData(
+ /*corpus_id=*/0, document1_score_, document1_creation_timestamp_,
+ /*length_in_tokens=*/4)));
+ EXPECT_THAT(
+ doc_store->GetDocumentAssociatedScoreData(document_id2),
+ IsOkAndHolds(DocumentAssociatedScoreData(
+ /*corpus_id=*/0, document2_score_, document2_creation_timestamp_,
+ /*length_in_tokens=*/4)));
+ EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+ IsOkAndHolds(CorpusAssociatedScoreData(
+ /*num_docs=*/2, /*sum_length_in_tokens=*/8)));
+ // Delete document 1
+ EXPECT_THAT(doc_store->Delete("icing", "email/1",
+ fake_clock_.GetSystemTimeMilliseconds()),
+ IsOk());
+ EXPECT_THAT(doc_store->Get(document_id1),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(doc_store->Get(document_id2),
+ IsOkAndHolds(EqualsProto(test_document2_)));
+
+ EXPECT_THAT(doc_store->ReportUsage(CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/2",
+ /*timestamp_ms=*/0, UsageReport::USAGE_TYPE1)),
+ IsOk());
+ }
+
+ // Discard all derived files.
+ ICING_ASSERT_OK(
+ DocumentStore::DiscardDerivedFiles(&filesystem_, document_store_dir_));
+
+ // Successfully recover after discarding all derived files.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ EXPECT_THAT(doc_store->Get(document_id1),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(doc_store->Get(document_id2),
+ IsOkAndHolds(EqualsProto(test_document2_)));
+
+ // Checks derived filter cache
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ DocumentFilterData doc_filter_data,
+ doc_store->GetAliveDocumentFilterData(
+ document_id2, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(doc_filter_data,
+ Eq(DocumentFilterData(
+ /*namespace_id=*/0,
+ /*schema_type_id=*/0, document2_expiration_timestamp_)));
+
+ // Checks derived score cache.
EXPECT_THAT(
doc_store->GetDocumentAssociatedScoreData(document_id2),
IsOkAndHolds(DocumentAssociatedScoreData(
@@ -1292,17 +1456,27 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) {
/*length_in_tokens=*/4)));
EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
IsOkAndHolds(CorpusAssociatedScoreData(
- /*num_docs=*/2, /*sum_length_in_tokens=*/8)));
+ /*num_docs=*/1, /*sum_length_in_tokens=*/4)));
+
+ // Checks usage score data - note that they aren't regenerated from
+ // scratch.
+ UsageStore::UsageScores expected_scores;
+ expected_scores.usage_type1_count = 1;
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ UsageStore::UsageScores actual_scores,
+ doc_store->GetUsageScores(document_id2,
+ fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(actual_scores, Eq(expected_scores));
}
-TEST_F(DocumentStoreTest, ShouldRecoverFromBadChecksum) {
+TEST_P(DocumentStoreTest, ShouldRecoverFromBadChecksum) {
DocumentId document_id1, document_id2;
{
// Can put and delete fine.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1330,7 +1504,9 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromBadChecksum) {
EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
IsOkAndHolds(CorpusAssociatedScoreData(
/*num_docs=*/2, /*sum_length_in_tokens=*/8)));
- EXPECT_THAT(doc_store->Delete("icing", "email/1"), IsOk());
+ EXPECT_THAT(doc_store->Delete("icing", "email/1",
+ fake_clock_.GetSystemTimeMilliseconds()),
+ IsOk());
EXPECT_THAT(doc_store->Get(document_id1),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
EXPECT_THAT(doc_store->Get(document_id2),
@@ -1341,8 +1517,8 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromBadChecksum) {
// Successfully recover from a corrupt derived file issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1354,7 +1530,8 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromBadChecksum) {
// Checks derived filter cache
ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData doc_filter_data,
- doc_store->GetAliveDocumentFilterData(document_id2));
+ doc_store->GetAliveDocumentFilterData(
+ document_id2, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(doc_filter_data,
Eq(DocumentFilterData(
/*namespace_id=*/0,
@@ -1370,11 +1547,11 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromBadChecksum) {
/*num_docs=*/1, /*sum_length_in_tokens=*/4)));
}
-TEST_F(DocumentStoreTest, GetStorageInfo) {
+TEST_P(DocumentStoreTest, GetStorageInfo) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1404,9 +1581,8 @@ TEST_F(DocumentStoreTest, GetStorageInfo) {
ON_CALL(mock_filesystem, GetDiskUsage(A<const char*>()))
.WillByDefault(Return(Filesystem::kBadFileSize));
ICING_ASSERT_OK_AND_ASSIGN(
- create_result,
- DocumentStore::Create(&mock_filesystem, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ create_result, CreateDocumentStore(&mock_filesystem, document_store_dir_,
+ &fake_clock_, schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store_with_mock_filesystem =
std::move(create_result.document_store);
@@ -1414,11 +1590,11 @@ TEST_F(DocumentStoreTest, GetStorageInfo) {
EXPECT_THAT(doc_store_storage_info.document_store_size(), Eq(-1));
}
-TEST_F(DocumentStoreTest, MaxDocumentId) {
+TEST_P(DocumentStoreTest, MaxDocumentId) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1430,7 +1606,8 @@ TEST_F(DocumentStoreTest, MaxDocumentId) {
EXPECT_THAT(doc_store->last_added_document_id(), Eq(document_id1));
// Still returns the last DocumentId even if it was deleted
- ICING_ASSERT_OK(doc_store->Delete("icing", "email/1"));
+ ICING_ASSERT_OK(doc_store->Delete("icing", "email/1",
+ fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(doc_store->last_added_document_id(), Eq(document_id1));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
@@ -1438,11 +1615,11 @@ TEST_F(DocumentStoreTest, MaxDocumentId) {
EXPECT_THAT(doc_store->last_added_document_id(), Eq(document_id2));
}
-TEST_F(DocumentStoreTest, GetNamespaceId) {
+TEST_P(DocumentStoreTest, GetNamespaceId) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1471,11 +1648,11 @@ TEST_F(DocumentStoreTest, GetNamespaceId) {
EXPECT_THAT(doc_store->GetNamespaceId("namespace1"), IsOkAndHolds(Eq(0)));
}
-TEST_F(DocumentStoreTest, GetDuplicateNamespaceId) {
+TEST_P(DocumentStoreTest, GetDuplicateNamespaceId) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1491,11 +1668,11 @@ TEST_F(DocumentStoreTest, GetDuplicateNamespaceId) {
EXPECT_THAT(doc_store->GetNamespaceId("namespace"), IsOkAndHolds(Eq(0)));
}
-TEST_F(DocumentStoreTest, NonexistentNamespaceNotFound) {
+TEST_P(DocumentStoreTest, NonexistentNamespaceNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1503,11 +1680,11 @@ TEST_F(DocumentStoreTest, NonexistentNamespaceNotFound) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, GetCorpusDuplicateCorpusId) {
+TEST_P(DocumentStoreTest, GetCorpusDuplicateCorpusId) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1524,11 +1701,11 @@ TEST_F(DocumentStoreTest, GetCorpusDuplicateCorpusId) {
IsOkAndHolds(Eq(0)));
}
-TEST_F(DocumentStoreTest, GetCorpusId) {
+TEST_P(DocumentStoreTest, GetCorpusId) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1559,11 +1736,11 @@ TEST_F(DocumentStoreTest, GetCorpusId) {
EXPECT_THAT(doc_store->GetNamespaceId("namespace1"), IsOkAndHolds(Eq(0)));
}
-TEST_F(DocumentStoreTest, NonexistentCorpusNotFound) {
+TEST_P(DocumentStoreTest, NonexistentCorpusNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1583,11 +1760,11 @@ TEST_F(DocumentStoreTest, NonexistentCorpusNotFound) {
StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
}
-TEST_F(DocumentStoreTest, GetCorpusAssociatedScoreDataSameCorpus) {
+TEST_P(DocumentStoreTest, GetCorpusAssociatedScoreDataSameCorpus) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1608,11 +1785,11 @@ TEST_F(DocumentStoreTest, GetCorpusAssociatedScoreDataSameCorpus) {
StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
}
-TEST_F(DocumentStoreTest, GetCorpusAssociatedScoreData) {
+TEST_P(DocumentStoreTest, GetCorpusAssociatedScoreData) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1646,11 +1823,11 @@ TEST_F(DocumentStoreTest, GetCorpusAssociatedScoreData) {
/*num_docs=*/1, /*sum_length_in_tokens=*/5)));
}
-TEST_F(DocumentStoreTest, NonexistentCorpusAssociatedScoreDataOutOfRange) {
+TEST_P(DocumentStoreTest, NonexistentCorpusAssociatedScoreDataOutOfRange) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1658,11 +1835,11 @@ TEST_F(DocumentStoreTest, NonexistentCorpusAssociatedScoreDataOutOfRange) {
StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
}
-TEST_F(DocumentStoreTest, GetDocumentAssociatedScoreDataSameCorpus) {
+TEST_P(DocumentStoreTest, GetDocumentAssociatedScoreDataSameCorpus) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1702,11 +1879,11 @@ TEST_F(DocumentStoreTest, GetDocumentAssociatedScoreDataSameCorpus) {
/*length_in_tokens=*/7)));
}
-TEST_F(DocumentStoreTest, GetDocumentAssociatedScoreDataDifferentCorpus) {
+TEST_P(DocumentStoreTest, GetDocumentAssociatedScoreDataDifferentCorpus) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1746,11 +1923,11 @@ TEST_F(DocumentStoreTest, GetDocumentAssociatedScoreDataDifferentCorpus) {
/*length_in_tokens=*/7)));
}
-TEST_F(DocumentStoreTest, NonexistentDocumentAssociatedScoreDataNotFound) {
+TEST_P(DocumentStoreTest, NonexistentDocumentAssociatedScoreDataNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1758,22 +1935,23 @@ TEST_F(DocumentStoreTest, NonexistentDocumentAssociatedScoreDataNotFound) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, NonexistentDocumentFilterDataNotFound) {
+TEST_P(DocumentStoreTest, NonexistentDocumentFilterDataNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
- EXPECT_FALSE(doc_store->GetAliveDocumentFilterData(/*document_id=*/0));
+ EXPECT_FALSE(doc_store->GetAliveDocumentFilterData(
+ /*document_id=*/0, fake_clock_.GetSystemTimeMilliseconds()));
}
-TEST_F(DocumentStoreTest, DeleteClearsFilterCache) {
+TEST_P(DocumentStoreTest, DeleteClearsFilterCache) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1782,22 +1960,25 @@ TEST_F(DocumentStoreTest, DeleteClearsFilterCache) {
ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData doc_filter_data,
- doc_store->GetAliveDocumentFilterData(document_id));
+ doc_store->GetAliveDocumentFilterData(
+ document_id, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(doc_filter_data,
Eq(DocumentFilterData(
/*namespace_id=*/0,
/*schema_type_id=*/0, document1_expiration_timestamp_)));
- ICING_ASSERT_OK(doc_store->Delete("icing", "email/1"));
+ ICING_ASSERT_OK(doc_store->Delete("icing", "email/1",
+ fake_clock_.GetSystemTimeMilliseconds()));
// Associated entry of the deleted document is removed.
- EXPECT_FALSE(doc_store->GetAliveDocumentFilterData(document_id));
+ EXPECT_FALSE(doc_store->GetAliveDocumentFilterData(
+ document_id, fake_clock_.GetSystemTimeMilliseconds()));
}
-TEST_F(DocumentStoreTest, DeleteClearsScoreCache) {
+TEST_P(DocumentStoreTest, DeleteClearsScoreCache) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1811,17 +1992,22 @@ TEST_F(DocumentStoreTest, DeleteClearsScoreCache) {
/*creation_timestamp_ms=*/document1_creation_timestamp_,
/*length_in_tokens=*/4)));
- ICING_ASSERT_OK(doc_store->Delete("icing", "email/1"));
+ ICING_ASSERT_OK(doc_store->Delete("icing", "email/1",
+ fake_clock_.GetSystemTimeMilliseconds()));
// Associated entry of the deleted document is removed.
- EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(
+ doc_store->GetDocumentAssociatedScoreData(document_id),
+ IsOkAndHolds(DocumentAssociatedScoreData(kInvalidCorpusId,
+ /*document_score=*/-1,
+ /*creation_timestamp_ms=*/-1,
+ /*length_in_tokens=*/0)));
}
-TEST_F(DocumentStoreTest, DeleteShouldPreventUsageScores) {
+TEST_P(DocumentStoreTest, DeleteShouldPreventUsageScores) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1836,11 +2022,15 @@ TEST_F(DocumentStoreTest, DeleteShouldPreventUsageScores) {
UsageStore::UsageScores expected_scores;
expected_scores.usage_type1_count = 1;
- ASSERT_THAT(doc_store->GetUsageScores(document_id),
- IsOkAndHolds(expected_scores));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ UsageStore::UsageScores actual_scores,
+ doc_store->GetUsageScores(document_id,
+ fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(actual_scores, Eq(expected_scores));
// Delete the document.
- ICING_ASSERT_OK(doc_store->Delete("icing", "email/1"));
+ ICING_ASSERT_OK(doc_store->Delete("icing", "email/1",
+ fake_clock_.GetSystemTimeMilliseconds()));
// Can't report or get usage scores on the deleted document
ASSERT_THAT(
@@ -1848,16 +2038,15 @@ TEST_F(DocumentStoreTest, DeleteShouldPreventUsageScores) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
HasSubstr("Couldn't report usage on a nonexistent document")));
- ASSERT_THAT(doc_store->GetUsageScores(document_id),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
- HasSubstr("Can't get usage scores")));
+ EXPECT_FALSE(doc_store->GetUsageScores(
+ document_id, fake_clock_.GetSystemTimeMilliseconds()));
}
-TEST_F(DocumentStoreTest, ExpirationShouldPreventUsageScores) {
+TEST_P(DocumentStoreTest, ExpirationShouldPreventUsageScores) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1884,8 +2073,11 @@ TEST_F(DocumentStoreTest, ExpirationShouldPreventUsageScores) {
UsageStore::UsageScores expected_scores;
expected_scores.usage_type1_count = 1;
- ASSERT_THAT(doc_store->GetUsageScores(document_id),
- IsOkAndHolds(expected_scores));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ UsageStore::UsageScores actual_scores,
+ doc_store->GetUsageScores(document_id,
+ fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(actual_scores, Eq(expected_scores));
// Some arbitrary time past the document's creation time (10) + ttl (100)
fake_clock_.SetSystemTimeMilliseconds(200);
@@ -1896,12 +2088,11 @@ TEST_F(DocumentStoreTest, ExpirationShouldPreventUsageScores) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
HasSubstr("Couldn't report usage on a nonexistent document")));
- ASSERT_THAT(doc_store->GetUsageScores(document_id),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
- HasSubstr("Can't get usage scores")));
+ EXPECT_FALSE(doc_store->GetUsageScores(
+ document_id, fake_clock_.GetSystemTimeMilliseconds()));
}
-TEST_F(DocumentStoreTest,
+TEST_P(DocumentStoreTest,
ExpirationTimestampIsSumOfNonZeroTtlAndCreationTimestamp) {
DocumentProto document = DocumentBuilder()
.SetKey("namespace1", "1")
@@ -1912,22 +2103,23 @@ TEST_F(DocumentStoreTest,
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, doc_store->Put(document));
ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData doc_filter_data,
- doc_store->GetAliveDocumentFilterData(document_id));
+ doc_store->GetAliveDocumentFilterData(
+ document_id, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(doc_filter_data, Eq(DocumentFilterData(
/*namespace_id=*/0,
/*schema_type_id=*/0,
/*expiration_timestamp_ms=*/1100)));
}
-TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxIfTtlIsZero) {
+TEST_P(DocumentStoreTest, ExpirationTimestampIsInt64MaxIfTtlIsZero) {
DocumentProto document = DocumentBuilder()
.SetKey("namespace1", "1")
.SetSchema("email")
@@ -1937,8 +2129,8 @@ TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxIfTtlIsZero) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1946,7 +2138,8 @@ TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxIfTtlIsZero) {
ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData doc_filter_data,
- doc_store->GetAliveDocumentFilterData(document_id));
+ doc_store->GetAliveDocumentFilterData(
+ document_id, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(
doc_filter_data,
@@ -1956,7 +2149,7 @@ TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxIfTtlIsZero) {
/*expiration_timestamp_ms=*/std::numeric_limits<int64_t>::max())));
}
-TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxOnOverflow) {
+TEST_P(DocumentStoreTest, ExpirationTimestampIsInt64MaxOnOverflow) {
DocumentProto document =
DocumentBuilder()
.SetKey("namespace1", "1")
@@ -1967,8 +2160,8 @@ TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxOnOverflow) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1976,7 +2169,8 @@ TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxOnOverflow) {
ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData doc_filter_data,
- doc_store->GetAliveDocumentFilterData(document_id));
+ doc_store->GetAliveDocumentFilterData(
+ document_id, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(
doc_filter_data,
@@ -1986,7 +2180,7 @@ TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxOnOverflow) {
/*expiration_timestamp_ms=*/std::numeric_limits<int64_t>::max())));
}
-TEST_F(DocumentStoreTest, CreationTimestampShouldBePopulated) {
+TEST_P(DocumentStoreTest, CreationTimestampShouldBePopulated) {
// Creates a document without a given creation timestamp
DocumentProto document_without_creation_timestamp =
DocumentBuilder()
@@ -2000,8 +2194,8 @@ TEST_F(DocumentStoreTest, CreationTimestampShouldBePopulated) {
fake_clock_.SetSystemTimeMilliseconds(fake_real_time);
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -2017,7 +2211,7 @@ TEST_F(DocumentStoreTest, CreationTimestampShouldBePopulated) {
Eq(fake_real_time));
}
-TEST_F(DocumentStoreTest, ShouldWriteAndReadScoresCorrectly) {
+TEST_P(DocumentStoreTest, ShouldWriteAndReadScoresCorrectly) {
DocumentProto document1 = DocumentBuilder()
.SetKey("icing", "email/1")
.SetSchema("email")
@@ -2033,8 +2227,8 @@ TEST_F(DocumentStoreTest, ShouldWriteAndReadScoresCorrectly) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -2056,11 +2250,11 @@ TEST_F(DocumentStoreTest, ShouldWriteAndReadScoresCorrectly) {
/*length_in_tokens=*/0)));
}
-TEST_F(DocumentStoreTest, ComputeChecksumSameBetweenCalls) {
+TEST_P(DocumentStoreTest, ComputeChecksumSameBetweenCalls) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2071,11 +2265,11 @@ TEST_F(DocumentStoreTest, ComputeChecksumSameBetweenCalls) {
EXPECT_THAT(document_store->ComputeChecksum(), IsOkAndHolds(checksum));
}
-TEST_F(DocumentStoreTest, ComputeChecksumSameAcrossInstances) {
+TEST_P(DocumentStoreTest, ComputeChecksumSameAcrossInstances) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2085,18 +2279,18 @@ TEST_F(DocumentStoreTest, ComputeChecksumSameAcrossInstances) {
// Destroy the previous instance and recreate DocumentStore
document_store.reset();
ICING_ASSERT_OK_AND_ASSIGN(
- create_result, DocumentStore::Create(&filesystem_, document_store_dir_,
- &fake_clock_, schema_store_.get()));
+ create_result, CreateDocumentStore(&filesystem_, document_store_dir_,
+ &fake_clock_, schema_store_.get()));
document_store = std::move(create_result.document_store);
EXPECT_THAT(document_store->ComputeChecksum(), IsOkAndHolds(checksum));
}
-TEST_F(DocumentStoreTest, ComputeChecksumChangesOnNewDocument) {
+TEST_P(DocumentStoreTest, ComputeChecksumChangesOnNewDocument) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2108,11 +2302,11 @@ TEST_F(DocumentStoreTest, ComputeChecksumChangesOnNewDocument) {
IsOkAndHolds(Not(Eq(checksum))));
}
-TEST_F(DocumentStoreTest, ComputeChecksumDoesntChangeOnNewUsage) {
+TEST_P(DocumentStoreTest, ComputeChecksumDoesntChangeOnNewUsage) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2126,7 +2320,7 @@ TEST_F(DocumentStoreTest, ComputeChecksumDoesntChangeOnNewUsage) {
EXPECT_THAT(document_store->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
}
-TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
+TEST_P(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
const std::string schema_store_dir = schema_store_dir_ + "_custom";
DocumentId email_document_id;
@@ -2159,7 +2353,9 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.AddType(SchemaTypeConfigBuilder().SetType("message"))
.Build();
- ICING_EXPECT_OK(schema_store->SetSchema(schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
schema_store->GetSchemaTypeId("email"));
@@ -2168,8 +2364,8 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2180,7 +2376,8 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
IsOkAndHolds(EqualsProto(email_document)));
ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData email_data,
- document_store->GetAliveDocumentFilterData(email_document_id));
+ document_store->GetAliveDocumentFilterData(
+ email_document_id, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(email_data.schema_type_id(), Eq(email_schema_type_id));
email_namespace_id = email_data.namespace_id();
email_expiration_timestamp = email_data.expiration_timestamp_ms();
@@ -2193,7 +2390,8 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
IsOkAndHolds(EqualsProto(message_document)));
ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData message_data,
- document_store->GetAliveDocumentFilterData(message_document_id));
+ document_store->GetAliveDocumentFilterData(
+ message_document_id, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(message_data.schema_type_id(), Eq(message_schema_type_id));
message_namespace_id = message_data.namespace_id();
message_expiration_timestamp = message_data.expiration_timestamp_ms();
@@ -2214,7 +2412,9 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ICING_EXPECT_OK(schema_store->SetSchema(schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
schema_store->GetSchemaTypeId("email"));
@@ -2223,8 +2423,8 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
// because the "message" schema type is missing
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2233,7 +2433,8 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
IsOkAndHolds(EqualsProto(email_document)));
ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData email_data,
- document_store->GetAliveDocumentFilterData(email_document_id));
+ document_store->GetAliveDocumentFilterData(
+ email_document_id, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(email_data.schema_type_id(), Eq(email_schema_type_id));
// Make sure that all the other fields are stll valid/the same
EXPECT_THAT(email_data.namespace_id(), Eq(email_namespace_id));
@@ -2245,7 +2446,8 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
IsOkAndHolds(EqualsProto(message_document)));
ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData message_data,
- document_store->GetAliveDocumentFilterData(message_document_id));
+ document_store->GetAliveDocumentFilterData(
+ message_document_id, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(message_data.schema_type_id(), Eq(-1));
// Make sure that all the other fields are stll valid/the same
EXPECT_THAT(message_data.namespace_id(), Eq(message_namespace_id));
@@ -2253,7 +2455,7 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
Eq(message_expiration_timestamp));
}
-TEST_F(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) {
+TEST_P(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) {
const std::string schema_store_dir = test_dir_ + "_custom";
filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
@@ -2268,7 +2470,9 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
- ICING_EXPECT_OK(schema_store->SetSchema(schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId old_email_schema_type_id,
schema_store->GetSchemaTypeId("email"));
@@ -2290,8 +2494,8 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) {
// Add the documents and check SchemaTypeIds match
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2299,14 +2503,16 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) {
document_store->Put(email_document));
ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData email_data,
- document_store->GetAliveDocumentFilterData(email_document_id));
+ document_store->GetAliveDocumentFilterData(
+ email_document_id, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(email_data.schema_type_id(), Eq(old_email_schema_type_id));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
document_store->Put(message_document));
ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData message_data,
- document_store->GetAliveDocumentFilterData(message_document_id));
+ document_store->GetAliveDocumentFilterData(
+ message_document_id, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(message_data.schema_type_id(), Eq(old_message_schema_type_id));
// Rearrange the schema types. Since SchemaTypeId is assigned based on order,
@@ -2316,7 +2522,9 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) {
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ICING_EXPECT_OK(schema_store->SetSchema(schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId new_email_schema_type_id,
schema_store->GetSchemaTypeId("email"));
@@ -2332,16 +2540,18 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) {
// Check that the FilterCache holds the new SchemaTypeIds
ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
email_data,
- document_store->GetAliveDocumentFilterData(email_document_id));
+ document_store->GetAliveDocumentFilterData(
+ email_document_id, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(email_data.schema_type_id(), Eq(new_email_schema_type_id));
ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
message_data,
- document_store->GetAliveDocumentFilterData(message_document_id));
+ document_store->GetAliveDocumentFilterData(
+ message_document_id, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(message_data.schema_type_id(), Eq(new_message_schema_type_id));
}
-TEST_F(DocumentStoreTest, UpdateSchemaStoreDeletesInvalidDocuments) {
+TEST_P(DocumentStoreTest, UpdateSchemaStoreDeletesInvalidDocuments) {
const std::string schema_store_dir = test_dir_ + "_custom";
filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
@@ -2359,7 +2569,9 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreDeletesInvalidDocuments) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
- ICING_EXPECT_OK(schema_store->SetSchema(schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// Add two documents, with and without a subject
DocumentProto email_without_subject = DocumentBuilder()
@@ -2380,8 +2592,8 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreDeletesInvalidDocuments) {
// Insert documents and check they're ok
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2401,7 +2613,8 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreDeletesInvalidDocuments) {
PropertyConfigProto::Cardinality::REQUIRED);
ICING_EXPECT_OK(schema_store->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true));
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
ICING_EXPECT_OK(document_store->UpdateSchemaStore(schema_store.get()));
@@ -2414,7 +2627,7 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreDeletesInvalidDocuments) {
IsOkAndHolds(EqualsProto(email_with_subject)));
}
-TEST_F(DocumentStoreTest,
+TEST_P(DocumentStoreTest,
UpdateSchemaStoreDeletesDocumentsByDeletedSchemaType) {
const std::string schema_store_dir = test_dir_ + "_custom";
filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
@@ -2430,7 +2643,9 @@ TEST_F(DocumentStoreTest,
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
- ICING_EXPECT_OK(schema_store->SetSchema(schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// Add a "email" and "message" document
DocumentProto email_document = DocumentBuilder()
@@ -2450,8 +2665,8 @@ TEST_F(DocumentStoreTest,
// Insert documents and check they're ok
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2472,7 +2687,8 @@ TEST_F(DocumentStoreTest,
ICING_EXPECT_OK(
schema_store->SetSchema(new_schema,
- /*ignore_errors_and_delete_documents=*/true));
+ /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
ICING_EXPECT_OK(document_store->UpdateSchemaStore(schema_store.get()));
@@ -2485,7 +2701,7 @@ TEST_F(DocumentStoreTest,
IsOkAndHolds(EqualsProto(message_document)));
}
-TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) {
+TEST_P(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) {
const std::string schema_store_dir = test_dir_ + "_custom";
filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
@@ -2500,7 +2716,9 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
- ICING_EXPECT_OK(schema_store->SetSchema(schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId old_email_schema_type_id,
schema_store->GetSchemaTypeId("email"));
@@ -2522,8 +2740,8 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) {
// Add the documents and check SchemaTypeIds match
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2531,14 +2749,16 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) {
document_store->Put(email_document));
ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData email_data,
- document_store->GetAliveDocumentFilterData(email_document_id));
+ document_store->GetAliveDocumentFilterData(
+ email_document_id, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(email_data.schema_type_id(), Eq(old_email_schema_type_id));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
document_store->Put(message_document));
ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData message_data,
- document_store->GetAliveDocumentFilterData(message_document_id));
+ document_store->GetAliveDocumentFilterData(
+ message_document_id, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(message_data.schema_type_id(), Eq(old_message_schema_type_id));
// Rearrange the schema types. Since SchemaTypeId is assigned based on order,
@@ -2548,8 +2768,11 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) {
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ICING_ASSERT_OK_AND_ASSIGN(SchemaStore::SetSchemaResult set_schema_result,
- schema_store->SetSchema(schema));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SchemaStore::SetSchemaResult set_schema_result,
+ schema_store->SetSchema(schema,
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId new_email_schema_type_id,
schema_store->GetSchemaTypeId("email"));
@@ -2566,16 +2789,18 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) {
// Check that the FilterCache holds the new SchemaTypeIds
ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
email_data,
- document_store->GetAliveDocumentFilterData(email_document_id));
+ document_store->GetAliveDocumentFilterData(
+ email_document_id, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(email_data.schema_type_id(), Eq(new_email_schema_type_id));
ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
message_data,
- document_store->GetAliveDocumentFilterData(message_document_id));
+ document_store->GetAliveDocumentFilterData(
+ message_document_id, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(message_data.schema_type_id(), Eq(new_message_schema_type_id));
}
-TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreDeletesInvalidDocuments) {
+TEST_P(DocumentStoreTest, OptimizedUpdateSchemaStoreDeletesInvalidDocuments) {
const std::string schema_store_dir = test_dir_ + "_custom";
filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
@@ -2593,7 +2818,9 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreDeletesInvalidDocuments) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
- ICING_EXPECT_OK(schema_store->SetSchema(schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// Add two documents, with and without a subject
DocumentProto email_without_subject = DocumentBuilder()
@@ -2614,8 +2841,8 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreDeletesInvalidDocuments) {
// Insert documents and check they're ok
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2637,7 +2864,8 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreDeletesInvalidDocuments) {
ICING_ASSERT_OK_AND_ASSIGN(
SchemaStore::SetSchemaResult set_schema_result,
schema_store->SetSchema(schema,
- /*ignore_errors_and_delete_documents=*/true));
+ /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
ICING_EXPECT_OK(document_store->OptimizedUpdateSchemaStore(
schema_store.get(), set_schema_result));
@@ -2651,7 +2879,7 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreDeletesInvalidDocuments) {
IsOkAndHolds(EqualsProto(email_with_subject)));
}
-TEST_F(DocumentStoreTest,
+TEST_P(DocumentStoreTest,
OptimizedUpdateSchemaStoreDeletesDocumentsByDeletedSchemaType) {
const std::string schema_store_dir = test_dir_ + "_custom";
filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
@@ -2667,7 +2895,9 @@ TEST_F(DocumentStoreTest,
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
- ICING_EXPECT_OK(schema_store->SetSchema(schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// Add a "email" and "message" document
DocumentProto email_document = DocumentBuilder()
@@ -2687,8 +2917,8 @@ TEST_F(DocumentStoreTest,
// Insert documents and check they're ok
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2710,7 +2940,8 @@ TEST_F(DocumentStoreTest,
ICING_ASSERT_OK_AND_ASSIGN(
SchemaStore::SetSchemaResult set_schema_result,
schema_store->SetSchema(new_schema,
- /*ignore_errors_and_delete_documents=*/true));
+ /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
ICING_EXPECT_OK(document_store->OptimizedUpdateSchemaStore(
schema_store.get(), set_schema_result));
@@ -2724,11 +2955,11 @@ TEST_F(DocumentStoreTest,
IsOkAndHolds(EqualsProto(message_document)));
}
-TEST_F(DocumentStoreTest, GetOptimizeInfo) {
+TEST_P(DocumentStoreTest, GetOptimizeInfo) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2748,8 +2979,9 @@ TEST_F(DocumentStoreTest, GetOptimizeInfo) {
EXPECT_THAT(optimize_info.estimated_optimizable_bytes, Eq(0));
// Delete a document. Now something is optimizable
- ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
- test_document1_.uri()));
+ ICING_EXPECT_OK(document_store->Delete(
+ test_document1_.namespace_(), test_document1_.uri(),
+ fake_clock_.GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(optimize_info, document_store->GetOptimizeInfo());
EXPECT_THAT(optimize_info.total_docs, Eq(1));
EXPECT_THAT(optimize_info.optimizable_docs, Eq(1));
@@ -2764,8 +2996,8 @@ TEST_F(DocumentStoreTest, GetOptimizeInfo) {
document_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
document_store.reset();
ICING_ASSERT_OK_AND_ASSIGN(
- create_result, DocumentStore::Create(&filesystem_, optimized_dir,
- &fake_clock_, schema_store_.get()));
+ create_result, CreateDocumentStore(&filesystem_, optimized_dir,
+ &fake_clock_, schema_store_.get()));
std::unique_ptr<DocumentStore> optimized_document_store =
std::move(create_result.document_store);
@@ -2776,11 +3008,11 @@ TEST_F(DocumentStoreTest, GetOptimizeInfo) {
EXPECT_THAT(optimize_info.estimated_optimizable_bytes, Eq(0));
}
-TEST_F(DocumentStoreTest, GetAllNamespaces) {
+TEST_P(DocumentStoreTest, GetAllNamespaces) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2828,13 +3060,15 @@ TEST_F(DocumentStoreTest, GetAllNamespaces) {
// After deleting namespace2_uri1, there's still namespace2_uri2, so
// "namespace2" still shows up in results
- ICING_EXPECT_OK(document_store->Delete("namespace2", "uri1"));
+ ICING_EXPECT_OK(document_store->Delete(
+ "namespace2", "uri1", fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(document_store->GetAllNamespaces(),
UnorderedElementsAre("namespace1", "namespace2", "namespace3"));
// After deleting namespace2_uri2, there's no more documents in "namespace2"
- ICING_EXPECT_OK(document_store->Delete("namespace2", "uri2"));
+ ICING_EXPECT_OK(document_store->Delete(
+ "namespace2", "uri2", fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(document_store->GetAllNamespaces(),
UnorderedElementsAre("namespace1", "namespace3"));
@@ -2846,11 +3080,11 @@ TEST_F(DocumentStoreTest, GetAllNamespaces) {
UnorderedElementsAre("namespace1"));
}
-TEST_F(DocumentStoreTest, ReportUsageWithDifferentTimestampsAndGetUsageScores) {
+TEST_P(DocumentStoreTest, ReportUsageWithDifferentTimestampsAndGetUsageScores) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2866,8 +3100,11 @@ TEST_F(DocumentStoreTest, ReportUsageWithDifferentTimestampsAndGetUsageScores) {
UsageStore::UsageScores expected_scores;
expected_scores.usage_type1_last_used_timestamp_s = 1;
++expected_scores.usage_type1_count;
- ASSERT_THAT(document_store->GetUsageScores(document_id),
- IsOkAndHolds(expected_scores));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ UsageStore::UsageScores actual_scores,
+ document_store->GetUsageScores(document_id,
+ fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(actual_scores, Eq(expected_scores));
// Report usage with type 1 and time 5, time should be updated.
UsageReport usage_report_type1_time5 = CreateUsageReport(
@@ -2877,8 +3114,10 @@ TEST_F(DocumentStoreTest, ReportUsageWithDifferentTimestampsAndGetUsageScores) {
expected_scores.usage_type1_last_used_timestamp_s = 5;
++expected_scores.usage_type1_count;
- ASSERT_THAT(document_store->GetUsageScores(document_id),
- IsOkAndHolds(expected_scores));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ actual_scores, document_store->GetUsageScores(
+ document_id, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(actual_scores, Eq(expected_scores));
// Report usage with type 2 and time 1.
UsageReport usage_report_type2_time1 = CreateUsageReport(
@@ -2888,8 +3127,10 @@ TEST_F(DocumentStoreTest, ReportUsageWithDifferentTimestampsAndGetUsageScores) {
expected_scores.usage_type2_last_used_timestamp_s = 1;
++expected_scores.usage_type2_count;
- ASSERT_THAT(document_store->GetUsageScores(document_id),
- IsOkAndHolds(expected_scores));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ actual_scores, document_store->GetUsageScores(
+ document_id, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(actual_scores, Eq(expected_scores));
// Report usage with type 2 and time 5.
UsageReport usage_report_type2_time5 = CreateUsageReport(
@@ -2899,8 +3140,10 @@ TEST_F(DocumentStoreTest, ReportUsageWithDifferentTimestampsAndGetUsageScores) {
expected_scores.usage_type2_last_used_timestamp_s = 5;
++expected_scores.usage_type2_count;
- ASSERT_THAT(document_store->GetUsageScores(document_id),
- IsOkAndHolds(expected_scores));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ actual_scores, document_store->GetUsageScores(
+ document_id, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(actual_scores, Eq(expected_scores));
// Report usage with type 3 and time 1.
UsageReport usage_report_type3_time1 = CreateUsageReport(
@@ -2910,8 +3153,10 @@ TEST_F(DocumentStoreTest, ReportUsageWithDifferentTimestampsAndGetUsageScores) {
expected_scores.usage_type3_last_used_timestamp_s = 1;
++expected_scores.usage_type3_count;
- ASSERT_THAT(document_store->GetUsageScores(document_id),
- IsOkAndHolds(expected_scores));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ actual_scores, document_store->GetUsageScores(
+ document_id, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(actual_scores, Eq(expected_scores));
// Report usage with type 3 and time 5.
UsageReport usage_report_type3_time5 = CreateUsageReport(
@@ -2921,15 +3166,17 @@ TEST_F(DocumentStoreTest, ReportUsageWithDifferentTimestampsAndGetUsageScores) {
expected_scores.usage_type3_last_used_timestamp_s = 5;
++expected_scores.usage_type3_count;
- ASSERT_THAT(document_store->GetUsageScores(document_id),
- IsOkAndHolds(expected_scores));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ actual_scores, document_store->GetUsageScores(
+ document_id, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(actual_scores, Eq(expected_scores));
}
-TEST_F(DocumentStoreTest, ReportUsageWithDifferentTypesAndGetUsageScores) {
+TEST_P(DocumentStoreTest, ReportUsageWithDifferentTypesAndGetUsageScores) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2944,8 +3191,11 @@ TEST_F(DocumentStoreTest, ReportUsageWithDifferentTypesAndGetUsageScores) {
UsageStore::UsageScores expected_scores;
++expected_scores.usage_type1_count;
- ASSERT_THAT(document_store->GetUsageScores(document_id),
- IsOkAndHolds(expected_scores));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ UsageStore::UsageScores actual_scores,
+ document_store->GetUsageScores(document_id,
+ fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(actual_scores, Eq(expected_scores));
// Report usage with type 2.
UsageReport usage_report_type2 = CreateUsageReport(
@@ -2954,8 +3204,10 @@ TEST_F(DocumentStoreTest, ReportUsageWithDifferentTypesAndGetUsageScores) {
ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type2));
++expected_scores.usage_type2_count;
- ASSERT_THAT(document_store->GetUsageScores(document_id),
- IsOkAndHolds(expected_scores));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ actual_scores, document_store->GetUsageScores(
+ document_id, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(actual_scores, Eq(expected_scores));
// Report usage with type 3.
UsageReport usage_report_type3 = CreateUsageReport(
@@ -2964,18 +3216,20 @@ TEST_F(DocumentStoreTest, ReportUsageWithDifferentTypesAndGetUsageScores) {
ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type3));
++expected_scores.usage_type3_count;
- ASSERT_THAT(document_store->GetUsageScores(document_id),
- IsOkAndHolds(expected_scores));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ actual_scores, document_store->GetUsageScores(
+ document_id, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(actual_scores, Eq(expected_scores));
}
-TEST_F(DocumentStoreTest, UsageScoresShouldNotBeClearedOnChecksumMismatch) {
+TEST_P(DocumentStoreTest, UsageScoresShouldNotBeClearedOnChecksumMismatch) {
UsageStore::UsageScores expected_scores;
DocumentId document_id;
{
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2989,32 +3243,38 @@ TEST_F(DocumentStoreTest, UsageScoresShouldNotBeClearedOnChecksumMismatch) {
ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1));
++expected_scores.usage_type1_count;
- ASSERT_THAT(document_store->GetUsageScores(document_id),
- IsOkAndHolds(expected_scores));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ UsageStore::UsageScores actual_scores,
+ document_store->GetUsageScores(
+ document_id, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(actual_scores, Eq(expected_scores));
}
CorruptDocStoreHeaderChecksumFile();
// Successfully recover from a corrupt derived file issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
// Usage scores should be the same.
- ASSERT_THAT(document_store->GetUsageScores(document_id),
- IsOkAndHolds(expected_scores));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ UsageStore::UsageScores actual_scores,
+ document_store->GetUsageScores(document_id,
+ fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(actual_scores, Eq(expected_scores));
}
-TEST_F(DocumentStoreTest, UsageScoresShouldBeAvailableAfterDataLoss) {
+TEST_P(DocumentStoreTest, UsageScoresShouldBeAvailableAfterDataLoss) {
UsageStore::UsageScores expected_scores;
DocumentId document_id;
{
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -3028,8 +3288,11 @@ TEST_F(DocumentStoreTest, UsageScoresShouldBeAvailableAfterDataLoss) {
ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1));
++expected_scores.usage_type1_count;
- ASSERT_THAT(document_store->GetUsageScores(document_id),
- IsOkAndHolds(expected_scores));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ UsageStore::UsageScores actual_scores,
+ document_store->GetUsageScores(
+ document_id, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(actual_scores, Eq(expected_scores));
}
// "Corrupt" the content written in the log by adding non-checksummed data to
@@ -3047,21 +3310,24 @@ TEST_F(DocumentStoreTest, UsageScoresShouldBeAvailableAfterDataLoss) {
// Successfully recover from a data loss issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
// Usage scores should still be available.
- ASSERT_THAT(document_store->GetUsageScores(document_id),
- IsOkAndHolds(expected_scores));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ UsageStore::UsageScores actual_scores,
+ document_store->GetUsageScores(document_id,
+ fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(actual_scores, Eq(expected_scores));
}
-TEST_F(DocumentStoreTest, UsageScoresShouldBeCopiedOverToUpdatedDocument) {
+TEST_P(DocumentStoreTest, UsageScoresShouldBeCopiedOverToUpdatedDocument) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -3077,8 +3343,11 @@ TEST_F(DocumentStoreTest, UsageScoresShouldBeCopiedOverToUpdatedDocument) {
UsageStore::UsageScores expected_scores;
++expected_scores.usage_type1_count;
- ASSERT_THAT(document_store->GetUsageScores(document_id),
- IsOkAndHolds(expected_scores));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ UsageStore::UsageScores actual_scores,
+ document_store->GetUsageScores(document_id,
+ fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(actual_scores, Eq(expected_scores));
// Update the document.
ICING_ASSERT_OK_AND_ASSIGN(
@@ -3088,15 +3357,18 @@ TEST_F(DocumentStoreTest, UsageScoresShouldBeCopiedOverToUpdatedDocument) {
ASSERT_THAT(updated_document_id, Not(Eq(document_id)));
// Usage scores should be the same.
- EXPECT_THAT(document_store->GetUsageScores(updated_document_id),
- IsOkAndHolds(expected_scores));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ actual_scores,
+ document_store->GetUsageScores(updated_document_id,
+ fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(actual_scores, Eq(expected_scores));
}
-TEST_F(DocumentStoreTest, UsageScoresShouldPersistOnOptimize) {
+TEST_P(DocumentStoreTest, UsageScoresShouldPersistOnOptimize) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -3106,7 +3378,8 @@ TEST_F(DocumentStoreTest, UsageScoresShouldPersistOnOptimize) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id2,
document_store->Put(DocumentProto(test_document2_)));
- ICING_ASSERT_OK(document_store->Delete(document_id1));
+ ICING_ASSERT_OK(document_store->Delete(
+ document_id1, fake_clock_.GetSystemTimeMilliseconds()));
// Report usage of document 2.
UsageReport usage_report = CreateUsageReport(
@@ -3116,8 +3389,11 @@ TEST_F(DocumentStoreTest, UsageScoresShouldPersistOnOptimize) {
UsageStore::UsageScores expected_scores;
++expected_scores.usage_type1_count;
- ASSERT_THAT(document_store->GetUsageScores(document_id2),
- IsOkAndHolds(expected_scores));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ UsageStore::UsageScores actual_scores,
+ document_store->GetUsageScores(document_id2,
+ fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(actual_scores, Eq(expected_scores));
// Run optimize
std::string optimized_dir = document_store_dir_ + "/optimize_test";
@@ -3127,24 +3403,27 @@ TEST_F(DocumentStoreTest, UsageScoresShouldPersistOnOptimize) {
// Get optimized document store
ICING_ASSERT_OK_AND_ASSIGN(
- create_result, DocumentStore::Create(&filesystem_, optimized_dir,
- &fake_clock_, schema_store_.get()));
+ create_result, CreateDocumentStore(&filesystem_, optimized_dir,
+ &fake_clock_, schema_store_.get()));
std::unique_ptr<DocumentStore> optimized_document_store =
std::move(create_result.document_store);
// Usage scores should be the same.
// The original document_id2 should have become document_id2 - 1.
- ASSERT_THAT(optimized_document_store->GetUsageScores(document_id2 - 1),
- IsOkAndHolds(expected_scores));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ actual_scores,
+ optimized_document_store->GetUsageScores(
+ document_id2 - 1, fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(actual_scores, Eq(expected_scores));
}
-TEST_F(DocumentStoreTest, DetectPartialDataLoss) {
+TEST_P(DocumentStoreTest, DetectPartialDataLoss) {
{
// Can put and delete fine.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
@@ -3172,14 +3451,14 @@ TEST_F(DocumentStoreTest, DetectPartialDataLoss) {
// Successfully recover from a data loss issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
ASSERT_THAT(create_result.data_loss, Eq(DataLoss::PARTIAL));
}
-TEST_F(DocumentStoreTest, DetectCompleteDataLoss) {
+TEST_P(DocumentStoreTest, DetectCompleteDataLoss) {
int64_t corruptible_offset;
const std::string document_log_file = absl_ports::StrCat(
document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename());
@@ -3187,8 +3466,8 @@ TEST_F(DocumentStoreTest, DetectCompleteDataLoss) {
// Can put and delete fine.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
@@ -3237,14 +3516,14 @@ TEST_F(DocumentStoreTest, DetectCompleteDataLoss) {
// Successfully recover from a data loss issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
ASSERT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE));
}
-TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
+TEST_P(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
// The directory testdata/score_cache_without_length_in_tokens/document_store
// contains only the scoring_cache and the document_store_header (holding the
// crc for the scoring_cache). If the current code is compatible with the
@@ -3283,10 +3562,13 @@ TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
InitializeStatsProto initialize_stats;
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- &initialize_stats));
+ DocumentStore::Create(
+ &filesystem_, document_store_dir_, &fake_clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ GetParam().namespace_id_fingerprint, GetParam().pre_mapping_fbv,
+ GetParam().use_persistent_hash_map,
+ PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+ &initialize_stats));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
// The document log is using the legacy v0 format so that a migration is
@@ -3295,11 +3577,11 @@ TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT);
}
-TEST_F(DocumentStoreTest, DocumentStoreStorageInfo) {
+TEST_P(DocumentStoreTest, DocumentStoreStorageInfo) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -3351,7 +3633,8 @@ TEST_F(DocumentStoreTest, DocumentStoreStorageInfo) {
ICING_ASSERT_OK(doc_store->ReportUsage(usage_report_type1));
// Delete the first doc.
- ICING_ASSERT_OK(doc_store->Delete(document1.namespace_(), document1.uri()));
+ ICING_ASSERT_OK(doc_store->Delete(document1.namespace_(), document1.uri(),
+ fake_clock_.GetSystemTimeMilliseconds()));
// Expire the second doc.
fake_clock_.SetSystemTimeMilliseconds(document2.creation_timestamp_ms() +
@@ -3404,7 +3687,7 @@ TEST_F(DocumentStoreTest, DocumentStoreStorageInfo) {
Eq(0));
}
-TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) {
+TEST_P(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) {
// Start fresh and set the schema with one type.
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
@@ -3427,7 +3710,10 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
- ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// The typeid for "email" should be 0.
ASSERT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
@@ -3436,8 +3722,8 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) {
// Create the document store the first time and add an email document.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -3455,7 +3741,8 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) {
ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(doc));
ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData filter_data,
- doc_store->GetAliveDocumentFilterData(docid));
+ doc_store->GetAliveDocumentFilterData(
+ docid, fake_clock_.GetSystemTimeMilliseconds()));
ASSERT_THAT(filter_data.schema_type_id(), Eq(0));
}
@@ -3476,7 +3763,10 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) {
.SetCardinality(CARDINALITY_OPTIONAL)))
.AddType(email_type_config)
.Build();
- ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// Adding a new type should cause ids to be reassigned. Ids are assigned in
// order of appearance so 'alarm' should be 0 and 'email' should be 1.
ASSERT_THAT(schema_store->GetSchemaTypeId("alarm"), IsOkAndHolds(0));
@@ -3487,24 +3777,29 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) {
InitializeStatsProto initialize_stats;
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get(),
- /*force_recovery_and_revalidate_documents=*/true,
- &initialize_stats));
+ DocumentStore::Create(
+ &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/true,
+ GetParam().namespace_id_fingerprint, GetParam().pre_mapping_fbv,
+ GetParam().use_persistent_hash_map,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ &initialize_stats));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
// Ensure that the type id of the email document has been correctly updated.
ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData filter_data,
- doc_store->GetAliveDocumentFilterData(docid));
+ doc_store->GetAliveDocumentFilterData(
+ docid, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(filter_data.schema_type_id(), Eq(1));
EXPECT_THAT(initialize_stats.document_store_recovery_cause(),
Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
}
}
-TEST_F(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) {
+TEST_P(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) {
// Start fresh and set the schema with one type.
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
@@ -3527,7 +3822,10 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
- ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// The typeid for "email" should be 0.
ASSERT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
@@ -3536,8 +3834,8 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) {
// Create the document store the first time and add an email document.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -3555,7 +3853,8 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) {
ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(doc));
ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData filter_data,
- doc_store->GetAliveDocumentFilterData(docid));
+ doc_store->GetAliveDocumentFilterData(
+ docid, fake_clock_.GetSystemTimeMilliseconds()));
ASSERT_THAT(filter_data.schema_type_id(), Eq(0));
}
@@ -3576,7 +3875,10 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) {
.SetCardinality(CARDINALITY_OPTIONAL)))
.AddType(email_type_config)
.Build();
- ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// Adding a new type should cause ids to be reassigned. Ids are assigned in
// order of appearance so 'alarm' should be 0 and 'email' should be 1.
ASSERT_THAT(schema_store->GetSchemaTypeId("alarm"), IsOkAndHolds(0));
@@ -3586,21 +3888,21 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) {
// Create the document store the second time. Don't force recovery.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(
- &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
- /*force_recovery_and_revalidate_documents=*/false));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
// Check that the type id of the email document has not been updated.
ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData filter_data,
- doc_store->GetAliveDocumentFilterData(docid));
+ doc_store->GetAliveDocumentFilterData(
+ docid, fake_clock_.GetSystemTimeMilliseconds()));
ASSERT_THAT(filter_data.schema_type_id(), Eq(0));
}
}
-TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) {
+TEST_P(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) {
// Start fresh and set the schema with one type.
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
@@ -3623,7 +3925,10 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
- ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
DocumentProto docWithBody =
DocumentBuilder()
@@ -3652,8 +3957,8 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) {
// that has the 'body' section and one that doesn't.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -3683,7 +3988,8 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) {
.Build();
schema = SchemaBuilder().AddType(email_type_config).Build();
ASSERT_THAT(schema_store->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true),
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false),
IsOk());
{
@@ -3693,7 +3999,12 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) {
DocumentStore::CreateResult create_result,
DocumentStore::Create(
&filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
- /*force_recovery_and_revalidate_documents=*/true));
+ /*force_recovery_and_revalidate_documents=*/true,
+ GetParam().namespace_id_fingerprint, GetParam().pre_mapping_fbv,
+ GetParam().use_persistent_hash_map,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -3705,7 +4016,7 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) {
}
}
-TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) {
+TEST_P(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) {
// Start fresh and set the schema with one type.
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
@@ -3728,7 +4039,10 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
- ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
DocumentProto docWithBody =
DocumentBuilder()
@@ -3757,8 +4071,8 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) {
// that has the 'body' section and one that doesn't.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -3788,7 +4102,8 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) {
.Build();
schema = SchemaBuilder().AddType(email_type_config).Build();
ASSERT_THAT(schema_store->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true),
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false),
IsOk());
{
@@ -3797,9 +4112,8 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) {
CorruptDocStoreHeaderChecksumFile();
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(
- &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
- /*force_recovery_and_revalidate_documents=*/false));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -3811,7 +4125,7 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) {
}
}
-TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
+TEST_P(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
// Set up schema.
SchemaProto schema =
SchemaBuilder()
@@ -3836,7 +4150,10 @@ TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
- ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// Create dst directory that we'll initialize the DocumentStore over.
std::string document_store_dir = document_store_dir_ + "_migrate";
@@ -3873,10 +4190,13 @@ TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
InitializeStatsProto initialize_stats;
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir, &fake_clock_,
- schema_store.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- &initialize_stats));
+ DocumentStore::Create(
+ &filesystem_, document_store_dir, &fake_clock_, schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ GetParam().pre_mapping_fbv, GetParam().use_persistent_hash_map,
+ GetParam().namespace_id_fingerprint,
+ PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+ &initialize_stats));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -3932,7 +4252,7 @@ TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
IsOkAndHolds(EqualsProto(document3)));
}
-TEST_F(DocumentStoreTest, GetDebugInfo) {
+TEST_P(DocumentStoreTest, GetDebugInfo) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -3960,12 +4280,14 @@ TEST_F(DocumentStoreTest, GetDebugInfo) {
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
- ICING_ASSERT_OK(schema_store->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -4033,7 +4355,8 @@ TEST_F(DocumentStoreTest, GetDebugInfo) {
EqualsProto(info3)));
// Delete document3.
- ICING_ASSERT_OK(document_store->Delete("namespace2", "email/3"));
+ ICING_ASSERT_OK(document_store->Delete(
+ "namespace2", "email/3", fake_clock_.GetSystemTimeMilliseconds()));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentDebugInfoProto out2,
document_store->GetDebugInfo(DebugInfoVerbosity::DETAILED));
@@ -4054,7 +4377,7 @@ TEST_F(DocumentStoreTest, GetDebugInfo) {
EXPECT_THAT(out3.corpus_info(), IsEmpty());
}
-TEST_F(DocumentStoreTest, GetDebugInfoWithoutSchema) {
+TEST_P(DocumentStoreTest, GetDebugInfoWithoutSchema) {
std::string schema_store_dir = schema_store_dir_ + "_custom";
filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
@@ -4064,8 +4387,8 @@ TEST_F(DocumentStoreTest, GetDebugInfoWithoutSchema) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -4078,11 +4401,11 @@ TEST_F(DocumentStoreTest, GetDebugInfoWithoutSchema) {
EXPECT_THAT(out.corpus_info(), IsEmpty());
}
-TEST_F(DocumentStoreTest, GetDebugInfoForEmptyDocumentStore) {
+TEST_P(DocumentStoreTest, GetDebugInfoForEmptyDocumentStore) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -4095,6 +4418,198 @@ TEST_F(DocumentStoreTest, GetDebugInfoForEmptyDocumentStore) {
EXPECT_THAT(out.corpus_info(), IsEmpty());
}
+TEST_P(DocumentStoreTest, SwitchKeyMapperTypeShouldRegenerateDerivedFiles) {
+ std::string dynamic_trie_uri_mapper_dir =
+ document_store_dir_ + "/key_mapper_dir";
+ std::string persistent_hash_map_uri_mapper_dir =
+ document_store_dir_ + "/uri_mapper";
+ DocumentId document_id1;
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ GetParam().namespace_id_fingerprint,
+ GetParam().pre_mapping_fbv,
+ GetParam().use_persistent_hash_map,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
+
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+ ICING_ASSERT_OK_AND_ASSIGN(document_id1, doc_store->Put(test_document1_));
+
+ if (GetParam().use_persistent_hash_map) {
+ EXPECT_THAT(filesystem_.DirectoryExists(
+ persistent_hash_map_uri_mapper_dir.c_str()),
+ IsTrue());
+ EXPECT_THAT(
+ filesystem_.DirectoryExists(dynamic_trie_uri_mapper_dir.c_str()),
+ IsFalse());
+ } else {
+ EXPECT_THAT(filesystem_.DirectoryExists(
+ persistent_hash_map_uri_mapper_dir.c_str()),
+ IsFalse());
+ EXPECT_THAT(
+ filesystem_.DirectoryExists(dynamic_trie_uri_mapper_dir.c_str()),
+ IsTrue());
+ }
+ }
+
+ // Switch key mapper. We should get I/O error and derived files should be
+ // regenerated.
+ {
+ bool switch_key_mapper_flag = !GetParam().use_persistent_hash_map;
+ InitializeStatsProto initialize_stats;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(
+ &filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ GetParam().namespace_id_fingerprint, GetParam().pre_mapping_fbv,
+ /*use_persistent_hash_map=*/switch_key_mapper_flag,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ &initialize_stats));
+ EXPECT_THAT(initialize_stats.document_store_recovery_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+ EXPECT_THAT(doc_store->GetDocumentId(test_document1_.namespace_(),
+ test_document1_.uri()),
+ IsOkAndHolds(document_id1));
+
+ if (switch_key_mapper_flag) {
+ EXPECT_THAT(filesystem_.DirectoryExists(
+ persistent_hash_map_uri_mapper_dir.c_str()),
+ IsTrue());
+ EXPECT_THAT(
+ filesystem_.DirectoryExists(dynamic_trie_uri_mapper_dir.c_str()),
+ IsFalse());
+ } else {
+ EXPECT_THAT(filesystem_.DirectoryExists(
+ persistent_hash_map_uri_mapper_dir.c_str()),
+ IsFalse());
+ EXPECT_THAT(
+ filesystem_.DirectoryExists(dynamic_trie_uri_mapper_dir.c_str()),
+ IsTrue());
+ }
+ }
+}
+
+TEST_P(DocumentStoreTest, SameKeyMapperTypeShouldNotRegenerateDerivedFiles) {
+ std::string dynamic_trie_uri_mapper_dir =
+ document_store_dir_ + "/key_mapper_dir";
+ std::string persistent_hash_map_uri_mapper_dir =
+ document_store_dir_ + "/uri_mapper";
+ DocumentId document_id1;
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ GetParam().namespace_id_fingerprint,
+ GetParam().pre_mapping_fbv,
+ GetParam().use_persistent_hash_map,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
+
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+ ICING_ASSERT_OK_AND_ASSIGN(document_id1, doc_store->Put(test_document1_));
+
+ if (GetParam().use_persistent_hash_map) {
+ EXPECT_THAT(filesystem_.DirectoryExists(
+ persistent_hash_map_uri_mapper_dir.c_str()),
+ IsTrue());
+ EXPECT_THAT(
+ filesystem_.DirectoryExists(dynamic_trie_uri_mapper_dir.c_str()),
+ IsFalse());
+ } else {
+ EXPECT_THAT(filesystem_.DirectoryExists(
+ persistent_hash_map_uri_mapper_dir.c_str()),
+ IsFalse());
+ EXPECT_THAT(
+ filesystem_.DirectoryExists(dynamic_trie_uri_mapper_dir.c_str()),
+ IsTrue());
+ }
+ }
+
+ // Use the same key mapper type. Derived files should not be regenerated.
+ {
+ InitializeStatsProto initialize_stats;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ GetParam().namespace_id_fingerprint,
+ GetParam().pre_mapping_fbv,
+ GetParam().use_persistent_hash_map,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ &initialize_stats));
+ EXPECT_THAT(initialize_stats.document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+ EXPECT_THAT(doc_store->GetDocumentId(test_document1_.namespace_(),
+ test_document1_.uri()),
+ IsOkAndHolds(document_id1));
+
+ if (GetParam().use_persistent_hash_map) {
+ EXPECT_THAT(filesystem_.DirectoryExists(
+ persistent_hash_map_uri_mapper_dir.c_str()),
+ IsTrue());
+ EXPECT_THAT(
+ filesystem_.DirectoryExists(dynamic_trie_uri_mapper_dir.c_str()),
+ IsFalse());
+ } else {
+ EXPECT_THAT(filesystem_.DirectoryExists(
+ persistent_hash_map_uri_mapper_dir.c_str()),
+ IsFalse());
+ EXPECT_THAT(
+ filesystem_.DirectoryExists(dynamic_trie_uri_mapper_dir.c_str()),
+ IsTrue());
+ }
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ DocumentStoreTest, DocumentStoreTest,
+ testing::Values(
+ DocumentStoreTestParam(/*namespace_id_fingerprint_in=*/false,
+ /*pre_mapping_fbv_in=*/false,
+ /*use_persistent_hash_map_in=*/false),
+ DocumentStoreTestParam(/*namespace_id_fingerprint_in=*/true,
+ /*pre_mapping_fbv_in=*/false,
+ /*use_persistent_hash_map_in=*/false),
+ DocumentStoreTestParam(/*namespace_id_fingerprint_in=*/false,
+ /*pre_mapping_fbv_in=*/true,
+ /*use_persistent_hash_map_in=*/false),
+ DocumentStoreTestParam(/*namespace_id_fingerprint_in=*/true,
+ /*pre_mapping_fbv_in=*/true,
+ /*use_persistent_hash_map_in=*/false),
+ DocumentStoreTestParam(/*namespace_id_fingerprint_in=*/false,
+ /*pre_mapping_fbv_in=*/false,
+ /*use_persistent_hash_map_in=*/true),
+ DocumentStoreTestParam(/*namespace_id_fingerprint_in=*/true,
+ /*pre_mapping_fbv_in=*/false,
+ /*use_persistent_hash_map_in=*/true),
+ DocumentStoreTestParam(/*namespace_id_fingerprint_in=*/false,
+ /*pre_mapping_fbv_in=*/true,
+ /*use_persistent_hash_map_in=*/true),
+ DocumentStoreTestParam(/*namespace_id_fingerprint_in=*/true,
+ /*pre_mapping_fbv_in=*/true,
+ /*use_persistent_hash_map_in=*/true)));
+
} // namespace
} // namespace lib
diff --git a/icing/store/key-mapper_benchmark.cc b/icing/store/key-mapper_benchmark.cc
index b649bc7..c25fe30 100644
--- a/icing/store/key-mapper_benchmark.cc
+++ b/icing/store/key-mapper_benchmark.cc
@@ -35,6 +35,7 @@ namespace lib {
namespace {
using ::testing::Eq;
+using ::testing::IsTrue;
using ::testing::Not;
class KeyMapperBenchmark {
@@ -78,9 +79,11 @@ class KeyMapperBenchmark {
template <>
libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<int>>>
CreateKeyMapper<PersistentHashMapKeyMapper<int>>(int max_num_entries) {
+ std::string working_path =
+ absl_ports::StrCat(base_dir, "/", "key_mapper_dir");
return PersistentHashMapKeyMapper<int>::Create(
- filesystem, base_dir, max_num_entries,
- /*average_kv_byte_size=*/kKeyLength + 1 + sizeof(int),
+ filesystem, std::move(working_path), /*pre_mapping_fbv=*/true,
+ max_num_entries, /*average_kv_byte_size=*/kKeyLength + 1 + sizeof(int),
/*max_load_factor_percent=*/100);
}
@@ -109,6 +112,7 @@ void BM_PutMany(benchmark::State& state) {
state.PauseTiming();
benchmark.filesystem.DeleteDirectoryRecursively(benchmark.base_dir.c_str());
DestructibleDirectory ddir(&benchmark.filesystem, benchmark.base_dir);
+ ASSERT_THAT(ddir.is_valid(), IsTrue());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<KeyMapper<int>> key_mapper,
benchmark.CreateKeyMapper<KeyMapperType>(num_keys));
@@ -166,6 +170,7 @@ void BM_Put(benchmark::State& state) {
KeyMapperBenchmark benchmark;
benchmark.filesystem.DeleteDirectoryRecursively(benchmark.base_dir.c_str());
DestructibleDirectory ddir(&benchmark.filesystem, benchmark.base_dir);
+ ASSERT_THAT(ddir.is_valid(), IsTrue());
// The overhead of state.PauseTiming is too large and affects the benchmark
// result a lot, so pre-generate enough kvps to avoid calling too many times
@@ -206,6 +211,7 @@ void BM_Get(benchmark::State& state) {
KeyMapperBenchmark benchmark;
benchmark.filesystem.DeleteDirectoryRecursively(benchmark.base_dir.c_str());
DestructibleDirectory ddir(&benchmark.filesystem, benchmark.base_dir);
+ ASSERT_THAT(ddir.is_valid(), IsTrue());
// Create a key mapper with num_keys entries.
ICING_ASSERT_OK_AND_ASSIGN(
@@ -260,6 +266,7 @@ void BM_Iterator(benchmark::State& state) {
KeyMapperBenchmark benchmark;
benchmark.filesystem.DeleteDirectoryRecursively(benchmark.base_dir.c_str());
DestructibleDirectory ddir(&benchmark.filesystem, benchmark.base_dir);
+ ASSERT_THAT(ddir.is_valid(), IsTrue());
// Create a key mapper with num_keys entries.
ICING_ASSERT_OK_AND_ASSIGN(
diff --git a/icing/store/key-mapper_test.cc b/icing/store/key-mapper_test.cc
index 682888d..fa7d1e8 100644
--- a/icing/store/key-mapper_test.cc
+++ b/icing/store/key-mapper_test.cc
@@ -16,14 +16,12 @@
#include <memory>
#include <string>
-#include <type_traits>
#include <unordered_map>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
-#include "icing/absl_ports/canonical_errors.h"
#include "icing/file/filesystem.h"
#include "icing/store/document-id.h"
#include "icing/store/dynamic-trie-key-mapper.h"
@@ -32,6 +30,7 @@
#include "icing/testing/tmp-directory.h"
using ::testing::IsEmpty;
+using ::testing::IsTrue;
using ::testing::Pair;
using ::testing::UnorderedElementsAre;
@@ -42,45 +41,65 @@ namespace {
constexpr int kMaxDynamicTrieKeyMapperSize = 3 * 1024 * 1024; // 3 MiB
-template <typename T>
-class KeyMapperTest : public ::testing::Test {
+enum class KeyMapperType {
+ kDynamicTrie,
+ kPersistentHashMap,
+};
+
+struct KeyMapperTestParam {
+ KeyMapperType key_mapper_type;
+ bool pre_mapping_fbv;
+
+ explicit KeyMapperTestParam(KeyMapperType key_mapper_type_in,
+ bool pre_mapping_fbv_in)
+ : key_mapper_type(key_mapper_type_in),
+ pre_mapping_fbv(pre_mapping_fbv_in) {}
+};
+
+class KeyMapperTest : public ::testing::TestWithParam<KeyMapperTestParam> {
protected:
- using KeyMapperType = T;
+ void SetUp() override {
+ base_dir_ = GetTestTempDir() + "/icing";
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+ IsTrue());
- void SetUp() override { base_dir_ = GetTestTempDir() + "/key_mapper"; }
+ working_dir_ = base_dir_ + "/key_mapper";
+ }
void TearDown() override {
filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
}
- template <typename UnknownKeyMapperType>
libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<DocumentId>>>
CreateKeyMapper() {
- return absl_ports::InvalidArgumentError("Unknown type");
+ const KeyMapperTestParam& param = GetParam();
+ switch (param.key_mapper_type) {
+ case KeyMapperType::kDynamicTrie:
+ return DynamicTrieKeyMapper<DocumentId>::Create(
+ filesystem_, working_dir_, kMaxDynamicTrieKeyMapperSize);
+ case KeyMapperType::kPersistentHashMap:
+ return PersistentHashMapKeyMapper<DocumentId>::Create(
+ filesystem_, working_dir_, param.pre_mapping_fbv);
+ }
}
- template <>
- libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<DocumentId>>>
- CreateKeyMapper<DynamicTrieKeyMapper<DocumentId>>() {
- return DynamicTrieKeyMapper<DocumentId>::Create(
- filesystem_, base_dir_, kMaxDynamicTrieKeyMapperSize);
- }
-
- template <>
- libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<DocumentId>>>
- CreateKeyMapper<PersistentHashMapKeyMapper<DocumentId>>() {
- return PersistentHashMapKeyMapper<DocumentId>::Create(filesystem_,
- base_dir_);
+ libtextclassifier3::Status DeleteKeyMapper() {
+ const KeyMapperTestParam& param = GetParam();
+ switch (param.key_mapper_type) {
+ case KeyMapperType::kDynamicTrie:
+ return DynamicTrieKeyMapper<DocumentId>::Delete(filesystem_,
+ working_dir_);
+ case KeyMapperType::kPersistentHashMap:
+ return PersistentHashMapKeyMapper<DocumentId>::Delete(filesystem_,
+ working_dir_);
+ }
}
std::string base_dir_;
+ std::string working_dir_;
Filesystem filesystem_;
};
-using TestTypes = ::testing::Types<DynamicTrieKeyMapper<DocumentId>,
- PersistentHashMapKeyMapper<DocumentId>>;
-TYPED_TEST_SUITE(KeyMapperTest, TestTypes);
-
std::unordered_map<std::string, DocumentId> GetAllKeyValuePairs(
const KeyMapper<DocumentId>* key_mapper) {
std::unordered_map<std::string, DocumentId> ret;
@@ -93,15 +112,15 @@ std::unordered_map<std::string, DocumentId> GetAllKeyValuePairs(
return ret;
}
-TYPED_TEST(KeyMapperTest, CreateNewKeyMapper) {
+TEST_P(KeyMapperTest, CreateNewKeyMapper) {
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
- this->template CreateKeyMapper<TypeParam>());
+ CreateKeyMapper());
EXPECT_THAT(key_mapper->num_keys(), 0);
}
-TYPED_TEST(KeyMapperTest, CanUpdateSameKeyMultipleTimes) {
+TEST_P(KeyMapperTest, CanUpdateSameKeyMultipleTimes) {
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
- this->template CreateKeyMapper<TypeParam>());
+ CreateKeyMapper());
ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100));
ICING_EXPECT_OK(key_mapper->Put("default-youtube.com", 50));
@@ -117,9 +136,9 @@ TYPED_TEST(KeyMapperTest, CanUpdateSameKeyMultipleTimes) {
EXPECT_THAT(key_mapper->num_keys(), 2);
}
-TYPED_TEST(KeyMapperTest, GetOrPutOk) {
+TEST_P(KeyMapperTest, GetOrPutOk) {
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
- this->template CreateKeyMapper<TypeParam>());
+ CreateKeyMapper());
EXPECT_THAT(key_mapper->Get("foo"),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -127,9 +146,9 @@ TYPED_TEST(KeyMapperTest, GetOrPutOk) {
EXPECT_THAT(key_mapper->Get("foo"), IsOkAndHolds(1));
}
-TYPED_TEST(KeyMapperTest, CanPersistToDiskRegularly) {
+TEST_P(KeyMapperTest, CanPersistToDiskRegularly) {
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
- this->template CreateKeyMapper<TypeParam>());
+ CreateKeyMapper());
// Can persist an empty DynamicTrieKeyMapper.
ICING_EXPECT_OK(key_mapper->PersistToDisk());
@@ -152,16 +171,15 @@ TYPED_TEST(KeyMapperTest, CanPersistToDiskRegularly) {
EXPECT_THAT(key_mapper->num_keys(), 2);
}
-TYPED_TEST(KeyMapperTest, CanUseAcrossMultipleInstances) {
+TEST_P(KeyMapperTest, CanUseAcrossMultipleInstances) {
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
- this->template CreateKeyMapper<TypeParam>());
+ CreateKeyMapper());
ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100));
ICING_EXPECT_OK(key_mapper->PersistToDisk());
key_mapper.reset();
- ICING_ASSERT_OK_AND_ASSIGN(key_mapper,
- this->template CreateKeyMapper<TypeParam>());
+ ICING_ASSERT_OK_AND_ASSIGN(key_mapper, CreateKeyMapper());
EXPECT_THAT(key_mapper->num_keys(), 1);
EXPECT_THAT(key_mapper->Get("default-google.com"), IsOkAndHolds(100));
@@ -173,29 +191,26 @@ TYPED_TEST(KeyMapperTest, CanUseAcrossMultipleInstances) {
EXPECT_THAT(key_mapper->Get("default-google.com"), IsOkAndHolds(300));
}
-TYPED_TEST(KeyMapperTest, CanDeleteAndRestartKeyMapping) {
+TEST_P(KeyMapperTest, CanDeleteAndRestartKeyMapping) {
// Can delete even if there's nothing there
- ICING_EXPECT_OK(
- TestFixture::KeyMapperType::Delete(this->filesystem_, this->base_dir_));
+ ICING_EXPECT_OK(DeleteKeyMapper());
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
- this->template CreateKeyMapper<TypeParam>());
+ CreateKeyMapper());
ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100));
ICING_EXPECT_OK(key_mapper->PersistToDisk());
- ICING_EXPECT_OK(
- TestFixture::KeyMapperType::Delete(this->filesystem_, this->base_dir_));
+ ICING_EXPECT_OK(DeleteKeyMapper());
key_mapper.reset();
- ICING_ASSERT_OK_AND_ASSIGN(key_mapper,
- this->template CreateKeyMapper<TypeParam>());
+ ICING_ASSERT_OK_AND_ASSIGN(key_mapper, CreateKeyMapper());
EXPECT_THAT(key_mapper->num_keys(), 0);
ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100));
EXPECT_THAT(key_mapper->num_keys(), 1);
}
-TYPED_TEST(KeyMapperTest, Iterator) {
+TEST_P(KeyMapperTest, Iterator) {
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
- this->template CreateKeyMapper<TypeParam>());
+ CreateKeyMapper());
EXPECT_THAT(GetAllKeyValuePairs(key_mapper.get()), IsEmpty());
ICING_EXPECT_OK(key_mapper->Put("foo", /*value=*/1));
@@ -209,6 +224,15 @@ TYPED_TEST(KeyMapperTest, Iterator) {
UnorderedElementsAre(Pair("foo", 1), Pair("bar", 2), Pair("baz", 3)));
}
+INSTANTIATE_TEST_SUITE_P(
+ KeyMapperTest, KeyMapperTest,
+ testing::Values(KeyMapperTestParam(KeyMapperType::kDynamicTrie,
+ /*pre_mapping_fbv_in=*/true),
+ KeyMapperTestParam(KeyMapperType::kPersistentHashMap,
+ /*pre_mapping_fbv_in=*/true),
+ KeyMapperTestParam(KeyMapperType::kPersistentHashMap,
+ /*pre_mapping_fbv_in=*/false)));
+
} // namespace
} // namespace lib
diff --git a/icing/store/persistent-hash-map-key-mapper.h b/icing/store/persistent-hash-map-key-mapper.h
index a13ec11..0596fe3 100644
--- a/icing/store/persistent-hash-map-key-mapper.h
+++ b/icing/store/persistent-hash-map-key-mapper.h
@@ -38,16 +38,28 @@ namespace lib {
template <typename T, typename Formatter = absl_ports::DefaultFormatter>
class PersistentHashMapKeyMapper : public KeyMapper<T, Formatter> {
public:
+ static constexpr int32_t kDefaultMaxNumEntries =
+ PersistentHashMap::Entry::kMaxNumEntries;
+ static constexpr int32_t kDefaultAverageKVByteSize =
+ PersistentHashMap::Options::kDefaultAverageKVByteSize;
+ static constexpr int32_t kDefaultMaxLoadFactorPercent =
+ PersistentHashMap::Options::kDefaultMaxLoadFactorPercent;
+
// Returns an initialized instance of PersistentHashMapKeyMapper that can
// immediately handle read/write operations.
// Returns any encountered IO errors.
//
// filesystem: Object to make system level calls
- // base_dir : Base directory used to save all the files required to persist
- // PersistentHashMapKeyMapper. If this base_dir was previously used
- // to create a PersistentHashMapKeyMapper, then this existing data
- // would be loaded. Otherwise, an empty PersistentHashMapKeyMapper
- // would be created.
+ // working_path: Working directory used to save all the files required to
+ // persist PersistentHashMapKeyMapper. If this working_path was
+ // previously used to create a PersistentHashMapKeyMapper, then
+ // this existing data would be loaded. Otherwise, an empty
+ // PersistentHashMapKeyMapper would be created. See
+ // PersistentStorage for more details about the concept of
+ // working_path.
+ // pre_mapping_fbv: flag indicating whether memory map max possible file size
+ // for underlying FileBackedVector before growing the actual
+ // file size.
// max_num_entries: max # of kvps. It will be used to compute 3 storages size.
// average_kv_byte_size: average byte size of a single key + serialized value.
// It will be used to compute kv_storage size.
@@ -60,24 +72,23 @@ class PersistentHashMapKeyMapper : public KeyMapper<T, Formatter> {
// considered valid.
static libtextclassifier3::StatusOr<
std::unique_ptr<PersistentHashMapKeyMapper<T, Formatter>>>
- Create(const Filesystem& filesystem, std::string_view base_dir,
- int32_t max_num_entries = PersistentHashMap::Entry::kMaxNumEntries,
- int32_t average_kv_byte_size =
- PersistentHashMap::Options::kDefaultAverageKVByteSize,
- int32_t max_load_factor_percent =
- PersistentHashMap::Options::kDefaultMaxLoadFactorPercent);
-
- // Deletes all the files associated with the PersistentHashMapKeyMapper.
+ Create(const Filesystem& filesystem, std::string working_path,
+ bool pre_mapping_fbv, int32_t max_num_entries = kDefaultMaxNumEntries,
+ int32_t average_kv_byte_size = kDefaultAverageKVByteSize,
+ int32_t max_load_factor_percent = kDefaultMaxLoadFactorPercent);
+
+ // Deletes working_path (and all the files under it recursively) associated
+ // with the PersistentHashMapKeyMapper.
//
- // base_dir : Base directory used to save all the files required to persist
- // PersistentHashMapKeyMapper. Should be the same as passed into
- // Create().
+ // working_path: Working directory used to save all the files required to
+ // persist PersistentHashMapKeyMapper. Should be the same as
+ // passed into Create().
//
// Returns:
// OK on success
// INTERNAL_ERROR on I/O error
static libtextclassifier3::Status Delete(const Filesystem& filesystem,
- std::string_view base_dir);
+ const std::string& working_path);
~PersistentHashMapKeyMapper() override = default;
@@ -122,7 +133,7 @@ class PersistentHashMapKeyMapper : public KeyMapper<T, Formatter> {
}
libtextclassifier3::StatusOr<Crc32> ComputeChecksum() override {
- return persistent_hash_map_->ComputeChecksum();
+ return persistent_hash_map_->UpdateChecksums();
}
private:
@@ -147,8 +158,6 @@ class PersistentHashMapKeyMapper : public KeyMapper<T, Formatter> {
PersistentHashMap::Iterator itr_;
};
- static constexpr std::string_view kKeyMapperDir = "key_mapper_dir";
-
// Use PersistentHashMapKeyMapper::Create() to instantiate.
explicit PersistentHashMapKeyMapper(
std::unique_ptr<PersistentHashMap> persistent_hash_map)
@@ -164,26 +173,21 @@ template <typename T, typename Formatter>
/* static */ libtextclassifier3::StatusOr<
std::unique_ptr<PersistentHashMapKeyMapper<T, Formatter>>>
PersistentHashMapKeyMapper<T, Formatter>::Create(
- const Filesystem& filesystem, std::string_view base_dir,
- int32_t max_num_entries, int32_t average_kv_byte_size,
+ const Filesystem& filesystem, std::string working_path,
+ bool pre_mapping_fbv, int32_t max_num_entries, int32_t average_kv_byte_size,
int32_t max_load_factor_percent) {
- const std::string key_mapper_dir =
- absl_ports::StrCat(base_dir, "/", kKeyMapperDir);
- if (!filesystem.CreateDirectoryRecursively(key_mapper_dir.c_str())) {
- return absl_ports::InternalError(absl_ports::StrCat(
- "Failed to create PersistentHashMapKeyMapper directory: ",
- key_mapper_dir));
- }
-
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem, key_mapper_dir,
+ filesystem, std::move(working_path),
PersistentHashMap::Options(
/*value_type_size_in=*/sizeof(T),
/*max_num_entries_in=*/max_num_entries,
/*max_load_factor_percent_in=*/max_load_factor_percent,
- /*average_kv_byte_size_in=*/average_kv_byte_size)));
+ /*average_kv_byte_size_in=*/average_kv_byte_size,
+ /*init_num_buckets_in=*/
+ PersistentHashMap::Options::kDefaultInitNumBuckets,
+ /*pre_mapping_fbv_in=*/pre_mapping_fbv)));
return std::unique_ptr<PersistentHashMapKeyMapper<T, Formatter>>(
new PersistentHashMapKeyMapper<T, Formatter>(
std::move(persistent_hash_map)));
@@ -191,16 +195,9 @@ PersistentHashMapKeyMapper<T, Formatter>::Create(
template <typename T, typename Formatter>
/* static */ libtextclassifier3::Status
-PersistentHashMapKeyMapper<T, Formatter>::Delete(const Filesystem& filesystem,
- std::string_view base_dir) {
- const std::string key_mapper_dir =
- absl_ports::StrCat(base_dir, "/", kKeyMapperDir);
- if (!filesystem.DeleteDirectoryRecursively(key_mapper_dir.c_str())) {
- return absl_ports::InternalError(absl_ports::StrCat(
- "Failed to delete PersistentHashMapKeyMapper directory: ",
- key_mapper_dir));
- }
- return libtextclassifier3::Status::OK;
+PersistentHashMapKeyMapper<T, Formatter>::Delete(
+ const Filesystem& filesystem, const std::string& working_path) {
+ return PersistentHashMap::Discard(filesystem, working_path);
}
} // namespace lib
diff --git a/icing/store/persistent-hash-map-key-mapper_test.cc b/icing/store/persistent-hash-map-key-mapper_test.cc
index c937c43..0d610e9 100644
--- a/icing/store/persistent-hash-map-key-mapper_test.cc
+++ b/icing/store/persistent-hash-map-key-mapper_test.cc
@@ -41,9 +41,9 @@ class PersistentHashMapKeyMapperTest : public testing::Test {
};
TEST_F(PersistentHashMapKeyMapperTest, InvalidBaseDir) {
- EXPECT_THAT(
- PersistentHashMapKeyMapper<DocumentId>::Create(filesystem_, "/dev/null"),
- StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ EXPECT_THAT(PersistentHashMapKeyMapper<DocumentId>::Create(
+ filesystem_, "/dev/null", /*pre_mapping_fbv=*/false),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
}
} // namespace
diff --git a/icing/store/suggestion-result-checker-impl.h b/icing/store/suggestion-result-checker-impl.h
index 89e7214..4e01f81 100644
--- a/icing/store/suggestion-result-checker-impl.h
+++ b/icing/store/suggestion-result-checker-impl.h
@@ -15,6 +15,7 @@
#ifndef ICING_STORE_SUGGESTION_RESULT_CHECKER_IMPL_H_
#define ICING_STORE_SUGGESTION_RESULT_CHECKER_IMPL_H_
+#include "icing/schema/section.h"
#include "icing/store/document-id.h"
#include "icing/store/document-store.h"
#include "icing/store/namespace-id.h"
@@ -26,23 +27,85 @@ namespace lib {
class SuggestionResultCheckerImpl : public SuggestionResultChecker {
public:
explicit SuggestionResultCheckerImpl(
- const DocumentStore* document_store,
+ const DocumentStore* document_store, const SchemaStore* schema_store,
std::unordered_set<NamespaceId> target_namespace_ids,
std::unordered_map<NamespaceId, std::unordered_set<DocumentId>>
document_id_filter_map,
std::unordered_set<SchemaTypeId> target_schema_type_ids,
- std::unordered_map<SchemaTypeId, SectionIdMask> property_filter_map)
+ std::unordered_map<SchemaTypeId, SectionIdMask> property_filter_map,
+ std::string target_section, std::unordered_set<DocumentId> search_base,
+ int64_t current_time_ms)
: document_store_(*document_store),
+ schema_store_(*schema_store),
target_namespace_ids_(std::move(target_namespace_ids)),
document_id_filter_map_(std::move(document_id_filter_map)),
target_schema_type_ids_(std::move(target_schema_type_ids)),
- property_filter_map_(std::move(property_filter_map)) {}
+ property_filter_map_(std::move(property_filter_map)),
+ target_section_(std::move(target_section)),
+ search_base_(std::move(search_base)),
+ current_time_ms_(current_time_ms) {}
+
+ bool MatchesTargetNamespace(NamespaceId namespace_id) const {
+ return target_namespace_ids_.empty() ||
+ target_namespace_ids_.find(namespace_id) !=
+ target_namespace_ids_.end();
+ }
+
+ bool MatchesTargetDocumentIds(NamespaceId namespace_id,
+ DocumentId document_id) const {
+ if (document_id_filter_map_.empty()) {
+ return true;
+ }
+ auto document_ids_itr = document_id_filter_map_.find(namespace_id);
+ // The client doesn't set desired document ids in this namespace, or the
+ // client doesn't want this document.
+ return document_ids_itr == document_id_filter_map_.end() ||
+ document_ids_itr->second.find(document_id) !=
+ document_ids_itr->second.end();
+ }
+
+ bool MatchesTargetSchemaType(SchemaTypeId schema_type_id) const {
+ return target_schema_type_ids_.empty() ||
+ target_schema_type_ids_.find(schema_type_id) !=
+ target_schema_type_ids_.end();
+ }
+
+ bool MatchesTargetSection(SchemaTypeId schema_type_id,
+ SectionId section_id) const {
+ if (target_section_.empty()) {
+ return true;
+ }
+ auto section_metadata_or =
+ schema_store_.GetSectionMetadata(schema_type_id, section_id);
+ if (!section_metadata_or.ok()) {
+ // cannot find the target section metadata.
+ return false;
+ }
+ const SectionMetadata* section_metadata = section_metadata_or.ValueOrDie();
+ return section_metadata->path == target_section_;
+ }
+
+ bool MatchesSearchBase(DocumentId document_id) const {
+ return search_base_.empty() ||
+ search_base_.find(document_id) != search_base_.end();
+ }
+
+ bool MatchesPropertyFilter(SchemaTypeId schema_type_id,
+ SectionId section_id) const {
+ if (property_filter_map_.empty()) {
+ return true;
+ }
+ auto section_mask_itr = property_filter_map_.find(schema_type_id);
+ return section_mask_itr == property_filter_map_.end() ||
+ (section_mask_itr->second & (UINT64_C(1) << section_id)) != 0;
+ }
bool BelongsToTargetResults(DocumentId document_id,
SectionId section_id) const override {
// Get the document filter data first.
auto document_filter_data_optional_ =
- document_store_.GetAliveDocumentFilterData(document_id);
+ document_store_.GetAliveDocumentFilterData(document_id,
+ current_time_ms_);
if (!document_filter_data_optional_) {
// The document doesn't exist.
return false;
@@ -50,54 +113,39 @@ class SuggestionResultCheckerImpl : public SuggestionResultChecker {
DocumentFilterData document_filter_data =
document_filter_data_optional_.value();
- // 1: Check the namespace filter
- if (!target_namespace_ids_.empty() &&
- target_namespace_ids_.find(document_filter_data.namespace_id()) ==
- target_namespace_ids_.end()) {
- // User gives a namespace filter, and the current namespace isn't desired.
+ if (!MatchesTargetNamespace(document_filter_data.namespace_id())) {
return false;
}
-
- // 2: Check the document id filter
- if (!document_id_filter_map_.empty()) {
- auto document_ids_itr =
- document_id_filter_map_.find(document_filter_data.namespace_id());
- if (document_ids_itr != document_id_filter_map_.end() &&
- document_ids_itr->second.find(document_id) ==
- document_ids_itr->second.end()) {
- // The client doesn't set desired document ids in this namespace, or the
- // client doesn't want this document.
- return false;
- }
+ if (!MatchesTargetDocumentIds(document_filter_data.namespace_id(),
+ document_id)) {
+ return false;
}
-
- // 3: Check the schema type filter
- if (!target_schema_type_ids_.empty() &&
- target_schema_type_ids_.find(document_filter_data.schema_type_id()) ==
- target_schema_type_ids_.end()) {
- // User gives a schema type filter, and the current schema type isn't
- // desired.
+ if (!MatchesTargetSchemaType(document_filter_data.schema_type_id())) {
return false;
}
-
- if (!property_filter_map_.empty()) {
- auto section_mask_itr =
- property_filter_map_.find(document_filter_data.schema_type_id());
- if (section_mask_itr != property_filter_map_.end() &&
- (section_mask_itr->second & (UINT64_C(1) << section_id)) == 0) {
- // The client doesn't set desired properties in this schema, or the
- // client doesn't want this property.
- return false;
- }
+ if (!MatchesTargetSection(document_filter_data.schema_type_id(),
+ section_id)) {
+ return false;
+ }
+ if (!MatchesSearchBase(document_id)) {
+ return false;
+ }
+ if (!MatchesPropertyFilter(document_filter_data.schema_type_id(),
+ section_id)) {
+ return false;
}
return true;
}
const DocumentStore& document_store_;
+ const SchemaStore& schema_store_;
std::unordered_set<NamespaceId> target_namespace_ids_;
std::unordered_map<NamespaceId, std::unordered_set<DocumentId>>
document_id_filter_map_;
std::unordered_set<SchemaTypeId> target_schema_type_ids_;
std::unordered_map<SchemaTypeId, SectionIdMask> property_filter_map_;
+ std::string target_section_;
+ std::unordered_set<DocumentId> search_base_;
+ int64_t current_time_ms_;
};
} // namespace lib
diff --git a/icing/store/usage-store.h b/icing/store/usage-store.h
index fd77df4..3c7a55e 100644
--- a/icing/store/usage-store.h
+++ b/icing/store/usage-store.h
@@ -180,6 +180,8 @@ class UsageStore {
// INTERNAL_ERROR on I/O error
libtextclassifier3::Status Reset();
+ int32_t num_elements() const { return usage_score_cache_->num_elements(); }
+
private:
explicit UsageStore(std::unique_ptr<FileBackedVector<UsageScores>>
document_id_to_scores_mapper,
diff --git a/icing/store/usage-store_test.cc b/icing/store/usage-store_test.cc
index b2dbe4b..2b17f13 100644
--- a/icing/store/usage-store_test.cc
+++ b/icing/store/usage-store_test.cc
@@ -44,7 +44,7 @@ class UsageStoreTest : public testing::Test {
};
UsageReport CreateUsageReport(std::string name_space, std::string uri,
- int64 timestamp_ms,
+ int64_t timestamp_ms,
UsageReport::UsageType usage_type) {
UsageReport usage_report;
usage_report.set_document_namespace(name_space);
@@ -450,7 +450,7 @@ TEST_F(UsageStoreTest, Reset) {
TEST_F(UsageStoreTest, TimestampInSecondsShouldNotOverflow) {
// Create a report with the max value of timestamps.
UsageReport usage_report = CreateUsageReport(
- "namespace", "uri", /*timestamp_ms=*/std::numeric_limits<int64>::max(),
+ "namespace", "uri", /*timestamp_ms=*/std::numeric_limits<int64_t>::max(),
UsageReport::USAGE_TYPE1);
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
diff --git a/icing/testing/common-matchers.h b/icing/testing/common-matchers.h
index db7b7ef..c6500db 100644
--- a/icing/testing/common-matchers.h
+++ b/icing/testing/common-matchers.h
@@ -16,6 +16,7 @@
#define ICING_TESTING_COMMON_MATCHERS_H_
#include <algorithm>
+#include <cinttypes>
#include <cmath>
#include <string>
#include <vector>
@@ -32,6 +33,7 @@
#include "icing/portable/equals-proto.h"
#include "icing/proto/search.pb.h"
#include "icing/proto/status.pb.h"
+#include "icing/schema/joinable-property.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/scoring/scored-document-hit.h"
@@ -239,7 +241,9 @@ MATCHER_P(EqualsSetSchemaResult, expected, "") {
actual.schema_types_changed_fully_compatible_by_name ==
expected.schema_types_changed_fully_compatible_by_name &&
actual.schema_types_index_incompatible_by_name ==
- expected.schema_types_index_incompatible_by_name) {
+ expected.schema_types_index_incompatible_by_name &&
+ actual.schema_types_join_incompatible_by_name ==
+ expected.schema_types_join_incompatible_by_name) {
return true;
}
@@ -336,6 +340,21 @@ MATCHER_P(EqualsSetSchemaResult, expected, "") {
","),
"]");
+ // Format schema_types_join_incompatible_by_name
+ std::string actual_schema_types_join_incompatible_by_name =
+ absl_ports::StrCat(
+ "[",
+ absl_ports::StrJoin(actual.schema_types_join_incompatible_by_name,
+ ","),
+ "]");
+
+ std::string expected_schema_types_join_incompatible_by_name =
+ absl_ports::StrCat(
+ "[",
+ absl_ports::StrJoin(expected.schema_types_join_incompatible_by_name,
+ ","),
+ "]");
+
*result_listener << IcingStringUtil::StringPrintf(
"\nExpected {\n"
"\tsuccess=%d,\n"
@@ -345,8 +364,9 @@ MATCHER_P(EqualsSetSchemaResult, expected, "") {
"\tschema_types_incompatible_by_name=%s,\n"
"\tschema_types_incompatible_by_id=%s\n"
"\tschema_types_new_by_name=%s,\n"
- "\tschema_types_index_incompatible_by_name=%s,\n"
"\tschema_types_changed_fully_compatible_by_name=%s\n"
+ "\tschema_types_index_incompatible_by_name=%s,\n"
+ "\tschema_types_join_incompatible_by_name=%s\n"
"}\n"
"Actual {\n"
"\tsuccess=%d,\n"
@@ -356,8 +376,9 @@ MATCHER_P(EqualsSetSchemaResult, expected, "") {
"\tschema_types_incompatible_by_name=%s,\n"
"\tschema_types_incompatible_by_id=%s\n"
"\tschema_types_new_by_name=%s,\n"
- "\tschema_types_index_incompatible_by_name=%s,\n"
"\tschema_types_changed_fully_compatible_by_name=%s\n"
+ "\tschema_types_index_incompatible_by_name=%s,\n"
+ "\tschema_types_join_incompatible_by_name=%s\n"
"}\n",
expected.success, expected_old_schema_type_ids_changed.c_str(),
expected_schema_types_deleted_by_name.c_str(),
@@ -366,7 +387,8 @@ MATCHER_P(EqualsSetSchemaResult, expected, "") {
expected_schema_types_incompatible_by_id.c_str(),
expected_schema_types_new_by_name.c_str(),
expected_schema_types_changed_fully_compatible_by_name.c_str(),
- expected_schema_types_index_incompatible_by_name.c_str(), actual.success,
+ expected_schema_types_index_incompatible_by_name.c_str(),
+ expected_schema_types_join_incompatible_by_name.c_str(), actual.success,
actual_old_schema_type_ids_changed.c_str(),
actual_schema_types_deleted_by_name.c_str(),
actual_schema_types_deleted_by_id.c_str(),
@@ -374,10 +396,36 @@ MATCHER_P(EqualsSetSchemaResult, expected, "") {
actual_schema_types_incompatible_by_id.c_str(),
actual_schema_types_new_by_name.c_str(),
actual_schema_types_changed_fully_compatible_by_name.c_str(),
- actual_schema_types_index_incompatible_by_name.c_str());
+ actual_schema_types_index_incompatible_by_name.c_str(),
+ actual_schema_types_join_incompatible_by_name.c_str());
return false;
}
+MATCHER_P3(EqualsSectionMetadata, expected_id, expected_property_path,
+ expected_property_config_proto, "") {
+ const SectionMetadata& actual = arg;
+ return actual.id == expected_id && actual.path == expected_property_path &&
+ actual.data_type == expected_property_config_proto.data_type() &&
+ actual.tokenizer ==
+ expected_property_config_proto.string_indexing_config()
+ .tokenizer_type() &&
+ actual.term_match_type ==
+ expected_property_config_proto.string_indexing_config()
+ .term_match_type() &&
+ actual.numeric_match_type ==
+ expected_property_config_proto.integer_indexing_config()
+ .numeric_match_type();
+}
+
+MATCHER_P3(EqualsJoinablePropertyMetadata, expected_id, expected_property_path,
+ expected_property_config_proto, "") {
+ const JoinablePropertyMetadata& actual = arg;
+ return actual.id == expected_id && actual.path == expected_property_path &&
+ actual.data_type == expected_property_config_proto.data_type() &&
+ actual.value_type ==
+ expected_property_config_proto.joinable_config().value_type();
+}
+
std::string StatusCodeToString(libtextclassifier3::StatusCode code);
std::string ProtoStatusCodeToString(StatusProto::Code code);
diff --git a/icing/testing/numeric/normal-distribution-number-generator.h b/icing/testing/numeric/normal-distribution-number-generator.h
new file mode 100644
index 0000000..73cdd1f
--- /dev/null
+++ b/icing/testing/numeric/normal-distribution-number-generator.h
@@ -0,0 +1,42 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TESTING_NUMERIC_NORMAL_DISTRIBUTION_NUMBER_GENERATOR_H_
+#define ICING_TESTING_NUMERIC_NORMAL_DISTRIBUTION_NUMBER_GENERATOR_H_
+
+#include <cmath>
+#include <random>
+
+#include "icing/testing/numeric/number-generator.h"
+
+namespace icing {
+namespace lib {
+
+template <typename T>
+class NormalDistributionNumberGenerator : public NumberGenerator<T> {
+ public:
+ explicit NormalDistributionNumberGenerator(int seed, double mean,
+ double stddev)
+ : NumberGenerator<T>(seed), distribution_(mean, stddev) {}
+
+ T Generate() override { return std::round(distribution_(this->engine_)); }
+
+ private:
+ std::normal_distribution<> distribution_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_TESTING_NUMERIC_NORMAL_DISTRIBUTION_NUMBER_GENERATOR_H_
diff --git a/icing/testing/numeric/number-generator.h b/icing/testing/numeric/number-generator.h
new file mode 100644
index 0000000..bb601b4
--- /dev/null
+++ b/icing/testing/numeric/number-generator.h
@@ -0,0 +1,39 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TESTING_NUMERIC_NUMBER_GENERATOR_H_
+#define ICING_TESTING_NUMERIC_NUMBER_GENERATOR_H_
+
+#include <random>
+
+namespace icing {
+namespace lib {
+
+template <typename T>
+class NumberGenerator {
+ public:
+ virtual ~NumberGenerator() = default;
+
+ virtual T Generate() = 0;
+
+ protected:
+ explicit NumberGenerator(int seed) : engine_(seed) {}
+
+ std::default_random_engine engine_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_TESTING_NUMERIC_NUMBER_GENERATOR_H_
diff --git a/icing/testing/numeric/uniform-distribution-integer-generator.h b/icing/testing/numeric/uniform-distribution-integer-generator.h
new file mode 100644
index 0000000..569eebd
--- /dev/null
+++ b/icing/testing/numeric/uniform-distribution-integer-generator.h
@@ -0,0 +1,41 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TESTING_NUMERIC_UNIFORM_DISTRIBUTION_INTEGER_GENERATOR_H_
+#define ICING_TESTING_NUMERIC_UNIFORM_DISTRIBUTION_INTEGER_GENERATOR_H_
+
+#include <random>
+
+#include "icing/testing/numeric/number-generator.h"
+
+namespace icing {
+namespace lib {
+
+template <typename T>
+class UniformDistributionIntegerGenerator : public NumberGenerator<T> {
+ public:
+ explicit UniformDistributionIntegerGenerator(int seed, T range_lower,
+ T range_upper)
+ : NumberGenerator<T>(seed), distribution_(range_lower, range_upper) {}
+
+ T Generate() override { return distribution_(this->engine_); }
+
+ private:
+ std::uniform_int_distribution<T> distribution_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_TESTING_NUMERIC_UNIFORM_DISTRIBUTION_INTEGER_GENERATOR_H_
diff --git a/icing/testing/random-string.h b/icing/testing/random-string.h
index fd8d87b..a313c1c 100644
--- a/icing/testing/random-string.h
+++ b/icing/testing/random-string.h
@@ -25,6 +25,15 @@ namespace lib {
inline constexpr std::string_view kAlNumAlphabet =
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
+// Average length of word in English is 4.7 characters.
+inline constexpr int kAvgTokenLen = 5;
+// Made up value. This results in a fairly reasonable language - the majority of
+// generated words are 3-9 characters, ~3% of words are >=20 chars, and the
+// longest ones are 27 chars, (roughly consistent with the longest,
+// non-contrived English words
+// https://en.wikipedia.org/wiki/Longest_word_in_English)
+inline constexpr int kTokenStdDev = 7;
+
template <typename Gen>
std::string RandomString(const std::string_view alphabet, size_t len,
Gen* gen) {
@@ -37,6 +46,22 @@ std::string RandomString(const std::string_view alphabet, size_t len,
return result;
}
+// Creates a vector containing num_words randomly-generated words for use by
+// documents.
+template <typename Rand>
+std::vector<std::string> CreateLanguages(int num_words, Rand* r) {
+ std::vector<std::string> language;
+ std::normal_distribution<> norm_dist(kAvgTokenLen, kTokenStdDev);
+ while (--num_words >= 0) {
+ int word_length = 0;
+ while (word_length < 1) {
+ word_length = std::round(norm_dist(*r));
+ }
+ language.push_back(RandomString(kAlNumAlphabet, word_length, r));
+ }
+ return language;
+}
+
// Returns a vector containing num_terms unique terms. Terms are created in
// non-random order starting with "a" to "z" to "aa" to "zz", etc.
std::vector<std::string> GenerateUniqueTerms(int num_terms);
diff --git a/icing/text_classifier/lib3/utils/base/statusor.h b/icing/text_classifier/lib3/utils/base/statusor.h
index 9ec3d91..aa1e598 100644
--- a/icing/text_classifier/lib3/utils/base/statusor.h
+++ b/icing/text_classifier/lib3/utils/base/statusor.h
@@ -201,12 +201,19 @@ template <typename T>
inline StatusOr<T>::StatusOr(T&& value) : value_(std::move(value)) {}
template <typename T>
-inline StatusOr<T>::StatusOr(const StatusOr& other)
- : status_(other.status_), value_(other.value_) {}
+inline StatusOr<T>::StatusOr(const StatusOr& other) : status_(other.status_) {
+ if (other.ok()) {
+ MakeValue(other.value_);
+ }
+}
template <typename T>
inline StatusOr<T>::StatusOr(StatusOr&& other)
- : status_(other.status_), value_(std::move(other.value_)) {}
+ : status_(std::move(other.status_)) {
+ if (other.ok()) {
+ MakeValue(std::move(other.value_));
+ }
+}
template <typename T>
template <
@@ -216,7 +223,11 @@ template <
std::is_convertible<const U&, T>>::value,
int>>
inline StatusOr<T>::StatusOr(const StatusOr<U>& other)
- : status_(other.status_), value_(other.value_) {}
+ : status_(other.status_) {
+ if (other.ok()) {
+ MakeValue(other.value_);
+ }
+}
template <typename T>
template <typename U,
@@ -225,7 +236,11 @@ template <typename U,
std::is_convertible<U&&, T>>::value,
int>>
inline StatusOr<T>::StatusOr(StatusOr<U>&& other)
- : status_(other.status_), value_(std::move(other.value_)) {}
+ : status_(std::move(other.status_)) {
+ if (other.ok()) {
+ MakeValue(std::move(other.value_));
+ }
+}
template <typename T>
template <
diff --git a/icing/tokenization/icu/icu-language-segmenter-factory.cc b/icing/tokenization/icu/icu-language-segmenter-factory.cc
index 363bc6d..7b095b4 100644
--- a/icing/tokenization/icu/icu-language-segmenter-factory.cc
+++ b/icing/tokenization/icu/icu-language-segmenter-factory.cc
@@ -47,7 +47,7 @@ libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter>> Create(
<< " not supported. Converting to locale " << ULOC_US;
options.locale = ULOC_US;
}
- return std::make_unique<IcuLanguageSegmenter>(std::move(options.locale));
+ return IcuLanguageSegmenter::Create(std::move(options.locale));
}
} // namespace language_segmenter_factory
diff --git a/icing/tokenization/icu/icu-language-segmenter.cc b/icing/tokenization/icu/icu-language-segmenter.cc
index dc7b0a4..cac12f7 100644
--- a/icing/tokenization/icu/icu-language-segmenter.cc
+++ b/icing/tokenization/icu/icu-language-segmenter.cc
@@ -24,6 +24,7 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/mutex.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/util/character-iterator.h"
#include "icing/util/i18n-utils.h"
@@ -48,9 +49,11 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
// INTERNAL_ERROR if unable to create
static libtextclassifier3::StatusOr<
std::unique_ptr<LanguageSegmenter::Iterator>>
- Create(std::string_view text, std::string_view locale) {
+ Create(const IcuLanguageSegmenter* creator, UBreakIterator* break_iterator,
+ std::string_view text, std::string_view locale) {
std::unique_ptr<IcuLanguageSegmenterIterator> iterator(
- new IcuLanguageSegmenterIterator(text, locale));
+ new IcuLanguageSegmenterIterator(creator, break_iterator, text,
+ locale));
if (iterator->Initialize()) {
return iterator;
}
@@ -58,8 +61,8 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
}
~IcuLanguageSegmenterIterator() {
- ubrk_close(break_iterator_);
utext_close(u_text_);
+ creator_.ReturnBreakIterator(break_iterator_);
}
// Advances to the next term. Returns false if it has reached the end.
@@ -244,9 +247,12 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
}
private:
- explicit IcuLanguageSegmenterIterator(std::string_view text,
+ explicit IcuLanguageSegmenterIterator(const IcuLanguageSegmenter* creator,
+ UBreakIterator* break_iterator,
+ std::string_view text,
std::string_view locale)
- : break_iterator_(nullptr),
+ : creator_(*creator),
+ break_iterator_(break_iterator),
text_(text),
locale_(locale),
u_text_(nullptr),
@@ -256,13 +262,14 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
// Returns true on success
bool Initialize() {
+ if (break_iterator_ == nullptr) {
+ return false;
+ }
UErrorCode status = U_ZERO_ERROR;
u_text_ = utext_openUTF8(nullptr, text_.data(), text_.length(), &status);
if (u_text_ == nullptr) {
return false;
}
- break_iterator_ = ubrk_open(UBRK_WORD, locale_.data(), /*text=*/nullptr,
- /*textLength=*/0, &status);
ubrk_setUText(break_iterator_, u_text_, &status);
return !U_FAILURE(status);
}
@@ -290,9 +297,11 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
term_start_index_ = 0;
}
+ const IcuLanguageSegmenter& creator_; // Does not own.
+
// The underlying class that does the segmentation, ubrk_close() must be
// called after using.
- UBreakIterator* break_iterator_;
+ UBreakIterator* break_iterator_; // Does not own
// Text to be segmented
std::string_view text_;
@@ -321,18 +330,61 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
int term_end_index_exclusive_;
};
-IcuLanguageSegmenter::IcuLanguageSegmenter(std::string locale)
- : locale_(std::move(locale)) {}
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IcuLanguageSegmenter>>
+IcuLanguageSegmenter::Create(std::string&& locale) {
+ UErrorCode status = U_ZERO_ERROR;
+ UBreakIterator* break_iterator = ubrk_open(
+ UBRK_WORD, locale.c_str(), /*text=*/nullptr, /*textLength=*/0, &status);
+ if (U_FAILURE(status) || break_iterator == nullptr) {
+ return absl_ports::AbortedError(
+ "Unable to create ICU break_iterator for language segmentation");
+ }
+ return std::unique_ptr<IcuLanguageSegmenter>(
+ new IcuLanguageSegmenter(std::move(locale), break_iterator));
+}
+
+UBreakIterator* IcuLanguageSegmenter::ProduceBreakIterator() const {
+ UBreakIterator* itr = nullptr;
+ {
+ absl_ports::unique_lock l(&mutex_);
+ if (cached_break_iterator_ != nullptr) {
+ itr = cached_break_iterator_;
+ cached_break_iterator_ = nullptr;
+ }
+ }
+ if (itr == nullptr) {
+ UErrorCode status = U_ZERO_ERROR;
+ itr = ubrk_open(UBRK_WORD, locale_.c_str(), /*text=*/nullptr,
+ /*textLength=*/0, &status);
+ if (U_FAILURE(status)) {
+ itr = nullptr;
+ }
+ }
+ return itr;
+}
+
+void IcuLanguageSegmenter::ReturnBreakIterator(UBreakIterator* itr) const {
+ {
+ absl_ports::unique_lock l(&mutex_);
+ if (cached_break_iterator_ == nullptr) {
+ cached_break_iterator_ = itr;
+ return;
+ }
+ }
+ ubrk_close(itr);
+}
libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter::Iterator>>
IcuLanguageSegmenter::Segment(const std::string_view text) const {
- return IcuLanguageSegmenterIterator::Create(text, locale_);
+ return IcuLanguageSegmenterIterator::Create(this, ProduceBreakIterator(),
+ text, locale_);
}
libtextclassifier3::StatusOr<std::vector<std::string_view>>
IcuLanguageSegmenter::GetAllTerms(const std::string_view text) const {
- ICING_ASSIGN_OR_RETURN(std::unique_ptr<LanguageSegmenter::Iterator> iterator,
- Segment(text));
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<LanguageSegmenter::Iterator> iterator,
+ Segment(text));
std::vector<std::string_view> terms;
while (iterator->Advance()) {
terms.push_back(iterator->GetTerm());
diff --git a/icing/tokenization/icu/icu-language-segmenter.h b/icing/tokenization/icu/icu-language-segmenter.h
index 4115461..44de5a2 100644
--- a/icing/tokenization/icu/icu-language-segmenter.h
+++ b/icing/tokenization/icu/icu-language-segmenter.h
@@ -22,7 +22,9 @@
#include <vector>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/mutex.h"
#include "icing/tokenization/language-segmenter.h"
+#include "unicode/ubrk.h"
namespace icing {
namespace lib {
@@ -41,7 +43,14 @@ namespace lib {
// class. Other special tokenization logic will be in each tokenizer.
class IcuLanguageSegmenter : public LanguageSegmenter {
public:
- explicit IcuLanguageSegmenter(std::string locale);
+ static libtextclassifier3::StatusOr<std::unique_ptr<IcuLanguageSegmenter>>
+ Create(std::string&& locale);
+
+ ~IcuLanguageSegmenter() override {
+ if (cached_break_iterator_ != nullptr) {
+ ubrk_close(cached_break_iterator_);
+ }
+ }
IcuLanguageSegmenter(const IcuLanguageSegmenter&) = delete;
IcuLanguageSegmenter& operator=(const IcuLanguageSegmenter&) = delete;
@@ -69,8 +78,32 @@ class IcuLanguageSegmenter : public LanguageSegmenter {
std::string_view text) const override;
private:
+ // Declared a friend so that it can call AcceptBreakIterator.
+ friend class IcuLanguageSegmenterIterator;
+
+ explicit IcuLanguageSegmenter(std::string&& locale, UBreakIterator* iterator)
+ : locale_(std::move(locale)), cached_break_iterator_(iterator) {}
+
+ // Returns a UBreakIterator that the caller owns.
+ // If cached_break_iterator_ is non-null, transfers ownership to caller and
+ // sets cached_break_iterator_ to null.
+ // If cached_break_iterator is null, creates a new UBreakIterator and
+ // transfers ownership to caller.
+ UBreakIterator* ProduceBreakIterator() const;
+
+ // Caller transfers ownership of itr to IcuLanguageSegmenter.
+ // If cached_break_iterator_ is null, itr becomes the cached_break_iterator_
+ // If cached_break_iterator_ is non-null, then itr will be closed.
+ void ReturnBreakIterator(UBreakIterator* itr) const;
+
// Used to help segment text
const std::string locale_;
+
+ // The underlying class that does the segmentation, ubrk_close() must be
+ // called after using.
+ mutable UBreakIterator* cached_break_iterator_ ICING_GUARDED_BY(mutex_);
+
+ mutable absl_ports::shared_mutex mutex_;
};
} // namespace lib
diff --git a/icing/tokenization/icu/icu-language-segmenter_test.cc b/icing/tokenization/icu/icu-language-segmenter_test.cc
index 6771050..3bacbc6 100644
--- a/icing/tokenization/icu/icu-language-segmenter_test.cc
+++ b/icing/tokenization/icu/icu-language-segmenter_test.cc
@@ -1290,6 +1290,50 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, QuerySyntax) {
"subproperty2", ":", "term3"));
}
+TEST_P(IcuLanguageSegmenterAllLocalesTest, MultipleLangSegmentersTest) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ auto language_segmenter,
+ language_segmenter_factory::Create(
+ GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<LanguageSegmenter::Iterator> iterator_one,
+ language_segmenter->Segment("foo bar baz"));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<LanguageSegmenter::Iterator> iterator_two,
+ language_segmenter->Segment("abra kadabra alakazam"));
+
+ ASSERT_TRUE(iterator_one->Advance());
+ ASSERT_TRUE(iterator_two->Advance());
+ EXPECT_THAT(iterator_one->GetTerm(), Eq("foo"));
+ EXPECT_THAT(iterator_two->GetTerm(), Eq("abra"));
+
+ ASSERT_TRUE(iterator_one->Advance());
+ ASSERT_TRUE(iterator_two->Advance());
+ EXPECT_THAT(iterator_one->GetTerm(), Eq(" "));
+ EXPECT_THAT(iterator_two->GetTerm(), Eq(" "));
+
+ ASSERT_TRUE(iterator_one->Advance());
+ EXPECT_THAT(iterator_one->GetTerm(), Eq("bar"));
+ EXPECT_THAT(iterator_two->GetTerm(), Eq(" "));
+ ASSERT_TRUE(iterator_two->Advance());
+ EXPECT_THAT(iterator_one->GetTerm(), Eq("bar"));
+ EXPECT_THAT(iterator_two->GetTerm(), Eq("kadabra"));
+
+ ASSERT_TRUE(iterator_one->Advance());
+ ASSERT_TRUE(iterator_two->Advance());
+ EXPECT_THAT(iterator_one->GetTerm(), Eq(" "));
+ EXPECT_THAT(iterator_two->GetTerm(), Eq(" "));
+
+ ASSERT_TRUE(iterator_two->Advance());
+ ASSERT_TRUE(iterator_one->Advance());
+ EXPECT_THAT(iterator_one->GetTerm(), Eq("baz"));
+ EXPECT_THAT(iterator_two->GetTerm(), Eq("alakazam"));
+
+ ASSERT_FALSE(iterator_two->Advance());
+ ASSERT_FALSE(iterator_one->Advance());
+}
+
INSTANTIATE_TEST_SUITE_P(
LocaleName, IcuLanguageSegmenterAllLocalesTest,
testing::Values(ULOC_US, ULOC_UK, ULOC_CANADA, ULOC_CANADA_FRENCH,
diff --git a/icing/tokenization/raw-query-tokenizer.cc b/icing/tokenization/raw-query-tokenizer.cc
index 8cd8d05..1dcbf9b 100644
--- a/icing/tokenization/raw-query-tokenizer.cc
+++ b/icing/tokenization/raw-query-tokenizer.cc
@@ -26,8 +26,8 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_join.h"
+#include "icing/schema/property-util.h"
#include "icing/schema/schema-util.h"
-#include "icing/schema/section-manager.h"
#include "icing/tokenization/language-segmenter.h"
#include "icing/tokenization/token.h"
#include "icing/tokenization/tokenizer.h"
@@ -252,41 +252,45 @@ std::string_view GetErrorMessage(ActionOrError maybe_error) {
// like "+", "&", "@", "#" in indexing and query tokenizers.
constexpr State state_transition_rules[STATE_COUNT][TYPE_COUNT] = {
/*State: Ready*/
- {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHANUMERIC_TERM,
- OPENING_PARENTHESES, CLOSING_PARENTHESES, PROCESSING_EXCLUSION,
- PROCESSING_OR, READY, READY},
+ {READY, PROCESSING_ALPHANUMERIC_TERM,
+ PROCESSING_NON_ASCII_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+ CLOSING_PARENTHESES, PROCESSING_EXCLUSION, PROCESSING_OR, READY, READY},
/*State: PROCESSING_ALPHANUMERIC_TERM*/
- {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHANUMERIC_TERM,
- OPENING_PARENTHESES, CLOSING_PARENTHESES, READY, INVALID,
- PROCESSING_PROPERTY_RESTRICT, READY},
+ {READY, PROCESSING_ALPHANUMERIC_TERM,
+ PROCESSING_NON_ASCII_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+ CLOSING_PARENTHESES, READY, INVALID, PROCESSING_PROPERTY_RESTRICT, READY},
/*State: PROCESSING_EXCLUSION*/
{READY, PROCESSING_EXCLUSION_TERM, PROCESSING_EXCLUSION_TERM, INVALID,
CLOSING_PARENTHESES, PROCESSING_EXCLUSION, INVALID, INVALID, READY},
/*State: PROCESSING_EXCLUSION_TERM*/
- {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHANUMERIC_TERM,
- OPENING_PARENTHESES, CLOSING_PARENTHESES, READY, INVALID, INVALID, READY},
+ {READY, PROCESSING_ALPHANUMERIC_TERM,
+ PROCESSING_NON_ASCII_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+ CLOSING_PARENTHESES, READY, INVALID, INVALID, READY},
/*State: PROCESSING_PROPERTY_RESTRICT*/
{READY, PROCESSING_PROPERTY_TERM, PROCESSING_PROPERTY_TERM, INVALID,
CLOSING_PARENTHESES, INVALID, INVALID, PROCESSING_PROPERTY_RESTRICT,
READY},
/*State: PROCESSING_PROPERTY_TERM*/
- {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHANUMERIC_TERM,
- OPENING_PARENTHESES, CLOSING_PARENTHESES, READY, INVALID,
- PROCESSING_PROPERTY_TERM_APPENDING, READY},
+ {READY, PROCESSING_ALPHANUMERIC_TERM,
+ PROCESSING_NON_ASCII_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+ CLOSING_PARENTHESES, READY, INVALID, PROCESSING_PROPERTY_TERM_APPENDING,
+ READY},
/*State: PROCESSING_OR*/
{READY, INVALID, INVALID, OPENING_PARENTHESES, CLOSING_PARENTHESES, INVALID,
INVALID, INVALID, READY},
/*State: OPENING_PARENTHESES*/
- {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHANUMERIC_TERM,
- OPENING_PARENTHESES, CLOSING_PARENTHESES, PROCESSING_EXCLUSION,
- OPENING_PARENTHESES, READY, READY},
+ {READY, PROCESSING_ALPHANUMERIC_TERM,
+ PROCESSING_NON_ASCII_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+ CLOSING_PARENTHESES, PROCESSING_EXCLUSION, OPENING_PARENTHESES, READY,
+ READY},
/*State: CLOSING_PARENTHESES*/
- {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHANUMERIC_TERM,
- OPENING_PARENTHESES, CLOSING_PARENTHESES, PROCESSING_EXCLUSION,
- PROCESSING_OR, INVALID, READY},
+ {READY, PROCESSING_ALPHANUMERIC_TERM,
+ PROCESSING_NON_ASCII_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+ CLOSING_PARENTHESES, PROCESSING_EXCLUSION, PROCESSING_OR, INVALID, READY},
/*State: PROCESSING_NON_ASCII_ALPHANUMERIC_TERM*/
- {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHANUMERIC_TERM,
- OPENING_PARENTHESES, CLOSING_PARENTHESES, READY, INVALID, INVALID, READY},
+ {READY, PROCESSING_ALPHANUMERIC_TERM,
+ PROCESSING_NON_ASCII_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+ CLOSING_PARENTHESES, READY, INVALID, INVALID, READY},
/*State: PROCESSING_PROPERTY_TERM_APPENDING*/
{READY, PROCESSING_PROPERTY_TERM_APPENDING,
PROCESSING_PROPERTY_TERM_APPENDING, OPENING_PARENTHESES,
@@ -504,7 +508,7 @@ libtextclassifier3::Status OutputToken(State new_state,
// Asserts extra rule 1: each property name in the property path is a
// valid term.
for (std::string_view property :
- absl_ports::StrSplit(current_term, kPropertySeparator)) {
+ property_util::SplitPropertyPathExpr(current_term)) {
if (!SchemaUtil::ValidatePropertyName(property).ok()) {
return absl_ports::InvalidArgumentError(
GetErrorMessage(ERROR_NON_ASCII_AS_PROPERTY_NAME));
diff --git a/icing/tokenization/rfc822-tokenizer_test.cc b/icing/tokenization/rfc822-tokenizer_test.cc
index f114943..ee3a95d 100644
--- a/icing/tokenization/rfc822-tokenizer_test.cc
+++ b/icing/tokenization/rfc822-tokenizer_test.cc
@@ -21,9 +21,6 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/testing/common-matchers.h"
-#include "icing/testing/jni-test-helpers.h"
-#include "icing/tokenization/language-segmenter-factory.h"
-#include "unicode/uloc.h"
namespace icing {
namespace lib {
@@ -31,21 +28,7 @@ namespace {
using ::testing::ElementsAre;
using ::testing::IsEmpty;
-class Rfc822TokenizerTest : public testing::Test {
- protected:
- void SetUp() override {
- jni_cache_ = GetTestJniCache();
- language_segmenter_factory::SegmenterOptions options(ULOC_US,
- jni_cache_.get());
- ICING_ASSERT_OK_AND_ASSIGN(
- language_segmenter_,
- language_segmenter_factory::Create(std::move(options)));
- }
- std::unique_ptr<const JniCache> jni_cache_;
- std::unique_ptr<LanguageSegmenter> language_segmenter_;
-};
-
-TEST_F(Rfc822TokenizerTest, StartingState) {
+TEST(Rfc822TokenizerTest, StartingState) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = "a@g.c";
auto token_iterator = rfc822_tokenizer.Tokenize(text).ValueOrDie();
@@ -55,7 +38,7 @@ TEST_F(Rfc822TokenizerTest, StartingState) {
ASSERT_THAT(token_iterator->GetTokens(), Not(IsEmpty()));
}
-TEST_F(Rfc822TokenizerTest, EmptyMiddleToken) {
+TEST(Rfc822TokenizerTest, EmptyMiddleToken) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string s("<alex>,,<tom>");
@@ -73,7 +56,7 @@ TEST_F(Rfc822TokenizerTest, EmptyMiddleToken) {
EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "tom"))));
}
-TEST_F(Rfc822TokenizerTest, Simple) {
+TEST(Rfc822TokenizerTest, Simple) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string_view s("<你alex@google.com>");
@@ -90,7 +73,7 @@ TEST_F(Rfc822TokenizerTest, Simple) {
EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
}
-TEST_F(Rfc822TokenizerTest, Small) {
+TEST(Rfc822TokenizerTest, Small) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string s = "\"a\"";
@@ -123,7 +106,7 @@ TEST_F(Rfc822TokenizerTest, Small) {
EqualsToken(Token::Type::RFC822_COMMENT, "a"))));
}
-TEST_F(Rfc822TokenizerTest, PB) {
+TEST(Rfc822TokenizerTest, PB) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string_view s("peanut (comment) butter, <alex@google.com>");
@@ -150,7 +133,7 @@ TEST_F(Rfc822TokenizerTest, PB) {
EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
}
-TEST_F(Rfc822TokenizerTest, NoBrackets) {
+TEST(Rfc822TokenizerTest, NoBrackets) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string_view s("alex@google.com");
@@ -167,7 +150,7 @@ TEST_F(Rfc822TokenizerTest, NoBrackets) {
EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"))));
}
-TEST_F(Rfc822TokenizerTest, TwoAddresses) {
+TEST(Rfc822TokenizerTest, TwoAddresses) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string_view s("<你alex@google.com>; <alexsav@gmail.com>");
@@ -191,7 +174,7 @@ TEST_F(Rfc822TokenizerTest, TwoAddresses) {
EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
}
-TEST_F(Rfc822TokenizerTest, Comment) {
+TEST(Rfc822TokenizerTest, Comment) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string_view s("(a comment) <alex@google.com>");
@@ -210,7 +193,7 @@ TEST_F(Rfc822TokenizerTest, Comment) {
EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
}
-TEST_F(Rfc822TokenizerTest, NameAndComment) {
+TEST(Rfc822TokenizerTest, NameAndComment) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string_view s("\"a name\" also a name <alex@google.com>");
@@ -233,7 +216,7 @@ TEST_F(Rfc822TokenizerTest, NameAndComment) {
}
// Test from tokenizer_test.cc.
-TEST_F(Rfc822TokenizerTest, Rfc822SanityCheck) {
+TEST(Rfc822TokenizerTest, Rfc822SanityCheck) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string addr1("A name (A comment) <address@domain.com>");
@@ -293,7 +276,7 @@ TEST_F(Rfc822TokenizerTest, Rfc822SanityCheck) {
}
// Tests from rfc822 converter.
-TEST_F(Rfc822TokenizerTest, SimpleRfcText) {
+TEST(Rfc822TokenizerTest, SimpleRfcText) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string test_string =
"foo@google.com,bar@google.com,baz@google.com,foo+hello@google.com,baz@"
@@ -345,7 +328,7 @@ TEST_F(Rfc822TokenizerTest, SimpleRfcText) {
EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "corp.google.com"))));
}
-TEST_F(Rfc822TokenizerTest, ComplicatedRfcText) {
+TEST(Rfc822TokenizerTest, ComplicatedRfcText) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string test_string =
R"raw("Weird, But&(Also)\\Valid" Name (!With, "an" \\odd\\ cmt too¡) <Foo B(a)r,Baz@g.co>
@@ -386,7 +369,7 @@ TEST_F(Rfc822TokenizerTest, ComplicatedRfcText) {
EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
}
-TEST_F(Rfc822TokenizerTest, FromHtmlBugs) {
+TEST(Rfc822TokenizerTest, FromHtmlBugs) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
// This input used to cause HTML parsing exception. We don't do HTML parsing
// any more (b/8388100) so we are just checking that it does not crash and
@@ -418,7 +401,7 @@ TEST_F(Rfc822TokenizerTest, FromHtmlBugs) {
EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
}
-TEST_F(Rfc822TokenizerTest, EmptyComponentsTest) {
+TEST(Rfc822TokenizerTest, EmptyComponentsTest) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
EXPECT_THAT(rfc822_tokenizer.TokenizeAll(""),
IsOkAndHolds(testing::IsEmpty()));
@@ -459,7 +442,7 @@ TEST_F(Rfc822TokenizerTest, EmptyComponentsTest) {
EqualsToken(Token::Type::RFC822_COMMENT, "comment"))));
}
-TEST_F(Rfc822TokenizerTest, NameTest) {
+TEST(Rfc822TokenizerTest, NameTest) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
// Name spread between address or comment.
@@ -525,7 +508,7 @@ TEST_F(Rfc822TokenizerTest, NameTest) {
EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "addr"))));
}
-TEST_F(Rfc822TokenizerTest, CommentEscapeTest) {
+TEST(Rfc822TokenizerTest, CommentEscapeTest) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
// '(', ')', '\\' chars should be escaped. All other escaped chars should be
// unescaped.
@@ -560,7 +543,7 @@ TEST_F(Rfc822TokenizerTest, CommentEscapeTest) {
EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"))));
}
-TEST_F(Rfc822TokenizerTest, QuoteEscapeTest) {
+TEST(Rfc822TokenizerTest, QuoteEscapeTest) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
// All names that include non-alphanumeric chars must be quoted and have '\\'
// and '"' chars escaped.
@@ -589,7 +572,7 @@ TEST_F(Rfc822TokenizerTest, QuoteEscapeTest) {
EqualsToken(Token::Type::RFC822_HOST_ADDRESS, R"(n\\a\m\"e)"))));
}
-TEST_F(Rfc822TokenizerTest, UnterminatedComponentTest) {
+TEST(Rfc822TokenizerTest, UnterminatedComponentTest) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
EXPECT_THAT(
@@ -657,7 +640,7 @@ TEST_F(Rfc822TokenizerTest, UnterminatedComponentTest) {
EqualsToken(Token::Type::RFC822_COMMENT, "comment"))));
}
-TEST_F(Rfc822TokenizerTest, Tokenize) {
+TEST(Rfc822TokenizerTest, Tokenize) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text =
@@ -710,7 +693,7 @@ TEST_F(Rfc822TokenizerTest, Tokenize) {
EqualsToken(Token::Type::RFC822_COMMENT, "something"))));
}
-TEST_F(Rfc822TokenizerTest, EdgeCases) {
+TEST(Rfc822TokenizerTest, EdgeCases) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
// Text to trigger the scenario where you have a non-alphabetic followed
@@ -772,7 +755,7 @@ TEST_F(Rfc822TokenizerTest, EdgeCases) {
EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
}
-TEST_F(Rfc822TokenizerTest, NumberInAddress) {
+TEST(Rfc822TokenizerTest, NumberInAddress) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = "<3alex@google.com>";
EXPECT_THAT(
@@ -787,7 +770,7 @@ TEST_F(Rfc822TokenizerTest, NumberInAddress) {
EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
}
-TEST_F(Rfc822TokenizerTest, DoubleQuoteDoubleSlash) {
+TEST(Rfc822TokenizerTest, DoubleQuoteDoubleSlash) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = R"("alex\"")";
EXPECT_THAT(
@@ -809,7 +792,7 @@ TEST_F(Rfc822TokenizerTest, DoubleQuoteDoubleSlash) {
EqualsToken(Token::Type::RFC822_HOST_ADDRESS, R"(alex\\\a)"))));
}
-TEST_F(Rfc822TokenizerTest, TwoEmails) {
+TEST(Rfc822TokenizerTest, TwoEmails) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = "tjbarron@google.com alexsav@google.com";
EXPECT_THAT(
@@ -831,7 +814,7 @@ TEST_F(Rfc822TokenizerTest, TwoEmails) {
EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"))));
}
-TEST_F(Rfc822TokenizerTest, BackSlashes) {
+TEST(Rfc822TokenizerTest, BackSlashes) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = R"("\name")";
EXPECT_THAT(
@@ -855,7 +838,7 @@ TEST_F(Rfc822TokenizerTest, BackSlashes) {
EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "foo\\@gmail"))));
}
-TEST_F(Rfc822TokenizerTest, BigWhitespace) {
+TEST(Rfc822TokenizerTest, BigWhitespace) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = "\"quoted\" <address>";
EXPECT_THAT(
@@ -868,7 +851,7 @@ TEST_F(Rfc822TokenizerTest, BigWhitespace) {
EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "address"))));
}
-TEST_F(Rfc822TokenizerTest, AtSignFirst) {
+TEST(Rfc822TokenizerTest, AtSignFirst) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = "\"@foo\"";
EXPECT_THAT(
@@ -880,7 +863,7 @@ TEST_F(Rfc822TokenizerTest, AtSignFirst) {
EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "foo"))));
}
-TEST_F(Rfc822TokenizerTest, SlashThenUnicode) {
+TEST(Rfc822TokenizerTest, SlashThenUnicode) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = R"("quoted\你cjk")";
EXPECT_THAT(
@@ -893,7 +876,7 @@ TEST_F(Rfc822TokenizerTest, SlashThenUnicode) {
EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "quoted\\你cjk"))));
}
-TEST_F(Rfc822TokenizerTest, AddressEmptyAddress) {
+TEST(Rfc822TokenizerTest, AddressEmptyAddress) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = "<address> <> Name";
EXPECT_THAT(
@@ -906,7 +889,7 @@ TEST_F(Rfc822TokenizerTest, AddressEmptyAddress) {
EqualsToken(Token::Type::RFC822_NAME, "Name"))));
}
-TEST_F(Rfc822TokenizerTest, ProperComment) {
+TEST(Rfc822TokenizerTest, ProperComment) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = "(comment)alex@google.com";
EXPECT_THAT(
@@ -922,7 +905,7 @@ TEST_F(Rfc822TokenizerTest, ProperComment) {
EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"))));
}
-TEST_F(Rfc822TokenizerTest, SmallNameToEmail) {
+TEST(Rfc822TokenizerTest, SmallNameToEmail) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = "a@g.c,b@g.c";
EXPECT_THAT(rfc822_tokenizer.TokenizeAll(text),
@@ -954,7 +937,7 @@ TEST_F(Rfc822TokenizerTest, SmallNameToEmail) {
EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "g.c"))));
}
-TEST_F(Rfc822TokenizerTest, AtSignLast) {
+TEST(Rfc822TokenizerTest, AtSignLast) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string_view text("<alex@>, tim@");
EXPECT_THAT(
@@ -970,13 +953,13 @@ TEST_F(Rfc822TokenizerTest, AtSignLast) {
EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "tim"))));
}
-TEST_F(Rfc822TokenizerTest, Commas) {
+TEST(Rfc822TokenizerTest, Commas) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = ",,,,,,,,,,,,,,,,,,,,,,,,,,;";
EXPECT_THAT(rfc822_tokenizer.TokenizeAll(text), IsOkAndHolds(IsEmpty()));
}
-TEST_F(Rfc822TokenizerTest, ResetToTokenStartingAfter) {
+TEST(Rfc822TokenizerTest, ResetToTokenStartingAfter) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = "a@g.c,b@g.c";
auto token_iterator = rfc822_tokenizer.Tokenize(text).ValueOrDie();
@@ -992,7 +975,7 @@ TEST_F(Rfc822TokenizerTest, ResetToTokenStartingAfter) {
ASSERT_FALSE(token_iterator->ResetToTokenStartingAfter(6));
}
-TEST_F(Rfc822TokenizerTest, ResetToTokenEndingBefore) {
+TEST(Rfc822TokenizerTest, ResetToTokenEndingBefore) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = "a@g.c,b@g.c";
auto token_iterator = rfc822_tokenizer.Tokenize(text).ValueOrDie();
diff --git a/icing/transform/icu/icu-normalizer.cc b/icing/transform/icu/icu-normalizer.cc
index aceb11d..f32e541 100644
--- a/icing/transform/icu/icu-normalizer.cc
+++ b/icing/transform/icu/icu-normalizer.cc
@@ -134,17 +134,16 @@ std::string IcuNormalizer::NormalizeTerm(const std::string_view term) const {
ICING_LOG(WARNING) << "Failed to create a UNormalizer2 instance";
}
- // Checks if the first character is within ASCII range or can be transformed
- // into an ASCII char. Since the term is tokenized, we know that the whole
- // term can be transformed into ASCII if the first character can.
- UChar32 first_uchar32 =
- i18n_utils::GetUChar32At(term.data(), term.length(), 0);
- if (normalizer2 != nullptr && first_uchar32 != i18n_utils::kInvalidUChar32 &&
- DiacriticCharToAscii(normalizer2, first_uchar32, nullptr)) {
- // This is a faster method to normalize Latin terms.
- normalized_text = NormalizeLatin(normalizer2, term);
- } else {
- normalized_text = term_transformer_->Transform(term);
+ // Normalize the prefix that can be transformed into ASCII.
+ // This is a faster method to normalize Latin terms.
+ NormalizeLatinResult result = NormalizeLatin(normalizer2, term);
+ normalized_text = std::move(result.text);
+ if (result.end_pos < term.length()) {
+ // Some portion of term couldn't be normalized via NormalizeLatin. Use
+ // term_transformer to handle this portion.
+ std::string_view rest_term = term.substr(result.end_pos);
+ absl_ports::StrAppend(&normalized_text,
+ term_transformer_->Transform(rest_term));
}
if (normalized_text.length() > max_term_byte_size_) {
@@ -154,40 +153,32 @@ std::string IcuNormalizer::NormalizeTerm(const std::string_view term) const {
return normalized_text;
}
-std::string IcuNormalizer::NormalizeLatin(const UNormalizer2* normalizer2,
- const std::string_view term) const {
- std::string result;
- result.reserve(term.length());
- int current_pos = 0;
- while (current_pos < term.length()) {
- if (i18n_utils::IsAscii(term[current_pos])) {
- result.push_back(std::tolower(term[current_pos]));
- ++current_pos;
+IcuNormalizer::NormalizeLatinResult IcuNormalizer::NormalizeLatin(
+ const UNormalizer2* normalizer2, const std::string_view term) const {
+ NormalizeLatinResult result = {};
+ if (normalizer2 == nullptr) {
+ return result;
+ }
+ CharacterIterator char_itr(term);
+ result.text.reserve(term.length());
+ char ascii_char;
+ while (char_itr.utf8_index() < term.length()) {
+ UChar32 c = char_itr.GetCurrentChar();
+ if (i18n_utils::IsAscii(c)) {
+ result.text.push_back(std::tolower(c));
+ } else if (DiacriticCharToAscii(normalizer2, c, &ascii_char)) {
+ result.text.push_back(std::tolower(ascii_char));
} else {
- UChar32 uchar32 =
- i18n_utils::GetUChar32At(term.data(), term.length(), current_pos);
- if (uchar32 == i18n_utils::kInvalidUChar32) {
- ICING_LOG(WARNING) << "Unable to get uchar32 from " << term
- << " at position" << current_pos;
- current_pos += i18n_utils::GetUtf8Length(uchar32);
- continue;
- }
- char ascii_char;
- if (DiacriticCharToAscii(normalizer2, uchar32, &ascii_char)) {
- result.push_back(std::tolower(ascii_char));
- } else {
- // We don't know how to transform / decompose this Unicode character, it
- // probably means that some other Unicode characters are mixed with
- // Latin characters. This shouldn't happen if input term is properly
- // tokenized. We handle it here in case there're something wrong with
- // the tokenizers.
- int utf8_length = i18n_utils::GetUtf8Length(uchar32);
- absl_ports::StrAppend(&result, term.substr(current_pos, utf8_length));
- }
- current_pos += i18n_utils::GetUtf8Length(uchar32);
+ // We don't know how to transform / decompose this Unicode character, it
+ // probably means that some other Unicode characters are mixed with Latin
+ // characters. We return the partial result here and let the caller handle
+ // the rest.
+ result.end_pos = char_itr.utf8_index();
+ return result;
}
+ char_itr.AdvanceToUtf32(char_itr.utf32_index() + 1);
}
-
+ result.end_pos = term.length();
return result;
}
@@ -267,10 +258,13 @@ std::string IcuNormalizer::TermTransformer::Transform(
return std::move(utf8_term_or).ValueOrDie();
}
-CharacterIterator FindNormalizedLatinMatchEndPosition(
+bool IcuNormalizer::FindNormalizedLatinMatchEndPosition(
const UNormalizer2* normalizer2, std::string_view term,
- CharacterIterator char_itr, std::string_view normalized_term) {
- CharacterIterator normalized_char_itr(normalized_term);
+ CharacterIterator& char_itr, std::string_view normalized_term,
+ CharacterIterator& normalized_char_itr) const {
+ if (normalizer2 == nullptr) {
+ return false;
+ }
char ascii_char;
while (char_itr.utf8_index() < term.length() &&
normalized_char_itr.utf8_index() < normalized_term.length()) {
@@ -278,16 +272,18 @@ CharacterIterator FindNormalizedLatinMatchEndPosition(
if (i18n_utils::IsAscii(c)) {
c = std::tolower(c);
} else if (DiacriticCharToAscii(normalizer2, c, &ascii_char)) {
- c = ascii_char;
+ c = std::tolower(ascii_char);
+ } else {
+ return false;
}
UChar32 normalized_c = normalized_char_itr.GetCurrentChar();
if (c != normalized_c) {
- return char_itr;
+ return true;
}
char_itr.AdvanceToUtf32(char_itr.utf32_index() + 1);
normalized_char_itr.AdvanceToUtf32(normalized_char_itr.utf32_index() + 1);
}
- return char_itr;
+ return true;
}
CharacterIterator
@@ -357,15 +353,18 @@ CharacterIterator IcuNormalizer::FindNormalizedMatchEndPosition(
}
CharacterIterator char_itr(term);
- UChar32 first_uchar32 = char_itr.GetCurrentChar();
- if (normalizer2 != nullptr && first_uchar32 != i18n_utils::kInvalidUChar32 &&
- DiacriticCharToAscii(normalizer2, first_uchar32, /*char_out=*/nullptr)) {
- return FindNormalizedLatinMatchEndPosition(normalizer2, term, char_itr,
- normalized_term);
- } else {
- return term_transformer_->FindNormalizedNonLatinMatchEndPosition(
- term, char_itr, normalized_term);
+ CharacterIterator normalized_char_itr(normalized_term);
+ if (FindNormalizedLatinMatchEndPosition(
+ normalizer2, term, char_itr, normalized_term, normalized_char_itr)) {
+ return char_itr;
}
+ // Some portion of term couldn't be normalized via
+ // FindNormalizedLatinMatchEndPosition. Use term_transformer to handle this
+ // portion.
+ std::string_view rest_normalized_term =
+ normalized_term.substr(normalized_char_itr.utf8_index());
+ return term_transformer_->FindNormalizedNonLatinMatchEndPosition(
+ term, char_itr, rest_normalized_term);
}
} // namespace lib
diff --git a/icing/transform/icu/icu-normalizer.h b/icing/transform/icu/icu-normalizer.h
index d4f1ebd..7c64506 100644
--- a/icing/transform/icu/icu-normalizer.h
+++ b/icing/transform/icu/icu-normalizer.h
@@ -101,14 +101,36 @@ class IcuNormalizer : public Normalizer {
UTransliterator* u_transliterator_;
};
+ struct NormalizeLatinResult {
+ // A string representing the maximum prefix of term (can be empty or term
+ // itself) that can be normalized into ASCII.
+ std::string text;
+ // The first position of the char within term that normalization failed to
+ // transform into an ASCII char, or term.length() if all chars can be
+ // transformed.
+ size_t end_pos;
+ };
+
explicit IcuNormalizer(std::unique_ptr<TermTransformer> term_transformer,
int max_term_byte_size);
// Helper method to normalize Latin terms only. Rules applied:
// 1. Uppercase to lowercase
// 2. Remove diacritic (accent) marks
- std::string NormalizeLatin(const UNormalizer2* normalizer2,
- std::string_view term) const;
+ NormalizeLatinResult NormalizeLatin(const UNormalizer2* normalizer2,
+ std::string_view term) const;
+
+ // Set char_itr and normalized_char_itr to point to one past the end of the
+ // segments of term and normalized_term that can match if normalized into
+ // ASCII. In this case, true will be returned.
+ //
+ // The method stops at the position when char_itr cannot be normalized into
+ // ASCII and returns false, so that term_transformer can handle the remaining
+ // portion.
+ bool FindNormalizedLatinMatchEndPosition(
+ const UNormalizer2* normalizer2, std::string_view term,
+ CharacterIterator& char_itr, std::string_view normalized_term,
+ CharacterIterator& normalized_char_itr) const;
// Used to transform terms into their normalized forms.
std::unique_ptr<TermTransformer> term_transformer_;
diff --git a/icing/transform/icu/icu-normalizer_test.cc b/icing/transform/icu/icu-normalizer_test.cc
index 143da17..719f7be 100644
--- a/icing/transform/icu/icu-normalizer_test.cc
+++ b/icing/transform/icu/icu-normalizer_test.cc
@@ -111,6 +111,7 @@ TEST_F(IcuNormalizerTest, LatinLetterRemoveAccent) {
EXPECT_THAT(normalizer_->NormalizeTerm("ÝŶŸẎẏŷýÿ"), Eq("yyyyyyyy"));
EXPECT_THAT(normalizer_->NormalizeTerm("ŹŻŽẐẒẔẑẓẕźżž"),
Eq("zzzzzzzzzzzz"));
+ EXPECT_THAT(normalizer_->NormalizeTerm("Barış"), Eq("baris"));
}
// Accent / diacritic marks won't be removed in non-latin chars, e.g. in
@@ -278,6 +279,14 @@ TEST_F(IcuNormalizerTest, PrefixMatchLength) {
term = "Buenos días";
match_end = normalizer->FindNormalizedMatchEndPosition(term, "buenos di");
EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Buenos dí"));
+
+ term = "BarışIcing";
+ match_end = normalizer->FindNormalizedMatchEndPosition(term, "baris");
+ EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Barış"));
+
+ term = "ÀĄḁáIcing";
+ match_end = normalizer->FindNormalizedMatchEndPosition(term, "aaaa");
+ EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("ÀĄḁá"));
}
TEST_F(IcuNormalizerTest, SharedPrefixMatchLength) {
@@ -327,6 +336,10 @@ TEST_F(IcuNormalizerTest, SharedPrefixMatchLength) {
term = "días";
match_end = normalizer->FindNormalizedMatchEndPosition(term, "diamond");
EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("día"));
+
+ term = "BarışIcing";
+ match_end = normalizer->FindNormalizedMatchEndPosition(term, "barismdi");
+ EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Barış"));
}
} // namespace
diff --git a/icing/util/document-validator.cc b/icing/util/document-validator.cc
index ca15ee3..9d5fea7 100644
--- a/icing/util/document-validator.cc
+++ b/icing/util/document-validator.cc
@@ -151,15 +151,19 @@ libtextclassifier3::Status DocumentValidator::Validate(
// fail, we don't need to validate the extra documents.
if (property_config.data_type() ==
PropertyConfigProto::DataType::DOCUMENT) {
- const std::string_view nested_type_expected =
- property_config.schema_type();
+ ICING_ASSIGN_OR_RETURN(
+ const std::unordered_set<SchemaTypeId>* nested_type_ids_expected,
+ schema_store_->GetSchemaTypeIdsWithChildren(
+ property_config.schema_type()));
for (const DocumentProto& nested_document : property.document_values()) {
- if (nested_type_expected.compare(nested_document.schema()) != 0) {
+ libtextclassifier3::StatusOr<SchemaTypeId> nested_document_type_id_or =
+ schema_store_->GetSchemaTypeId(nested_document.schema());
+ if (!nested_document_type_id_or.ok() ||
+ nested_type_ids_expected->count(
+ nested_document_type_id_or.ValueOrDie()) == 0) {
return absl_ports::InvalidArgumentError(absl_ports::StrCat(
- "Property '", property.name(), "' should have type '",
- nested_type_expected,
- "' but actual "
- "value has type '",
+ "Property '", property.name(), "' should be type or subtype of '",
+ property_config.schema_type(), "' but actual value has type '",
nested_document.schema(), "' for key: (", document.namespace_(),
", ", document.uri(), ")."));
}
diff --git a/icing/util/document-validator_test.cc b/icing/util/document-validator_test.cc
index 310494a..9d10b36 100644
--- a/icing/util/document-validator_test.cc
+++ b/icing/util/document-validator_test.cc
@@ -35,13 +35,16 @@ namespace {
using ::testing::HasSubstr;
-// type and property names of EmailMessage
+// type and property names of EmailMessage and EmailMessageWithNote
constexpr char kTypeEmail[] = "EmailMessage";
+constexpr char kTypeEmailWithNote[] = "EmailMessageWithNote";
constexpr char kPropertySubject[] = "subject";
constexpr char kPropertyText[] = "text";
constexpr char kPropertyRecipients[] = "recipients";
+constexpr char kPropertyNote[] = "note";
// type and property names of Conversation
constexpr char kTypeConversation[] = "Conversation";
+constexpr char kTypeConversationWithEmailNote[] = "ConversationWithEmailNote";
constexpr char kPropertyName[] = "name";
constexpr char kPropertyEmails[] = "emails";
// Other values
@@ -72,6 +75,26 @@ class DocumentValidatorTest : public ::testing::Test {
.SetCardinality(CARDINALITY_REPEATED)))
.AddType(
SchemaTypeConfigBuilder()
+ .SetType(kTypeEmailWithNote)
+ .AddParentType(kTypeEmail)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertySubject)
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyText)
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyRecipients)
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyNote)
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
.SetType(kTypeConversation)
.AddProperty(PropertyConfigBuilder()
.SetName(kPropertyName)
@@ -83,6 +106,19 @@ class DocumentValidatorTest : public ::testing::Test {
.SetDataTypeDocument(
kTypeEmail, /*index_nested_properties=*/true)
.SetCardinality(CARDINALITY_REPEATED)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kTypeConversationWithEmailNote)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyName)
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyEmails)
+ .SetDataTypeDocument(
+ kTypeEmailWithNote,
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED)))
.Build();
schema_dir_ = GetTestTempDir() + "/schema_store";
@@ -90,13 +126,16 @@ class DocumentValidatorTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, schema_dir_, &fake_clock_));
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
document_validator_ =
std::make_unique<DocumentValidator>(schema_store_.get());
}
- static DocumentBuilder SimpleEmailBuilder() {
+ DocumentBuilder SimpleEmailBuilder() {
return DocumentBuilder()
.SetKey(kDefaultNamespace, "email/1")
.SetSchema(kTypeEmail)
@@ -106,7 +145,18 @@ class DocumentValidatorTest : public ::testing::Test {
kDefaultString);
}
- static DocumentBuilder SimpleConversationBuilder() {
+ DocumentBuilder SimpleEmailWithNoteBuilder() {
+ return DocumentBuilder()
+ .SetKey(kDefaultNamespace, "email_with_note/1")
+ .SetSchema(kTypeEmailWithNote)
+ .AddStringProperty(kPropertySubject, kDefaultString)
+ .AddStringProperty(kPropertyText, kDefaultString)
+ .AddStringProperty(kPropertyRecipients, kDefaultString, kDefaultString,
+ kDefaultString)
+ .AddStringProperty(kPropertyNote, kDefaultString);
+ }
+
+ DocumentBuilder SimpleConversationBuilder() {
return DocumentBuilder()
.SetKey(kDefaultNamespace, "conversation/1")
.SetSchema(kTypeConversation)
@@ -299,10 +349,82 @@ TEST_F(DocumentValidatorTest,
SimpleEmailBuilder().Build())
.Build();
- EXPECT_THAT(document_validator_->Validate(conversation),
- StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
- HasSubstr("'emails' should have type 'EmailMessage' but "
- "actual value has type 'Conversation'")));
+ EXPECT_THAT(
+ document_validator_->Validate(conversation),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("'emails' should be type or subtype of 'EmailMessage' "
+ "but actual value has type 'Conversation'")));
+}
+
+TEST_F(DocumentValidatorTest, ValidateNestedPropertyMatchSubtypeOk) {
+ DocumentProto conversation =
+ DocumentBuilder()
+ .SetKey(kDefaultNamespace, "conversation/1")
+ .SetSchema(kTypeConversation)
+ .AddStringProperty(kPropertyName, kDefaultString)
+ .AddDocumentProperty(kPropertyEmails, SimpleEmailBuilder().Build(),
+ // This is a subtype, which is ok.
+ SimpleEmailWithNoteBuilder().Build(),
+ SimpleEmailBuilder().Build())
+ .Build();
+
+ EXPECT_THAT(document_validator_->Validate(conversation), IsOk());
+}
+
+TEST_F(DocumentValidatorTest, ValidateNestedPropertyNonexistentTypeInvalid) {
+ DocumentProto conversation =
+ DocumentBuilder()
+ .SetKey(kDefaultNamespace, "conversation/1")
+ .SetSchema(kTypeConversation)
+ .AddStringProperty(kPropertyName, kDefaultString)
+ .AddDocumentProperty(
+ kPropertyEmails, SimpleEmailBuilder().Build(),
+ // Nonexistent type is not allowed
+ DocumentBuilder()
+ .SetKey(kDefaultNamespace, "email_with_note/1")
+ .SetSchema("Nonexistent")
+ .Build(),
+ SimpleEmailBuilder().Build())
+ .Build();
+
+ EXPECT_THAT(
+ document_validator_->Validate(conversation),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("'emails' should be type or subtype of 'EmailMessage' "
+ "but actual value has type 'Nonexistent'")));
+}
+
+TEST_F(DocumentValidatorTest, ValidateNestedPropertyMatchSuperTypeInvalid) {
+ DocumentProto conversation1 =
+ DocumentBuilder()
+ .SetKey(kDefaultNamespace, "conversation_with_email_note/1")
+ .SetSchema(kTypeConversationWithEmailNote)
+ .AddStringProperty(kPropertyName, kDefaultString)
+ .AddDocumentProperty(kPropertyEmails,
+ SimpleEmailWithNoteBuilder().Build(),
+ SimpleEmailWithNoteBuilder().Build(),
+ SimpleEmailWithNoteBuilder().Build())
+ .Build();
+ EXPECT_THAT(document_validator_->Validate(conversation1), IsOk());
+
+ DocumentProto conversation2 =
+ DocumentBuilder()
+ .SetKey(kDefaultNamespace, "conversation_with_email_note/2")
+ .SetSchema(kTypeConversationWithEmailNote)
+ .AddStringProperty(kPropertyName, kDefaultString)
+ .AddDocumentProperty(kPropertyEmails,
+ SimpleEmailWithNoteBuilder().Build(),
+ // This is a super type, which is not ok.
+ SimpleEmailBuilder().Build(),
+ SimpleEmailWithNoteBuilder().Build())
+ .Build();
+ EXPECT_THAT(
+ document_validator_->Validate(conversation2),
+ StatusIs(
+ libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr(
+ "'emails' should be type or subtype of 'EmailMessageWithNote' "
+ "but actual value has type 'EmailMessage'")));
}
TEST_F(DocumentValidatorTest, ValidateNestedPropertyInvalid) {
@@ -351,7 +473,10 @@ TEST_F(DocumentValidatorTest, HandleTypeConfigMapChangesOk) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, custom_schema_dir, &fake_clock_));
- ASSERT_THAT(schema_store->SetSchema(email_schema), IsOk());
+ ASSERT_THAT(schema_store->SetSchema(
+ email_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
DocumentValidator document_validator(schema_store.get());
@@ -381,7 +506,11 @@ TEST_F(DocumentValidatorTest, HandleTypeConfigMapChangesOk) {
// DocumentValidator should be able to handle the SchemaStore getting updated
// separately
- ASSERT_THAT(schema_store->SetSchema(email_and_conversation_schema), IsOk());
+ ASSERT_THAT(
+ schema_store->SetSchema(email_and_conversation_schema,
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
ICING_EXPECT_OK(document_validator.Validate(conversation));
}
diff --git a/icing/util/encode-util.cc b/icing/util/encode-util.cc
new file mode 100644
index 0000000..2642da7
--- /dev/null
+++ b/icing/util/encode-util.cc
@@ -0,0 +1,50 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/encode-util.h"
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+
+namespace icing {
+namespace lib {
+
+namespace encode_util {
+
+std::string EncodeIntToCString(uint64_t value) {
+ std::string encoded_str;
+ // Encode it in base128 and add 1 to make sure that there is no 0-byte. This
+ // increases the size of the encoded_str from 8-bytes to 10-bytes at worst.
+ do {
+ encoded_str.push_back((value & 0x7F) + 1);
+ value >>= 7;
+ } while (value);
+ return encoded_str;
+}
+
+uint64_t DecodeIntFromCString(std::string_view encoded_str) {
+ uint64_t value = 0;
+ for (int i = encoded_str.length() - 1; i >= 0; --i) {
+ value <<= 7;
+ char c = encoded_str[i] - 1;
+ value |= (c & 0x7F);
+ }
+ return value;
+}
+
+} // namespace encode_util
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/util/encode-util.h b/icing/util/encode-util.h
new file mode 100644
index 0000000..5a31acb
--- /dev/null
+++ b/icing/util/encode-util.h
@@ -0,0 +1,45 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_UTIL_ENCODE_UTIL_H_
+#define ICING_UTIL_ENCODE_UTIL_H_
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+
+namespace icing {
+namespace lib {
+
+namespace encode_util {
+
+// Converts an unsigned 64-bit integer to a C string that doesn't contain 0-byte
+// since C string uses 0-byte as terminator. This increases the size of the
+// encoded_str from 8-bytes to 10-bytes at worst.
+//
+// Note that it is compatible with unsigned 32-bit integers, i.e. casting an
+// uint32_t to uint64_t with the same value and encoding it by this method will
+// get the same string.
+std::string EncodeIntToCString(uint64_t value);
+
+// Converts a C string (encoded from EncodeIntToCString()) to an unsigned 64-bit
+// integer.
+uint64_t DecodeIntFromCString(std::string_view encoded_str);
+
+} // namespace encode_util
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_UTIL_ENCODE_UTIL_H_
diff --git a/icing/util/encode-util_test.cc b/icing/util/encode-util_test.cc
new file mode 100644
index 0000000..c6cb984
--- /dev/null
+++ b/icing/util/encode-util_test.cc
@@ -0,0 +1,91 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/encode-util.h"
+
+#include <cstdint>
+#include <string>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace icing {
+namespace lib {
+namespace encode_util {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::Gt;
+using ::testing::SizeIs;
+
+TEST(EncodeUtilTest, IntCStringZeroConversion) {
+ uint64_t value = 0;
+ std::string encoded_str = EncodeIntToCString(value);
+
+ EXPECT_THAT(encoded_str, SizeIs(Gt(0)));
+ EXPECT_THAT(DecodeIntFromCString(encoded_str), Eq(value));
+}
+
+TEST(EncodeUtilTest, IntCStringConversionIsReversible) {
+ uint64_t value = 123456;
+ std::string encoded_str = EncodeIntToCString(value);
+ EXPECT_THAT(DecodeIntFromCString(encoded_str), Eq(value));
+}
+
+TEST(EncodeUtilTest, MultipleIntCStringConversionsAreReversible) {
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(25)), Eq(25));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(766)), Eq(766));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(2305)), Eq(2305));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(6922)), Eq(6922));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(62326)), Eq(62326));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(186985)), Eq(186985));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(560962)), Eq(560962));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(1682893)), Eq(1682893));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(15146065)), Eq(15146065));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(136314613)),
+ Eq(136314613));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(1226831545)),
+ Eq(1226831545));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(11041483933)),
+ Eq(11041483933));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(2683080596566)),
+ Eq(2683080596566));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(72443176107373)),
+ Eq(72443176107373));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(1955965754899162)),
+ Eq(1955965754899162));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(52811075382277465)),
+ Eq(52811075382277465));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(4277697105964474945)),
+ Eq(4277697105964474945));
+}
+
+TEST(EncodeUtilTest, MultipleValidEncodedCStringIntConversionsAreReversible) {
+ // Only valid encoded C string (no zero bytes, length is between 1 and 10) are
+ // reversible.
+ EXPECT_THAT(EncodeIntToCString(DecodeIntFromCString("foo")), Eq("foo"));
+ EXPECT_THAT(EncodeIntToCString(DecodeIntFromCString("bar")), Eq("bar"));
+ EXPECT_THAT(EncodeIntToCString(DecodeIntFromCString("baz")), Eq("baz"));
+ EXPECT_THAT(EncodeIntToCString(DecodeIntFromCString("Icing")), Eq("Icing"));
+ EXPECT_THAT(EncodeIntToCString(DecodeIntFromCString("Google")), Eq("Google"));
+ EXPECT_THAT(EncodeIntToCString(DecodeIntFromCString("Youtube")),
+ Eq("Youtube"));
+}
+
+} // namespace
+
+} // namespace encode_util
+} // namespace lib
+} // namespace icing
diff --git a/icing/util/snippet-helpers.cc b/icing/util/snippet-helpers.cc
index 6d6277f..ca6f423 100644
--- a/icing/util/snippet-helpers.cc
+++ b/icing/util/snippet-helpers.cc
@@ -17,47 +17,13 @@
#include <algorithm>
#include <string_view>
-#include "icing/absl_ports/str_join.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/search.pb.h"
-#include "icing/schema/section-manager.h"
+#include "icing/schema/property-util.h"
namespace icing {
namespace lib {
-namespace {
-
-// Returns the property index and the property name with the index removed.
-// Examples:
-// GetPropertyIndex("foo") will return ["foo", 0]
-// GetPropertyIndex("foo[5]") will return ["foo", 5]
-std::pair<std::string_view, int> GetPropertyIndex(std::string_view property) {
- size_t l_bracket = property.find(kLBracket);
- if (l_bracket == std::string_view::npos || l_bracket >= property.length()) {
- return {property, 0};
- }
- size_t r_bracket = property.find(kRBracket, l_bracket);
- if (r_bracket == std::string_view::npos || r_bracket - l_bracket < 2) {
- return {property, 0};
- }
- std::string index_string =
- std::string(property.substr(l_bracket + 1, r_bracket - l_bracket - 1));
- return {property.substr(0, l_bracket), std::stoi(index_string)};
-}
-
-} // namespace
-
-const PropertyProto* GetProperty(const DocumentProto& document,
- std::string_view property_name) {
- const PropertyProto* property = nullptr;
- for (const PropertyProto& prop : document.properties()) {
- if (prop.name() == property_name) {
- property = &prop;
- }
- }
- return property;
-}
-
std::vector<std::string_view> GetWindows(
std::string_view content, const SnippetProto::EntryProto& snippet_proto) {
std::vector<std::string_view> windows;
@@ -89,31 +55,36 @@ std::vector<std::string_view> GetSubMatches(
}
std::string_view GetString(const DocumentProto* document,
- std::string_view property_path) {
+ std::string_view property_path_expr) {
std::vector<std::string_view> properties =
- absl_ports::StrSplit(property_path, kPropertySeparator);
+ property_util::SplitPropertyPathExpr(property_path_expr);
for (int i = 0; i < properties.size(); ++i) {
- std::string_view property = properties.at(i);
- int property_index;
- std::tie(property, property_index) = GetPropertyIndex(property);
- const PropertyProto* prop = GetProperty(*document, property);
+ property_util::PropertyInfo property_info =
+ property_util::ParsePropertyNameExpr(properties.at(i));
+ if (property_info.index == property_util::kWildcardPropertyIndex) {
+ // Use index = 0 by default.
+ property_info.index = 0;
+ }
+
+ const PropertyProto* prop =
+ property_util::GetPropertyProto(*document, property_info.name);
if (prop == nullptr) {
// requested property doesn't exist in the document. Return empty string.
return "";
}
if (i == properties.size() - 1) {
// The last property. Get the string_value
- if (prop->string_values_size() - 1 < property_index) {
+ if (prop->string_values_size() - 1 < property_info.index) {
// The requested string doesn't exist. Return empty string.
return "";
}
- return prop->string_values(property_index);
- } else if (prop->document_values_size() - 1 < property_index) {
+ return prop->string_values(property_info.index);
+ } else if (prop->document_values_size() - 1 < property_info.index) {
// The requested subproperty doesn't exist. return an empty string.
return "";
} else {
// Go to the next subproperty.
- document = &prop->document_values(property_index);
+ document = &prop->document_values(property_info.index);
}
}
return "";
diff --git a/icing/util/snippet-helpers.h b/icing/util/snippet-helpers.h
index 73b2ce2..d7349ba 100644
--- a/icing/util/snippet-helpers.h
+++ b/icing/util/snippet-helpers.h
@@ -45,14 +45,14 @@ std::vector<std::string_view> GetSubMatches(
std::string_view content, const SnippetProto::EntryProto& snippet_proto);
// Retrieves the string value held in the document corresponding to the
-// property_path.
+// property_path_expr.
// Example:
// - GetString(doc, "foo") will retrieve the first string value in the
// property "foo" in document or an empty string if it doesn't exist.
// - GetString(doc, "foo[1].bar[2]") will retrieve the third string value in
// the subproperty "bar" of the second document value in the property "foo".
std::string_view GetString(const DocumentProto* document,
- std::string_view property_path);
+ std::string_view property_path_expr);
} // namespace lib
} // namespace icing
diff --git a/icing/util/tokenized-document.cc b/icing/util/tokenized-document.cc
index facb267..19aaddf 100644
--- a/icing/util/tokenized-document.cc
+++ b/icing/util/tokenized-document.cc
@@ -20,6 +20,7 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/proto/document.pb.h"
+#include "icing/schema/joinable-property.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/tokenization/language-segmenter.h"
@@ -72,6 +73,9 @@ TokenizedDocument::Create(const SchemaStore* schema_store,
ICING_ASSIGN_OR_RETURN(SectionGroup section_group,
schema_store->ExtractSections(document));
+ ICING_ASSIGN_OR_RETURN(JoinablePropertyGroup joinable_property_group,
+ schema_store->ExtractJoinableProperties(document));
+
// Tokenize string sections
ICING_ASSIGN_OR_RETURN(
std::vector<TokenizedSection> tokenized_string_sections,
@@ -80,7 +84,8 @@ TokenizedDocument::Create(const SchemaStore* schema_store,
return TokenizedDocument(std::move(document),
std::move(tokenized_string_sections),
- std::move(section_group.integer_sections));
+ std::move(section_group.integer_sections),
+ std::move(joinable_property_group));
}
} // namespace lib
diff --git a/icing/util/tokenized-document.h b/icing/util/tokenized-document.h
index 5729df2..7cc34e3 100644
--- a/icing/util/tokenized-document.h
+++ b/icing/util/tokenized-document.h
@@ -21,6 +21,7 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/proto/document.pb.h"
+#include "icing/schema/joinable-property.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/tokenization/language-segmenter.h"
@@ -62,19 +63,27 @@ class TokenizedDocument {
return integer_sections_;
}
+ const std::vector<JoinableProperty<std::string_view>>&
+ qualified_id_join_properties() const {
+ return joinable_property_group_.qualified_id_properties;
+ }
+
private:
// Use TokenizedDocument::Create() to instantiate.
explicit TokenizedDocument(
DocumentProto&& document,
std::vector<TokenizedSection>&& tokenized_string_sections,
- std::vector<Section<int64_t>>&& integer_sections)
+ std::vector<Section<int64_t>>&& integer_sections,
+ JoinablePropertyGroup&& joinable_property_group)
: document_(std::move(document)),
tokenized_string_sections_(std::move(tokenized_string_sections)),
- integer_sections_(std::move(integer_sections)) {}
+ integer_sections_(std::move(integer_sections)),
+ joinable_property_group_(std::move(joinable_property_group)) {}
DocumentProto document_;
std::vector<TokenizedSection> tokenized_string_sections_;
std::vector<Section<int64_t>> integer_sections_;
+ JoinablePropertyGroup joinable_property_group_;
};
} // namespace lib
diff --git a/icing/util/tokenized-document_test.cc b/icing/util/tokenized-document_test.cc
index 3497bef..7c97776 100644
--- a/icing/util/tokenized-document_test.cc
+++ b/icing/util/tokenized-document_test.cc
@@ -27,6 +27,7 @@
#include "icing/proto/schema.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/schema-builder.h"
+#include "icing/schema/joinable-property.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/testing/common-matchers.h"
@@ -43,26 +44,36 @@ namespace lib {
namespace {
+using ::icing::lib::portable_equals_proto::EqualsProto;
using ::testing::ElementsAre;
using ::testing::Eq;
-using ::testing::EqualsProto;
using ::testing::IsEmpty;
using ::testing::SizeIs;
// schema types
-constexpr std::string_view kFakeType = "FakeType";
+static constexpr std::string_view kFakeType = "FakeType";
// Indexable properties and section Id. Section Id is determined by the
// lexicographical order of indexable property path.
-constexpr std::string_view kIndexableIntegerProperty1 = "indexableInteger1";
-constexpr std::string_view kIndexableIntegerProperty2 = "indexableInteger2";
-constexpr std::string_view kStringExactProperty = "stringExact";
-constexpr std::string_view kStringPrefixProperty = "stringPrefix";
-
-constexpr SectionId kIndexableInteger1SectionId = 0;
-constexpr SectionId kIndexableInteger2SectionId = 1;
-constexpr SectionId kStringExactSectionId = 2;
-constexpr SectionId kStringPrefixSectionId = 3;
+static constexpr std::string_view kIndexableIntegerProperty1 =
+ "indexableInteger1";
+static constexpr std::string_view kIndexableIntegerProperty2 =
+ "indexableInteger2";
+static constexpr std::string_view kStringExactProperty = "stringExact";
+static constexpr std::string_view kStringPrefixProperty = "stringPrefix";
+
+static constexpr SectionId kIndexableInteger1SectionId = 0;
+static constexpr SectionId kIndexableInteger2SectionId = 1;
+static constexpr SectionId kStringExactSectionId = 2;
+static constexpr SectionId kStringPrefixSectionId = 3;
+
+// Joinable properties and joinable property id. Joinable property id is
+// determined by the lexicographical order of joinable property path.
+static constexpr std::string_view kQualifiedId1 = "qualifiedId1";
+static constexpr std::string_view kQualifiedId2 = "qualifiedId2";
+
+static constexpr JoinablePropertyId kQualifiedId1JoinablePropertyId = 0;
+static constexpr JoinablePropertyId kQualifiedId2JoinablePropertyId = 1;
const SectionMetadata kIndexableInteger1SectionMetadata(
kIndexableInteger1SectionId, TYPE_INT64, TOKENIZER_NONE, TERM_MATCH_UNKNOWN,
@@ -80,7 +91,15 @@ const SectionMetadata kStringPrefixSectionMetadata(
kStringPrefixSectionId, TYPE_STRING, TOKENIZER_PLAIN, TERM_MATCH_PREFIX,
NUMERIC_MATCH_UNKNOWN, std::string(kStringPrefixProperty));
-// Other non-indexable properties.
+const JoinablePropertyMetadata kQualifiedId1JoinablePropertyMetadata(
+ kQualifiedId1JoinablePropertyId, TYPE_STRING,
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID, std::string(kQualifiedId1));
+
+const JoinablePropertyMetadata kQualifiedId2JoinablePropertyMetadata(
+ kQualifiedId2JoinablePropertyId, TYPE_STRING,
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID, std::string(kQualifiedId2));
+
+// Other non-indexable/joinable properties.
constexpr std::string_view kUnindexedStringProperty = "unindexedString";
constexpr std::string_view kUnindexedIntegerProperty = "unindexedInteger";
@@ -137,9 +156,21 @@ class TokenizedDocumentTest : public ::testing::Test {
.SetName(kStringPrefixProperty)
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kQualifiedId1)
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kQualifiedId2)
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
}
void TearDown() override {
@@ -177,6 +208,8 @@ TEST_F(TokenizedDocumentTest, CreateAll) {
.AddInt64Property(std::string(kUnindexedIntegerProperty), 789)
.AddInt64Property(std::string(kIndexableIntegerProperty1), 1, 2, 3)
.AddInt64Property(std::string(kIndexableIntegerProperty2), 456)
+ .AddStringProperty(std::string(kQualifiedId1), "pkg$db/ns#uri1")
+ .AddStringProperty(std::string(kQualifiedId2), "pkg$db/ns#uri2")
.Build();
ICING_ASSERT_OK_AND_ASSIGN(
@@ -210,6 +243,17 @@ TEST_F(TokenizedDocumentTest, CreateAll) {
Eq(kIndexableInteger2SectionMetadata));
EXPECT_THAT(tokenized_document.integer_sections().at(1).content,
ElementsAre(456));
+
+ // Qualified id join properties
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties(), SizeIs(2));
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties().at(0).metadata,
+ Eq(kQualifiedId1JoinablePropertyMetadata));
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties().at(0).values,
+ ElementsAre("pkg$db/ns#uri1"));
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties().at(1).metadata,
+ Eq(kQualifiedId2JoinablePropertyMetadata));
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties().at(1).values,
+ ElementsAre("pkg$db/ns#uri2"));
}
TEST_F(TokenizedDocumentTest, CreateNoIndexableIntegerProperties) {
@@ -233,6 +277,9 @@ TEST_F(TokenizedDocumentTest, CreateNoIndexableIntegerProperties) {
// integer sections
EXPECT_THAT(tokenized_document.integer_sections(), IsEmpty());
+
+ // Qualified id join properties
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty());
}
TEST_F(TokenizedDocumentTest, CreateMultipleIndexableIntegerProperties) {
@@ -266,6 +313,9 @@ TEST_F(TokenizedDocumentTest, CreateMultipleIndexableIntegerProperties) {
Eq(kIndexableInteger2SectionMetadata));
EXPECT_THAT(tokenized_document.integer_sections().at(1).content,
ElementsAre(456));
+
+ // Qualified id join properties
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty());
}
TEST_F(TokenizedDocumentTest, CreateNoIndexableStringProperties) {
@@ -290,6 +340,9 @@ TEST_F(TokenizedDocumentTest, CreateNoIndexableStringProperties) {
// integer sections
EXPECT_THAT(tokenized_document.integer_sections(), IsEmpty());
+
+ // Qualified id join properties
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty());
}
TEST_F(TokenizedDocumentTest, CreateMultipleIndexableStringProperties) {
@@ -327,6 +380,73 @@ TEST_F(TokenizedDocumentTest, CreateMultipleIndexableStringProperties) {
// integer sections
EXPECT_THAT(tokenized_document.integer_sections(), IsEmpty());
+
+ // Qualified id join properties
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty());
+}
+
+TEST_F(TokenizedDocumentTest, CreateNoJoinQualifiedIdProperties) {
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kUnindexedStringProperty),
+ "hello world unindexed")
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+
+ EXPECT_THAT(tokenized_document.document(), EqualsProto(document));
+ EXPECT_THAT(tokenized_document.num_string_tokens(), Eq(0));
+
+ // string sections
+ EXPECT_THAT(tokenized_document.tokenized_string_sections(), IsEmpty());
+
+ // integer sections
+ EXPECT_THAT(tokenized_document.integer_sections(), IsEmpty());
+
+ // Qualified id join properties
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty());
+}
+
+TEST_F(TokenizedDocumentTest, CreateMultipleJoinQualifiedIdProperties) {
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kUnindexedStringProperty),
+ "hello world unindexed")
+ .AddStringProperty(std::string(kQualifiedId1), "pkg$db/ns#uri1")
+ .AddStringProperty(std::string(kQualifiedId2), "pkg$db/ns#uri2")
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+
+ EXPECT_THAT(tokenized_document.document(), EqualsProto(document));
+ EXPECT_THAT(tokenized_document.num_string_tokens(), Eq(0));
+
+ // string sections
+ EXPECT_THAT(tokenized_document.tokenized_string_sections(), IsEmpty());
+
+ // integer sections
+ EXPECT_THAT(tokenized_document.integer_sections(), IsEmpty());
+
+ // Qualified id join properties
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties(), SizeIs(2));
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties().at(0).metadata,
+ Eq(kQualifiedId1JoinablePropertyMetadata));
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties().at(0).values,
+ ElementsAre("pkg$db/ns#uri1"));
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties().at(1).metadata,
+ Eq(kQualifiedId2JoinablePropertyMetadata));
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties().at(1).values,
+ ElementsAre("pkg$db/ns#uri2"));
}
} // namespace
diff --git a/java/src/com/google/android/icing/IcingSearchEngine.java b/java/src/com/google/android/icing/IcingSearchEngine.java
index 47b94a5..79fcdb8 100644
--- a/java/src/com/google/android/icing/IcingSearchEngine.java
+++ b/java/src/com/google/android/icing/IcingSearchEngine.java
@@ -77,6 +77,7 @@ public class IcingSearchEngine implements IcingSearchEngineInterface {
icingSearchEngineImpl.close();
}
+ @SuppressWarnings("deprecation")
@Override
protected void finalize() throws Throwable {
icingSearchEngineImpl.close();
diff --git a/java/src/com/google/android/icing/IcingSearchEngineImpl.java b/java/src/com/google/android/icing/IcingSearchEngineImpl.java
index 8e79a88..57744c4 100644
--- a/java/src/com/google/android/icing/IcingSearchEngineImpl.java
+++ b/java/src/com/google/android/icing/IcingSearchEngineImpl.java
@@ -71,6 +71,7 @@ public class IcingSearchEngineImpl implements Closeable {
closed = true;
}
+ @SuppressWarnings("deprecation")
@Override
protected void finalize() throws Throwable {
close();
diff --git a/lint-baseline.xml b/lint-baseline.xml
new file mode 100644
index 0000000..5d2b935
--- /dev/null
+++ b/lint-baseline.xml
@@ -0,0 +1,487 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<issues format="6" by="lint 8.1.0-beta02" type="baseline" client="gradle" dependencies="false" name="AGP (8.1.0-beta02)" variant="all" version="8.1.0-beta02">
+
+ <issue
+ id="KotlinPropertyAccess"
+ message="The getter return type (`GetSchemaResultProto`) and setter parameter type (`SchemaProto`) getter and setter methods for property `schema` should have exactly the same type to allow be accessed as a property from Kotlin; see https://android.github.io/kotlin-guides/interop.html#property-prefixes"
+ errorLine1=" public GetSchemaResultProto getSchema() {"
+ errorLine2=" ~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngine.java"/>
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngine.java"
+ message="Setter here"/>
+ </issue>
+
+ <issue
+ id="KotlinPropertyAccess"
+ message="The getter return type (`GetSchemaResultProto`) and setter parameter type (`SchemaProto`) getter and setter methods for property `schema` should have exactly the same type to allow be accessed as a property from Kotlin; see https://android.github.io/kotlin-guides/interop.html#property-prefixes"
+ errorLine1=" GetSchemaResultProto getSchema();"
+ errorLine2=" ~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"
+ message="Setter here"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" public BreakIteratorBatcher(Locale locale) {"
+ errorLine2=" ~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/BreakIteratorBatcher.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" public void setText(String text) {"
+ errorLine2=" ~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/BreakIteratorBatcher.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" public int[] next(int batchSize) {"
+ errorLine2=" ~~~~~">
+ <location
+ file="java/src/com/google/android/icing/BreakIteratorBatcher.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" public DebugInfoResultProto getDebugInfo(DebugInfoVerbosity.Code verbosity) {"
+ errorLine2=" ~~~~~~~~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngine.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" public static boolean shouldLog(LogSeverity.Code severity) {"
+ errorLine2=" ~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngine.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" public static boolean shouldLog(LogSeverity.Code severity, short verbosity) {"
+ errorLine2=" ~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngine.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" public static boolean setLoggingLevel(LogSeverity.Code severity) {"
+ errorLine2=" ~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngine.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" public static boolean setLoggingLevel(LogSeverity.Code severity, short verbosity) {"
+ errorLine2=" ~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngine.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" InitializeResultProto initialize();"
+ errorLine2=" ~~~~~~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" SetSchemaResultProto setSchema(SchemaProto schema);"
+ errorLine2=" ~~~~~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" SetSchemaResultProto setSchema(SchemaProto schema);"
+ errorLine2=" ~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" SetSchemaResultProto setSchema(SchemaProto schema, boolean ignoreErrorsAndDeleteDocuments);"
+ errorLine2=" ~~~~~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" SetSchemaResultProto setSchema(SchemaProto schema, boolean ignoreErrorsAndDeleteDocuments);"
+ errorLine2=" ~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" GetSchemaResultProto getSchema();"
+ errorLine2=" ~~~~~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" GetSchemaTypeResultProto getSchemaType(String schemaType);"
+ errorLine2=" ~~~~~~~~~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" GetSchemaTypeResultProto getSchemaType(String schemaType);"
+ errorLine2=" ~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" PutResultProto put(DocumentProto document);"
+ errorLine2=" ~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" PutResultProto put(DocumentProto document);"
+ errorLine2=" ~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" GetResultProto get(String namespace, String uri, GetResultSpecProto getResultSpec);"
+ errorLine2=" ~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" GetResultProto get(String namespace, String uri, GetResultSpecProto getResultSpec);"
+ errorLine2=" ~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" GetResultProto get(String namespace, String uri, GetResultSpecProto getResultSpec);"
+ errorLine2=" ~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" GetResultProto get(String namespace, String uri, GetResultSpecProto getResultSpec);"
+ errorLine2=" ~~~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" ReportUsageResultProto reportUsage(UsageReport usageReport);"
+ errorLine2=" ~~~~~~~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" ReportUsageResultProto reportUsage(UsageReport usageReport);"
+ errorLine2=" ~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" GetAllNamespacesResultProto getAllNamespaces();"
+ errorLine2=" ~~~~~~~~~~~~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" SearchResultProto search("
+ errorLine2=" ~~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" SearchSpecProto searchSpec, ScoringSpecProto scoringSpec, ResultSpecProto resultSpec);"
+ errorLine2=" ~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" SearchSpecProto searchSpec, ScoringSpecProto scoringSpec, ResultSpecProto resultSpec);"
+ errorLine2=" ~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" SearchSpecProto searchSpec, ScoringSpecProto scoringSpec, ResultSpecProto resultSpec);"
+ errorLine2=" ~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" SearchResultProto getNextPage(long nextPageToken);"
+ errorLine2=" ~~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" DeleteResultProto delete(String namespace, String uri);"
+ errorLine2=" ~~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" DeleteResultProto delete(String namespace, String uri);"
+ errorLine2=" ~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" DeleteResultProto delete(String namespace, String uri);"
+ errorLine2=" ~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" SuggestionResponse searchSuggestions(SuggestionSpecProto suggestionSpec);"
+ errorLine2=" ~~~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" SuggestionResponse searchSuggestions(SuggestionSpecProto suggestionSpec);"
+ errorLine2=" ~~~~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" DeleteByNamespaceResultProto deleteByNamespace(String namespace);"
+ errorLine2=" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" DeleteByNamespaceResultProto deleteByNamespace(String namespace);"
+ errorLine2=" ~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" DeleteBySchemaTypeResultProto deleteBySchemaType(String schemaType);"
+ errorLine2=" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" DeleteBySchemaTypeResultProto deleteBySchemaType(String schemaType);"
+ errorLine2=" ~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" DeleteByQueryResultProto deleteByQuery(SearchSpecProto searchSpec);"
+ errorLine2=" ~~~~~~~~~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" DeleteByQueryResultProto deleteByQuery(SearchSpecProto searchSpec);"
+ errorLine2=" ~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" DeleteByQueryResultProto deleteByQuery("
+ errorLine2=" ~~~~~~~~~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" SearchSpecProto searchSpec, boolean returnDeletedDocumentInfo);"
+ errorLine2=" ~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" PersistToDiskResultProto persistToDisk(PersistType.Code persistTypeCode);"
+ errorLine2=" ~~~~~~~~~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" PersistToDiskResultProto persistToDisk(PersistType.Code persistTypeCode);"
+ errorLine2=" ~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" OptimizeResultProto optimize();"
+ errorLine2=" ~~~~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" GetOptimizeInfoResultProto getOptimizeInfo();"
+ errorLine2=" ~~~~~~~~~~~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" StorageInfoResultProto getStorageInfo();"
+ errorLine2=" ~~~~~~~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" DebugInfoResultProto getDebugInfo(DebugInfoVerbosity.Code verbosity);"
+ errorLine2=" ~~~~~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" DebugInfoResultProto getDebugInfo(DebugInfoVerbosity.Code verbosity);"
+ errorLine2=" ~~~~~~~~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+ <issue
+ id="UnknownNullness"
+ message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+ errorLine1=" ResetResultProto reset();"
+ errorLine2=" ~~~~~~~~~~~~~~~~">
+ <location
+ file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+ </issue>
+
+</issues>
diff --git a/proto/icing/index/numeric/wildcard-property-storage.proto b/proto/icing/index/numeric/wildcard-property-storage.proto
new file mode 100644
index 0000000..7f02b77
--- /dev/null
+++ b/proto/icing/index/numeric/wildcard-property-storage.proto
@@ -0,0 +1,22 @@
+// Copyright 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package icing.lib;
+
+// Next tag: 2
+message WildcardPropertyStorage {
+ repeated string property_entries = 1;
+}
diff --git a/proto/icing/proto/initialize.proto b/proto/icing/proto/initialize.proto
index 7fe1e6f..958767b 100644
--- a/proto/icing/proto/initialize.proto
+++ b/proto/icing/proto/initialize.proto
@@ -23,7 +23,7 @@ option java_package = "com.google.android.icing.proto";
option java_multiple_files = true;
option objc_class_prefix = "ICNG";
-// Next tag: 5
+// Next tag: 14
message IcingSearchEngineOptions {
// Directory to persist files for Icing. Required.
// If Icing was previously initialized with this directory, it will reload
@@ -58,6 +58,75 @@ message IcingSearchEngineOptions {
// Optional.
optional int32 index_merge_size = 4 [default = 1048576]; // 1 MiB
+ // Whether to use namespace id or namespace name to build up fingerprint for
+ // document_key_mapper_ and corpus_mapper_ in document store.
+ // TODO(b/259969017) Flip the default value of this flag to true at the time
+ // when we switch to use persistent hash map for document_key_mapper_ so that
+ // we just need one reconstruction of the internal mappers.
+ optional bool document_store_namespace_id_fingerprint = 5;
+
+ // The threshold of the percentage of invalid documents to rebuild index
+ // during optimize, i.e. we rebuild index if and only if
+ // |invalid_documents| / |all_documents| >= optimize_rebuild_index_threshold
+ //
+ // Rebuilding the index could be faster than optimizing the index if we have
+ // removed most of the documents.
+ // Based on benchmarks, 85%~95% seems to be a good threshold for most cases.
+ //
+ // Default to 0 for better rollout of the new index optimize.
+ optional float optimize_rebuild_index_threshold = 6 [default = 0.0];
+
+ // Level of compression, NO_COMPRESSION = 0, BEST_SPEED = 1,
+ // BEST_COMPRESSION = 9
+ // Valid values: [0, 9]
+ // Optional.
+ optional int32 compression_level = 7 [default = 3];
+
+ // OPTIONAL: Whether to allow circular references between schema types for
+ // the schema definition.
+ //
+ // Even when set to true, circular references are still not allowed in the
+ // following cases:
+ // 1. All edges of a cycle have index_nested_properties=true
+ // 2. One of the types in the cycle has a joinable property, or depends on
+ // a type with a joinable property.
+ // This is because such a cycle would lead to an infinite number of
+ // indexed/joinable properties:
+ //
+ // The default value is false.
+ optional bool allow_circular_schema_definitions = 8;
+
+ // Whether memory map max possible file size for FileBackedVector before
+ // growing the actual file size.
+ optional bool pre_mapping_fbv = 9;
+
+ // Whether use persistent hash map as the key mapper (if false, then fall back
+ // to dynamic trie key mapper).
+ optional bool use_persistent_hash_map = 10;
+
+ // Integer index bucket split threshold.
+ optional int32 integer_index_bucket_split_threshold = 11 [default = 65536];
+
+ // Whether Icing should sort and merge its lite index HitBuffer unsorted tail
+ // at indexing time.
+ //
+ // If set to true, the HitBuffer will be sorted at indexing time after
+ // exceeding the sort threshold. If false, the HifBuffer will be sorted at
+ // querying time, before the first query after inserting new elements into the
+ // HitBuffer.
+ //
+ // The default value is false.
+ optional bool lite_index_sort_at_indexing = 12;
+
+ // Size (in bytes) at which Icing's lite index should sort and merge the
+ // HitBuffer's unsorted tail into the sorted head for sorting at indexing
+ // time. Size specified here is the maximum byte size to allow for the
+ // unsorted tail section.
+ //
+ // Setting a lower sort size reduces querying latency at the expense of
+ // indexing latency.
+ optional int32 lite_index_sort_size = 13 [default = 8192]; // 8 KiB
+
reserved 2;
}
diff --git a/proto/icing/proto/logging.proto b/proto/icing/proto/logging.proto
index 6f168bd..418fc88 100644
--- a/proto/icing/proto/logging.proto
+++ b/proto/icing/proto/logging.proto
@@ -23,7 +23,7 @@ option java_multiple_files = true;
option objc_class_prefix = "ICNG";
// Stats of the top-level function IcingSearchEngine::Initialize().
-// Next tag: 12
+// Next tag: 14
message InitializeStatsProto {
// Overall time used for the function call.
optional int32 latency_ms = 1;
@@ -49,6 +49,9 @@ message InitializeStatsProto {
// The document log is using legacy format.
LEGACY_DOCUMENT_LOG_FORMAT = 5;
+
+ // The current code version is different from existing data version.
+ VERSION_CHANGED = 6;
}
// Possible recovery causes for document store:
@@ -73,7 +76,7 @@ message InitializeStatsProto {
// Time used to restore the index.
optional int32 index_restoration_latency_ms = 6;
- // Time used to restore the index.
+ // Time used to restore the schema store.
optional int32 schema_store_recovery_latency_ms = 7;
// Status regarding how much data is lost during the initialization.
@@ -99,10 +102,22 @@ message InitializeStatsProto {
// Number of consecutive initialization failures that immediately preceded
// this initialization.
optional int32 num_previous_init_failures = 11;
+
+ // Possible recovery causes for integer index:
+ // - INCONSISTENT_WITH_GROUND_TRUTH
+ // - SCHEMA_CHANGES_OUT_OF_SYNC
+ // - IO_ERROR
+ optional RecoveryCause integer_index_restoration_cause = 12;
+
+ // Possible recovery causes for qualified id join index:
+ // - INCONSISTENT_WITH_GROUND_TRUTH
+ // - SCHEMA_CHANGES_OUT_OF_SYNC
+ // - IO_ERROR
+ optional RecoveryCause qualified_id_join_index_restoration_cause = 13;
}
// Stats of the top-level function IcingSearchEngine::Put().
-// Next tag: 7
+// Next tag: 11
message PutDocumentStatsProto {
// Overall time used for the function call.
optional int32 latency_ms = 1;
@@ -110,8 +125,7 @@ message PutDocumentStatsProto {
// Time used to store the document.
optional int32 document_store_latency_ms = 2;
- // Time used to index the document. It does not include the time to merge
- // indices.
+ // Time used to index the document.
optional int32 index_latency_ms = 3;
// Time used to merge the indices.
@@ -127,11 +141,24 @@ message PutDocumentStatsProto {
reserved 2;
}
optional TokenizationStats tokenization_stats = 6;
+
+ // Time used to index all indexable string terms in the document. It does not
+ // include the time to merge indices.
+ optional int32 term_index_latency_ms = 7;
+
+ // Time used to index all indexable integers in the document.
+ optional int32 integer_index_latency_ms = 8;
+
+ // Time used to index all qualified id join strings in the document.
+ optional int32 qualified_id_join_index_latency_ms = 9;
+
+ // Time used to sort and merge the LiteIndex's HitBuffer.
+ optional int32 lite_index_sort_latency_ms = 10;
}
// Stats of the top-level function IcingSearchEngine::Search() and
// IcingSearchEngine::GetNextPage().
-// Next tag: 21
+// Next tag: 23
message QueryStatsProto {
// The UTF-8 length of the query string
optional int32 query_length = 16;
@@ -195,6 +222,12 @@ message QueryStatsProto {
// Time used to send protos across the JNI boundary from native to java side.
optional int32 native_to_java_jni_latency_ms = 20;
+ // The native latency due to the join operation.
+ optional int32 join_latency_ms = 21;
+
+ // Number of documents scored.
+ optional int32 num_joined_results_returned_current_page = 22;
+
reserved 9;
}
diff --git a/proto/icing/proto/schema.proto b/proto/icing/proto/schema.proto
index 5d1685c..c716dba 100644
--- a/proto/icing/proto/schema.proto
+++ b/proto/icing/proto/schema.proto
@@ -34,7 +34,7 @@ option objc_class_prefix = "ICNG";
// TODO(cassiewang) Define a sample proto file that can be used by tests and for
// documentation.
//
-// Next tag: 6
+// Next tag: 7
message SchemaTypeConfigProto {
// REQUIRED: Named type that uniquely identifies the structured, logical
// schema being defined.
@@ -60,6 +60,12 @@ message SchemaTypeConfigProto {
// it will default to value == 0.
optional int32 version = 5;
+ // An experimental field to make the type as a subtype of parent_types, which
+ // enables parent_types to be interpreted as its subtypes in the context of
+ // the Search APIs, including schema type filters and projections specified in
+ // TypePropertyMask.
+ repeated string parent_types = 6;
+
reserved 2, 3;
}
@@ -132,15 +138,22 @@ message StringIndexingConfig {
}
// Describes how a document property should be indexed.
-// Next tag: 2
+// Next tag: 3
message DocumentIndexingConfig {
// OPTIONAL: Whether nested properties within the document property should be
- // indexed. If true, then the nested properties will be indexed according to
+ // indexed. If true, then all nested properties will be indexed according to
// the property's own indexing configurations. If false, nested documents'
// properties will not be indexed even if they have an indexing configuration.
//
// The default value is false.
optional bool index_nested_properties = 1;
+
+ // List of nested properties within the document to index. Only the
+ // provided list of properties will be indexed according to the property's
+ // indexing configurations.
+ //
+ // index_nested_properties must be false in order to use this feature.
+ repeated string indexable_nested_properties_list = 2;
}
// Describes how a int64 property should be indexed.
@@ -166,7 +179,7 @@ message IntegerIndexingConfig {
// Describes how a property can be used to join this document with another
// document. See JoinSpecProto (in search.proto) for more details.
-// Next tag: 2
+// Next tag: 3
message JoinableConfig {
// OPTIONAL: Indicates what joinable type the content value of this property
// is.
@@ -185,6 +198,11 @@ message JoinableConfig {
}
}
optional ValueType.Code value_type = 1;
+
+ // If the parent document a child document is joined to is deleted, delete the
+ // child document as well. This will only apply to children joined through
+ // QUALIFIED_ID, other (future) joinable value types won't use it.
+ optional bool propagate_delete = 2 [default = false];
}
// Describes the schema of a single property of Documents that belong to a
@@ -290,7 +308,7 @@ message SchemaProto {
}
// Result of a call to IcingSearchEngine.SetSchema
-// Next tag: 8
+// Next tag: 9
message SetSchemaResultProto {
// Status code can be one of:
// OK
@@ -329,6 +347,14 @@ message SetSchemaResultProto {
// Overall time used for the function call.
optional int32 latency_ms = 7;
+
+ // Schema types that were changed in a way that was backwards compatible, but
+ // invalidated the joinable cache.
+ //
+ // For example, a property was set non joinable in the old schema definition,
+ // but changed to joinable in the new definition. In this case, this property
+ // will be considered join incompatible when setting new schema.
+ repeated string join_incompatible_changed_schema_types = 8;
}
// Result of a call to IcingSearchEngine.GetSchema
diff --git a/proto/icing/proto/search.proto b/proto/icing/proto/search.proto
index c9e2b1d..7f4fb3e 100644
--- a/proto/icing/proto/search.proto
+++ b/proto/icing/proto/search.proto
@@ -27,7 +27,7 @@ option java_multiple_files = true;
option objc_class_prefix = "ICNG";
// Client-supplied specifications on what documents to retrieve.
-// Next tag: 9
+// Next tag: 11
message SearchSpecProto {
// REQUIRED: The "raw" query string that users may type. For example, "cat"
// will search for documents with the term cat in it.
@@ -85,7 +85,8 @@ message SearchSpecProto {
// enable testing.
// TODO(b/208654892) Remove this field once EXPERIMENTAL_ICING_ADVANCED_QUERY
// is fully supported.
- optional SearchType.Code search_type = 6 [default = ICING_RAW_QUERY];
+ optional SearchType.Code search_type = 6
+ [default = EXPERIMENTAL_ICING_ADVANCED_QUERY];
// OPTIONAL: If this field is present, join documents based on a nested
// SearchSpec.
@@ -93,11 +94,29 @@ message SearchSpecProto {
// Features enabled in this search spec.
repeated string enabled_features = 8;
+
+ // OPTIONAL: Whether to use the read-only implementation of
+ // IcingSearchEngine::Search.
+ // The read-only version enables multiple queries to be performed concurrently
+ // as it only acquires the read lock at IcingSearchEngine's level.
+ // Finer-grained locks are implemented around code paths that write changes to
+ // Icing during Search.
+ optional bool use_read_only_search = 9 [default = true];
+
+ // TODO(b/294266822): Handle multiple property filter lists for same schema
+ // type.
+ // How to specify a subset of properties to be searched. If no type property
+ // filter has been specified for a schema type (no TypePropertyMask for the
+ // given schema type), then *all* properties of that schema type will be
+ // searched. If an empty property filter is specified for a given schema type
+ // (TypePropertyMask for the given schema type has empty paths field), no
+ // properties of that schema type will be searched.
+ repeated TypePropertyMask type_property_filters = 10;
}
// Client-supplied specifications on what to include/how to format the search
// results.
-// Next tag: 8
+// Next tag: 10
message ResultSpecProto {
// The results will be returned in pages, and num_per_page specifies the
// number of documents in one page.
@@ -198,6 +217,22 @@ message ResultSpecProto {
NAMESPACE_AND_SCHEMA_TYPE = 3;
}
optional ResultGroupingType result_group_type = 7;
+
+ // The max # of child documents will be attached and returned in the result
+ // for each parent. It is only used for join API.
+ optional int32 max_joined_children_per_parent_to_return = 8;
+
+ // The max # of results being scored and ranked.
+ // Running time of ScoringProcessor and Ranker is O(num_to_score) according to
+ // results of //icing/scoring:score-and-rank_benchmark. Note that
+ // the process includes scoring, building a heap, and popping results from the
+ // heap.
+ //
+ // 30000 results can be scored and ranked within 3 ms on a Pixel 3 XL
+ // according to results of
+ // //icing/scoring:score-and-rank_benchmark, so set it as the
+ // default value.
+ optional int32 num_to_score = 9 [default = 30000];
}
// The representation of a single match within a DocumentProto property.
@@ -498,7 +533,10 @@ message JoinSpecProto {
optional string child_property_expression = 3;
// The max number of child documents to join to a parent document.
- optional int32 max_joined_child_count = 4;
+ // DEPRECATED: use ResultSpecProto.max_joined_children_per_parent_to_return to
+ // control the number of children that are returned. There is no supported
+ // control for the number of children being scored at this time.
+ optional int32 max_joined_child_count = 4 [deprecated = true];
// The strategy by which to score the aggregation of child documents. For
// example, you might want to know which entity document has the most actions
diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt
index a4f3a30..bd3f395 100644
--- a/synced_AOSP_CL_number.txt
+++ b/synced_AOSP_CL_number.txt
@@ -1 +1 @@
-set(synced_AOSP_CL_number=500254546)
+set(synced_AOSP_CL_number=561560020)