aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Barron <tjbarron@google.com>2023-03-14 09:57:47 -0700
committerTim Barron <tjbarron@google.com>2023-03-14 09:57:47 -0700
commitc1e7edff54723138756063ee4b7948c1ee91277e (patch)
treeb2a55e543a6c9396631feaab459bfd671a8bc400
parent140aaee3e7b269f02599310e42d6172090ce02d2 (diff)
parentd5c81ae0c41ae9c1aefb3601f3836570b9f686c7 (diff)
downloadicing-c1e7edff54723138756063ee4b7948c1ee91277e.tar.gz
Merge remote-tracking branch 'goog/upstream-master' into androidx-platform-dev
* goog/upstream-master: Update Icing from upstream. Update Icing from upstream. Descriptions: ======================================================================== Cache an instance of UBreakIterator to reduce unnecessary creations. ======================================================================== Cap number of individual IntegerIndexStorages that IntegerIndex creates. ======================================================================== Change error in trimRightMostNode from Unimplemented to InvalidArgument. ======================================================================== Add detection for new language features of List Filters Query Language. ======================================================================== Add option to control threshold to rebuild index during optimize by flag ======================================================================== Add option to control use of namespace id to build urimapper by flag. ======================================================================== Enforce schema validation for joinable config. ======================================================================== Adopt bucket splitting for IntegerIndexStorage. ======================================================================== Implement bucket splitting function. ======================================================================== Add Icing initialization unit tests for QualifiedIdTypeJoinableIndex. ======================================================================== Add Icing schema change unit tests for QualifiedIdTypeJoinableIndex. ======================================================================== Add Icing optimization unit tests for QualifiedIdTypeJoinableIndex. ======================================================================== Integrate QualifiedIdTypeJoinableIndex into IcingSearchEngine. ======================================================================== Implement QualifiedIdJoinablePropertyIndexingHandler. ======================================================================== Change QualifiedIdTypeJoinableIndex to store raw qualified id string. ======================================================================== Pass info about unnormalized query terms through lexer/parser/visitor. ======================================================================== Integrate Advanced Query w/ Suggest, make ADVANCED_QUERY default parser. ====================================================================== Bug: 208654892 Bug: 263890397 Bug: 259743562 Bug: 272145329 Bug: 227356108 Change-Id: I44de5853bb6c55b42800ae34d8071016be6c87cd
-rw-r--r--icing/file/file-backed-proto.h44
-rw-r--r--icing/file/persistent-hash-map.cc4
-rw-r--r--icing/file/posting_list/flash-index-storage.cc28
-rw-r--r--icing/file/posting_list/flash-index-storage.h3
-rw-r--r--icing/icing-search-engine.cc254
-rw-r--r--icing/icing-search-engine.h63
-rw-r--r--icing/icing-search-engine_initialization_test.cc2246
-rw-r--r--icing/icing-search-engine_optimize_test.cc886
-rw-r--r--icing/icing-search-engine_schema_test.cc515
-rw-r--r--icing/icing-search-engine_search_test.cc90
-rw-r--r--icing/icing-search-engine_suggest_test.cc92
-rw-r--r--icing/index/index-processor_test.cc52
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-not.cc4
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-not_test.cc2
-rw-r--r--icing/index/numeric/doc-hit-info-iterator-numeric.h4
-rw-r--r--icing/index/numeric/dummy-numeric-index.h9
-rw-r--r--icing/index/numeric/integer-index-bucket-util.cc205
-rw-r--r--icing/index/numeric/integer-index-bucket-util.h81
-rw-r--r--icing/index/numeric/integer-index-bucket-util_test.cc1112
-rw-r--r--icing/index/numeric/integer-index-storage.cc174
-rw-r--r--icing/index/numeric/integer-index-storage.h27
-rw-r--r--icing/index/numeric/integer-index-storage_benchmark.cc17
-rw-r--r--icing/index/numeric/integer-index-storage_test.cc152
-rw-r--r--icing/index/numeric/integer-index.cc284
-rw-r--r--icing/index/numeric/integer-index.h87
-rw-r--r--icing/index/numeric/integer-index_test.cc1313
-rw-r--r--icing/index/numeric/numeric-index.h11
-rw-r--r--icing/index/numeric/posting-list-integer-index-accessor.cc83
-rw-r--r--icing/index/numeric/posting-list-integer-index-accessor.h49
-rw-r--r--icing/index/numeric/posting-list-integer-index-accessor_test.cc127
-rw-r--r--icing/index/numeric/posting-list-integer-index-serializer.h11
-rw-r--r--icing/join/join-processor.cc82
-rw-r--r--icing/join/join-processor.h32
-rw-r--r--icing/join/join-processor_test.cc150
-rw-r--r--icing/join/qualified-id-joinable-property-indexing-handler.cc96
-rw-r--r--icing/join/qualified-id-joinable-property-indexing-handler.h71
-rw-r--r--icing/join/qualified-id-joinable-property-indexing-handler_test.cc332
-rw-r--r--icing/join/qualified-id-type-joinable-index.cc161
-rw-r--r--icing/join/qualified-id-type-joinable-index.h32
-rw-r--r--icing/join/qualified-id-type-joinable-index_test.cc255
-rw-r--r--icing/join/qualified-id.cc5
-rw-r--r--icing/join/qualified-id_test.cc18
-rw-r--r--icing/monkey_test/icing-monkey-test-runner.cc6
-rw-r--r--icing/query/advanced_query_parser/abstract-syntax-tree.h26
-rw-r--r--icing/query/advanced_query_parser/abstract-syntax-tree_test.cc32
-rw-r--r--icing/query/advanced_query_parser/function_test.cc78
-rw-r--r--icing/query/advanced_query_parser/lexer.cc71
-rw-r--r--icing/query/advanced_query_parser/lexer.h13
-rw-r--r--icing/query/advanced_query_parser/parser.cc10
-rw-r--r--icing/query/advanced_query_parser/parser_test.cc458
-rw-r--r--icing/query/advanced_query_parser/pending-value.cc2
-rw-r--r--icing/query/advanced_query_parser/pending-value.h13
-rw-r--r--icing/query/advanced_query_parser/query-visitor.cc141
-rw-r--r--icing/query/advanced_query_parser/query-visitor.h12
-rw-r--r--icing/query/advanced_query_parser/query-visitor_test.cc518
-rw-r--r--icing/query/advanced_query_parser/util/string-util.cc106
-rw-r--r--icing/query/advanced_query_parser/util/string-util.h49
-rw-r--r--icing/query/advanced_query_parser/util/string-util_test.cc125
-rw-r--r--icing/query/query-features.h10
-rw-r--r--icing/query/query-processor.cc4
-rw-r--r--icing/query/suggestion-processor_test.cc109
-rw-r--r--icing/schema/joinable-property-manager.cc3
-rw-r--r--icing/schema/joinable-property-manager.h4
-rw-r--r--icing/schema/joinable-property-manager_test.cc5
-rw-r--r--icing/schema/schema-store.cc5
-rw-r--r--icing/schema/schema-store.h4
-rw-r--r--icing/schema/schema-util.cc100
-rw-r--r--icing/schema/schema-util.h46
-rw-r--r--icing/schema/schema-util_test.cc646
-rw-r--r--icing/store/document-store.cc63
-rw-r--r--icing/store/document-store.h23
-rw-r--r--icing/store/document-store_test.cc27
-rw-r--r--icing/tokenization/icu/icu-language-segmenter-factory.cc2
-rw-r--r--icing/tokenization/icu/icu-language-segmenter.cc78
-rw-r--r--icing/tokenization/icu/icu-language-segmenter.h35
-rw-r--r--icing/tokenization/icu/icu-language-segmenter_test.cc47
-rw-r--r--icing/tokenization/rfc822-tokenizer_test.cc85
-rw-r--r--icing/util/tokenized-document.cc7
-rw-r--r--icing/util/tokenized-document.h13
-rw-r--r--icing/util/tokenized-document_test.cc140
-rw-r--r--proto/icing/index/numeric/wildcard-property-storage.proto22
-rw-r--r--proto/icing/proto/initialize.proto19
-rw-r--r--proto/icing/proto/logging.proto8
-rw-r--r--proto/icing/proto/search.proto3
-rw-r--r--synced_AOSP_CL_number.txt2
85 files changed, 10741 insertions, 1692 deletions
diff --git a/icing/file/file-backed-proto.h b/icing/file/file-backed-proto.h
index 8deb7a6..8c5743b 100644
--- a/icing/file/file-backed-proto.h
+++ b/icing/file/file-backed-proto.h
@@ -22,6 +22,7 @@
#ifndef ICING_FILE_FILE_BACKED_PROTO_H_
#define ICING_FILE_FILE_BACKED_PROTO_H_
+#include <algorithm>
#include <cstdint>
#include <memory>
#include <string>
@@ -37,6 +38,7 @@
#include "icing/legacy/core/icing-string-util.h"
#include "icing/util/crc32.h"
#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
@@ -74,6 +76,13 @@ class FileBackedProto {
file_path_ = swapped_to_file_path;
}
+ // Computes the checksum of the proto stored in this file and returns it.
+ // RETURNS:
+ // - the checksum of the proto or 0 if the file is empty/non-existent
+ // - INTERNAL_ERROR if an IO error or a corruption was encountered.
+ libtextclassifier3::StatusOr<Crc32> ComputeChecksum() const
+ ICING_LOCKS_EXCLUDED(mutex_);
+
// Returns a reference to the proto read from the file. It
// internally caches the read proto so that future calls are fast.
//
@@ -103,6 +112,11 @@ class FileBackedProto {
FileBackedProto& operator=(const FileBackedProto&) = delete;
private:
+ // Internal method to handle reading the proto from disk.
+ // Requires the caller to hold an exclusive lock on mutex_.
+ libtextclassifier3::StatusOr<const ProtoT*> ReadInternal() const
+ ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
// Upper bound of file-size that is supported.
static constexpr int32_t kMaxFileSize = 1 * 1024 * 1024; // 1 MiB.
@@ -113,6 +127,8 @@ class FileBackedProto {
std::string file_path_;
mutable std::unique_ptr<ProtoT> cached_proto_ ICING_GUARDED_BY(mutex_);
+
+ mutable std::unique_ptr<Header> cached_header_ ICING_GUARDED_BY(mutex_);
};
template <typename ProtoT>
@@ -124,12 +140,35 @@ FileBackedProto<ProtoT>::FileBackedProto(const Filesystem& filesystem,
: filesystem_(&filesystem), file_path_(file_path) {}
template <typename ProtoT>
+libtextclassifier3::StatusOr<Crc32> FileBackedProto<ProtoT>::ComputeChecksum()
+ const {
+ absl_ports::unique_lock l(&mutex_);
+ if (cached_proto_ == nullptr) {
+ auto read_status = ReadInternal();
+ if (!read_status.ok()) {
+ if (absl_ports::IsNotFound(read_status.status())) {
+ // File doesn't exist. So simply return 0.
+ return Crc32();
+ }
+ return read_status.status();
+ }
+ }
+ return Crc32(cached_header_->proto_checksum);
+}
+
+template <typename ProtoT>
libtextclassifier3::StatusOr<const ProtoT*> FileBackedProto<ProtoT>::Read()
const {
ICING_VLOG(1) << "Reading proto from file: " << file_path_;
absl_ports::unique_lock l(&mutex_);
+ return ReadInternal();
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<const ProtoT*>
+FileBackedProto<ProtoT>::ReadInternal() const {
// Return cached proto if we've already read from disk.
if (cached_proto_ != nullptr) {
ICING_VLOG(1) << "Reusing cached proto for file: " << file_path_;
@@ -157,8 +196,7 @@ libtextclassifier3::StatusOr<const ProtoT*> FileBackedProto<ProtoT>::Read()
<< " of size: " << file_size;
Header header;
- if (!filesystem_->PRead(fd.get(), &header, sizeof(Header),
- /*offset=*/0)) {
+ if (!filesystem_->PRead(fd.get(), &header, sizeof(Header), /*offset=*/0)) {
return absl_ports::InternalError(
absl_ports::StrCat("Unable to read header of: ", file_path_));
}
@@ -193,6 +231,7 @@ libtextclassifier3::StatusOr<const ProtoT*> FileBackedProto<ProtoT>::Read()
ICING_VLOG(1) << "Successfully read proto from file: " << file_path_;
cached_proto_ = std::move(proto);
+ cached_header_ = std::make_unique<Header>(std::move(header));
return cached_proto_.get();
}
@@ -253,6 +292,7 @@ libtextclassifier3::Status FileBackedProto<ProtoT>::Write(
ICING_VLOG(1) << "Successfully wrote proto to file: " << file_path_;
cached_proto_ = std::move(new_proto);
+ cached_header_ = std::make_unique<Header>(std::move(header));
return libtextclassifier3::Status::OK;
}
diff --git a/icing/file/persistent-hash-map.cc b/icing/file/persistent-hash-map.cc
index 14a1251..ce8310b 100644
--- a/icing/file/persistent-hash-map.cc
+++ b/icing/file/persistent-hash-map.cc
@@ -147,7 +147,9 @@ PersistentHashMap::Create(const Filesystem& filesystem,
!filesystem.FileExists(
GetKeyValueStorageFilePath(working_path).c_str())) {
// Discard working_path if any of them is missing, and reinitialize.
- ICING_RETURN_IF_ERROR(Discard(filesystem, working_path));
+ if (filesystem.DirectoryExists(working_path.c_str())) {
+ ICING_RETURN_IF_ERROR(Discard(filesystem, working_path));
+ }
return InitializeNewFiles(filesystem, std::move(working_path),
std::move(options));
}
diff --git a/icing/file/posting_list/flash-index-storage.cc b/icing/file/posting_list/flash-index-storage.cc
index 657bd96..2ba24a3 100644
--- a/icing/file/posting_list/flash-index-storage.cc
+++ b/icing/file/posting_list/flash-index-storage.cc
@@ -37,22 +37,6 @@
namespace icing {
namespace lib {
-namespace {
-
-uint32_t SelectBlockSize() {
- // This should be close to the flash page size.
- static constexpr uint32_t kMinBlockSize = 4096;
-
- // Determine a good block size.
- uint32_t page_size = getpagesize();
- uint32_t block_size = std::max(kMinBlockSize, page_size);
-
- // Align up to the nearest page size.
- return math_util::RoundUpTo(block_size, page_size);
-}
-
-} // namespace
-
libtextclassifier3::StatusOr<FlashIndexStorage> FlashIndexStorage::Create(
std::string index_filename, const Filesystem* filesystem,
PostingListSerializer* serializer, bool in_memory) {
@@ -75,6 +59,18 @@ FlashIndexStorage::~FlashIndexStorage() {
}
}
+/* static */ uint32_t FlashIndexStorage::SelectBlockSize() {
+ // This should be close to the flash page size.
+ static constexpr uint32_t kMinBlockSize = 4096;
+
+ // Determine a good block size.
+ uint32_t page_size = getpagesize();
+ uint32_t block_size = std::max(kMinBlockSize, page_size);
+
+ // Align up to the nearest page size.
+ return math_util::RoundUpTo(block_size, page_size);
+}
+
bool FlashIndexStorage::Init() {
storage_sfd_ = ScopedFd(filesystem_->OpenForWrite(index_filename_.c_str()));
if (!storage_sfd_.is_valid()) {
diff --git a/icing/file/posting_list/flash-index-storage.h b/icing/file/posting_list/flash-index-storage.h
index 1813637..05feb08 100644
--- a/icing/file/posting_list/flash-index-storage.h
+++ b/icing/file/posting_list/flash-index-storage.h
@@ -105,6 +105,9 @@ class FlashIndexStorage {
~FlashIndexStorage();
+ // Selects block size to use.
+ static uint32_t SelectBlockSize();
+
// Retrieves the PostingList referred to by PostingListIdentifier. This
// posting list must have been previously allocated by a prior call to
// AllocatePostingList.
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
index 1b193af..7800e7e 100644
--- a/icing/icing-search-engine.cc
+++ b/icing/icing-search-engine.cc
@@ -41,6 +41,8 @@
#include "icing/index/numeric/integer-index.h"
#include "icing/index/string-section-indexing-handler.h"
#include "icing/join/join-processor.h"
+#include "icing/join/qualified-id-joinable-property-indexing-handler.h"
+#include "icing/join/qualified-id-type-joinable-index.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/portable/endian.h"
#include "icing/proto/debug.pb.h"
@@ -96,6 +98,8 @@ namespace {
constexpr std::string_view kDocumentSubfolderName = "document_dir";
constexpr std::string_view kIndexSubfolderName = "index_dir";
constexpr std::string_view kIntegerIndexSubfolderName = "integer_index_dir";
+constexpr std::string_view kQualifiedIdJoinIndexSubfolderName =
+ "qualified_id_join_index_dir";
constexpr std::string_view kSchemaSubfolderName = "schema_dir";
constexpr std::string_view kSetSchemaMarkerFilename = "set_schema_marker";
constexpr std::string_view kInitMarkerFilename = "init_marker";
@@ -240,6 +244,14 @@ std::string MakeIntegerIndexWorkingPath(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kIntegerIndexSubfolderName);
}
+// Working path for qualified id join index. It is derived from
+// PersistentStorage and it will take full ownership of this working path,
+// including creation/deletion. See PersistentStorage for more details about
+// working path.
+std::string MakeQualifiedIdJoinIndexWorkingPath(const std::string& base_dir) {
+ return absl_ports::StrCat(base_dir, "/", kQualifiedIdJoinIndexSubfolderName);
+}
+
// SchemaStore files are in a standalone subfolder for easier file management.
// We can delete and recreate the subfolder and not touch/affect anything
// else.
@@ -347,15 +359,12 @@ libtextclassifier3::Status RetrieveAndAddDocumentInfo(
return libtextclassifier3::Status::OK;
}
-bool ShouldRebuildIndex(const OptimizeStatsProto& optimize_stats) {
+bool ShouldRebuildIndex(const OptimizeStatsProto& optimize_stats,
+ float optimize_rebuild_index_threshold) {
int num_invalid_documents = optimize_stats.num_deleted_documents() +
optimize_stats.num_expired_documents();
- // Rebuilding the index could be faster than optimizing the index if we have
- // removed most of the documents.
- // Based on benchmarks, 85%~95% seems to be a good threshold for most cases.
- // TODO(b/238236206): Try using the number of remaining hits in this
- // condition, and allow clients to configure the threshold.
- return num_invalid_documents >= optimize_stats.num_original_documents() * 0.9;
+ return num_invalid_documents >= optimize_stats.num_original_documents() *
+ optimize_rebuild_index_threshold;
}
// Useful method to get RankingStrategy if advanced scoring is enabled. When the
@@ -428,6 +437,7 @@ void IcingSearchEngine::ResetMembers() {
normalizer_.reset();
index_.reset();
integer_index_.reset();
+ qualified_id_join_index_.reset();
}
libtextclassifier3::Status IcingSearchEngine::CheckInitMarkerFile(
@@ -559,12 +569,17 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
const std::string integer_index_dir =
MakeIntegerIndexWorkingPath(options_.base_dir());
+ const std::string qualified_id_join_index_dir =
+ MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir());
if (!filesystem_->DeleteDirectoryRecursively(doc_store_dir.c_str()) ||
!filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) ||
- !IntegerIndex::Discard(*filesystem_, integer_index_dir).ok()) {
- return absl_ports::InternalError(
- absl_ports::StrCat("Could not delete directories: ", index_dir, ", ",
- integer_index_dir, " and", doc_store_dir));
+ !IntegerIndex::Discard(*filesystem_, integer_index_dir).ok() ||
+ !QualifiedIdTypeJoinableIndex::Discard(*filesystem_,
+ qualified_id_join_index_dir)
+ .ok()) {
+ return absl_ports::InternalError(absl_ports::StrCat(
+ "Could not delete directories: ", index_dir, ", ", integer_index_dir,
+ ", ", qualified_id_join_index_dir, " and ", doc_store_dir));
}
ICING_RETURN_IF_ERROR(InitializeDocumentStore(
/*force_recovery_and_revalidate_documents=*/false, initialize_stats));
@@ -601,6 +616,16 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
integer_index_,
IntegerIndex::Create(*filesystem_, std::move(integer_index_dir)));
+ // Discard qualified id join index directory and instantiate a new one.
+ std::string qualified_id_join_index_dir =
+ MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir());
+ ICING_RETURN_IF_ERROR(QualifiedIdTypeJoinableIndex::Discard(
+ *filesystem_, qualified_id_join_index_dir));
+ ICING_ASSIGN_OR_RETURN(
+ qualified_id_join_index_,
+ QualifiedIdTypeJoinableIndex::Create(
+ *filesystem_, std::move(qualified_id_join_index_dir)));
+
std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
IndexRestorationResult restore_result = RestoreIndexIfNeeded();
index_init_status = std::move(restore_result.status);
@@ -621,6 +646,8 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
initialize_stats->set_integer_index_restoration_cause(
InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
+ initialize_stats->set_qualified_id_join_index_restoration_cause(
+ InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
} else {
ICING_RETURN_IF_ERROR(InitializeDocumentStore(
/*force_recovery_and_revalidate_documents=*/false, initialize_stats));
@@ -673,9 +700,11 @@ libtextclassifier3::Status IcingSearchEngine::InitializeDocumentStore(
}
ICING_ASSIGN_OR_RETURN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(
- filesystem_.get(), document_dir, clock_.get(), schema_store_.get(),
- force_recovery_and_revalidate_documents, initialize_stats));
+ DocumentStore::Create(filesystem_.get(), document_dir, clock_.get(),
+ schema_store_.get(),
+ force_recovery_and_revalidate_documents,
+ options_.document_store_namespace_id_fingerprint(),
+ initialize_stats));
document_store_ = std::move(create_result.document_store);
return libtextclassifier3::Status::OK;
@@ -693,6 +722,7 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
}
Index::Options index_options(index_dir, options_.index_merge_size());
+ // Term index
InitializeStatsProto::RecoveryCause index_recovery_cause;
auto index_or =
Index::Create(index_options, filesystem_.get(), icing_filesystem_.get());
@@ -717,6 +747,7 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
index_recovery_cause = InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
}
+ // Integer index
std::string integer_index_dir =
MakeIntegerIndexWorkingPath(options_.base_dir());
InitializeStatsProto::RecoveryCause integer_index_recovery_cause;
@@ -740,10 +771,38 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
}
+ // Qualified id join index
+ std::string qualified_id_join_index_dir =
+ MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir());
+ InitializeStatsProto::RecoveryCause qualified_id_join_index_recovery_cause;
+ auto qualified_id_join_index_or = QualifiedIdTypeJoinableIndex::Create(
+ *filesystem_, qualified_id_join_index_dir);
+ if (!qualified_id_join_index_or.ok()) {
+ ICING_RETURN_IF_ERROR(QualifiedIdTypeJoinableIndex::Discard(
+ *filesystem_, qualified_id_join_index_dir));
+
+ qualified_id_join_index_recovery_cause = InitializeStatsProto::IO_ERROR;
+
+ // Try recreating it from scratch and rebuild everything.
+ ICING_ASSIGN_OR_RETURN(
+ qualified_id_join_index_,
+ QualifiedIdTypeJoinableIndex::Create(
+ *filesystem_, std::move(qualified_id_join_index_dir)));
+ } else {
+ // Qualified id join index was created fine.
+ qualified_id_join_index_ =
+ std::move(qualified_id_join_index_or).ValueOrDie();
+ // If a recover does have to happen, then it must be because the index is
+ // out of sync with the document store.
+ qualified_id_join_index_recovery_cause =
+ InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
+ }
+
std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
IndexRestorationResult restore_result = RestoreIndexIfNeeded();
if (restore_result.index_needed_restoration ||
- restore_result.integer_index_needed_restoration) {
+ restore_result.integer_index_needed_restoration ||
+ restore_result.qualified_id_join_index_needed_restoration) {
initialize_stats->set_index_restoration_latency_ms(
restore_timer->GetElapsedMilliseconds());
@@ -754,6 +813,10 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
initialize_stats->set_integer_index_restoration_cause(
integer_index_recovery_cause);
}
+ if (restore_result.qualified_id_join_index_needed_restoration) {
+ initialize_stats->set_qualified_id_join_index_restoration_cause(
+ qualified_id_join_index_recovery_cause);
+ }
}
return restore_result.status;
}
@@ -863,20 +926,25 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
}
}
- if (lost_previous_schema || join_incompatible) {
- // TODO(b/256022027): rebuild joinable cache if not join compatible. This
- // should be done together with index (see RestoreIndexIfNeeded) because
- // we want to "replay" documents only once to cover all rebuild.
+ if (lost_previous_schema || index_incompatible) {
+ // Clears search indices
+ status = ClearSearchIndices();
+ if (!status.ok()) {
+ TransformStatus(status, result_status);
+ return result_proto;
+ }
}
- if (lost_previous_schema || index_incompatible) {
- // Clears all indices
- status = ClearIndices();
+ if (lost_previous_schema || join_incompatible) {
+ // Clears join indices
+ status = ClearJoinIndices();
if (!status.ok()) {
TransformStatus(status, result_status);
return result_proto;
}
+ }
+ if (lost_previous_schema || index_incompatible || join_incompatible) {
IndexRestorationResult restore_result = RestoreIndexIfNeeded();
// DATA_LOSS means that we have successfully re-added content to the
// index. Some indexed content was lost, but otherwise the index is in a
@@ -996,12 +1064,12 @@ PutResultProto IcingSearchEngine::Put(DocumentProto&& document) {
auto index_status = index_processor.IndexDocument(
tokenized_document, document_id, put_document_stats);
// Getting an internal error from the index could possibly mean that the index
- // is broken. Try to rebuild the index to recover.
+ // is broken. Try to rebuild them to recover.
if (absl_ports::IsInternal(index_status)) {
ICING_LOG(ERROR) << "Got an internal error from the index. Trying to "
"rebuild the index!\n"
<< index_status.error_message();
- index_status = ClearIndices();
+ index_status = ClearAllIndices();
if (index_status.ok()) {
index_status = RestoreIndexIfNeeded().status;
if (!index_status.ok()) {
@@ -1009,8 +1077,8 @@ PutResultProto IcingSearchEngine::Put(DocumentProto&& document) {
"indexing a document.";
}
} else {
- ICING_LOG(ERROR) << "Failed to clear the index after a failure of "
- "indexing a document.";
+ ICING_LOG(ERROR)
+ << "Failed to clear indices after a failure of indexing a document.";
}
}
@@ -1411,7 +1479,9 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
// guaranteed to work, so we update index according to the new document store.
std::unique_ptr<Timer> optimize_index_timer = clock_->GetNewTimer();
bool should_rebuild_index =
- !document_id_old_to_new_or.ok() || ShouldRebuildIndex(*optimize_stats);
+ !document_id_old_to_new_or.ok() ||
+ ShouldRebuildIndex(*optimize_stats,
+ options_.optimize_rebuild_index_threshold());
if (!should_rebuild_index) {
optimize_stats->set_index_restoration_mode(
OptimizeStatsProto::INDEX_TRANSLATION);
@@ -1432,6 +1502,17 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
<< integer_index_optimize_status.error_message();
should_rebuild_index = true;
}
+
+ libtextclassifier3::Status qualified_id_join_index_optimize_status =
+ qualified_id_join_index_->Optimize(
+ document_id_old_to_new_or.ValueOrDie(),
+ document_store_->last_added_document_id());
+ if (!qualified_id_join_index_optimize_status.ok()) {
+ ICING_LOG(WARNING)
+ << "Failed to optimize qualified id join index. Error: "
+ << qualified_id_join_index_optimize_status.error_message();
+ should_rebuild_index = true;
+ }
}
// If we received a DATA_LOSS error from OptimizeDocumentStore, we have a
// valid document store, but it might be the old one or the new one. So throw
@@ -1445,7 +1526,7 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
OptimizeStatsProto::FULL_INDEX_REBUILD);
ICING_LOG(WARNING) << "Clearing the entire index!";
- libtextclassifier3::Status index_clear_status = ClearIndices();
+ libtextclassifier3::Status index_clear_status = ClearAllIndices();
if (!index_clear_status.ok()) {
status = absl_ports::Annotate(
absl_ports::InternalError("Failed to clear index."),
@@ -1652,6 +1733,7 @@ libtextclassifier3::Status IcingSearchEngine::InternalPersistToDisk(
ICING_RETURN_IF_ERROR(document_store_->PersistToDisk(PersistType::FULL));
ICING_RETURN_IF_ERROR(index_->PersistToDisk());
ICING_RETURN_IF_ERROR(integer_index_->PersistToDisk());
+ ICING_RETURN_IF_ERROR(qualified_id_join_index_->PersistToDisk());
return libtextclassifier3::Status::OK;
}
@@ -1714,7 +1796,8 @@ SearchResultProto IcingSearchEngine::Search(
return result_proto;
}
- JoinProcessor join_processor(document_store_.get());
+ JoinProcessor join_processor(document_store_.get(), schema_store_.get(),
+ qualified_id_join_index_.get());
// Building a JoinChildrenFetcher where child documents are grouped by
// their joinable values.
libtextclassifier3::StatusOr<JoinChildrenFetcher> join_children_fetcher_or =
@@ -1756,7 +1839,8 @@ SearchResultProto IcingSearchEngine::Search(
std::unique_ptr<ScoredDocumentHitsRanker> ranker;
if (join_children_fetcher != nullptr) {
// Join 2 scored document hits
- JoinProcessor join_processor(document_store_.get());
+ JoinProcessor join_processor(document_store_.get(), schema_store_.get(),
+ qualified_id_join_index_.get());
libtextclassifier3::StatusOr<std::vector<JoinedScoredDocumentHit>>
joined_result_document_hits_or = join_processor.Join(
join_spec, std::move(query_scoring_results.scored_document_hits),
@@ -2042,9 +2126,10 @@ IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) {
// Tries to rebuild document store if swapping fails, to avoid leaving the
// system in the broken state for future operations.
- auto create_result_or =
- DocumentStore::Create(filesystem_.get(), current_document_dir,
- clock_.get(), schema_store_.get());
+ auto create_result_or = DocumentStore::Create(
+ filesystem_.get(), current_document_dir, clock_.get(),
+ schema_store_.get(), /*force_recovery_and_revalidate_documents=*/false,
+ options_.document_store_namespace_id_fingerprint());
// TODO(b/144458732): Implement a more robust version of
// TC_ASSIGN_OR_RETURN that can support error logging.
if (!create_result_or.ok()) {
@@ -2068,9 +2153,10 @@ IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) {
}
// Recreates the doc store instance
- auto create_result_or =
- DocumentStore::Create(filesystem_.get(), current_document_dir,
- clock_.get(), schema_store_.get());
+ auto create_result_or = DocumentStore::Create(
+ filesystem_.get(), current_document_dir, clock_.get(),
+ schema_store_.get(), /*force_recovery_and_revalidate_documents=*/false,
+ options_.document_store_namespace_id_fingerprint());
if (!create_result_or.ok()) {
// Unable to create DocumentStore from the new file. Mark as uninitialized
// and return INTERNAL.
@@ -2098,34 +2184,37 @@ IcingSearchEngine::RestoreIndexIfNeeded() {
DocumentId last_stored_document_id =
document_store_->last_added_document_id();
if (last_stored_document_id == index_->last_added_document_id() &&
- last_stored_document_id == integer_index_->last_added_document_id()) {
+ last_stored_document_id == integer_index_->last_added_document_id() &&
+ last_stored_document_id ==
+ qualified_id_join_index_->last_added_document_id()) {
// No need to recover.
- return {libtextclassifier3::Status::OK, false, false};
+ return {libtextclassifier3::Status::OK, false, false, false};
}
if (last_stored_document_id == kInvalidDocumentId) {
// Document store is empty but index is not. Clear the index.
- return {ClearIndices(), false, false};
+ return {ClearAllIndices(), false, false, false};
}
// Truncate indices first.
auto truncate_result_or = TruncateIndicesTo(last_stored_document_id);
if (!truncate_result_or.ok()) {
- return {std::move(truncate_result_or).status(), false, false};
+ return {std::move(truncate_result_or).status(), false, false, false};
}
TruncateIndexResult truncate_result =
std::move(truncate_result_or).ValueOrDie();
if (truncate_result.first_document_to_reindex > last_stored_document_id) {
// Nothing to restore. Just return.
- return {libtextclassifier3::Status::OK, false, false};
+ return {libtextclassifier3::Status::OK, false, false, false};
}
auto data_indexing_handlers_or = CreateDataIndexingHandlers();
if (!data_indexing_handlers_or.ok()) {
return {data_indexing_handlers_or.status(),
truncate_result.index_needed_restoration,
- truncate_result.integer_index_needed_restoration};
+ truncate_result.integer_index_needed_restoration,
+ truncate_result.qualified_id_join_index_needed_restoration};
}
// By using recovery_mode for IndexProcessor, we're able to replay documents
// from smaller document id and it will skip documents that are already been
@@ -2151,7 +2240,8 @@ IcingSearchEngine::RestoreIndexIfNeeded() {
} else {
// Returns other errors
return {document_or.status(), truncate_result.index_needed_restoration,
- truncate_result.integer_index_needed_restoration};
+ truncate_result.integer_index_needed_restoration,
+ truncate_result.qualified_id_join_index_needed_restoration};
}
}
DocumentProto document(std::move(document_or).ValueOrDie());
@@ -2163,7 +2253,8 @@ IcingSearchEngine::RestoreIndexIfNeeded() {
if (!tokenized_document_or.ok()) {
return {tokenized_document_or.status(),
truncate_result.index_needed_restoration,
- truncate_result.integer_index_needed_restoration};
+ truncate_result.integer_index_needed_restoration,
+ truncate_result.qualified_id_join_index_needed_restoration};
}
TokenizedDocument tokenized_document(
std::move(tokenized_document_or).ValueOrDie());
@@ -2174,7 +2265,8 @@ IcingSearchEngine::RestoreIndexIfNeeded() {
if (!absl_ports::IsDataLoss(status)) {
// Real error. Stop recovering and pass it up.
return {status, truncate_result.index_needed_restoration,
- truncate_result.integer_index_needed_restoration};
+ truncate_result.integer_index_needed_restoration,
+ truncate_result.qualified_id_join_index_needed_restoration};
}
// FIXME: why can we skip data loss error here?
// Just a data loss. Keep trying to add the remaining docs, but report the
@@ -2184,7 +2276,8 @@ IcingSearchEngine::RestoreIndexIfNeeded() {
}
return {overall_status, truncate_result.index_needed_restoration,
- truncate_result.integer_index_needed_restoration};
+ truncate_result.integer_index_needed_restoration,
+ truncate_result.qualified_id_join_index_needed_restoration};
}
libtextclassifier3::StatusOr<bool> IcingSearchEngine::LostPreviousSchema() {
@@ -2228,7 +2321,14 @@ IcingSearchEngine::CreateDataIndexingHandlers() {
clock_.get(), integer_index_.get()));
handlers.push_back(std::move(integer_section_indexing_handler));
- // TODO(b/263890397): add QualifiedIdJoinablePropertyIndexingHandler
+ // Qualified id joinable property index handler
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler>
+ qualified_id_joinable_property_indexing_handler,
+ QualifiedIdJoinablePropertyIndexingHandler::Create(
+ clock_.get(), qualified_id_join_index_.get()));
+ handlers.push_back(
+ std::move(qualified_id_joinable_property_indexing_handler));
return handlers;
}
@@ -2265,11 +2365,12 @@ IcingSearchEngine::TruncateIndicesTo(DocumentId last_stored_document_id) {
// starting from integer_index_last_added_document_id + 1. Also use std::min
// since we might need to replay even smaller doc ids for term index.
integer_index_needed_restoration = true;
- first_document_to_reindex =
- integer_index_last_added_document_id != kInvalidDocumentId
- ? std::min(first_document_to_reindex,
- integer_index_last_added_document_id + 1)
- : kMinDocumentId;
+ if (integer_index_last_added_document_id != kInvalidDocumentId) {
+ first_document_to_reindex = std::min(
+ first_document_to_reindex, integer_index_last_added_document_id + 1);
+ } else {
+ first_document_to_reindex = kMinDocumentId;
+ }
} else if (last_stored_document_id < integer_index_last_added_document_id) {
// Clear the entire integer index if last_stored_document_id is smaller than
// integer_index_last_added_document_id, because there is no way to remove
@@ -2283,17 +2384,62 @@ IcingSearchEngine::TruncateIndicesTo(DocumentId last_stored_document_id) {
first_document_to_reindex = kMinDocumentId;
}
+ // Attempt to truncate qualified id join index
+ bool qualified_id_join_index_needed_restoration = false;
+ DocumentId qualified_id_join_index_last_added_document_id =
+ qualified_id_join_index_->last_added_document_id();
+ if (qualified_id_join_index_last_added_document_id == kInvalidDocumentId ||
+ last_stored_document_id >
+ qualified_id_join_index_last_added_document_id) {
+ // If last_stored_document_id is greater than
+ // qualified_id_join_index_last_added_document_id, then we only have to
+ // replay docs starting from (qualified_id_join_index_last_added_document_id
+ // + 1). Also use std::min since we might need to replay even smaller doc
+ // ids for other components.
+ qualified_id_join_index_needed_restoration = true;
+ if (qualified_id_join_index_last_added_document_id != kInvalidDocumentId) {
+ first_document_to_reindex =
+ std::min(first_document_to_reindex,
+ qualified_id_join_index_last_added_document_id + 1);
+ } else {
+ first_document_to_reindex = kMinDocumentId;
+ }
+ } else if (last_stored_document_id <
+ qualified_id_join_index_last_added_document_id) {
+ // Clear the entire qualified id join index if last_stored_document_id is
+ // smaller than qualified_id_join_index_last_added_document_id, because
+ // there is no way to remove data with doc_id > last_stored_document_id from
+ // join index efficiently and we have to rebuild.
+ ICING_RETURN_IF_ERROR(qualified_id_join_index_->Clear());
+
+ // Since the entire qualified id join index is discarded, we start to
+ // rebuild it by setting first_document_to_reindex to kMinDocumentId.
+ qualified_id_join_index_needed_restoration = true;
+ first_document_to_reindex = kMinDocumentId;
+ }
+
return TruncateIndexResult(first_document_to_reindex,
index_needed_restoration,
- integer_index_needed_restoration);
+ integer_index_needed_restoration,
+ qualified_id_join_index_needed_restoration);
}
-libtextclassifier3::Status IcingSearchEngine::ClearIndices() {
+libtextclassifier3::Status IcingSearchEngine::ClearSearchIndices() {
ICING_RETURN_IF_ERROR(index_->Reset());
ICING_RETURN_IF_ERROR(integer_index_->Clear());
return libtextclassifier3::Status::OK;
}
+libtextclassifier3::Status IcingSearchEngine::ClearJoinIndices() {
+ return qualified_id_join_index_->Clear();
+}
+
+libtextclassifier3::Status IcingSearchEngine::ClearAllIndices() {
+ ICING_RETURN_IF_ERROR(ClearSearchIndices());
+ ICING_RETURN_IF_ERROR(ClearJoinIndices());
+ return libtextclassifier3::Status::OK;
+}
+
ResetResultProto IcingSearchEngine::Reset() {
absl_ports::unique_lock l(&mutex_);
return ResetInternal();
diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h
index 678fc77..3e85f69 100644
--- a/icing/icing-search-engine.h
+++ b/icing/icing-search-engine.h
@@ -31,6 +31,7 @@
#include "icing/index/numeric/numeric-index.h"
#include "icing/jni/jni-cache.h"
#include "icing/join/join-children-fetcher.h"
+#include "icing/join/qualified-id-type-joinable-index.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/performance-configuration.h"
#include "icing/proto/debug.pb.h"
@@ -474,10 +475,13 @@ class IcingSearchEngine {
std::unique_ptr<Index> index_ ICING_GUARDED_BY(mutex_);
// Storage for all hits of numeric contents from the document store.
- // TODO(b/249829533): integrate more functions with integer_index_.
std::unique_ptr<NumericIndex<int64_t>> integer_index_
ICING_GUARDED_BY(mutex_);
+ // Storage for all join qualified ids from the document store.
+ std::unique_ptr<QualifiedIdTypeJoinableIndex> qualified_id_join_index_
+ ICING_GUARDED_BY(mutex_);
+
// Pointer to JNI class references
const std::unique_ptr<const JniCache> jni_cache_;
@@ -550,8 +554,8 @@ class IcingSearchEngine {
InitializeStatsProto* initialize_stats)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
- // Do any initialization/recovery necessary to create a DocumentStore
- // instance.
+ // Do any initialization/recovery necessary to create term index, integer
+ // index, and qualified id join index instances.
//
// Returns:
// OK on success
@@ -640,9 +644,10 @@ class IcingSearchEngine {
OptimizeStatsProto* optimize_stats)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
- // Helper method to restore missing document data in index_. All documents
- // will be reindexed. This does not clear the index, so it is recommended to
- // call Index::Reset first.
+ // Helper method to restore missing document data in index_, integer_index_,
+ // and qualified_id_join_index_. All documents will be reindexed. This does
+ // not clear the index, so it is recommended to call ClearAllIndices,
+ // ClearSearchIndices, or ClearJoinIndices first if needed.
//
// Returns:
// On success, OK and a bool indicating whether or not restoration was
@@ -657,6 +662,7 @@ class IcingSearchEngine {
libtextclassifier3::Status status;
bool index_needed_restoration;
bool integer_index_needed_restoration;
+ bool qualified_id_join_index_needed_restoration;
};
IndexRestorationResult RestoreIndexIfNeeded()
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
@@ -674,13 +680,18 @@ class IcingSearchEngine {
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Helper method to create all types of data indexing handlers to index term,
- // integer, and joinable qualified ids.
+ // integer, and join qualified ids.
libtextclassifier3::StatusOr<
std::vector<std::unique_ptr<DataIndexingHandler>>>
CreateDataIndexingHandlers() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
- // Helper method to discard parts of (term, integer) indices if they contain
- // data for document ids greater than last_stored_document_id.
+ // Helper method to discard parts of (term, integer, qualified id join)
+ // indices if they contain data for document ids greater than
+ // last_stored_document_id.
+ //
+ // REQUIRES: last_stored_document_id is valid (!= kInvalidDocumentId). Note:
+ // if we want to truncate everything in the index, then please call
+ // ClearSearchIndices/ClearJoinIndices/ClearAllIndices instead.
//
// Returns:
// On success, a DocumentId indicating the first document to start for
@@ -691,25 +702,45 @@ class IcingSearchEngine {
DocumentId first_document_to_reindex;
bool index_needed_restoration;
bool integer_index_needed_restoration;
+ bool qualified_id_join_index_needed_restoration;
- explicit TruncateIndexResult(DocumentId first_document_to_reindex_in,
- bool index_needed_restoration_in,
- bool integer_index_needed_restoration_in)
+ explicit TruncateIndexResult(
+ DocumentId first_document_to_reindex_in,
+ bool index_needed_restoration_in,
+ bool integer_index_needed_restoration_in,
+ bool qualified_id_join_index_needed_restoration_in)
: first_document_to_reindex(first_document_to_reindex_in),
index_needed_restoration(index_needed_restoration_in),
- integer_index_needed_restoration(
- integer_index_needed_restoration_in) {}
+ integer_index_needed_restoration(integer_index_needed_restoration_in),
+ qualified_id_join_index_needed_restoration(
+ qualified_id_join_index_needed_restoration_in) {}
};
libtextclassifier3::StatusOr<TruncateIndexResult> TruncateIndicesTo(
DocumentId last_stored_document_id)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
- // Helper method to discard the entire (term, integer) indices.
+ // Helper method to discard search (term, integer) indices.
+ //
+ // Returns:
+ // OK on success
+ // INTERNAL_ERROR on any I/O errors
+ libtextclassifier3::Status ClearSearchIndices()
+ ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+ // Helper method to discard join (qualified id) indices.
+ //
+ // Returns:
+ // OK on success
+ // INTERNAL_ERROR on any I/O errors
+ libtextclassifier3::Status ClearJoinIndices()
+ ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+ // Helper method to discard all search and join indices.
//
// Returns:
// OK on success
// INTERNAL_ERROR on any I/O errors
- libtextclassifier3::Status ClearIndices()
+ libtextclassifier3::Status ClearAllIndices()
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
};
diff --git a/icing/icing-search-engine_initialization_test.cc b/icing/icing-search-engine_initialization_test.cc
index f51abdf..6ba1737 100644
--- a/icing/icing-search-engine_initialization_test.cc
+++ b/icing/icing-search-engine_initialization_test.cc
@@ -28,6 +28,9 @@
#include "icing/index/index.h"
#include "icing/index/numeric/integer-index.h"
#include "icing/jni/jni-cache.h"
+#include "icing/join/doc-join-info.h"
+#include "icing/join/join-processor.h"
+#include "icing/join/qualified-id-type-joinable-index.h"
#include "icing/legacy/index/icing-mock-filesystem.h"
#include "icing/portable/endian.h"
#include "icing/portable/equals-proto.h"
@@ -66,10 +69,12 @@ namespace {
using ::icing::lib::portable_equals_proto::EqualsProto;
using ::testing::_;
+using ::testing::AtLeast;
using ::testing::DoDefault;
using ::testing::EndsWith;
using ::testing::Eq;
using ::testing::HasSubstr;
+using ::testing::IsEmpty;
using ::testing::Matcher;
using ::testing::Return;
using ::testing::SizeIs;
@@ -162,6 +167,10 @@ std::string GetIntegerIndexDir() {
return GetTestBaseDir() + "/integer_index_dir";
}
+std::string GetQualifiedIdJoinIndexDir() {
+ return GetTestBaseDir() + "/qualified_id_join_index_dir";
+}
+
std::string GetSchemaDir() { return GetTestBaseDir() + "/schema_dir"; }
std::string GetHeaderFilename() {
@@ -239,6 +248,10 @@ ScoringSpecProto GetDefaultScoringSpec() {
return scoring_spec;
}
+// TODO(b/272145329): create SearchSpecBuilder, JoinSpecBuilder,
+// SearchResultProtoBuilder and ResultProtoBuilder for unit tests and build all
+// instances by them.
+
TEST_F(IcingSearchEngineInitializationTest, UninitializedInstanceFailsSafely) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
@@ -876,21 +889,73 @@ TEST_F(IcingSearchEngineInitializationTest,
TEST_F(IcingSearchEngineInitializationTest,
RecoverFromInconsistentDocumentStore) {
- // Test the following scenario: document store is ahead of term and integer
- // index. IcingSearchEngine should be able to recover term index. Several
- // additional behaviors are also tested:
+ // Test the following scenario: document store is ahead of term, integer and
+ // qualified id join index. IcingSearchEngine should be able to recover all
+ // indices. Several additional behaviors are also tested:
// - Index directory handling:
// - Term index directory should be unaffected.
// - Integer index directory should be unaffected.
+ // - Qualified id join index directory should be unaffected.
// - Truncate indices:
// - "TruncateTo()" for term index shouldn't take effect.
// - "Clear()" shouldn't be called for integer index, i.e. no integer index
// storage sub directories (path_expr = "*/integer_index_dir/*") should be
// discarded.
+ // - "Clear()" shouldn't be called for qualified id join index, i.e. no
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded.
// - Still, we need to replay and reindex documents.
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message1 =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message2 =
+ DocumentBuilder()
+ .SetKey("namespace", "message/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body two")
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
{
// Initializes folder and schema, index one document
@@ -899,8 +964,9 @@ TEST_F(IcingSearchEngineInitializationTest,
std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
GetTestJniCache());
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message1).status(), ProtoIsOk());
} // This should shut down IcingSearchEngine and persist anything it needs to
{
@@ -910,7 +976,7 @@ TEST_F(IcingSearchEngineInitializationTest,
SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
ICING_EXPECT_OK(schema_store->SetSchema(CreateMessageSchema()));
- // Puts a second document into DocumentStore but doesn't index it.
+ // Puts message2 into DocumentStore but doesn't index it.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(filesystem(), GetDocumentDir(), &fake_clock,
@@ -918,11 +984,10 @@ TEST_F(IcingSearchEngineInitializationTest,
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
- ICING_EXPECT_OK(document_store->Put(document2));
+ ICING_EXPECT_OK(document_store->Put(message2));
}
- // Mock filesystem to observe and check the behavior of term index and
- // integer index.
+ // Mock filesystem to observe and check the behavior of all indices.
auto mock_filesystem = std::make_unique<MockFilesystem>();
EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
.WillRepeatedly(DoDefault());
@@ -939,6 +1004,15 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_CALL(*mock_filesystem,
DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
.Times(0);
+ // Ensure qualified id join index directory should never be discarded, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
TestIcingSearchEngine icing(GetDefaultIcingOptions(),
std::move(mock_filesystem),
@@ -953,27 +1027,30 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(
initialize_result.initialize_stats().integer_index_restoration_cause(),
Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
GetResultProto expected_get_result_proto;
expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = document1;
+ *expected_get_result_proto.mutable_document() = message1;
// DocumentStore kept the additional document
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
+ EXPECT_THAT(icing.Get("namespace", "message/1",
+ GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
+ *expected_get_result_proto.mutable_document() = message2;
+ EXPECT_THAT(icing.Get("namespace", "message/2",
+ GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
SearchResultProto expected_search_result_proto;
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
+ message2;
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
+ message1;
// We indexed the additional document in all indices.
// Verify term search
@@ -998,6 +1075,40 @@ TEST_F(IcingSearchEngineInitializationTest,
ResultSpecProto::default_instance());
EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
expected_search_result_proto));
+
+ // Verify join search: join a query for `name:person` with a child query for
+ // `body:message` based on the child's `senderQualifiedId` field.
+ SearchSpecProto search_spec3;
+ search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec3.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+ join_spec->set_max_joined_child_count(100);
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:message");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ SearchResultProto expected_join_search_result_proto;
+ expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto =
+ expected_join_search_result_proto.mutable_results()->Add();
+ *result_proto->mutable_document() = person;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() = message2;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() = message1;
+
+ SearchResultProto search_result_proto3 =
+ icing.Search(search_spec3, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto3, EqualsSearchResultIgnoreStatsAndScores(
+ expected_join_search_result_proto));
}
TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIndex) {
@@ -1008,20 +1119,67 @@ TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIndex) {
// - Should discard the entire term index directory and start it from
// scratch.
// - Integer index directory should be unaffected.
+ // - Qualified id join index directory should be unaffected.
// - Truncate indices:
// - "TruncateTo()" for term index shouldn't take effect since we start it
// from scratch.
// - "Clear()" shouldn't be called for integer index, i.e. no integer index
// storage sub directories (path_expr = "*/integer_index_dir/*") should be
// discarded.
+ // - "Clear()" shouldn't be called for qualified id join index, i.e. no
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded.
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
SearchSpecProto search_spec;
- search_spec.set_query("message");
+ search_spec.set_query("body:message");
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
SearchResultProto expected_search_result_proto;
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- CreateMessageDocument("namespace", "uri");
+ message;
{
// Initializes folder and schema, index one document
@@ -1030,9 +1188,9 @@ TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIndex) {
std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
GetTestJniCache());
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
@@ -1048,8 +1206,7 @@ TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIndex) {
ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
}
- // Mock filesystem to observe and check the behavior of term index and integer
- // index.
+ // Mock filesystem to observe and check the behavior of all indices.
auto mock_filesystem = std::make_unique<MockFilesystem>();
EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
.WillRepeatedly(DoDefault());
@@ -1066,6 +1223,15 @@ TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIndex) {
EXPECT_CALL(*mock_filesystem,
DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
.Times(0);
+ // Ensure qualified id join index directory should never be discarded, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
TestIcingSearchEngine icing(GetDefaultIcingOptions(),
std::move(mock_filesystem),
@@ -1078,6 +1244,9 @@ TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIndex) {
EXPECT_THAT(
initialize_result.initialize_stats().integer_index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
// Check that our index is ok by searching over the restored index
SearchResultProto search_result_proto =
@@ -1095,11 +1264,58 @@ TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIntegerIndex) {
// - Term index directory should be unaffected.
// - Should discard the entire integer index directory and start it from
// scratch.
+ // - Qualified id join index directory should be unaffected.
// - Truncate indices:
// - "TruncateTo()" for term index shouldn't take effect.
// - "Clear()" shouldn't be called for integer index, i.e. no integer index
// storage sub directories (path_expr = "*/integer_index_dir/*") should be
// discarded, since we start it from scratch.
+ // - "Clear()" shouldn't be called for qualified id join index, i.e. no
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded.
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
SearchSpecProto search_spec;
search_spec.set_query("indexableInteger == 123");
search_spec.set_search_type(
@@ -1109,7 +1325,7 @@ TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIntegerIndex) {
SearchResultProto expected_search_result_proto;
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- CreateMessageDocument("namespace", "uri");
+ message;
{
// Initializes folder and schema, index one document
@@ -1118,9 +1334,9 @@ TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIntegerIndex) {
std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
GetTestJniCache());
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
@@ -1138,8 +1354,7 @@ TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIntegerIndex) {
ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
}
- // Mock filesystem to observe and check the behavior of term index and integer
- // index.
+ // Mock filesystem to observe and check the behavior of all indices.
auto mock_filesystem = std::make_unique<MockFilesystem>();
EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
.WillRepeatedly(DoDefault());
@@ -1156,6 +1371,15 @@ TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIntegerIndex) {
EXPECT_CALL(*mock_filesystem,
DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
.Times(0);
+ // Ensure qualified id join index directory should never be discarded, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
TestIcingSearchEngine icing(GetDefaultIcingOptions(),
std::move(mock_filesystem),
@@ -1168,6 +1392,175 @@ TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIntegerIndex) {
EXPECT_THAT(
initialize_result.initialize_stats().integer_index_restoration_cause(),
Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+
+ // Check that our index is ok by searching over the restored index
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ RecoverFromCorruptQualifiedIdJoinIndex) {
+ // Test the following scenario: qualified id join index is corrupted (e.g.
+ // checksum doesn't match). IcingSearchEngine should be able to recover
+ // qualified id join index. Several additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should be unaffected.
+ // - Should discard the entire qualified id join index directory and start
+ // it from scratch.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index shouldn't take effect.
+ // - "Clear()" shouldn't be called for integer index, i.e. no integer index
+ // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // discarded.
+ // - "Clear()" shouldn't be called for qualified id join index, i.e. no
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded, since we start
+ // it from scratch.
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // Prepare join search spec to join a query for `name:person` with a child
+ // query for `body:message` based on the child's `senderQualifiedId` field.
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_max_joined_child_count(100);
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:message");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto =
+ expected_search_result_proto.mutable_results()->Add();
+ *result_proto->mutable_document() = person;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() = message;
+
+ {
+ // Initializes folder and schema, index one document
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+ } // This should shut down IcingSearchEngine and persist anything it needs to
+
+ // Manually corrupt qualified id join index
+ {
+ const std::string qualified_id_join_index_metadata_file =
+ GetQualifiedIdJoinIndexDir() + "/metadata";
+ ScopedFd fd(filesystem()->OpenForWrite(
+ qualified_id_join_index_metadata_file.c_str()));
+ ASSERT_TRUE(fd.is_valid());
+ ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
+ }
+
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded, and Clear()
+ // should never be called (i.e. storage sub directory "*/integer_index_dir/*"
+ // should never be discarded).
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+ // Ensure qualified id join index directory should be discarded once, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(1);
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ EXPECT_THAT(initialize_result.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
// Check that our index is ok by searching over the restored index
SearchResultProto search_result_proto =
@@ -1185,19 +1578,60 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseTermIndex) {
// - Term index directory should not be discarded since we've already lost
// it. Start it from scratch.
// - Integer index directory should be unaffected.
+ // - Qualified id join index directory should be unaffected.
// - Truncate indices:
// - "TruncateTo()" for term index shouldn't take effect since we start it
// from scratch.
// - "Clear()" shouldn't be called for integer index, i.e. no integer index
// storage sub directories (path_expr = "*/integer_index_dir/*") should be
// discarded.
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", kIpsumText)
- .AddInt64Property("indexableInteger", 123)
- .Build();
- // 1. Create an index with 3 documents.
+ // - "Clear()" shouldn't be called for qualified id join index, i.e. no
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded.
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // 1. Create an index with 3 message documents.
{
TestIcingSearchEngine icing(
GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
@@ -1205,13 +1639,14 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseTermIndex) {
GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = DocumentBuilder(document).SetUri("fake_type/1").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = DocumentBuilder(document).SetUri("fake_type/2").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/2").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/3").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
}
// 2. Delete the term index directory to trigger RestoreIndexIfNeeded.
@@ -1220,8 +1655,7 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseTermIndex) {
// 3. Create the index again. This should trigger index restoration.
{
- // Mock filesystem to observe and check the behavior of term index and
- // integer index.
+ // Mock filesystem to observe and check the behavior of all indices.
auto mock_filesystem = std::make_unique<MockFilesystem>();
EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
.WillRepeatedly(DoDefault());
@@ -1239,6 +1673,16 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseTermIndex) {
EXPECT_CALL(*mock_filesystem,
DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
.Times(0);
+ // Ensure qualified id join index directory should never be discarded, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
TestIcingSearchEngine icing(
GetDefaultIcingOptions(), std::move(mock_filesystem),
@@ -1251,10 +1695,13 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseTermIndex) {
EXPECT_THAT(
initialize_result.initialize_stats().integer_index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
// Verify term index works normally
SearchSpecProto search_spec1;
- search_spec1.set_query("consectetur");
+ search_spec1.set_query("body:consectetur");
search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
SearchResultProto results1 =
icing.Search(search_spec1, ScoringSpecProto::default_instance(),
@@ -1263,9 +1710,9 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseTermIndex) {
EXPECT_THAT(results1.next_page_token(), Eq(0));
// All documents should be retrievable.
ASSERT_THAT(results1.results(), SizeIs(3));
- EXPECT_THAT(results1.results(0).document().uri(), Eq("fake_type/2"));
- EXPECT_THAT(results1.results(1).document().uri(), Eq("fake_type/1"));
- EXPECT_THAT(results1.results(2).document().uri(), Eq("fake_type/0"));
+ EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
// Verify integer index works normally
SearchSpecProto search_spec2;
@@ -1278,9 +1725,43 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseTermIndex) {
icing.Search(search_spec2, ScoringSpecProto::default_instance(),
ResultSpecProto::default_instance());
ASSERT_THAT(results2.results(), SizeIs(3));
- EXPECT_THAT(results2.results(0).document().uri(), Eq("fake_type/2"));
- EXPECT_THAT(results2.results(1).document().uri(), Eq("fake_type/1"));
- EXPECT_THAT(results2.results(2).document().uri(), Eq("fake_type/0"));
+ EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
+
+ // Verify qualified id join index works normally: join a query for
+ // `name:person` with a child query for `body:consectetur` based on the
+ // child's `senderQualifiedId` field.
+ SearchSpecProto search_spec3;
+ search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec3.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+ join_spec->set_max_joined_child_count(100);
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:consectetur");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ SearchResultProto results3 =
+ icing.Search(search_spec3, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results3.results(), SizeIs(1));
+ EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
+ EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
+ EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
+ Eq("message/3"));
+ EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
+ Eq("message/2"));
+ EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
+ Eq("message/1"));
}
}
@@ -1292,18 +1773,59 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseIntegerIndex) {
// - Term index directory should be unaffected.
// - Integer index directory should not be discarded since we've already
// lost it. Start it from scratch.
+ // - Qualified id join index directory should be unaffected.
// - Truncate indices:
// - "TruncateTo()" for term index shouldn't take effect.
// - "Clear()" shouldn't be called for integer index, i.e. no integer index
// storage sub directories (path_expr = "*/integer_index_dir/*") should be
// discarded, since we start it from scratch.
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", kIpsumText)
- .AddInt64Property("indexableInteger", 123)
- .Build();
- // 1. Create an index with 3 documents.
+ // - "Clear()" shouldn't be called for qualified id join index, i.e. no
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded.
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // 1. Create an index with 3 message documents.
{
TestIcingSearchEngine icing(
GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
@@ -1311,13 +1833,14 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseIntegerIndex) {
GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = DocumentBuilder(document).SetUri("fake_type/1").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = DocumentBuilder(document).SetUri("fake_type/2").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/2").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/3").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
}
// 2. Delete the integer index file to trigger RestoreIndexIfNeeded.
@@ -1326,8 +1849,7 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseIntegerIndex) {
// 3. Create the index again. This should trigger index restoration.
{
- // Mock filesystem to observe and check the behavior of term index and
- // integer index.
+ // Mock filesystem to observe and check the behavior of all indices.
auto mock_filesystem = std::make_unique<MockFilesystem>();
EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
.WillRepeatedly(DoDefault());
@@ -1345,6 +1867,16 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseIntegerIndex) {
EXPECT_CALL(*mock_filesystem,
DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
.Times(0);
+ // Ensure qualified id join index directory should never be discarded, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
TestIcingSearchEngine icing(
GetDefaultIcingOptions(), std::move(mock_filesystem),
@@ -1357,10 +1889,13 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseIntegerIndex) {
EXPECT_THAT(
initialize_result.initialize_stats().integer_index_restoration_cause(),
Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
// Verify term index works normally
SearchSpecProto search_spec1;
- search_spec1.set_query("consectetur");
+ search_spec1.set_query("body:consectetur");
search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
SearchResultProto results1 =
icing.Search(search_spec1, ScoringSpecProto::default_instance(),
@@ -1369,9 +1904,9 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseIntegerIndex) {
EXPECT_THAT(results1.next_page_token(), Eq(0));
// All documents should be retrievable.
ASSERT_THAT(results1.results(), SizeIs(3));
- EXPECT_THAT(results1.results(0).document().uri(), Eq("fake_type/2"));
- EXPECT_THAT(results1.results(1).document().uri(), Eq("fake_type/1"));
- EXPECT_THAT(results1.results(2).document().uri(), Eq("fake_type/0"));
+ EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
// Verify integer index works normally
SearchSpecProto search_spec2;
@@ -1384,9 +1919,239 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseIntegerIndex) {
icing.Search(search_spec2, ScoringSpecProto::default_instance(),
ResultSpecProto::default_instance());
ASSERT_THAT(results2.results(), SizeIs(3));
- EXPECT_THAT(results2.results(0).document().uri(), Eq("fake_type/2"));
- EXPECT_THAT(results2.results(1).document().uri(), Eq("fake_type/1"));
- EXPECT_THAT(results2.results(2).document().uri(), Eq("fake_type/0"));
+ EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
+
+ // Verify qualified id join index works normally: join a query for
+ // `name:person` with a child query for `body:consectetur` based on the
+ // child's `senderQualifiedId` field.
+ SearchSpecProto search_spec3;
+ search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec3.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+ join_spec->set_max_joined_child_count(100);
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:consectetur");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ SearchResultProto results3 =
+ icing.Search(search_spec3, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results3.results(), SizeIs(1));
+ EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
+ EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
+ EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
+ Eq("message/3"));
+ EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
+ Eq("message/2"));
+ EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
+ Eq("message/1"));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ RestoreIndexLoseQualifiedIdJoinIndex) {
+ // Test the following scenario: losing the entire qualified id join index
+ // directory. IcingSearchEngine should be able to recover qualified id join
+ // index. Several additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should be unaffected.
+ // - Qualified id join index directory should not be discarded since we've
+ // already lost it. Start it from scratch.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index shouldn't take effect.
+ // - "Clear()" shouldn't be called for integer index, i.e. no integer index
+ // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // discarded.
+ // - "Clear()" shouldn't be called for qualified id join index, i.e. no
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded, since we start
+ // it from scratch.
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // 1. Create an index with 3 message documents.
+ {
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/2").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/3").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ }
+
+ // 2. Delete the qualified id join index file to trigger RestoreIndexIfNeeded.
+ std::string qualified_id_join_index_dir = GetQualifiedIdJoinIndexDir();
+ filesystem()->DeleteDirectoryRecursively(qualified_id_join_index_dir.c_str());
+
+ // 3. Create the index again. This should trigger index restoration.
+ {
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded since we've
+ // already lost it, and Clear() should never be called (i.e. storage sub
+ // directory "*/integer_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+ // Ensure qualified id join index directory should never be discarded, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded)
+ // since we start it from scratch.
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
+
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+
+ // Verify term index works normally
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("body:consectetur");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto results1 =
+ icing.Search(search_spec1, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results1.status(), ProtoIsOk());
+ EXPECT_THAT(results1.next_page_token(), Eq(0));
+ // All documents should be retrievable.
+ ASSERT_THAT(results1.results(), SizeIs(3));
+ EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
+
+ // Verify integer index works normally
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results2 =
+ icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results2.results(), SizeIs(3));
+ EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
+
+ // Verify qualified id join index works normally: join a query for
+ // `name:person` with a child query for `body:consectetur` based on the
+ // child's `senderQualifiedId` field.
+ SearchSpecProto search_spec3;
+ search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec3.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+ join_spec->set_max_joined_child_count(100);
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:consectetur");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ SearchResultProto results3 =
+ icing.Search(search_spec3, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results3.results(), SizeIs(1));
+ EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
+ EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
+ EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
+ Eq("message/3"));
+ EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
+ Eq("message/2"));
+ EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
+ Eq("message/1"));
}
}
@@ -1398,6 +2163,7 @@ TEST_F(IcingSearchEngineInitializationTest,
// - Index directory handling:
// - Term index directory should be unaffected.
// - Integer index directory should be unaffected.
+ // - Qualified id join index directory should be unaffected.
// - Truncate indices:
// - "TruncateTo()" for term index should take effect and throw out the
// entire lite index. This should be sufficient to make term index
@@ -1405,39 +2171,82 @@ TEST_F(IcingSearchEngineInitializationTest,
// - "Clear()" shouldn't be called for integer index, i.e. no integer index
// storage sub directories (path_expr = "*/integer_index_dir/*") should be
// discarded.
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", kIpsumText)
- .AddInt64Property("indexableInteger", 123)
- .Build();
- // 1. Create an index with a LiteIndex that will only allow one document
- // before needing a merge.
+ // - "Clear()" shouldn't be called for qualified id join index, i.e. no
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded.
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // 1. Create an index with a LiteIndex that will only allow a person and a
+ // message document before needing a merge.
{
IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(document.ByteSizeLong());
+ options.set_index_merge_size(person.ByteSizeLong() +
+ message.ByteSizeLong());
TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
std::make_unique<IcingFilesystem>(),
std::make_unique<FakeClock>(),
GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- // Add two documents. These should get merged into the main index.
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = DocumentBuilder(document).SetUri("fake_type/1").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ // Add two message documents. These should get merged into the main index.
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/2").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
}
// 2. Manually add some data into term lite index and increment
// last_added_document_id, but don't merge into the main index. This will
// cause mismatched last_added_document_id with term index.
- // - Document store: [0, 1]
+ // - Document store: [0, 1, 2]
// - Term index
- // - Main index: [0, 1]
- // - Lite index: [2]
- // - Integer index: [0, 1]
+ // - Main index: [0, 1, 2]
+ // - Lite index: [3]
+ // - Integer index: [0, 1, 2]
+ // - Qualified id join index: [0, 1, 2]
{
Filesystem filesystem;
IcingFilesystem icing_filesystem;
@@ -1445,7 +2254,7 @@ TEST_F(IcingSearchEngineInitializationTest,
std::unique_ptr<Index> index,
Index::Create(
Index::Options(GetIndexDir(),
- /*index_merge_size=*/document.ByteSizeLong()),
+ /*index_merge_size=*/message.ByteSizeLong()),
&filesystem, &icing_filesystem));
DocumentId original_last_added_doc_id = index->last_added_document_id();
index->set_last_added_document_id(original_last_added_doc_id + 1);
@@ -1458,8 +2267,7 @@ TEST_F(IcingSearchEngineInitializationTest,
// 3. Create the index again.
{
- // Mock filesystem to observe and check the behavior of term index and
- // integer index.
+ // Mock filesystem to observe and check the behavior of all indices.
auto mock_filesystem = std::make_unique<MockFilesystem>();
EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
.WillRepeatedly(DoDefault());
@@ -1477,9 +2285,19 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_CALL(*mock_filesystem,
DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
.Times(0);
+ // Ensure qualified id join index directory should never be discarded, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(document.ByteSizeLong());
+ options.set_index_merge_size(message.ByteSizeLong());
TestIcingSearchEngine icing(options, std::move(mock_filesystem),
std::make_unique<IcingFilesystem>(),
std::make_unique<FakeClock>(),
@@ -1493,10 +2311,13 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(
initialize_result.initialize_stats().integer_index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
// Verify term index works normally
SearchSpecProto search_spec1;
- search_spec1.set_query("consectetur");
+ search_spec1.set_query("body:consectetur");
search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
SearchResultProto results1 =
icing.Search(search_spec1, ScoringSpecProto::default_instance(),
@@ -1505,8 +2326,8 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(results1.next_page_token(), Eq(0));
// Only the documents that were in the main index should be retrievable.
ASSERT_THAT(results1.results(), SizeIs(2));
- EXPECT_THAT(results1.results(0).document().uri(), Eq("fake_type/1"));
- EXPECT_THAT(results1.results(1).document().uri(), Eq("fake_type/0"));
+ EXPECT_THAT(results1.results(0).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results1.results(1).document().uri(), Eq("message/1"));
// Verify integer index works normally
SearchSpecProto search_spec2;
@@ -1519,11 +2340,43 @@ TEST_F(IcingSearchEngineInitializationTest,
icing.Search(search_spec2, ScoringSpecProto::default_instance(),
ResultSpecProto::default_instance());
ASSERT_THAT(results2.results(), SizeIs(2));
- EXPECT_THAT(results2.results(0).document().uri(), Eq("fake_type/1"));
- EXPECT_THAT(results2.results(1).document().uri(), Eq("fake_type/0"));
+ EXPECT_THAT(results2.results(0).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results2.results(1).document().uri(), Eq("message/1"));
+
+ // Verify qualified id join index works normally: join a query for
+ // `name:person` with a child query for `body:consectetur` based on the
+ // child's `senderQualifiedId` field.
+ SearchSpecProto search_spec3;
+ search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec3.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+ join_spec->set_max_joined_child_count(100);
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:consectetur");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ SearchResultProto results3 =
+ icing.Search(search_spec3, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results3.results(), SizeIs(1));
+ EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
+ EXPECT_THAT(results3.results(0).joined_results(), SizeIs(2));
+ EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
+ Eq("message/2"));
+ EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
+ Eq("message/1"));
}
- // 4. Since document 2 doesn't exist, testing query = "foo" is not enough to
+ // 4. Since document 3 doesn't exist, testing query = "foo" is not enough to
// verify the correctness of term index restoration. Instead, we have to check
// hits for "foo" should not be found in term index.
{
@@ -1533,7 +2386,7 @@ TEST_F(IcingSearchEngineInitializationTest,
std::unique_ptr<Index> index,
Index::Create(
Index::Options(GetIndexDir(),
- /*index_merge_size=*/document.ByteSizeLong()),
+ /*index_merge_size=*/message.ByteSizeLong()),
&filesystem, &icing_filesystem));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
@@ -1553,6 +2406,7 @@ TEST_F(IcingSearchEngineInitializationTest,
// - Index directory handling:
// - Term index directory should be unaffected.
// - Integer index directory should be unaffected.
+ // - Qualified id join index directory should be unaffected.
// - Truncate indices:
// - "TruncateTo()" for term index should take effect and throw out the
// entire lite index. However, some valid data in term lite index were
@@ -1561,42 +2415,84 @@ TEST_F(IcingSearchEngineInitializationTest,
// - "Clear()" shouldn't be called for integer index, i.e. no integer index
// storage sub directories (path_expr = "*/integer_index_dir/*") should be
// discarded.
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", kIpsumText)
- .AddInt64Property("indexableInteger", 123)
- .Build();
- // 1. Create an index with a LiteIndex that will only allow one document
- // before needing a merge.
+ // - "Clear()" shouldn't be called for qualified id join index, i.e. no
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded.
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // 1. Create an index with a LiteIndex that will only allow a person and a
+ // message document before needing a merge.
{
IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(document.ByteSizeLong());
+ options.set_index_merge_size(message.ByteSizeLong());
TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
std::make_unique<IcingFilesystem>(),
std::make_unique<FakeClock>(),
GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- // Add two documents. These should get merged into the main index.
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = DocumentBuilder(document).SetUri("fake_type/1").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ // Add two message documents. These should get merged into the main index.
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/2").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
// Add one document. This one should get remain in the lite index.
- document = DocumentBuilder(document).SetUri("fake_type/2").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/3").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
}
// 2. Manually add some data into term lite index and increment
// last_added_document_id, but don't merge into the main index. This will
// cause mismatched last_added_document_id with term index.
- // - Document store: [0, 1, 2]
+ // - Document store: [0, 1, 2, 3]
// - Term index
- // - Main index: [0, 1]
- // - Lite index: [2, 3]
- // - Integer index: [0, 1, 2]
+ // - Main index: [0, 1, 2]
+ // - Lite index: [3, 4]
+ // - Integer index: [0, 1, 2, 3]
+ // - Qualified id join index: [0, 1, 2, 3]
{
Filesystem filesystem;
IcingFilesystem icing_filesystem;
@@ -1604,7 +2500,7 @@ TEST_F(IcingSearchEngineInitializationTest,
std::unique_ptr<Index> index,
Index::Create(
Index::Options(GetIndexDir(),
- /*index_merge_size=*/document.ByteSizeLong()),
+ /*index_merge_size=*/message.ByteSizeLong()),
&filesystem, &icing_filesystem));
DocumentId original_last_added_doc_id = index->last_added_document_id();
index->set_last_added_document_id(original_last_added_doc_id + 1);
@@ -1617,8 +2513,7 @@ TEST_F(IcingSearchEngineInitializationTest,
// 3. Create the index again.
{
- // Mock filesystem to observe and check the behavior of term index and
- // integer index.
+ // Mock filesystem to observe and check the behavior of all indices.
auto mock_filesystem = std::make_unique<MockFilesystem>();
EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
.WillRepeatedly(DoDefault());
@@ -1636,9 +2531,19 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_CALL(*mock_filesystem,
DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
.Times(0);
+ // Ensure qualified id join index directory should never be discarded, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(document.ByteSizeLong());
+ options.set_index_merge_size(message.ByteSizeLong());
TestIcingSearchEngine icing(options, std::move(mock_filesystem),
std::make_unique<IcingFilesystem>(),
std::make_unique<FakeClock>(),
@@ -1653,10 +2558,13 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(
initialize_result.initialize_stats().integer_index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
// Verify term index works normally
SearchSpecProto search_spec1;
- search_spec1.set_query("consectetur");
+ search_spec1.set_query("body:consectetur");
search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
SearchResultProto results1 =
icing.Search(search_spec1, ScoringSpecProto::default_instance(),
@@ -1665,9 +2573,9 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(results1.next_page_token(), Eq(0));
// Only the documents that were in the main index should be retrievable.
ASSERT_THAT(results1.results(), SizeIs(3));
- EXPECT_THAT(results1.results(0).document().uri(), Eq("fake_type/2"));
- EXPECT_THAT(results1.results(1).document().uri(), Eq("fake_type/1"));
- EXPECT_THAT(results1.results(2).document().uri(), Eq("fake_type/0"));
+ EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
// Verify integer index works normally
SearchSpecProto search_spec2;
@@ -1680,12 +2588,46 @@ TEST_F(IcingSearchEngineInitializationTest,
icing.Search(search_spec2, ScoringSpecProto::default_instance(),
ResultSpecProto::default_instance());
ASSERT_THAT(results2.results(), SizeIs(3));
- EXPECT_THAT(results2.results(0).document().uri(), Eq("fake_type/2"));
- EXPECT_THAT(results2.results(1).document().uri(), Eq("fake_type/1"));
- EXPECT_THAT(results2.results(2).document().uri(), Eq("fake_type/0"));
+ EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
+
+ // Verify qualified id join index works normally: join a query for
+ // `name:person` with a child query for `body:consectetur` based on the
+ // child's `senderQualifiedId` field.
+ SearchSpecProto search_spec3;
+ search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec3.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+ join_spec->set_max_joined_child_count(100);
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:consectetur");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ SearchResultProto results3 =
+ icing.Search(search_spec3, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results3.results(), SizeIs(1));
+ EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
+ EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
+ EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
+ Eq("message/3"));
+ EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
+ Eq("message/2"));
+ EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
+ Eq("message/1"));
}
- // 4. Since document 3 doesn't exist, testing query = "foo" is not enough to
+ // 4. Since document 4 doesn't exist, testing query = "foo" is not enough to
// verify the correctness of term index restoration. Instead, we have to check
// hits for "foo" should not be found in term index.
{
@@ -1695,7 +2637,7 @@ TEST_F(IcingSearchEngineInitializationTest,
std::unique_ptr<Index> index,
Index::Create(
Index::Options(GetIndexDir(),
- /*index_merge_size=*/document.ByteSizeLong()),
+ /*index_merge_size=*/message.ByteSizeLong()),
&filesystem, &icing_filesystem));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
@@ -1715,14 +2657,18 @@ TEST_F(IcingSearchEngineInitializationTest,
// - Index directory handling:
// - Term index directory should be unaffected.
// - Integer index directory should be unaffected.
+ // - Qualified id join index directory should be unaffected.
// - Truncate indices:
// - "TruncateTo()" for term index should take effect and throw out the
// entire lite and main index. This should be sufficient to make term
// index consistent with document store (in this case, document store is
// empty as well), so reindexing should not take place.
- // - "Clear()" shouldn't be called for integer index, i.e. no integer index
- // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // - "Clear()" should be called for integer index. It is a special case when
+ // document store has no document. Since there is no integer index storage
+ // sub directories (path_expr = "*/integer_index_dir/*"), nothing will be
// discarded.
+ // - "Clear()" should be called for qualified id join index. It is a special
+ // case when document store has no document.
// 1. Create an index with no document.
{
@@ -1744,6 +2690,7 @@ TEST_F(IcingSearchEngineInitializationTest,
// - Main index: [0]
// - Lite index: [1]
// - Integer index: []
+ // - Qualified id join index: []
{
Filesystem filesystem;
IcingFilesystem icing_filesystem;
@@ -1774,8 +2721,7 @@ TEST_F(IcingSearchEngineInitializationTest,
// 3. Create the index again. This should throw out the lite and main index.
{
- // Mock filesystem to observe and check the behavior of term index and
- // integer index.
+ // Mock filesystem to observe and check the behavior of all indices.
auto mock_filesystem = std::make_unique<MockFilesystem>();
EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
.WillRepeatedly(DoDefault());
@@ -1784,15 +2730,25 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_CALL(*mock_filesystem,
DeleteDirectoryRecursively(EndsWith("/index_dir")))
.Times(0);
- // Ensure integer index directory should never be discarded, and Clear()
- // should never be called (i.e. storage sub directory
- // "*/integer_index_dir/*" should never be discarded).
+ // Ensure integer index directory should never be discarded. Even though
+ // Clear() was called, it shouldn't take effect since there is no storage
+ // sub directory ("*/integer_index_dir/*") and nothing will be discarded.
EXPECT_CALL(*mock_filesystem,
DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
.Times(0);
EXPECT_CALL(*mock_filesystem,
DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
.Times(0);
+ // Ensure qualified id join index directory should never be discarded.
+ // Clear() was called and should discard and reinitialize the underlying
+ // mapper.
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(AtLeast(1));
TestIcingSearchEngine icing(
GetDefaultIcingOptions(), std::move(mock_filesystem),
@@ -1808,6 +2764,9 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(
initialize_result.initialize_stats().integer_index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
}
// 4. Since document 0, 1 don't exist, testing queries = "foo", "bar" are not
@@ -1846,6 +2805,7 @@ TEST_F(IcingSearchEngineInitializationTest,
// - Index directory handling:
// - Term index directory should be unaffected.
// - Integer index directory should be unaffected.
+ // - Qualified id join index directory should be unaffected.
// - In RestoreIndexIfNecessary():
// - "TruncateTo()" for term index should take effect and throw out the
// entire lite and main index. However, some valid data in term main index
@@ -1854,13 +2814,53 @@ TEST_F(IcingSearchEngineInitializationTest,
// - "Clear()" shouldn't be called for integer index, i.e. no integer index
// storage sub directories (path_expr = "*/integer_index_dir/*") should be
// discarded.
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", kIpsumText)
- .AddInt64Property("indexableInteger", 123)
- .Build();
- // 1. Create an index with 3 documents.
+ // - "Clear()" shouldn't be called for qualified id join index, i.e. no
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded.
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // 1. Create an index with 3 message documents.
{
TestIcingSearchEngine icing(
GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
@@ -1868,24 +2868,26 @@ TEST_F(IcingSearchEngineInitializationTest,
GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = DocumentBuilder(document).SetUri("fake_type/1").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = DocumentBuilder(document).SetUri("fake_type/2").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/2").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/3").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
}
// 2. Manually add some data into term lite index and increment
// last_added_document_id. Merge some of them into the main index and keep
// others in the lite index. This will cause mismatched document id with
// document store.
- // - Document store: [0, 1, 2]
+ // - Document store: [0, 1, 2, 3]
// - Term index
- // - Main index: [0, 1, 2, 3]
- // - Lite index: [4]
- // - Integer index: [0, 1, 2]
+ // - Main index: [0, 1, 2, 3, 4]
+ // - Lite index: [5]
+ // - Integer index: [0, 1, 2, 3]
+ // - Qualified id join index: [0, 1, 2, 3]
{
Filesystem filesystem;
IcingFilesystem icing_filesystem;
@@ -1893,9 +2895,9 @@ TEST_F(IcingSearchEngineInitializationTest,
std::unique_ptr<Index> index,
Index::Create(
Index::Options(GetIndexDir(),
- /*index_merge_size=*/document.ByteSizeLong()),
+ /*index_merge_size=*/message.ByteSizeLong()),
&filesystem, &icing_filesystem));
- // Add hits for document 3 and merge.
+ // Add hits for document 4 and merge.
DocumentId original_last_added_doc_id = index->last_added_document_id();
index->set_last_added_document_id(original_last_added_doc_id + 1);
Index::Editor editor =
@@ -1905,7 +2907,7 @@ TEST_F(IcingSearchEngineInitializationTest,
ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(index->Merge());
- // Add hits for document 4 and don't merge.
+ // Add hits for document 5 and don't merge.
index->set_last_added_document_id(original_last_added_doc_id + 2);
editor = index->Edit(original_last_added_doc_id + 2, /*section_id=*/0,
TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
@@ -1916,8 +2918,7 @@ TEST_F(IcingSearchEngineInitializationTest,
// 3. Create the index again. This should throw out the lite and main index
// and trigger index restoration.
{
- // Mock filesystem to observe and check the behavior of term index and
- // integer index.
+ // Mock filesystem to observe and check the behavior of all indices.
auto mock_filesystem = std::make_unique<MockFilesystem>();
EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
.WillRepeatedly(DoDefault());
@@ -1935,6 +2936,16 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_CALL(*mock_filesystem,
DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
.Times(0);
+ // Ensure qualified id join index directory should never be discarded, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
TestIcingSearchEngine icing(
GetDefaultIcingOptions(), std::move(mock_filesystem),
@@ -1950,10 +2961,13 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(
initialize_result.initialize_stats().integer_index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
// Verify term index works normally
SearchSpecProto search_spec1;
- search_spec1.set_query("consectetur");
+ search_spec1.set_query("body:consectetur");
search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
SearchResultProto results1 =
icing.Search(search_spec1, ScoringSpecProto::default_instance(),
@@ -1962,9 +2976,9 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(results1.next_page_token(), Eq(0));
// Only the first document should be retrievable.
ASSERT_THAT(results1.results(), SizeIs(3));
- EXPECT_THAT(results1.results(0).document().uri(), Eq("fake_type/2"));
- EXPECT_THAT(results1.results(1).document().uri(), Eq("fake_type/1"));
- EXPECT_THAT(results1.results(2).document().uri(), Eq("fake_type/0"));
+ EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
// Verify integer index works normally
SearchSpecProto search_spec2;
@@ -1977,12 +2991,46 @@ TEST_F(IcingSearchEngineInitializationTest,
icing.Search(search_spec2, ScoringSpecProto::default_instance(),
ResultSpecProto::default_instance());
ASSERT_THAT(results2.results(), SizeIs(3));
- EXPECT_THAT(results2.results(0).document().uri(), Eq("fake_type/2"));
- EXPECT_THAT(results2.results(1).document().uri(), Eq("fake_type/1"));
- EXPECT_THAT(results2.results(2).document().uri(), Eq("fake_type/0"));
+ EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
+
+ // Verify qualified id join index works normally: join a query for
+ // `name:person` with a child query for `body:consectetur` based on the
+ // child's `senderQualifiedId` field.
+ SearchSpecProto search_spec3;
+ search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec3.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+ join_spec->set_max_joined_child_count(100);
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:consectetur");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ SearchResultProto results3 =
+ icing.Search(search_spec3, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results3.results(), SizeIs(1));
+ EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
+ EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
+ EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
+ Eq("message/3"));
+ EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
+ Eq("message/2"));
+ EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
+ Eq("message/1"));
}
- // 4. Since document 3, 4 don't exist, testing queries = "foo", "bar" are not
+ // 4. Since document 4, 5 don't exist, testing queries = "foo", "bar" are not
// enough to verify the correctness of term index restoration. Instead, we
// have to check hits for "foo", "bar" should not be found in term index.
{
@@ -2018,6 +3066,7 @@ TEST_F(IcingSearchEngineInitializationTest,
// - Index directory handling:
// - Term index directory should be unaffected.
// - Integer index directory should be unaffected.
+ // - Qualified id join index directory should be unaffected.
// - Truncate indices:
// - "TruncateTo()" for term index shouldn't take effect.
// - "Clear()" should be called for integer index and throw out all integer
@@ -2025,6 +3074,8 @@ TEST_F(IcingSearchEngineInitializationTest,
// "*/integer_index_dir/*") should be discarded. This should be sufficient
// to make integer index consistent with document store (in this case,
// document store is empty as well), so reindexing should not take place.
+ // - "Clear()" should be called for qualified id join index. It is a special
+ // case when document store has no document.
// 1. Create an index with no document.
{
@@ -2043,6 +3094,7 @@ TEST_F(IcingSearchEngineInitializationTest,
// - Document store: []
// - Term index: []
// - Integer index: [0]
+ // - Qualified id join index: []
{
Filesystem filesystem;
ICING_ASSERT_OK_AND_ASSIGN(
@@ -2060,8 +3112,7 @@ TEST_F(IcingSearchEngineInitializationTest,
// 3. Create the index again. This should trigger index restoration.
{
- // Mock filesystem to observe and check the behavior of term index and
- // integer index.
+ // Mock filesystem to observe and check the behavior of all indices.
auto mock_filesystem = std::make_unique<MockFilesystem>();
EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
.WillRepeatedly(DoDefault());
@@ -2078,6 +3129,16 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_CALL(*mock_filesystem,
DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
.Times(1);
+ // Ensure qualified id join index directory should never be discarded.
+ // Clear() was called and should discard and reinitialize the underlying
+ // mapper.
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(AtLeast(1));
TestIcingSearchEngine icing(
GetDefaultIcingOptions(), std::move(mock_filesystem),
@@ -2092,23 +3153,36 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(
initialize_result.initialize_stats().integer_index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
- }
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
- // 4. Since document 0 doesn't exist, testing numeric query
- // "indexableInteger == 123" is not enough to verify the correctness of
- // integer index restoration. Instead, we have to check hits for 123 should
- // not be found in integer index.
- {
- Filesystem filesystem;
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<IntegerIndex> integer_index,
- IntegerIndex::Create(filesystem, GetIntegerIndexDir()));
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
- integer_index->GetIterator(/*property_path=*/"indexableInteger",
- /*key_lower=*/123, /*key_upper=*/123));
- EXPECT_THAT(doc_hit_info_iter->Advance(),
- StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+ // Verify that numeric query safely wiped out the pre-existing hit for
+ // 'indexableInteger' == 123. Add a new document without that value for
+ // 'indexableInteger' that will take docid=0. If the integer index was not
+ // rebuilt correctly, then it will still have the previously added hit for
+ // 'indexableInteger' == 123 for docid 0 and incorrectly return this new
+ // doc in a query.
+ DocumentProto another_message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 456)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ EXPECT_THAT(icing.Put(another_message).status(), ProtoIsOk());
+ // Verify integer index works normally
+ SearchSpecProto search_spec;
+ search_spec.set_query("indexableInteger == 123");
+ search_spec.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.results(), IsEmpty());
}
}
@@ -2120,6 +3194,7 @@ TEST_F(IcingSearchEngineInitializationTest,
// - Index directory handling:
// - Term index directory should be unaffected.
// - Integer index directory should be unaffected.
+ // - Qualified id join index directory should be unaffected.
// - Truncate indices:
// - "TruncateTo()" for term index shouldn't take effect.
// - "Clear()" should be called for integer index and throw out all integer
@@ -2127,13 +3202,53 @@ TEST_F(IcingSearchEngineInitializationTest,
// "*/integer_index_dir/*") should be discarded. However, some valid data
// in integer index were discarded together, so reindexing should still
// take place to recover them after clearing.
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", kIpsumText)
- .AddInt64Property("indexableInteger", 123)
- .Build();
- // 1. Create an index with 3 documents.
+ // - "Clear()" shouldn't be called for qualified id join index, i.e. no
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded.
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // 1. Create an index with message 3 documents.
{
TestIcingSearchEngine icing(
GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
@@ -2141,27 +3256,29 @@ TEST_F(IcingSearchEngineInitializationTest,
GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = DocumentBuilder(document).SetUri("fake_type/1").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = DocumentBuilder(document).SetUri("fake_type/2").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/2").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/3").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
}
// 2. Manually add some data into integer index and increment
// last_added_document_id. This will cause mismatched document id with
// document store.
- // - Document store: [0, 1, 2]
- // - Term index: [0, 1, 2]
- // - Integer index: [0, 1, 2, 3]
+ // - Document store: [0, 1, 2, 3]
+ // - Term index: [0, 1, 2, 3]
+ // - Integer index: [0, 1, 2, 3, 4]
+ // - Qualified id join index: [0, 1, 2, 3]
{
Filesystem filesystem;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndex> integer_index,
IntegerIndex::Create(filesystem, GetIntegerIndexDir()));
- // Add hits for document 3.
+ // Add hits for document 4.
DocumentId original_last_added_doc_id =
integer_index->last_added_document_id();
integer_index->set_last_added_document_id(original_last_added_doc_id + 1);
@@ -2174,8 +3291,7 @@ TEST_F(IcingSearchEngineInitializationTest,
// 3. Create the index again. This should trigger index restoration.
{
- // Mock filesystem to observe and check the behavior of term index and
- // integer index.
+ // Mock filesystem to observe and check the behavior of all indices.
auto mock_filesystem = std::make_unique<MockFilesystem>();
EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
.WillRepeatedly(DoDefault());
@@ -2192,6 +3308,16 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_CALL(*mock_filesystem,
DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
.Times(1);
+ // Ensure qualified id join index directory should never be discarded, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
TestIcingSearchEngine icing(
GetDefaultIcingOptions(), std::move(mock_filesystem),
@@ -2204,10 +3330,13 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(
initialize_result.initialize_stats().integer_index_restoration_cause(),
Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
// Verify term index works normally
SearchSpecProto search_spec1;
- search_spec1.set_query("consectetur");
+ search_spec1.set_query("body:consectetur");
search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
SearchResultProto results1 =
icing.Search(search_spec1, ScoringSpecProto::default_instance(),
@@ -2216,9 +3345,9 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(results1.next_page_token(), Eq(0));
// All documents should be retrievable.
ASSERT_THAT(results1.results(), SizeIs(3));
- EXPECT_THAT(results1.results(0).document().uri(), Eq("fake_type/2"));
- EXPECT_THAT(results1.results(1).document().uri(), Eq("fake_type/1"));
- EXPECT_THAT(results1.results(2).document().uri(), Eq("fake_type/0"));
+ EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
// Verify integer index works normally
SearchSpecProto search_spec2;
@@ -2231,26 +3360,425 @@ TEST_F(IcingSearchEngineInitializationTest,
icing.Search(search_spec2, ScoringSpecProto::default_instance(),
ResultSpecProto::default_instance());
ASSERT_THAT(results2.results(), SizeIs(3));
- EXPECT_THAT(results2.results(0).document().uri(), Eq("fake_type/2"));
- EXPECT_THAT(results2.results(1).document().uri(), Eq("fake_type/1"));
- EXPECT_THAT(results2.results(2).document().uri(), Eq("fake_type/0"));
+ EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
+
+ // Verify qualified id join index works normally: join a query for
+ // `name:person` with a child query for `body:consectetur` based on the
+ // child's `senderQualifiedId` field.
+ SearchSpecProto search_spec3;
+ search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec3.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+ join_spec->set_max_joined_child_count(100);
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:consectetur");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ SearchResultProto results3 =
+ icing.Search(search_spec3, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results3.results(), SizeIs(1));
+ EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
+ EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
+ EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
+ Eq("message/3"));
+ EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
+ Eq("message/2"));
+ EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
+ Eq("message/1"));
+
+ // Verify that numeric index safely wiped out the pre-existing hit for
+ // 'indexableInteger' == 456. Add a new document without that value for
+ // 'indexableInteger' that will take docid=0. If the integer index was not
+ // rebuilt correctly, then it will still have the previously added hit for
+ // 'indexableInteger' == 456 for docid 0 and incorrectly return this new
+ // doc in a query.
+ DocumentProto another_message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/4")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ EXPECT_THAT(icing.Put(another_message).status(), ProtoIsOk());
+ // Verify integer index works normally
+ SearchSpecProto search_spec;
+ search_spec.set_query("indexableInteger == 456");
+ search_spec.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.results(), IsEmpty());
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ RestoreIndexTruncateQualifiedIdJoinIndexWithoutReindexing) {
+ // Test the following scenario: qualified id join index is *completely* ahead
+ // of document store. IcingSearchEngine should be able to recover qualified id
+ // join index. Several additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should be unaffected.
+ // - Qualified id join index directory should be unaffected.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index shouldn't take effect.
+ // - "Clear()" should be called for integer index. It is a special case when
+ // document store has no document. Since there is no integer index storage
+ // sub directories (path_expr = "*/integer_index_dir/*"), nothing will be
+ // discarded.
+ // - "Clear()" should be called for qualified id join index and throw out
+ // all data, i.e. discarding the underlying mapper (path_expr =
+ // "*/qualified_id_join_index_dir/*") and reinitialize. This should be
+ // sufficient to make qualified id join index consistent with document
+ // store (in this case, document store is empty as well), so reindexing
+ // should not take place.
+
+ // 1. Create an index with no document.
+ {
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
}
- // 4. Since document 3 doesn't exist, testing numeric query
- // "indexableInteger == 456" is not enough to verify the correctness of
- // integer index restoration. Instead, we have to check hits for 456 should
- // not be found in integer index.
+ // 2. Manually add some data into integer index and increment
+ // last_added_document_id. This will cause mismatched document id with
+ // document store.
+ // - Document store: []
+ // - Term index: []
+ // - Integer index: []
+ // - Qualified id join index: [0]
{
Filesystem filesystem;
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<IntegerIndex> integer_index,
- IntegerIndex::Create(filesystem, GetIntegerIndexDir()));
+ std::unique_ptr<QualifiedIdTypeJoinableIndex> qualified_id_join_index,
+ QualifiedIdTypeJoinableIndex::Create(filesystem,
+ GetQualifiedIdJoinIndexDir()));
+ // Add data for document 0.
+ ASSERT_THAT(qualified_id_join_index->last_added_document_id(),
+ kInvalidDocumentId);
+ qualified_id_join_index->set_last_added_document_id(0);
+ ICING_ASSERT_OK(qualified_id_join_index->Put(
+ DocJoinInfo(/*document_id=*/0, /*joinable_property_id=*/0),
+ /*ref_qualified_id_str=*/"namespace#person"));
+ }
+
+ // 3. Create the index again. This should trigger index restoration.
+ {
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded. Even though
+ // Clear() was called, it shouldn't take effect since there is no storage
+ // sub directory ("*/integer_index_dir/*") and nothing will be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+ // Ensure qualified id join index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ // Clear() should be called to truncate qualified id join index and thus
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded.
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(AtLeast(1));
+
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ // Since truncating qualified id join index is sufficient to make it
+ // consistent with document store, replaying documents or reindexing
+ // shouldn't take place.
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ }
+
+ // 4. Since document 0 doesn't exist, testing join query is not enough to
+ // verify the correctness of qualified id join index restoration. Instead, we
+ // have to check the previously added data should not be found in qualified id
+ // join index.
+ {
+ Filesystem filesystem;
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
- integer_index->GetIterator(/*property_path=*/"indexableInteger",
- /*key_lower=*/456, /*key_upper=*/456));
- EXPECT_THAT(doc_hit_info_iter->Advance(),
- StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+ std::unique_ptr<QualifiedIdTypeJoinableIndex> qualified_id_join_index,
+ QualifiedIdTypeJoinableIndex::Create(filesystem,
+ GetQualifiedIdJoinIndexDir()));
+ EXPECT_THAT(qualified_id_join_index->Get(
+ DocJoinInfo(/*document_id=*/0, /*joinable_property_id=*/0)),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ RestoreIndexTruncateQualifiedIdJoinIndexWithReindexing) {
+ // Test the following scenario: qualified id join index is *partially* ahead
+ // of document store. IcingSearchEngine should be able to recover qualified id
+ // join index. Several additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should be unaffected.
+ // - Qualified id join index directory should be unaffected.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index shouldn't take effect.
+ // - "Clear()" shouldn't be called for integer index, i.e. no integer index
+ // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // discarded.
+ // - "Clear()" should be called for qualified id join index and throw out
+ // all data, i.e. discarding the underlying mapper (path_expr =
+ // "*/qualified_id_join_index_dir/*") and reinitialize. However, some
+ // valid data in qualified id join index were discarded together, so
+ // reindexing should still take place to recover them after clearing.
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // 1. Create an index with message 3 documents.
+ {
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/2").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/3").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ }
+
+ DocJoinInfo additional_data_key;
+ // 2. Manually add some data into qualified id join index and increment
+ // last_added_document_id. This will cause mismatched document id with
+ // document store.
+ // - Document store: [0, 1, 2, 3]
+ // - Term index: [0, 1, 2, 3]
+ // - Integer index: [0, 1, 2, 3]
+ // - Qualified id join index: [0, 1, 2, 3, 4]
+ {
+ Filesystem filesystem;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdTypeJoinableIndex> qualified_id_join_index,
+ QualifiedIdTypeJoinableIndex::Create(filesystem,
+ GetQualifiedIdJoinIndexDir()));
+ // Add data for document 4.
+ DocumentId original_last_added_doc_id =
+ qualified_id_join_index->last_added_document_id();
+ qualified_id_join_index->set_last_added_document_id(
+ original_last_added_doc_id + 1);
+ additional_data_key =
+ DocJoinInfo(/*document_id=*/original_last_added_doc_id + 1,
+ /*joinable_property_id=*/0);
+ ICING_ASSERT_OK(qualified_id_join_index->Put(
+ additional_data_key,
+ /*ref_qualified_id_str=*/"namespace#person"));
+ }
+
+ // 3. Create the index again. This should trigger index restoration.
+ {
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded, and Clear()
+ // should never be called (i.e. storage sub directory
+ // "*/integer_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+ // Ensure qualified id join index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ // Clear() should be called to truncate qualified id join index and thus
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded.
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(AtLeast(1));
+
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+
+ // Verify term index works normally
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("body:consectetur");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto results1 =
+ icing.Search(search_spec1, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results1.status(), ProtoIsOk());
+ EXPECT_THAT(results1.next_page_token(), Eq(0));
+ // All documents should be retrievable.
+ ASSERT_THAT(results1.results(), SizeIs(3));
+ EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
+
+ // Verify integer index works normally
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results2 =
+ icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results2.results(), SizeIs(3));
+ EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
+ EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
+ EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
+
+ // Verify qualified id join index works normally: join a query for
+ // `name:person` with a child query for `body:consectetur` based on the
+ // child's `senderQualifiedId` field.
+
+ // Add document 4 without "senderQualifiedId". If joinable index is not
+ // rebuilt correctly, then it will still have the previously added
+ // senderQualifiedId for document 4 and include document 4 incorrectly in
+ // the right side.
+ DocumentProto another_message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/4")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ EXPECT_THAT(icing.Put(another_message).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec3;
+ search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec3.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+ join_spec->set_max_joined_child_count(100);
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:consectetur");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ SearchResultProto results3 =
+ icing.Search(search_spec3, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results3.results(), SizeIs(1));
+ EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
+ EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
+ EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
+ Eq("message/3"));
+ EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
+ Eq("message/2"));
+ EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
+ Eq("message/1"));
}
}
@@ -2308,6 +3836,9 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(
init_result.initialize_stats().integer_index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(init_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
}
}
@@ -2332,14 +3863,21 @@ TEST_F(IcingSearchEngineInitializationTest,
.AddProperty(PropertyConfigBuilder()
.SetName("indexableInteger")
.SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- // Set a schema for a single type that has no indexed contents.
+ // Set a schema for a single type that has no term, integer, join indexed
+ // contents.
ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
// Add a document that contains:
// - No valid indexed string content - just punctuation
// - No integer content - since it is an optional property
+ // - No qualified id content - since it is an optional property
DocumentProto document = DocumentBuilder()
.SetKey("icing", "fake_type/0")
.SetSchema("Message")
@@ -2364,6 +3902,9 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(
init_result.initialize_stats().integer_index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(init_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
}
}
@@ -2454,6 +3995,9 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(initialize_result_proto.initialize_stats()
.integer_index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
EXPECT_THAT(
initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
Eq(0));
@@ -2525,6 +4069,9 @@ TEST_F(IcingSearchEngineInitializationTest,
.integer_index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
Eq(0));
EXPECT_THAT(initialize_result_proto.initialize_stats()
@@ -2616,6 +4163,9 @@ TEST_F(IcingSearchEngineInitializationTest,
.integer_index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
Eq(0));
EXPECT_THAT(initialize_result_proto.initialize_stats()
@@ -2667,6 +4217,9 @@ TEST_F(IcingSearchEngineInitializationTest,
.integer_index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
Eq(10));
EXPECT_THAT(initialize_result_proto.initialize_stats()
@@ -2728,6 +4281,111 @@ TEST_F(
.integer_index_restoration_cause(),
Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .index_restoration_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+ }
+}
+
+TEST_F(
+ IcingSearchEngineInitializationTest,
+ InitializeShouldLogRecoveryCauseQualifiedIdJoinIndexInconsistentWithGroundTruth) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ {
+ // Initialize and put documents.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ }
+
+ {
+ // Delete the qualified id join index file to trigger RestoreIndexIfNeeded.
+ std::string qualified_id_join_index_dir = GetQualifiedIdJoinIndexDir();
+ filesystem()->DeleteDirectoryRecursively(
+ qualified_id_join_index_dir.c_str());
+ }
+
+ {
+ // Index is empty but ground truth is not. Index should be restored due to
+ // the inconsistency.
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
Eq(10));
EXPECT_THAT(initialize_result_proto.initialize_stats()
@@ -2808,6 +4466,9 @@ TEST_F(IcingSearchEngineInitializationTest,
.integer_index_restoration_cause(),
Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
Eq(10));
EXPECT_THAT(initialize_result_proto.initialize_stats()
@@ -2844,6 +4505,9 @@ TEST_F(IcingSearchEngineInitializationTest,
.integer_index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
Eq(0));
EXPECT_THAT(initialize_result_proto.initialize_stats()
@@ -2906,6 +4570,9 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(initialize_result_proto.initialize_stats()
.integer_index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
EXPECT_THAT(
initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
Eq(10));
@@ -2966,6 +4633,113 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(initialize_result_proto.initialize_stats()
.integer_index_restoration_cause(),
Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldLogRecoveryCauseQualifiedIdJoinIndexIOError) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ {
+ // Initialize and put documents.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(message).status(), ProtoIsOk());
+ }
+
+ std::string qualified_id_join_index_metadata_file =
+ absl_ports::StrCat(GetQualifiedIdJoinIndexDir(), "/metadata");
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, PRead(A<const char*>(), _, _, _))
+ .WillRepeatedly(DoDefault());
+ // This fails QualifiedIdTypeJoinableIndex::Create() once.
+ EXPECT_CALL(
+ *mock_filesystem,
+ PRead(Matcher<const char*>(Eq(qualified_id_join_index_metadata_file)), _,
+ _, _))
+ .WillOnce(Return(false))
+ .WillRepeatedly(DoDefault());
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
EXPECT_THAT(
initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
Eq(10));
@@ -3038,6 +4812,9 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(initialize_result_proto.initialize_stats()
.integer_index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
EXPECT_THAT(
initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
Eq(0));
@@ -3095,6 +4872,9 @@ TEST_F(IcingSearchEngineInitializationTest,
.integer_index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
Eq(0));
}
diff --git a/icing/icing-search-engine_optimize_test.cc b/icing/icing-search-engine_optimize_test.cc
index b2c7a62..0c5cb7a 100644
--- a/icing/icing-search-engine_optimize_test.cc
+++ b/icing/icing-search-engine_optimize_test.cc
@@ -28,6 +28,7 @@
#include "icing/file/mock-filesystem.h"
#include "icing/icing-search-engine.h"
#include "icing/jni/jni-cache.h"
+#include "icing/join/join-processor.h"
#include "icing/portable/endian.h"
#include "icing/portable/equals-proto.h"
#include "icing/portable/platform.h"
@@ -123,46 +124,46 @@ IcingSearchEngineOptions GetDefaultIcingOptions() {
return icing_options;
}
-DocumentProto CreateMessageDocument(std::string name_space, std::string uri) {
- return DocumentBuilder()
- .SetKey(std::move(name_space), std::move(uri))
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .AddInt64Property("indexableInteger", 123)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-}
-
-SchemaProto CreateMessageSchema() {
- return SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Message")
- .AddProperty(PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED))
- .AddProperty(PropertyConfigBuilder()
- .SetName("indexableInteger")
- .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
- .SetCardinality(CARDINALITY_REQUIRED)))
- .Build();
-}
-
ScoringSpecProto GetDefaultScoringSpec() {
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
return scoring_spec;
}
+// TODO(b/272145329): create SearchSpecBuilder, JoinSpecBuilder,
+// SearchResultProtoBuilder and ResultProtoBuilder for unit tests and build all
+// instances by them.
+
TEST_F(IcingSearchEngineOptimizeTest,
AllPageTokensShouldBeInvalidatedAfterOptimization) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body two")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
@@ -205,9 +206,24 @@ TEST_F(IcingSearchEngineOptimizeTest,
}
TEST_F(IcingSearchEngineOptimizeTest, OptimizationShouldRemoveDeletedDocs) {
- IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
GetResultProto expected_get_result_proto;
expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
@@ -216,7 +232,7 @@ TEST_F(IcingSearchEngineOptimizeTest, OptimizationShouldRemoveDeletedDocs) {
{
IcingSearchEngine icing(icing_options, GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
// Deletes document1
@@ -247,10 +263,19 @@ TEST_F(IcingSearchEngineOptimizeTest, OptimizationShouldRemoveDeletedDocs) {
TEST_F(IcingSearchEngineOptimizeTest,
OptimizationShouldDeleteTemporaryDirectory) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
IcingSearchEngine icing(icing_options, GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
// Create a tmp dir that will be used in Optimize() to swap files,
// this validates that any tmp dirs will be deleted before using.
@@ -271,12 +296,26 @@ TEST_F(IcingSearchEngineOptimizeTest,
}
TEST_F(IcingSearchEngineOptimizeTest, GetOptimizeInfoHasCorrectStats) {
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
DocumentProto document2 = DocumentBuilder()
.SetKey("namespace", "uri2")
.SetSchema("Message")
- .AddStringProperty("body", "message body")
- .AddInt64Property("indexableInteger", 456)
+ .AddStringProperty("body", "message body two")
.SetCreationTimestampMs(100)
.SetTtlMs(500)
.Build();
@@ -298,7 +337,7 @@ TEST_F(IcingSearchEngineOptimizeTest, GetOptimizeInfoHasCorrectStats) {
EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
// Only have active documents, nothing is optimizable yet.
@@ -356,11 +395,50 @@ TEST_F(IcingSearchEngineOptimizeTest, GetOptimizeInfoHasCorrectStats) {
}
TEST_F(IcingSearchEngineOptimizeTest, GetAndPutShouldWorkAfterOptimization) {
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
- DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
- DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
- DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body two")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body three")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document4 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri4")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body four")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document5 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri5")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body five")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
GetResultProto expected_get_result_proto;
expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
@@ -368,7 +446,7 @@ TEST_F(IcingSearchEngineOptimizeTest, GetAndPutShouldWorkAfterOptimization) {
{
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
@@ -474,12 +552,34 @@ TEST_F(IcingSearchEngineOptimizeTest,
}
TEST_F(IcingSearchEngineOptimizeTest, DeleteShouldWorkAfterOptimization) {
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body two")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
{
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
@@ -557,13 +657,14 @@ TEST_F(IcingSearchEngineOptimizeTest, OptimizationFailureUninitializesIcing) {
ASSERT_THAT(icing.Optimize().status(), ProtoStatusIs(StatusProto::INTERNAL));
// Ordinary operations should fail safely.
- SchemaProto simple_schema;
- auto type = simple_schema.add_types();
- type->set_schema_type("type0");
- auto property = type->add_properties();
- property->set_property_name("prop0");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ SchemaProto simple_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("type0").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("prop0")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
DocumentProto simple_doc = DocumentBuilder()
.SetKey("namespace0", "uri0")
@@ -606,27 +707,30 @@ TEST_F(IcingSearchEngineOptimizeTest, OptimizationFailureUninitializesIcing) {
TEST_F(IcingSearchEngineOptimizeTest, SetSchemaShouldWorkAfterOptimization) {
// Creates 3 test schemas
- SchemaProto schema1 = SchemaProto(CreateMessageSchema());
+ SchemaProto schema1 =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
SchemaProto schema2 = SchemaProto(schema1);
- auto new_property2 = schema2.mutable_types(0)->add_properties();
- new_property2->set_property_name("property2");
- new_property2->set_data_type(PropertyConfigProto::DataType::STRING);
- new_property2->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- new_property2->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- new_property2->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
+ *schema2.mutable_types(0)->add_properties() =
+ PropertyConfigBuilder()
+ .SetName("property2")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
SchemaProto schema3 = SchemaProto(schema2);
- auto new_property3 = schema3.mutable_types(0)->add_properties();
- new_property3->set_property_name("property3");
- new_property3->set_data_type(PropertyConfigProto::DataType::STRING);
- new_property3->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- new_property3->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- new_property3->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
+ *schema3.mutable_types(0)->add_properties() =
+ PropertyConfigBuilder()
+ .SetName("property3")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
{
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
@@ -644,7 +748,29 @@ TEST_F(IcingSearchEngineOptimizeTest, SetSchemaShouldWorkAfterOptimization) {
}
TEST_F(IcingSearchEngineOptimizeTest, SearchShouldWorkAfterOptimization) {
- DocumentProto document = CreateMessageDocument("namespace", "uri");
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
SearchSpecProto search_spec1;
search_spec1.set_term_match_type(TermMatchType::PREFIX);
@@ -664,7 +790,7 @@ TEST_F(IcingSearchEngineOptimizeTest, SearchShouldWorkAfterOptimization) {
{
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
@@ -703,14 +829,308 @@ TEST_F(IcingSearchEngineOptimizeTest, SearchShouldWorkAfterOptimization) {
}
TEST_F(IcingSearchEngineOptimizeTest,
+ JoinShouldWorkAfterOptimizationDeleteParent) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person1 =
+ DocumentBuilder()
+ .SetKey("namespace", "person1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person one")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto person2 =
+ DocumentBuilder()
+ .SetKey("namespace", "person2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person two")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ DocumentProto message1 =
+ DocumentBuilder()
+ .SetKey("namespace", "message1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .AddStringProperty("senderQualifiedId", "namespace#person1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message2 =
+ DocumentBuilder()
+ .SetKey("namespace", "message2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body two")
+ .AddStringProperty("senderQualifiedId", "namespace#person1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message3 =
+ DocumentBuilder()
+ .SetKey("namespace", "message3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body three")
+ .AddStringProperty("senderQualifiedId", "namespace#person2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // Prepare join search spec to join a query for `name:person` with a child
+ // query for `body:message` based on the child's `senderQualifiedId` field.
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_max_joined_child_count(100);
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:message");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ // Person1 is going to be deleted below. Only person2 which is joined with
+ // message3 should match the query.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto =
+ expected_search_result_proto.mutable_results()->Add();
+ *result_proto->mutable_document() = person2;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() = message3;
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(message1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(message2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(message3).status(), ProtoIsOk());
+ // Delete parent document: person1
+ ASSERT_THAT(icing.Delete("namespace", "person1").status(), ProtoIsOk());
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+
+ // Validates that join search query works right after Optimize()
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+ } // Destroys IcingSearchEngine to make sure nothing is cached.
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest,
+ JoinShouldWorkAfterOptimizationDeleteChild) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person1 =
+ DocumentBuilder()
+ .SetKey("namespace", "person1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person one")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto person2 =
+ DocumentBuilder()
+ .SetKey("namespace", "person2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person two")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ DocumentProto message1 =
+ DocumentBuilder()
+ .SetKey("namespace", "message1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .AddStringProperty("senderQualifiedId", "namespace#person1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message2 =
+ DocumentBuilder()
+ .SetKey("namespace", "message2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body two")
+ .AddStringProperty("senderQualifiedId", "namespace#person1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message3 =
+ DocumentBuilder()
+ .SetKey("namespace", "message3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body three")
+ .AddStringProperty("senderQualifiedId", "namespace#person2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // Prepare join search spec to join a query for `name:person` with a child
+ // query for `body:message` based on the child's `senderQualifiedId` field.
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_max_joined_child_count(100);
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:message");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ // Message1 and message3 are going to be deleted below. Both person1 and
+ // person2 should be included even though person2 has no child (since we're
+ // doing left join).
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto1 =
+ expected_search_result_proto.mutable_results()->Add();
+ *result_proto1->mutable_document() = person1;
+ *result_proto1->mutable_joined_results()->Add()->mutable_document() =
+ message2;
+ SearchResultProto::ResultProto* result_google::protobuf =
+ expected_search_result_proto.mutable_results()->Add();
+ *result_google::protobuf->mutable_document() = person2;
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(message1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(message2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(message3).status(), ProtoIsOk());
+ // Delete child documents: message1 and message3
+ ASSERT_THAT(icing.Delete("namespace", "message1").status(), ProtoIsOk());
+ ASSERT_THAT(icing.Delete("namespace", "message3").status(), ProtoIsOk());
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+
+ // Validates that join search query works right after Optimize()
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+ } // Destroys IcingSearchEngine to make sure nothing is cached.
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest,
IcingShouldWorkFineIfOptimizationIsAborted) {
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ DocumentProto message1 =
+ DocumentBuilder()
+ .SetKey("namespace", "message1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
{
// Initializes a normal icing to create files needed
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(message1).status(), ProtoIsOk());
}
// Creates a mock filesystem in which DeleteDirectoryRecursively() always
@@ -733,25 +1153,33 @@ TEST_F(IcingSearchEngineOptimizeTest,
GetResultProto expected_get_result_proto;
expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = document1;
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
+ *expected_get_result_proto.mutable_document() = message1;
+ EXPECT_THAT(icing.Get("namespace", "message1",
+ GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+ DocumentProto message2 =
+ DocumentBuilder()
+ .SetKey("namespace", "message2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body two")
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
- EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message2).status(), ProtoIsOk());
SearchResultProto expected_search_result_proto;
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
+ message2;
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
+ message1;
// Verify term search
SearchSpecProto search_spec1;
- search_spec1.set_query("m");
+ search_spec1.set_query("body:m");
search_spec1.set_term_match_type(TermMatchType::PREFIX);
SearchResultProto search_result_proto1 =
@@ -772,10 +1200,68 @@ TEST_F(IcingSearchEngineOptimizeTest,
ResultSpecProto::default_instance());
EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
expected_search_result_proto));
+
+ // Verify join search: join a query for `name:person` with a child query for
+ // `body:message` based on the child's `senderQualifiedId` field.
+ SearchSpecProto search_spec3;
+ search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec3.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+ join_spec->set_max_joined_child_count(100);
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:message");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ SearchResultProto expected_join_search_result_proto;
+ expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto =
+ expected_join_search_result_proto.mutable_results()->Add();
+ *result_proto->mutable_document() = person;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() = message2;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() = message1;
+
+ SearchResultProto search_result_proto3 =
+ icing.Search(search_spec3, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto3, EqualsSearchResultIgnoreStatsAndScores(
+ expected_join_search_result_proto));
}
TEST_F(IcingSearchEngineOptimizeTest,
OptimizationShouldRecoverIfFileDirectoriesAreMissing) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
// Creates a mock filesystem in which SwapFiles() always fails and deletes the
// directories. This will fail IcingSearchEngine::OptimizeDocumentStore().
auto mock_filesystem = std::make_unique<MockFilesystem>();
@@ -793,9 +1279,8 @@ TEST_F(IcingSearchEngineOptimizeTest,
std::make_unique<FakeClock>(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
// Optimize() fails due to filesystem error
OptimizeResultProto result = icing.Optimize();
@@ -873,6 +1358,30 @@ TEST_F(IcingSearchEngineOptimizeTest,
TEST_F(IcingSearchEngineOptimizeTest,
OptimizationShouldRecoverIfDataFilesAreMissing) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
// Creates a mock filesystem in which SwapFiles() always fails and empties the
// directories. This will fail IcingSearchEngine::OptimizeDocumentStore().
auto mock_filesystem = std::make_unique<MockFilesystem>();
@@ -892,9 +1401,8 @@ TEST_F(IcingSearchEngineOptimizeTest,
std::make_unique<FakeClock>(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
// Optimize() fails due to filesystem error
OptimizeResultProto result = icing.Optimize();
@@ -969,23 +1477,61 @@ TEST_F(IcingSearchEngineOptimizeTest,
expected_search_result_proto));
}
-TEST_F(IcingSearchEngineOptimizeTest, OptimizeStatsProtoTest) {
+TEST_F(IcingSearchEngineOptimizeTest, OptimizeThresholdTest) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .AddInt64Property("indexableInteger", 1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body two")
+ .AddInt64Property("indexableInteger", 2)
+ .SetCreationTimestampMs(9000)
+ .SetTtlMs(500)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body three")
+ .AddInt64Property("indexableInteger", 3)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
auto fake_clock = std::make_unique<FakeClock>();
fake_clock->SetTimerElapsedMilliseconds(5);
fake_clock->SetSystemTimeMilliseconds(10000);
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ // Set the threshold to 0.9 to test that the threshold works.
+ options.set_optimize_rebuild_index_threshold(0.9);
auto icing = std::make_unique<TestIcingSearchEngine>(
- GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ options, std::make_unique<Filesystem>(),
std::make_unique<IcingFilesystem>(), std::move(fake_clock),
GetTestJniCache());
ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing->SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Create three documents.
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
- document2.set_creation_timestamp_ms(9000);
- document2.set_ttl_ms(500);
- DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ // Add three documents.
ASSERT_THAT(icing->Put(document1).status(), ProtoIsOk());
ASSERT_THAT(icing->Put(document2).status(), ProtoIsOk());
ASSERT_THAT(icing->Put(document3).status(), ProtoIsOk());
@@ -1022,7 +1568,7 @@ TEST_F(IcingSearchEngineOptimizeTest, OptimizeStatsProtoTest) {
fake_clock->SetTimerElapsedMilliseconds(5);
fake_clock->SetSystemTimeMilliseconds(20000);
icing = std::make_unique<TestIcingSearchEngine>(
- GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ options, std::make_unique<Filesystem>(),
std::make_unique<IcingFilesystem>(), std::move(fake_clock),
GetTestJniCache());
ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
@@ -1069,6 +1615,144 @@ TEST_F(IcingSearchEngineOptimizeTest, OptimizeStatsProtoTest) {
EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
}
+TEST_F(IcingSearchEngineOptimizeTest, OptimizeStatsProtoTest) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .AddInt64Property("indexableInteger", 1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body two")
+ .AddInt64Property("indexableInteger", 2)
+ .SetCreationTimestampMs(9000)
+ .SetTtlMs(500)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body three")
+ .AddInt64Property("indexableInteger", 3)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(5);
+ fake_clock->SetSystemTimeMilliseconds(10000);
+ // Use the default Icing options, so that a change to the default value will
+ // require updating this test.
+ auto icing = std::make_unique<TestIcingSearchEngine>(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::move(fake_clock),
+ GetTestJniCache());
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ // Add three documents.
+ ASSERT_THAT(icing->Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing->Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing->Put(document3).status(), ProtoIsOk());
+
+ // Delete the first document.
+ ASSERT_THAT(icing->Delete(document1.namespace_(), document1.uri()).status(),
+ ProtoIsOk());
+ ASSERT_THAT(icing->PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
+
+ OptimizeStatsProto expected;
+ expected.set_latency_ms(5);
+ expected.set_document_store_optimize_latency_ms(5);
+ expected.set_index_restoration_latency_ms(5);
+ expected.set_num_original_documents(3);
+ expected.set_num_deleted_documents(1);
+ expected.set_num_expired_documents(1);
+ expected.set_index_restoration_mode(OptimizeStatsProto::FULL_INDEX_REBUILD);
+
+ // Run Optimize
+ OptimizeResultProto result = icing->Optimize();
+ // Depending on how many blocks the documents end up spread across, it's
+ // possible that Optimize can remove documents without shrinking storage. The
+ // first Optimize call will also write the OptimizeStatusProto for the first
+ // time which will take up 1 block. So make sure that before_size is no less
+ // than after_size - 1 block.
+ uint32_t page_size = getpagesize();
+ EXPECT_THAT(result.optimize_stats().storage_size_before(),
+ Ge(result.optimize_stats().storage_size_after() - page_size));
+ result.mutable_optimize_stats()->clear_storage_size_before();
+ result.mutable_optimize_stats()->clear_storage_size_after();
+ EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
+
+ fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(5);
+ fake_clock->SetSystemTimeMilliseconds(20000);
+ // Use the default Icing options, so that a change to the default value will
+ // require updating this test.
+ icing = std::make_unique<TestIcingSearchEngine>(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::move(fake_clock),
+ GetTestJniCache());
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+
+ expected = OptimizeStatsProto();
+ expected.set_latency_ms(5);
+ expected.set_document_store_optimize_latency_ms(5);
+ expected.set_index_restoration_latency_ms(5);
+ expected.set_num_original_documents(1);
+ expected.set_num_deleted_documents(0);
+ expected.set_num_expired_documents(0);
+ expected.set_time_since_last_optimize_ms(10000);
+ expected.set_index_restoration_mode(OptimizeStatsProto::FULL_INDEX_REBUILD);
+
+ // Run Optimize
+ result = icing->Optimize();
+ EXPECT_THAT(result.optimize_stats().storage_size_before(),
+ Eq(result.optimize_stats().storage_size_after()));
+ result.mutable_optimize_stats()->clear_storage_size_before();
+ result.mutable_optimize_stats()->clear_storage_size_after();
+ EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
+
+ // Delete the last document.
+ ASSERT_THAT(icing->Delete(document3.namespace_(), document3.uri()).status(),
+ ProtoIsOk());
+
+ expected = OptimizeStatsProto();
+ expected.set_latency_ms(5);
+ expected.set_document_store_optimize_latency_ms(5);
+ expected.set_index_restoration_latency_ms(5);
+ expected.set_num_original_documents(1);
+ expected.set_num_deleted_documents(1);
+ expected.set_num_expired_documents(0);
+ expected.set_time_since_last_optimize_ms(0);
+ expected.set_index_restoration_mode(OptimizeStatsProto::FULL_INDEX_REBUILD);
+
+ // Run Optimize
+ result = icing->Optimize();
+ EXPECT_THAT(result.optimize_stats().storage_size_before(),
+ Ge(result.optimize_stats().storage_size_after()));
+ result.mutable_optimize_stats()->clear_storage_size_before();
+ result.mutable_optimize_stats()->clear_storage_size_after();
+ EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
+}
+
} // namespace
} // namespace lib
} // namespace icing
diff --git a/icing/icing-search-engine_schema_test.cc b/icing/icing-search-engine_schema_test.cc
index 38a0464..7081ba2 100644
--- a/icing/icing-search-engine_schema_test.cc
+++ b/icing/icing-search-engine_schema_test.cc
@@ -26,6 +26,7 @@
#include "icing/file/mock-filesystem.h"
#include "icing/icing-search-engine.h"
#include "icing/jni/jni-cache.h"
+#include "icing/join/join-processor.h"
#include "icing/portable/endian.h"
#include "icing/portable/equals-proto.h"
#include "icing/portable/platform.h"
@@ -153,6 +154,10 @@ ScoringSpecProto GetDefaultScoringSpec() {
return scoring_spec;
}
+// TODO(b/272145329): create SearchSpecBuilder, JoinSpecBuilder,
+// SearchResultProtoBuilder and ResultProtoBuilder for unit tests and build all
+// instances by them.
+
TEST_F(IcingSearchEngineSchemaTest,
CircularReferenceCreateSectionManagerReturnsInvalidArgument) {
// Create a type config with a circular reference.
@@ -1077,7 +1082,203 @@ TEST_F(IcingSearchEngineSchemaTest,
}
TEST_F(IcingSearchEngineSchemaTest,
- ForceSetSchemaPropertyDeletionTriggersIndexRestorationAndReturnsOk) {
+ SetSchemaNewJoinablePropertyTriggersIndexRestorationAndReturnsOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create "Message" schema with 3 properties:
+ // - "subject": string type, non-joinable. No joinable property id assigned.
+ // It is indexed and used for searching only.
+ // - "receiverQualifiedId": string type, non-joinable. No joinable property id
+ // assigned.
+ // - "senderQualifiedId": string type, Qualified Id type joinable. Joinable
+ // property id = 0.
+ SchemaProto schema_one =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("receiverQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_NONE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ SetSchemaResultProto set_schema_result = icing.SetSchema(schema_one);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_new_schema_types()->Add("Message");
+ expected_set_schema_result.mutable_new_schema_types()->Add("Person");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ DocumentProto person1 =
+ DocumentBuilder()
+ .SetKey("namespace", "person1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person one")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto person2 =
+ DocumentBuilder()
+ .SetKey("namespace", "person2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person two")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message1")
+ .SetSchema("Message")
+ .AddStringProperty("subject", "message")
+ .AddStringProperty("receiverQualifiedId", "namespace#person1")
+ .AddStringProperty("senderQualifiedId", "namespace#person2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ EXPECT_THAT(icing.Put(person1).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person2).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+
+ // Verify join search: join a query for `name:person` with a child query for
+ // `subject:message` based on the child's `receiverQualifiedId` field.
+ // Since "receiverQualifiedId" is not JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ // joining on that property should only return the "left-side" (`name:person`)
+ // of the join.
+ SearchSpecProto search_spec_join_by_receiver;
+ search_spec_join_by_receiver.set_query("name:person");
+ search_spec_join_by_receiver.set_term_match_type(TermMatchType::EXACT_ONLY);
+ JoinSpecProto* join_spec = search_spec_join_by_receiver.mutable_join_spec();
+ join_spec->set_max_joined_child_count(100);
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("receiverQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("subject:message");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ SearchResultProto expected_empty_child_search_result_proto;
+ expected_empty_child_search_result_proto.mutable_status()->set_code(
+ StatusProto::OK);
+ *expected_empty_child_search_result_proto.mutable_results()
+ ->Add()
+ ->mutable_document() = person2;
+ *expected_empty_child_search_result_proto.mutable_results()
+ ->Add()
+ ->mutable_document() = person1;
+ SearchResultProto actual_results =
+ icing.Search(search_spec_join_by_receiver, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_empty_child_search_result_proto));
+
+ // Verify join search: join a query for `name:person` with a child query for
+ // `subject:message` based on the child's `senderQualifiedId` field.
+ // Since "senderQualifiedId" is JOINABLE_VALUE_TYPE_QUALIFIED_ID, joining on
+ // that property should return both "left-side" (`name:person`) and
+ // "right-side" (`subject:message`) of the join.
+ SearchSpecProto search_spec_join_by_sender = search_spec_join_by_receiver;
+ join_spec = search_spec_join_by_sender.mutable_join_spec();
+ join_spec->set_child_property_expression("senderQualifiedId");
+
+ SearchResultProto expected_join_by_sender_search_result_proto;
+ expected_join_by_sender_search_result_proto.mutable_status()->set_code(
+ StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto =
+ expected_join_by_sender_search_result_proto.mutable_results()->Add();
+ *result_proto->mutable_document() = person2;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() = message;
+ *expected_join_by_sender_search_result_proto.mutable_results()
+ ->Add()
+ ->mutable_document() = person1;
+ actual_results =
+ icing.Search(search_spec_join_by_sender, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_join_by_sender_search_result_proto));
+
+ // Change "Message" schema to:
+ // - "subject": string type, non-joinable. No joinable property id assigned.
+ // - "receiverQualifiedId": string type, Qualified Id joinable. Joinable
+ // property id = 0.
+ // - "senderQualifiedId": string type, Qualified Id joinable. Joinable
+ // property id = 1.
+ SchemaProto schema_two = schema_one;
+ schema_two.mutable_types(1)
+ ->mutable_properties(1)
+ ->mutable_joinable_config()
+ ->set_value_type(JOINABLE_VALUE_TYPE_QUALIFIED_ID);
+ // Index restoration should be triggered here because new schema requires more
+ // joinable properties. Also new joinable property ids will be reassigned and
+ // index restoration should use new joinable property ids to rebuild.
+ set_schema_result = icing.SetSchema(schema_two);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result = SetSchemaResultProto();
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_join_incompatible_changed_schema_types()
+ ->Add("Message");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Verify join search: join a query for `name:person` with a child query for
+ // `subject:message` based on the child's `receiverQualifiedId` field.
+ // Since we've changed "receiverQualifiedId" to be
+ // JOINABLE_VALUE_TYPE_QUALIFIED_ID, joining on that property should return
+ // should return both "left-side" (`name:person`) and "right-side"
+ // (`subject:message`) of the join now.
+ SearchResultProto expected_join_by_receiver_search_result_proto;
+ expected_join_by_receiver_search_result_proto.mutable_status()->set_code(
+ StatusProto::OK);
+ result_proto =
+ expected_join_by_receiver_search_result_proto.mutable_results()->Add();
+ *result_proto->mutable_document() = person1;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() = message;
+ *expected_join_by_receiver_search_result_proto.mutable_results()
+ ->Add()
+ ->mutable_document() = person2;
+ actual_results =
+ icing.Search(search_spec_join_by_receiver, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(
+ expected_join_by_receiver_search_result_proto));
+
+ // Verify join search: join a query for `name:person` with a child query for
+ // `subject:message` based on the child's `senderQualifiedId` field. We should
+ // get the same set of result since `senderQualifiedId` is unchanged.
+ actual_results =
+ icing.Search(search_spec_join_by_sender, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_join_by_sender_search_result_proto));
+}
+
+TEST_F(
+ IcingSearchEngineSchemaTest,
+ ForceSetSchemaIndexedPropertyDeletionTriggersIndexRestorationAndReturnsOk) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -1221,9 +1422,161 @@ TEST_F(IcingSearchEngineSchemaTest,
expected_search_result_proto));
}
+TEST_F(IcingSearchEngineSchemaTest,
+ ForceSetSchemaJoinablePropertyDeletionTriggersIndexRestoration) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create "Email" schema with 2 joinable properties:
+ // - "receiverQualifiedId": qualified id joinable. Joinable property id = 0.
+ // - "senderQualifiedId": qualified id joinable. Joinable property id = 1.
+ SchemaProto email_with_receiver_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("receiverQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SetSchemaResultProto set_schema_result =
+ icing.SetSchema(email_with_receiver_schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_new_schema_types()->Add("Person");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ DocumentProto person = DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("name", "person")
+ .Build();
+ // Create an email document with only "senderQualifiedId" joinable property.
+ DocumentProto email =
+ DocumentBuilder()
+ .SetKey("namespace", "email")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("subject",
+ "Did you get the memo about TPS reports?")
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .Build();
+
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(email).status(), ProtoIsOk());
+
+ // Verify join search: join a query for `name:person` with a child query for
+ // `subject:tps` based on the child's `senderQualifiedId` field. We should be
+ // able to join person and email documents by this property.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto =
+ expected_search_result_proto.mutable_results()->Add();
+ *result_proto->mutable_document() = person;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() = email;
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("name:person");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_max_joined_child_count(100);
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("subject:tps");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Now update the schema to remove "receiverQualifiedId" fields. This is
+ // backwards incompatible, but document should be preserved because it doesn't
+ // contain "receiverQualifiedId" field. Also since it is join incompatible, we
+ // have to rebuild join index.
+ // - "senderQualifiedId": qualified id joinable. Joinable property id = 0.
+ //
+ // If the index is not correctly rebuilt, then the joinable data of
+ // "senderQualifiedId" in the joinable index will still have old joinable
+ // property id of 1 and therefore won't take effect for join search query.
+ SchemaProto email_without_receiver_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Although we've just deleted an existing property "receiverQualifiedId" from
+ // schema "Email", some email documents will still be preserved because they
+ // don't have "receiverQualifiedId" property.
+ set_schema_result =
+ icing.SetSchema(email_without_receiver_schema,
+ /*ignore_errors_and_delete_documents=*/true);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result = SetSchemaResultProto();
+ expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_join_incompatible_changed_schema_types()
+ ->Add("Email");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Verify join search: join a query for `name:person` with a child query for
+ // `subject:tps` based on the child's `senderQualifiedId` field. We should
+ // still be able to join person and email documents by this property.
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
TEST_F(
IcingSearchEngineSchemaTest,
- ForceSetSchemaPropertyDeletionAndAdditionTriggersIndexRestorationAndReturnsOk) {
+ ForceSetSchemaIndexedPropertyDeletionAndAdditionTriggersIndexRestorationAndReturnsOk) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -1368,6 +1721,161 @@ TEST_F(
expected_search_result_proto));
}
+TEST_F(
+ IcingSearchEngineSchemaTest,
+ ForceSetSchemaJoinablePropertyDeletionAndAdditionTriggersIndexRestorationAndReturnsOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create "Email" schema with 2 joinable properties:
+ // - "receiverQualifiedId": qualified id joinable. Joinable property id = 0.
+ // - "senderQualifiedId": qualified id joinable. Joinable property id = 1.
+ SchemaProto email_with_body_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("receiverQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SetSchemaResultProto set_schema_result =
+ icing.SetSchema(email_with_body_schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_new_schema_types()->Add("Person");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ DocumentProto person = DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("name", "person")
+ .Build();
+ // Create an email document with only subject and timestamp property.
+ DocumentProto email =
+ DocumentBuilder()
+ .SetKey("namespace", "email")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("subject",
+ "Did you get the memo about TPS reports?")
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .Build();
+
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(email).status(), ProtoIsOk());
+
+ // Verify join search: join a query for `name:person` with a child query for
+ // `subject:tps` based on the child's `senderQualifiedId` field. We should be
+ // able to join person and email documents by this property.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto =
+ expected_search_result_proto.mutable_results()->Add();
+ *result_proto->mutable_document() = person;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() = email;
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("name:person");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_max_joined_child_count(100);
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("subject:tps");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Now update the schema to remove the "receiverQualified" field and add
+ // "zQualifiedId". This is backwards incompatible, but document should
+ // be preserved because it doesn't contain a "receiverQualified" field and
+ // "zQualifiedId" is optional.
+ // - "senderQualifiedId": qualified id joinable. Joinable property id = 0.
+ // - "zQualifiedId": qualified id joinable. Joinable property id = 1.
+ //
+ // If the index is not correctly rebuilt, then the joinable data of
+ // "senderQualifiedId" in the joinable index will still have old joinable
+ // property id of 1 and therefore won't take effect for join search query.
+ SchemaProto email_no_body_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("zQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ set_schema_result = icing.SetSchema(
+ email_no_body_schema, /*ignore_errors_and_delete_documents=*/true);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result = SetSchemaResultProto();
+ expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_join_incompatible_changed_schema_types()
+ ->Add("Email");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Verify join search: join a query for `name:person` with a child query for
+ // `subject:tps` based on the child's `senderQualifiedId` field. We should
+ // still be able to join person and email documents by this property.
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
TEST_F(IcingSearchEngineSchemaTest,
ForceSetSchemaIncompatibleNestedDocsAreDeleted) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
@@ -1485,9 +1993,6 @@ TEST_F(IcingSearchEngineSchemaTest,
EXPECT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
}
-// TODO(b/256022027): add unit tests for join incompatible schema change to make
-// sure the joinable cache is rebuilt correctly.
-
TEST_F(IcingSearchEngineSchemaTest, SetSchemaRevalidatesDocumentsAndReturnsOk) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
diff --git a/icing/icing-search-engine_search_test.cc b/icing/icing-search-engine_search_test.cc
index 5648184..e953d71 100644
--- a/icing/icing-search-engine_search_test.cc
+++ b/icing/icing-search-engine_search_test.cc
@@ -4388,6 +4388,96 @@ TEST_P(IcingSearchEngineSearchTest, LatinSnippetTest) {
ASSERT_THAT(match, Eq("ḞÖÖ"));
}
+TEST_P(IcingSearchEngineSearchTest,
+ DocumentStoreNamespaceIdFingerprintCompatible) {
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+ DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+
+ // Initialize with some documents with document_store_namespace_id_fingerprint
+ // being false.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_document_store_namespace_id_fingerprint(false);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates and inserts 3 documents
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ }
+
+ // Reinitializate with document_store_namespace_id_fingerprint being true,
+ // and test that we are still able to read/query docs.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_document_store_namespace_id_fingerprint(true);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ ASSERT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
+ .status(),
+ ProtoIsOk());
+ ASSERT_THAT(
+ icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+ .status(),
+ ProtoIsOk());
+ ASSERT_THAT(
+ icing.Get("namespace", "uri3", GetResultSpecProto::default_instance())
+ .status(),
+ ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+ SearchResultProto results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results.results(), SizeIs(3));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document3));
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document2));
+ EXPECT_THAT(results.results(2).document(), EqualsProto(document1));
+ }
+
+ // Reinitializate with document_store_namespace_id_fingerprint being false,
+ // and test that we are still able to read/query docs.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_document_store_namespace_id_fingerprint(false);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ ASSERT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
+ .status(),
+ ProtoIsOk());
+ ASSERT_THAT(
+ icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+ .status(),
+ ProtoIsOk());
+ ASSERT_THAT(
+ icing.Get("namespace", "uri3", GetResultSpecProto::default_instance())
+ .status(),
+ ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+ SearchResultProto results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results.results(), SizeIs(3));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document3));
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document2));
+ EXPECT_THAT(results.results(2).document(), EqualsProto(document1));
+ }
+}
+
INSTANTIATE_TEST_SUITE_P(
IcingSearchEngineSearchTest, IcingSearchEngineSearchTest,
testing::Values(
diff --git a/icing/icing-search-engine_suggest_test.cc b/icing/icing-search-engine_suggest_test.cc
index 6973ad0..b3aeafc 100644
--- a/icing/icing-search-engine_suggest_test.cc
+++ b/icing/icing-search-engine_suggest_test.cc
@@ -12,8 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/icing-search-engine.h"
-
#include <cstdint>
#include <limits>
#include <memory>
@@ -25,6 +23,7 @@
#include "gtest/gtest.h"
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
+#include "icing/icing-search-engine.h"
#include "icing/jni/jni-cache.h"
#include "icing/portable/endian.h"
#include "icing/portable/equals-proto.h"
@@ -1508,6 +1507,95 @@ TEST_F(IcingSearchEngineSuggestTest,
UnorderedElementsAre(EqualsProto(suggestionBarCatSubjectFoo)));
}
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_InvalidPrefixTest) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "bar fo") // "bar fo"
+ .AddStringProperty("body", "fool")
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "bar cat foo") // "bar cat fool"
+ .AddStringProperty("body", "fool")
+ .Build();
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace1", "uri3")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool") // "fool"
+ .AddStringProperty("body", "fool")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ // Search for "f OR"
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f OR");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ if (SearchSpecProto::default_instance().search_type() ==
+ SearchSpecProto::SearchType::ICING_RAW_QUERY) {
+ EXPECT_THAT(response.status(), ProtoIsOk());
+ EXPECT_THAT(response.suggestions(), IsEmpty());
+ } else {
+ EXPECT_THAT(response.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+ EXPECT_THAT(response.suggestions(), IsEmpty());
+ }
+
+ // TODO(b/208654892): Update handling for hyphens to only consider it a hyphen
+ // within a TEXT token (rather than a MINUS token) when surrounded on both
+ // sides by TEXT rather than just preceded by TEXT.
+ // Search for "f-"
+ suggestion_spec.set_prefix("f-");
+ response = icing.SearchSuggestions(suggestion_spec);
+ EXPECT_THAT(response.status(), ProtoIsOk());
+ EXPECT_THAT(response.suggestions(), IsEmpty());
+
+ // Search for "f:"
+ suggestion_spec.set_prefix("f:");
+ response = icing.SearchSuggestions(suggestion_spec);
+ if (SearchSpecProto::default_instance().search_type() ==
+ SearchSpecProto::SearchType::ICING_RAW_QUERY) {
+ EXPECT_THAT(response.status(), ProtoIsOk());
+ EXPECT_THAT(response.suggestions(), IsEmpty());
+ } else {
+ EXPECT_THAT(response.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+ EXPECT_THAT(response.suggestions(), IsEmpty());
+ }
+
+ // Search for "OR OR - :"
+ suggestion_spec.set_prefix("OR OR - :");
+ response = icing.SearchSuggestions(suggestion_spec);
+ if (SearchSpecProto::default_instance().search_type() ==
+ SearchSpecProto::SearchType::ICING_RAW_QUERY) {
+ EXPECT_THAT(response.status(), ProtoIsOk());
+ EXPECT_THAT(response.suggestions(), IsEmpty());
+ } else {
+ EXPECT_THAT(response.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+ EXPECT_THAT(response.suggestions(), IsEmpty());
+ }
+}
+
} // namespace
} // namespace lib
} // namespace icing
diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc
index 3a9b4ee..47baabe 100644
--- a/icing/index/index-processor_test.cc
+++ b/icing/index/index-processor_test.cc
@@ -40,6 +40,8 @@
#include "icing/index/numeric/numeric-index.h"
#include "icing/index/string-section-indexing-handler.h"
#include "icing/index/term-property-id.h"
+#include "icing/join/qualified-id-joinable-property-indexing-handler.h"
+#include "icing/join/qualified-id-type-joinable-index.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/legacy/index/icing-mock-filesystem.h"
#include "icing/portable/platform.h"
@@ -51,6 +53,7 @@
#include "icing/schema/schema-util.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/icu-data-file-helper.h"
@@ -160,7 +163,9 @@ class IndexProcessorTest : public Test {
index_dir_ = base_dir_ + "/index";
integer_index_dir_ = base_dir_ + "/integer_index";
+ qualified_id_join_index_dir_ = base_dir_ + "/qualified_id_join_index";
schema_store_dir_ = base_dir_ + "/schema_store";
+ doc_store_dir_ = base_dir_ + "/doc_store";
Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -169,6 +174,10 @@ class IndexProcessorTest : public Test {
ICING_ASSERT_OK_AND_ASSIGN(
integer_index_, IntegerIndex::Create(filesystem_, integer_index_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(qualified_id_join_index_,
+ QualifiedIdTypeJoinableIndex::Create(
+ filesystem_, qualified_id_join_index_dir_));
+
language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
ICING_ASSERT_OK_AND_ASSIGN(
lang_segmenter_,
@@ -260,6 +269,13 @@ class IndexProcessorTest : public Test {
.Build();
ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ doc_store_ = std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<StringSectionIndexingHandler>
string_section_indexing_handler,
@@ -269,9 +285,16 @@ class IndexProcessorTest : public Test {
integer_section_indexing_handler,
IntegerSectionIndexingHandler::Create(
&fake_clock_, integer_index_.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler>
+ qualified_id_joinable_property_indexing_handler,
+ QualifiedIdJoinablePropertyIndexingHandler::Create(
+ &fake_clock_, qualified_id_join_index_.get()));
std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
handlers.push_back(std::move(string_section_indexing_handler));
handlers.push_back(std::move(integer_section_indexing_handler));
+ handlers.push_back(
+ std::move(qualified_id_joinable_property_indexing_handler));
index_processor_ =
std::make_unique<IndexProcessor>(std::move(handlers), &fake_clock_);
@@ -281,9 +304,11 @@ class IndexProcessorTest : public Test {
void TearDown() override {
index_processor_.reset();
+ doc_store_.reset();
schema_store_.reset();
normalizer_.reset();
lang_segmenter_.reset();
+ qualified_id_join_index_.reset();
integer_index_.reset();
index_.reset();
@@ -298,13 +323,17 @@ class IndexProcessorTest : public Test {
std::string base_dir_;
std::string index_dir_;
std::string integer_index_dir_;
+ std::string qualified_id_join_index_dir_;
std::string schema_store_dir_;
+ std::string doc_store_dir_;
std::unique_ptr<Index> index_;
std::unique_ptr<NumericIndex<int64_t>> integer_index_;
+ std::unique_ptr<QualifiedIdTypeJoinableIndex> qualified_id_join_index_;
std::unique_ptr<LanguageSegmenter> lang_segmenter_;
std::unique_ptr<Normalizer> normalizer_;
std::unique_ptr<SchemaStore> schema_store_;
+ std::unique_ptr<DocumentStore> doc_store_;
std::unique_ptr<IndexProcessor> index_processor_;
};
@@ -788,9 +817,16 @@ TEST_F(IndexProcessorTest, OutOfOrderDocumentIdsInRecoveryMode) {
integer_section_indexing_handler,
IntegerSectionIndexingHandler::Create(
&fake_clock_, integer_index_.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler>
+ qualified_id_joinable_property_indexing_handler,
+ QualifiedIdJoinablePropertyIndexingHandler::Create(
+ &fake_clock_, qualified_id_join_index_.get()));
std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
handlers.push_back(std::move(string_section_indexing_handler));
handlers.push_back(std::move(integer_section_indexing_handler));
+ handlers.push_back(
+ std::move(qualified_id_joinable_property_indexing_handler));
IndexProcessor index_processor(std::move(handlers), &fake_clock_,
/*recovery_mode=*/true);
@@ -1506,10 +1542,10 @@ TEST_F(IndexProcessorTest, IndexableIntegerProperty) {
EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
IsOk());
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocHitInfoIterator> itr,
- integer_index_->GetIterator(kIndexableIntegerProperty, /*key_lower=*/1,
- /*key_upper=*/5));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
+ integer_index_->GetIterator(
+ kIndexableIntegerProperty, /*key_lower=*/1,
+ /*key_upper=*/5, *doc_store_, *schema_store_));
EXPECT_THAT(
GetHits(std::move(itr)),
@@ -1535,10 +1571,10 @@ TEST_F(IndexProcessorTest, IndexableIntegerPropertyNoMatch) {
EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
IsOk());
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocHitInfoIterator> itr,
- integer_index_->GetIterator(kIndexableIntegerProperty, /*key_lower=*/-1,
- /*key_upper=*/0));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
+ integer_index_->GetIterator(
+ kIndexableIntegerProperty, /*key_lower=*/-1,
+ /*key_upper=*/0, *doc_store_, *schema_store_));
EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
}
diff --git a/icing/index/iterator/doc-hit-info-iterator-not.cc b/icing/index/iterator/doc-hit-info-iterator-not.cc
index 1818f08..38b1ded 100644
--- a/icing/index/iterator/doc-hit-info-iterator-not.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-not.cc
@@ -63,8 +63,8 @@ libtextclassifier3::Status DocHitInfoIteratorNot::Advance() {
libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
DocHitInfoIteratorNot::TrimRightMostNode() && {
// Don't generate suggestion if the last operator is NOT.
- return absl_ports::UnimplementedError(
- "Cannot trim right most node in NOT operator.");
+ return absl_ports::InvalidArgumentError(
+ "Cannot generate suggestion if the last term is NOT operator.");
}
int32_t DocHitInfoIteratorNot::GetNumBlocksInspected() const {
diff --git a/icing/index/iterator/doc-hit-info-iterator-not_test.cc b/icing/index/iterator/doc-hit-info-iterator-not_test.cc
index 54d6c36..5a8ce2c 100644
--- a/icing/index/iterator/doc-hit-info-iterator-not_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-not_test.cc
@@ -163,7 +163,7 @@ TEST(DocHitInfoIteratorNotTest, TrimNotIterator) {
DocHitInfoIteratorNot not_iterator(std::move(to_be_excluded_iterator),
/*document_id_limit=*/5);
EXPECT_THAT(std::move(not_iterator).TrimRightMostNode(),
- StatusIs(libtextclassifier3::StatusCode::UNIMPLEMENTED));
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
} // namespace
diff --git a/icing/index/numeric/doc-hit-info-iterator-numeric.h b/icing/index/numeric/doc-hit-info-iterator-numeric.h
index bf990d1..fc66a1d 100644
--- a/icing/index/numeric/doc-hit-info-iterator-numeric.h
+++ b/icing/index/numeric/doc-hit-info-iterator-numeric.h
@@ -49,8 +49,8 @@ class DocHitInfoIteratorNumeric : public DocHitInfoIterator {
}
libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override {
- return absl_ports::UnimplementedError(
- "Cannot trim right most node in numeric operator.");
+ return absl_ports::InvalidArgumentError(
+ "Cannot generate suggestion if the last term is numeric operator.");
}
int32_t GetNumBlocksInspected() const override { return 0; }
diff --git a/icing/index/numeric/dummy-numeric-index.h b/icing/index/numeric/dummy-numeric-index.h
index 164866c..7cfb102 100644
--- a/icing/index/numeric/dummy-numeric-index.h
+++ b/icing/index/numeric/dummy-numeric-index.h
@@ -70,7 +70,8 @@ class DummyNumericIndex : public NumericIndex<T> {
}
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> GetIterator(
- std::string_view property_path, T key_lower, T key_upper) const override;
+ std::string_view property_path, T key_lower, T key_upper,
+ const DocumentStore&, const SchemaStore&) const override;
libtextclassifier3::Status Optimize(
const std::vector<DocumentId>& document_id_old_to_new,
@@ -93,6 +94,8 @@ class DummyNumericIndex : public NumericIndex<T> {
}
}
+ int num_property_indices() const override { return storage_.size(); }
+
private:
class Editor : public NumericIndex<T>::Editor {
public:
@@ -176,7 +179,6 @@ class DummyNumericIndex : public NumericIndex<T> {
DocHitInfo doc_hit_info_;
};
- private:
explicit DummyNumericIndex(const Filesystem& filesystem,
std::string&& working_path)
: NumericIndex<T>(filesystem, std::move(working_path),
@@ -265,7 +267,8 @@ libtextclassifier3::Status DummyNumericIndex<T>::Iterator::Advance() {
template <typename T>
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
DummyNumericIndex<T>::GetIterator(std::string_view property_path, T key_lower,
- T key_upper) const {
+ T key_upper, const DocumentStore&,
+ const SchemaStore&) const {
if (key_lower > key_upper) {
return absl_ports::InvalidArgumentError(
"key_lower should not be greater than key_upper");
diff --git a/icing/index/numeric/integer-index-bucket-util.cc b/icing/index/numeric/integer-index-bucket-util.cc
new file mode 100644
index 0000000..a05baab
--- /dev/null
+++ b/icing/index/numeric/integer-index-bucket-util.cc
@@ -0,0 +1,205 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/numeric/integer-index-bucket-util.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <utility>
+#include <vector>
+
+#include "icing/index/numeric/integer-index-data.h"
+
+namespace icing {
+namespace lib {
+
+namespace integer_index_bucket_util {
+
+namespace {
+
+// Helper function to determine if data slice [start, end) forms a "full
+// single-range bucket".
+//
+// Full single-range bucket: keys of all data are identical and # of them exceed
+// num_data_threshold.
+//
+// REQUIRES: data slice [start, end) are sorted by key.
+inline bool WouldBeFullSingleRangeBucket(
+ const std::vector<IntegerIndexData>::iterator& start,
+ const std::vector<IntegerIndexData>::iterator& end,
+ int32_t num_data_threshold) {
+ return std::distance(start, end) > num_data_threshold &&
+ start->key() == (end - 1)->key();
+}
+
+// Helper function to determine if a bucket is full single-range.
+//
+// REQUIRES:
+// bucket.key_lower <= [bucket.start, bucket.end)->key() <= bucket.key_upper
+inline bool IsFullSingleRangeBucket(const DataRangeAndBucketInfo& bucket,
+ int32_t num_data_threshold) {
+ return bucket.key_lower == bucket.key_upper &&
+ WouldBeFullSingleRangeBucket(bucket.start, bucket.end,
+ num_data_threshold);
+}
+
+// Helper function to append new bucket(s) with corresponding data slice for
+// range [curr_key_lower, last_key] where last_key = (it_end - 1)->key().
+//
+// Also it handles an edge case:
+// If data slice [it_start, it_end) forms a "full single-range bucket" (see
+// WouldBeFullSingleRangeBucket for definition), then we have to put them into a
+// single range bucket [last_key, last_key] instead of [curr_key_lower,
+// last_key]. Also we have to deal with range [curr_key_lower, last_key - 1]:
+// - If the previous bucket exists and it is not a "full single-range bucket",
+// then merge [curr_key_lower, last_key - 1] into the previous bucket, i.e.
+// change the previous bucket's key_upper to (last_key - 1). Then we will end
+// up having:
+// - [prev_bucket.key_lower, last_key - 1]
+// - [last_key, last_key]
+// - Otherwise, we have to create [curr_key_lower, last_key - 1] with
+// empty data. Then we will end up having (Note: prev_bucket.key_upper ==
+// curr_key_lower - 1):
+// - [prev_bucket.key_lower, curr_key_lower - 1]
+// - [curr_key_lower, last_key - 1]
+// - [last_key, last_key]
+// This will avoid split bucket being called too frequently.
+// For example, original_key_lower = 0, original_key_upper = 50. If we have
+// (num_data_threshold + 1) data with key = 20 and another data with key = 40:
+// - Without this part, we will split them into [[0, 20], [21, 50]]. Then when
+// adding data with key = 10 next round, we will invoke split again and split
+// [0, 20] to [[0, 10], [11, 20]].
+// - With this part, we will split them into [[0, 19], [20, 20], [21, 50]],
+// which will avoid splitting in the next round for key = 20.
+//
+// REQUIRES: it_start < it_end
+void AppendNewBuckets(const std::vector<IntegerIndexData>::iterator& it_start,
+ const std::vector<IntegerIndexData>::iterator& it_end,
+ int64_t curr_key_lower, int32_t num_data_threshold,
+ std::vector<DataRangeAndBucketInfo>& results) {
+ int64_t last_key = (it_end - 1)->key();
+ if (curr_key_lower < last_key &&
+ WouldBeFullSingleRangeBucket(it_start, it_end, num_data_threshold)) {
+ if (!results.empty() &&
+ !IsFullSingleRangeBucket(results.back(), num_data_threshold)) {
+ // Previous bucket is not full single-range, so merge it to now hold the
+ // range [prev_bucket.key_lower, last_key - 1].
+ results.back().key_upper = last_key - 1;
+ } else {
+ // There is either no previous bucket or the previous bucket is full
+ // single-range. So add an empty bucket for the range [curr_key_lower,
+ // last_key - 1].
+ results.push_back(DataRangeAndBucketInfo(it_start, it_start,
+ curr_key_lower, last_key - 1));
+ }
+ curr_key_lower = last_key;
+ }
+ results.push_back(
+ DataRangeAndBucketInfo(it_start, it_end, curr_key_lower, last_key));
+}
+
+} // namespace
+
+std::vector<DataRangeAndBucketInfo> Split(std::vector<IntegerIndexData>& data,
+ int64_t original_key_lower,
+ int64_t original_key_upper,
+ int32_t num_data_threshold) {
+ // Early return if there is no need to split.
+ if (data.size() <= num_data_threshold) {
+ return {DataRangeAndBucketInfo(data.begin(), data.end(), original_key_lower,
+ original_key_upper)};
+ }
+
+ // Sort data by key.
+ std::sort(
+ data.begin(), data.end(),
+ [](const IntegerIndexData& lhs, const IntegerIndexData& rhs) -> bool {
+ return lhs.key() < rhs.key();
+ });
+
+ std::vector<DataRangeAndBucketInfo> results;
+ int64_t curr_key_lower = original_key_lower;
+ // Sliding window [it_start, it_end) to separate data into different buckets.
+ auto it_start = data.begin();
+ auto it_end = data.begin();
+ while (it_end != data.end()) {
+ // Attempt to extend it_end by 1, but we have to include all data with the
+ // same key since they cannot be separated into different buckets. Also use
+ // extend_it_end to avoid modifying it_end directly. For some edge cases,
+ // the extension in a single round is extremely large (i.e. a lot of data
+ // have the same key), and we want to separate them. For example:
+ // - key = 0: 5 data
+ // - key = 1: num_data_threshold - 1 data
+ // In the second round, # of data in the sliding window will exceed the
+ // threshold. We want to separate all data with key = 0 into a single bucket
+ // instead of putting key = 0 and key = 1 together. Therefore, using
+ // extend_it_end allow us to preserve it_end of the previous round and be
+ // able to deal with this case.
+ auto extend_it_end = it_end + 1;
+ while (extend_it_end != data.end() &&
+ it_end->key() == extend_it_end->key()) {
+ ++extend_it_end;
+ }
+
+ if (std::distance(it_start, extend_it_end) > num_data_threshold &&
+ it_start != it_end) {
+ // Split data between [it_start, it_end) into range [curr_key_lower,
+ // (it_end - 1)->key()].
+ AppendNewBuckets(it_start, it_end, curr_key_lower, num_data_threshold,
+ results);
+
+ // it_end at this moment won't be data.end(), so the last element of the
+ // new bucket can't have key == INT64_MAX. Therefore, it is safe to set
+ // curr_key_lower as ((it_end - 1)->key() + 1).
+ curr_key_lower = (it_end - 1)->key() + 1;
+ it_start = it_end;
+ }
+ it_end = extend_it_end;
+ }
+
+ // Handle the final range [curr_key_lower, original_key_upper].
+ if (curr_key_lower <= original_key_upper) {
+ if (it_start != it_end) {
+ AppendNewBuckets(it_start, it_end, curr_key_lower, num_data_threshold,
+ results);
+
+ // AppendNewBuckets only handles range [curr_key_lower, (it_end -
+ // 1)->key()], so we have to handle range [(it_end - 1)->key() + 1,
+ // original_key_upper] if needed.
+ int64_t last_key = (it_end - 1)->key();
+ if (last_key != std::numeric_limits<int64_t>::max() &&
+ last_key + 1 <= original_key_upper) {
+ if (!results.empty() &&
+ !IsFullSingleRangeBucket(results.back(), num_data_threshold)) {
+ results.back().key_upper = original_key_upper;
+ } else {
+ results.push_back(DataRangeAndBucketInfo(
+ it_start, it_start, last_key + 1, original_key_upper));
+ }
+ }
+ } else {
+ results.push_back(DataRangeAndBucketInfo(it_start, it_end, curr_key_lower,
+ original_key_upper));
+ }
+ }
+
+ return results;
+}
+
+} // namespace integer_index_bucket_util
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/numeric/integer-index-bucket-util.h b/icing/index/numeric/integer-index-bucket-util.h
new file mode 100644
index 0000000..863bd01
--- /dev/null
+++ b/icing/index/numeric/integer-index-bucket-util.h
@@ -0,0 +1,81 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_NUMERIC_INTEGER_INDEX_BUCKET_UTIL_H_
+#define ICING_INDEX_NUMERIC_INTEGER_INDEX_BUCKET_UTIL_H_
+
+#include <cstdint>
+#include <utility>
+#include <vector>
+
+#include "icing/index/numeric/integer-index-data.h"
+
+namespace icing {
+namespace lib {
+
+namespace integer_index_bucket_util {
+
+// A wrapper struct that contains information of a bucket.
+// - The bucket contains data within the iterator [start, end).
+// - Bucket range is [key_lower, key_upper], and all data within [start, end)
+// should have keys in the bucket range.
+//
+// Note: the caller should make sure the lifecycle of data vector is longer than
+// instances of this wrapper struct.
+struct DataRangeAndBucketInfo {
+ std::vector<IntegerIndexData>::iterator start;
+ std::vector<IntegerIndexData>::iterator end;
+ int64_t key_lower;
+ int64_t key_upper;
+
+ explicit DataRangeAndBucketInfo(
+ std::vector<IntegerIndexData>::iterator start_in,
+ std::vector<IntegerIndexData>::iterator end_in, int64_t key_lower_in,
+ int64_t key_upper_in)
+ : start(std::move(start_in)),
+ end(std::move(end_in)),
+ key_lower(key_lower_in),
+ key_upper(key_upper_in) {}
+};
+
+// Helper function to split data (that are originally in a bucket with range
+// [original_key_lower, original_key_upper]) into different buckets according to
+// num_data_threshold.
+// - The input vector `data` will be sorted by key in ascending order (unless
+// there's no need to split in which case data is returned unmodified)
+// - Data with the same key will be in the same bucket even if # of them exceed
+// num_data_threshold.
+// - Range of all buckets will be disjoint, and the range union will be
+// [original_key_lower, original_key_upper].
+// - Data slice (i.e. [start, end)) can be empty.
+//
+// REQUIRES:
+// - original_key_lower <= original_key_upper
+// - num_data_threshold > 0
+// - Keys of all data are in range [original_key_lower, original_key_upper]
+//
+// Returns: a vector of DataRangeAndBucketInfo that contain all bucket info
+// after splitting. Also the returned vector should contain at least one
+// bucket, otherwise it is considered an error.
+std::vector<DataRangeAndBucketInfo> Split(std::vector<IntegerIndexData>& data,
+ int64_t original_key_lower,
+ int64_t original_key_upper,
+ int32_t num_data_threshold);
+
+} // namespace integer_index_bucket_util
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_INDEX_NUMERIC_INTEGER_INDEX_BUCKET_UTIL_H_
diff --git a/icing/index/numeric/integer-index-bucket-util_test.cc b/icing/index/numeric/integer-index-bucket-util_test.cc
new file mode 100644
index 0000000..82c593e
--- /dev/null
+++ b/icing/index/numeric/integer-index-bucket-util_test.cc
@@ -0,0 +1,1112 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/numeric/integer-index-bucket-util.h"
+
+#include <limits>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/index/numeric/integer-index-data.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+namespace integer_index_bucket_util {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::Ne;
+using ::testing::SizeIs;
+
+static constexpr DocumentId kDefaultDocumentId = 123;
+static constexpr SectionId kDefaultSectionId = 31;
+
+TEST(IntegerIndexBucketUtilTest, Split_numDataNotDivisibleByThreshold) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)};
+ int64_t key_lower = -10;
+ int64_t key_upper = 10;
+ int32_t num_data_threshold = 3;
+ ASSERT_THAT(data.size() % num_data_threshold, Ne(0));
+
+ // Keys = [-10, -3, -2, 0, 1, 2, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, key_lower, key_upper, num_data_threshold);
+ ASSERT_THAT(results, SizeIs(3));
+ // Bucket 0: key lower = -10, key upper = -2, keys = [-10, -3, -2].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(-2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)));
+ // Bucket 1: key lower = -1, key upper = 2, keys = [0, 1, 2].
+ EXPECT_THAT(results[1].key_lower, Eq(-1));
+ EXPECT_THAT(results[1].key_upper, Eq(2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+ // Bucket 2: key lower = 3, key upper = 10, keys = [10].
+ EXPECT_THAT(results[2].key_lower, Eq(3));
+ EXPECT_THAT(results[2].key_upper, Eq(10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest, Split_numDataDivisibleByThreshold) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)};
+ int64_t key_lower = -10;
+ int64_t key_upper = 10;
+ int32_t num_data_threshold = 3;
+ ASSERT_THAT(data.size() % num_data_threshold, Eq(0));
+
+ // Keys = [-10, -3, -2, 0, 2, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, key_lower, key_upper, num_data_threshold);
+ ASSERT_THAT(results, SizeIs(2));
+ // Bucket 0: key lower = -10, key upper = -2, keys = [-10, -3, -2].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(-2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)));
+ // Bucket 1: key lower = -1, key upper = 2, keys = [0, 2, 10].
+ EXPECT_THAT(results[1].key_lower, Eq(-1));
+ EXPECT_THAT(results[1].key_upper, Eq(10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest, Split_shouldIncludeOriginalKeyRange) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)};
+ int64_t key_lower = -1000;
+ int64_t key_upper = 1000;
+ int32_t num_data_threshold = 3;
+
+ // Keys = [-10, -3, -2, 0, 1, 2, 10].
+ // Split should include the original key_lower and key_upper even if there is
+ // no key at boundary.
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, key_lower, key_upper, num_data_threshold);
+ ASSERT_THAT(results, SizeIs(3));
+ // Bucket 0: key lower = -1000, key upper = -2, keys = [-10, -3, -2].
+ EXPECT_THAT(results[0].key_lower, Eq(-1000));
+ EXPECT_THAT(results[0].key_upper, Eq(-2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)));
+ // Bucket 1: key lower = -1, key upper = 2, keys = [0, 1, 2].
+ EXPECT_THAT(results[1].key_lower, Eq(-1));
+ EXPECT_THAT(results[1].key_upper, Eq(2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+ // Bucket 2: key lower = 3, key upper = 1000, keys = [10].
+ EXPECT_THAT(results[2].key_lower, Eq(3));
+ EXPECT_THAT(results[2].key_upper, Eq(1000));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest, Split_singleBucketWithoutSplitting) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)};
+ int64_t key_lower = -1000;
+ int64_t key_upper = 1000;
+ int32_t num_data_threshold = 100;
+
+ // Keys = [-10, -3, -2, 0, 1, 2, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, key_lower, key_upper, num_data_threshold);
+ ASSERT_THAT(results, SizeIs(1));
+ // Bucket 0: key lower = -1000, key upper = 1000, keys = [-10, -3, -2, 0, 1,
+ // 2, 10]. Since # of data <= threshold, data vector won't be sorted and thus
+ // [start, end) will have data with the original order.
+ EXPECT_THAT(results[0].key_lower, Eq(-1000));
+ EXPECT_THAT(results[0].key_upper, Eq(1000));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)));
+}
+
+TEST(IntegerIndexBucketUtilTest, Split_emptyData) {
+ std::vector<IntegerIndexData> empty_data;
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(empty_data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ ASSERT_THAT(results, SizeIs(1));
+ // Bucket 0: key lower = -10, key upper = 10, keys = [].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(10));
+ EXPECT_THAT(std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ IsEmpty());
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_sameKeysExceedingThreshold_firstBucket_keyEqualsKeyLower) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+ // Keys = [-10, -10, -10, -10, -10, 0, 3, 5, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // - Even though # of data with key = -10 exceeds the threshold, they should
+ // still be in the same bucket.
+ // - They should be separated from key = 0, 3, ....
+ ASSERT_THAT(results, SizeIs(3));
+ // Bucket 0: key lower = -10, key upper = -10, keys = [-10, -10, -10, -10,
+ // -10].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(-10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10)));
+ // Bucket 1: key lower = -9, key upper = 5, keys = [0, 3, 5].
+ EXPECT_THAT(results[1].key_lower, Eq(-9));
+ EXPECT_THAT(results[1].key_upper, Eq(5));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5)));
+ // Bucket 2: key lower = 6, key upper = 10, keys = [10].
+ EXPECT_THAT(results[2].key_lower, Eq(6));
+ EXPECT_THAT(results[2].key_upper, Eq(10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_sameKeysExceedingThreshold_firstBucket_keyGreaterThanKeyLower) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+ // Keys = [-7, -7, -7, -7, -7, 0, 3, 5, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // - Even though # of data with key = -7 exceeds the threshold, they should
+ // still be in the same bucket.
+ // - They should be separated from key = 0, 3, ....
+ // - They should be in a single range bucket [-7, -7], and another bucket
+ // [-10, -8] with empty data should be created before it.
+ ASSERT_THAT(results, SizeIs(4));
+ // Bucket 0: key lower = -10, key upper = -8, keys = [].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(-8));
+ EXPECT_THAT(std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ IsEmpty());
+ // Bucket 1: key lower = -7, key upper = -7, keys = [-7, -7, -7, -7, -7].
+ EXPECT_THAT(results[1].key_lower, Eq(-7));
+ EXPECT_THAT(results[1].key_upper, Eq(-7));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7)));
+ // Bucket 2: key lower = -6, key upper = 5, keys = [0, 3, 5].
+ EXPECT_THAT(results[2].key_lower, Eq(-6));
+ EXPECT_THAT(results[2].key_upper, Eq(5));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5)));
+ // Bucket 3: key lower = 6, key upper = 10, keys = [10].
+ EXPECT_THAT(results[3].key_lower, Eq(6));
+ EXPECT_THAT(results[3].key_upper, Eq(10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[3].start, results[3].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_sameKeysExceedingThreshold_midBucket_keyEqualsKeyLower) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+ // Keys = [-10, -5, -4, -4, -4, -4, -4, 5, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // - Even though # of data with key = -4 exceeds the threshold, they should
+ // still be in the same bucket.
+ // - They should be separated from key = -10, -5, 5, 10.
+ ASSERT_THAT(results, SizeIs(3));
+ // Bucket 0: key lower = -10, key upper = -5, keys = [-10, -5].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(-5));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -5)));
+ // Bucket 1: key lower = -4, key upper = -4, keys = [-4, -4, -4, -4, -4].
+ EXPECT_THAT(results[1].key_lower, Eq(-4));
+ EXPECT_THAT(results[1].key_upper, Eq(-4));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4)));
+ // Bucket 2: key lower = -3, key upper = 10, keys = [5, 10].
+ EXPECT_THAT(results[2].key_lower, Eq(-3));
+ EXPECT_THAT(results[2].key_upper, Eq(10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_sameKeysExceedingThreshold_midBucket_keyGreaterThanKeyLower) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+ // Keys = [-10, -5, -1, -1, -1, -1, -1, 5, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // - Even though # of data with key = -1 exceeds the threshold, they should
+ // still be in the same bucket.
+ // - They should be separated from key = -10, -5, 5, 10.
+ // - They should be in a single range bucket [-1, -1], and range [-4, -2]
+ // should be merged into the previous bucket.
+ ASSERT_THAT(results, SizeIs(3));
+ // Bucket 0: key lower = -10, key upper = -2, keys = [-10, -5].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(-2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -5)));
+ // Bucket 1: key lower = -1, key upper = -1, keys = [-1, -1, -1, -1, -1].
+ EXPECT_THAT(results[1].key_lower, Eq(-1));
+ EXPECT_THAT(results[1].key_upper, Eq(-1));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1)));
+ // Bucket 2: key lower = 0, key upper = 10, keys = [5, 10].
+ EXPECT_THAT(results[2].key_lower, Eq(0));
+ EXPECT_THAT(results[2].key_upper, Eq(10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_sameKeysExceedingThreshold_lastBucket_keyEqualsKeyLower) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3)};
+
+ // Keys = [-10, -3, 0, 2, 3, 3, 3, 3, 3].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // - Even though # of data with key = 3 exceeds the threshold, they should
+ // still be in the same bucket.
+ // - They should be separated from key = -10, -3, 0, 2.
+ // - They should be in a single range bucket [3, 3], and another bucket
+ // [4, 10] with empty data should be created after it.
+ ASSERT_THAT(results, SizeIs(4));
+ // Bucket 0: key lower = -10, key upper = 0, keys = [-10, -3, 0].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(0));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0)));
+ // Bucket 1: key lower = 1, key upper = 2, keys = [2].
+ EXPECT_THAT(results[1].key_lower, Eq(1));
+ EXPECT_THAT(results[1].key_upper, Eq(2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+ // Bucket 2: key lower = 3, key upper = 10, keys = [3, 3, 3, 3, 3].
+ EXPECT_THAT(results[2].key_lower, Eq(3));
+ EXPECT_THAT(results[2].key_upper, Eq(3));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3)));
+ // Bucket 3: key lower = 4, key upper = 10, keys = [].
+ EXPECT_THAT(results[3].key_lower, Eq(4));
+ EXPECT_THAT(results[3].key_upper, Eq(10));
+ EXPECT_THAT(std::vector<IntegerIndexData>(results[3].start, results[3].end),
+ IsEmpty());
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_sameKeysExceedingThreshold_lastBucket_keyWithinKeyLowerAndUpper) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6)};
+
+ // Keys = [-10, -3, 0, 2, 6, 6, 6, 6, 6].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // - Even though # of data with key = 6 exceeds the threshold, they should
+ // still be in the same bucket.
+ // - They should be separated from key = -10, -3, 0, 2.
+ // - They should be in a single range bucket [6, 6]. Range [3, 5] should be
+ // merged into the previous bucket. and another bucket [7, 10] with empty
+ // data should be created after it.
+ ASSERT_THAT(results, SizeIs(4));
+ // Bucket 0: key lower = -10, key upper = 0, keys = [-10, -3, 0].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(0));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0)));
+ // Bucket 1: key lower = 1, key upper = 5, keys = [2].
+ EXPECT_THAT(results[1].key_lower, Eq(1));
+ EXPECT_THAT(results[1].key_upper, Eq(5));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+ // Bucket 2: key lower = 6, key upper = 6, keys = [6, 6, 6, 6, 6].
+ EXPECT_THAT(results[2].key_lower, Eq(6));
+ EXPECT_THAT(results[2].key_upper, Eq(6));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6)));
+ // Bucket 3: key lower = 7, key upper = 10, keys = [].
+ EXPECT_THAT(results[3].key_lower, Eq(7));
+ EXPECT_THAT(results[3].key_upper, Eq(10));
+ EXPECT_THAT(std::vector<IntegerIndexData>(results[3].start, results[3].end),
+ IsEmpty());
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_sameKeysExceedingThreshold_lastBucket_keyEqualsKeyUpper) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+ // Keys = [-10, -3, 0, 2, 10, 10, 10, 10, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // - Even though # of data with key = 10 exceeds the threshold, they should
+ // still be in the same bucket.
+ // - They should be separated from key = -10, -3, 0, 2.
+ // - They should be in a single range bucket [10, 10], and range [3, 9] should
+ // be merged into the previous bucket.
+ ASSERT_THAT(results, SizeIs(3));
+ // Bucket 0: key lower = -10, key upper = 0, keys = [-10, -3, 0].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(0));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0)));
+ // Bucket 1: key lower = 1, key upper = 9, keys = [2].
+ EXPECT_THAT(results[1].key_lower, Eq(1));
+ EXPECT_THAT(results[1].key_upper, Eq(9));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+ // Bucket 2: key lower = 10, key upper = 10, keys = [10, 10, 10, 10, 10].
+ EXPECT_THAT(results[2].key_lower, Eq(10));
+ EXPECT_THAT(results[2].key_upper, Eq(10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_sameKeysExceedingThreshold_shouldNotMergeIntoPreviousBucket) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+ // Keys = [-10, -2, -2, -2, -2, -2, 5, 5, 5, 5, 5, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // - Data with key = -2 and 5 should be put into a single bucket respectively.
+ // - When dealing with key = 5, range [-1, 4] should not be merged into the
+ // previous bucket [-2, -2] because [-2, -2] also contains single key data
+ // exceeding the threshold. Instead, we should create bucket [-1, 4] with
+ // empty data.
+ ASSERT_THAT(results, SizeIs(5));
+ // Bucket 0: key lower = -10, key upper = -3, keys = [-10].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(-3));
+ EXPECT_THAT(std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId,
+ kDefaultDocumentId, -10)));
+ // Bucket 1: key lower = -2, key upper = -2, keys = [-2, -2, -2, -2, -2].
+ EXPECT_THAT(results[1].key_lower, Eq(-2));
+ EXPECT_THAT(results[1].key_upper, Eq(-2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)));
+ // Bucket 2: key lower = -1, key upper = 4, keys = [].
+ EXPECT_THAT(results[2].key_lower, Eq(-1));
+ EXPECT_THAT(results[2].key_upper, Eq(4));
+ EXPECT_THAT(std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ IsEmpty());
+ // Bucket 3: key lower = 5, key upper = 5, keys = [5, 5, 5, 5, 5].
+ EXPECT_THAT(results[3].key_lower, Eq(5));
+ EXPECT_THAT(results[3].key_upper, Eq(5));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[3].start, results[3].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5)));
+ // Bucket 4: key lower = 6, key upper = 10, keys = [10].
+ EXPECT_THAT(results[4].key_lower, Eq(6));
+ EXPECT_THAT(results[4].key_upper, Eq(10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[4].start, results[4].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_sameKeysExceedingThreshold_shouldMergeIntoPreviousBucket) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -8),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+ // Keys = [-10, -8, -3, -2, -2, -2, 5, 5, 5, 5, 5, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // - Data with key = 5 should be put into a single bucket.
+ // - When dealing with key = 5, range [-1, 4] should be merged into the
+ // previous bucket [-2, -2] because # of data in [-2, -2] doesn't exceed the
+ // threshold.
+ ASSERT_THAT(results, SizeIs(4));
+ // Bucket 0: key lower = -10, key upper = -3, keys = [-10, -8, -3].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(-3));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -8),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3)));
+ // Bucket 1: key lower = -2, key upper = 4, keys = [-2, -2, -2].
+ EXPECT_THAT(results[1].key_lower, Eq(-2));
+ EXPECT_THAT(results[1].key_upper, Eq(4));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)));
+ // Bucket 2: key lower = 5, key upper = 5, keys = [5, 5, 5, 5, 5].
+ EXPECT_THAT(results[2].key_lower, Eq(5));
+ EXPECT_THAT(results[2].key_upper, Eq(5));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5)));
+ // Bucket 3: key lower = 6, key upper = 10, keys = [10].
+ EXPECT_THAT(results[3].key_lower, Eq(6));
+ EXPECT_THAT(results[3].key_upper, Eq(10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[3].start, results[3].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_sameKeysExceedingThreshold_singleBucket_keyEqualsKeyLower) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10)};
+
+ // Keys = [-10, -10, -10, -10, -10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // - Even though # of data with key = -10 exceeds the threshold, they should
+ // still be in the same bucket.
+ // - They should be in a single range bucket [-10, -10], and another bucket
+ // [-9, 10] with empty data should be created after it.
+ ASSERT_THAT(results, SizeIs(2));
+ // Bucket 0: key lower = -10, key upper = -10, keys = [-10, -10, -10, -10,
+ // -10].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(-10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10)));
+ // Bucket 1: key lower = -9, key upper = 10, keys = [].
+ EXPECT_THAT(results[1].key_lower, Eq(-9));
+ EXPECT_THAT(results[1].key_upper, Eq(10));
+ EXPECT_THAT(std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ IsEmpty());
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_sameKeysExceedingThreshold_singleBucket_keyWithinKeyLowerAndUpper) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0)};
+
+ // Keys = [0, 0, 0, 0, 0].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // - Even though # of data with key = 0 exceeds the threshold, they should
+ // still be in the same bucket.
+ // - They should be in a single range bucket [0, 0]. Another bucket [-10, -1]
+ // with empty data should be created before it, and another bucket [1, 10]
+ // with empty data should be created after it.
+ ASSERT_THAT(results, SizeIs(3));
+ // Bucket 0: key lower = -10, key upper = -1, keys = [].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(-1));
+ EXPECT_THAT(std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ IsEmpty());
+ // Bucket 1: key lower = 0, key upper = 0, keys = [0, 0, 0, 0, 0].
+ EXPECT_THAT(results[1].key_lower, Eq(0));
+ EXPECT_THAT(results[1].key_upper, Eq(0));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0)));
+ // Bucket 2: key lower = 1, key upper = 10, keys = [].
+ EXPECT_THAT(results[2].key_lower, Eq(1));
+ EXPECT_THAT(results[2].key_upper, Eq(10));
+ EXPECT_THAT(std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ IsEmpty());
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_sameKeysExceedingThreshold_singleBucket_keyEqualsKeyUpper) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+ // Keys = [10, 10, 10, 10, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // - Even though # of data with key = 10 exceeds the threshold, they should
+ // still be in the same bucket.
+ // - They should be in a single range bucket [10, 10], and another bucket
+ // [-10, 9] with empty data should be created before it.
+ ASSERT_THAT(results, SizeIs(2));
+ // Bucket 0: key lower = -10, key upper = 9, keys = [].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(9));
+ EXPECT_THAT(std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ IsEmpty());
+ // Bucket 1: key lower = -10, key upper = 10, keys = [10, 10, 10, 10, 10].
+ EXPECT_THAT(results[1].key_lower, Eq(10));
+ EXPECT_THAT(results[1].key_upper, Eq(10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_adjacentKeysTotalNumDataExceedThreshold) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+ // Keys = [-10, -10, -1, -1, 2, 2, 10, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+ /*num_data_threshold=*/3);
+ // Even though # of data with the same key is within the threshold, since
+ // total # of data of adjacent keys exceed the threshold, they should be
+ // separated into different buckets.
+ ASSERT_THAT(results, SizeIs(4));
+ // Bucket 0: key lower = -10, key upper = -10, keys = [-10, -10].
+ EXPECT_THAT(results[0].key_lower, Eq(-10));
+ EXPECT_THAT(results[0].key_upper, Eq(-10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10)));
+ // Bucket 1: key lower = -9, key upper = -1, keys = [-1, -1].
+ EXPECT_THAT(results[1].key_lower, Eq(-9));
+ EXPECT_THAT(results[1].key_upper, Eq(-1));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1)));
+ // Bucket 2: key lower = 0, key upper = 2, keys = [2, 2].
+ EXPECT_THAT(results[2].key_lower, Eq(0));
+ EXPECT_THAT(results[2].key_upper, Eq(2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+ // Bucket 3: key lower = 3, key upper = 10, keys = [10, 10].
+ EXPECT_THAT(results[3].key_lower, Eq(3));
+ EXPECT_THAT(results[3].key_upper, Eq(10));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[3].start, results[3].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_keyLowerEqualsIntMin_smallestKeyGreaterThanKeyLower) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min() + 1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+ // Keys = [INT64_MIN + 1, -10, -1, 2, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*original_key_upper=*/std::numeric_limits<int64_t>::max(),
+ /*num_data_threshold=*/3);
+ ASSERT_THAT(results, SizeIs(2));
+ // Bucket 0: key lower = INT64_MIN, key upper = -1, keys = [INT64_MIN + 1,
+ // -10, -1].
+ EXPECT_THAT(results[0].key_lower, Eq(std::numeric_limits<int64_t>::min()));
+ EXPECT_THAT(results[0].key_upper, Eq(-1));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min() + 1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1)));
+ // Bucket 1: key lower = 0, key upper = INT64_MAX, keys = [2, 10].
+ EXPECT_THAT(results[1].key_lower, Eq(0));
+ EXPECT_THAT(results[1].key_upper, Eq(std::numeric_limits<int64_t>::max()));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_keyLowerEqualsIntMin_smallestKeyEqualsKeyLower) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+ // Keys = [INT64_MIN, -10, -1, 2, 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*original_key_upper=*/std::numeric_limits<int64_t>::max(),
+ /*num_data_threshold=*/3);
+ ASSERT_THAT(results, SizeIs(2));
+ // Bucket 0: key lower = INT64_MIN, key upper = -1, keys = [INT64_MIN, -10,
+ // -1].
+ EXPECT_THAT(results[0].key_lower, Eq(std::numeric_limits<int64_t>::min()));
+ EXPECT_THAT(results[0].key_upper, Eq(-1));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1)));
+ // Bucket 1: key lower = 0, key upper = INT64_MAX, keys = [2, 10].
+ EXPECT_THAT(results[1].key_lower, Eq(0));
+ EXPECT_THAT(results[1].key_upper, Eq(std::numeric_limits<int64_t>::max()));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_keyLowerEqualsIntMin_keyIntMinExceedingThreshold) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+ // Keys = [INT64_MIN, INT64_MIN, INT64_MIN, INT64_MIN, INT64_MIN, -10, -1, 2,
+ // 10].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*original_key_upper=*/std::numeric_limits<int64_t>::max(),
+ /*num_data_threshold=*/3);
+ ASSERT_THAT(results, SizeIs(3));
+ // Bucket 0: key lower = INT64_MIN, key upper = INT64_MIN, keys = [INT64_MIN,
+ // INT64_MIN, INT64_MIN, INT64_MIN, INT64_MIN].
+ EXPECT_THAT(results[0].key_lower, Eq(std::numeric_limits<int64_t>::min()));
+ EXPECT_THAT(results[0].key_upper, Eq(std::numeric_limits<int64_t>::min()));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::min())));
+ // Bucket 1: key lower = INT64_MIN + 1, key upper = 2, keys = [-10, -1, 2].
+ EXPECT_THAT(results[1].key_lower,
+ Eq(std::numeric_limits<int64_t>::min() + 1));
+ EXPECT_THAT(results[1].key_upper, Eq(2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+ // Bucket 2: key lower = 3, key upper = INT64_MAX, keys = [10].
+ EXPECT_THAT(results[2].key_lower, Eq(3));
+ EXPECT_THAT(results[2].key_upper, Eq(std::numeric_limits<int64_t>::max()));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_keyUpperEqualsIntMax_largestKeySmallerThanKeyUpper) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max() - 1),
+ };
+
+ // Keys = [-10, -1, 2, 10, INT64_MAX - 1].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*original_key_upper=*/std::numeric_limits<int64_t>::max(),
+ /*num_data_threshold=*/3);
+ ASSERT_THAT(results, SizeIs(2));
+ // Bucket 0: key lower = INT64_MIN, key upper = 2, keys = [-10, -1, 2].
+ EXPECT_THAT(results[0].key_lower, Eq(std::numeric_limits<int64_t>::min()));
+ EXPECT_THAT(results[0].key_upper, Eq(2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+ // Bucket 1: key lower = 3, key upper = INT64_MAX, keys = [10, INT64_MAX - 1].
+ EXPECT_THAT(results[1].key_lower, Eq(3));
+ EXPECT_THAT(results[1].key_upper, Eq(std::numeric_limits<int64_t>::max()));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max() - 1)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_keyUpperEqualsIntMax_largestKeyEqualsKeyUpper) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max()),
+ };
+
+ // Keys = [-10, -1, 2, 10, INT64_MAX].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*original_key_upper=*/std::numeric_limits<int64_t>::max(),
+ /*num_data_threshold=*/3);
+ ASSERT_THAT(results, SizeIs(2));
+ // Bucket 0: key lower = INT64_MIN, key upper = 2, keys = [-10, -1, 2].
+ EXPECT_THAT(results[0].key_lower, Eq(std::numeric_limits<int64_t>::min()));
+ EXPECT_THAT(results[0].key_upper, Eq(2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+ // Bucket 1: key lower = 3, key upper = INT64_MAX, keys = [10, INT64_MAX].
+ EXPECT_THAT(results[1].key_lower, Eq(3));
+ EXPECT_THAT(results[1].key_upper, Eq(std::numeric_limits<int64_t>::max()));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max())));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+ Split_keyUpperEqualsIntMax_keyIntMaxExceedingThreshold) {
+ std::vector<IntegerIndexData> data = {
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max())};
+
+ // Keys = [-10, -1, 2, 10, INT64_MAX, INT64_MAX, INT64_MAX, INT64_MAX,
+ // INT64_MAX].
+ std::vector<DataRangeAndBucketInfo> results =
+ Split(data, /*original_key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*original_key_upper=*/std::numeric_limits<int64_t>::max(),
+ /*num_data_threshold=*/3);
+ ASSERT_THAT(results, SizeIs(3));
+ // Bucket 0: key lower = INT64_MIN, key upper = 2, keys = [-10, -1, 2].
+ EXPECT_THAT(results[0].key_lower, Eq(std::numeric_limits<int64_t>::min()));
+ EXPECT_THAT(results[0].key_upper, Eq(2));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[0].start, results[0].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+ // Bucket 1: key lower = 3, key upper = INT_MAX - 1, keys = [10].
+ EXPECT_THAT(results[1].key_lower, Eq(3));
+ EXPECT_THAT(results[1].key_upper,
+ Eq(std::numeric_limits<int64_t>::max() - 1));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[1].start, results[1].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+ // Bucket 2: key lower = INT64_MAX, key upper = INT64_MAX, keys = [INT64_MAX,
+ // INT64_MAX, INT64_MAX, INT64_MAX, INT64_MAX].
+ EXPECT_THAT(results[2].key_lower, Eq(std::numeric_limits<int64_t>::max()));
+ EXPECT_THAT(results[2].key_upper, Eq(std::numeric_limits<int64_t>::max()));
+ EXPECT_THAT(
+ std::vector<IntegerIndexData>(results[2].start, results[2].end),
+ ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max()),
+ IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+ std::numeric_limits<int64_t>::max())));
+}
+
+} // namespace
+
+} // namespace integer_index_bucket_util
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/numeric/integer-index-storage.cc b/icing/index/numeric/integer-index-storage.cc
index 22ef8bd..db1983c 100644
--- a/icing/index/numeric/integer-index-storage.cc
+++ b/icing/index/numeric/integer-index-storage.cc
@@ -17,6 +17,7 @@
#include <algorithm>
#include <cstdint>
#include <functional>
+#include <iterator>
#include <limits>
#include <memory>
#include <queue>
@@ -37,6 +38,7 @@
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/numeric/doc-hit-info-iterator-numeric.h"
+#include "icing/index/numeric/integer-index-bucket-util.h"
#include "icing/index/numeric/integer-index-data.h"
#include "icing/index/numeric/numeric-index.h"
#include "icing/index/numeric/posting-list-integer-index-accessor.h"
@@ -50,6 +52,41 @@ namespace lib {
namespace {
+// Helper function to flush data between [it_start, it_end) into posting list(s)
+// and return posting list id.
+// Note: it will sort data between [it_start, it_end) by basic hit value, so the
+// caller should be aware that the data order will be changed after calling this
+// function.
+libtextclassifier3::StatusOr<PostingListIdentifier> FlushDataIntoPostingLists(
+ FlashIndexStorage* flash_index_storage,
+ PostingListIntegerIndexSerializer* posting_list_serializer,
+ const std::vector<IntegerIndexData>::iterator& it_start,
+ const std::vector<IntegerIndexData>::iterator& it_end) {
+ if (it_start == it_end) {
+ return PostingListIdentifier::kInvalid;
+ }
+
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<PostingListIntegerIndexAccessor> new_pl_accessor,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage,
+ posting_list_serializer));
+
+ std::sort(it_start, it_end);
+ for (auto it = it_end - 1; it >= it_start; --it) {
+ ICING_RETURN_IF_ERROR(new_pl_accessor->PrependData(*it));
+ }
+
+ PostingListAccessor::FinalizeResult result =
+ std::move(*new_pl_accessor).Finalize();
+ if (!result.status.ok()) {
+ return result.status;
+ }
+ if (!result.id.is_valid()) {
+ return absl_ports::InternalError("Fail to flush data into posting list(s)");
+ }
+ return result.id;
+}
+
// The following 4 methods are helper functions to get the correct file path of
// metadata/sorted_buckets/unsorted_buckets/flash_index_storage, according to
// the given working directory.
@@ -510,9 +547,12 @@ libtextclassifier3::Status IntegerIndexStorage::AddKeys(
mutable_new_arr.SetArray(/*idx=*/0, new_buckets.data(), new_buckets.size());
}
- // Step 4: merge the unsorted bucket array into the sorted bucket array if the
- // length of the unsorted bucket array exceeds the threshold.
- // TODO(b/259743562): [Optimization 1] implement merge
+ // Step 4: sort and merge the unsorted bucket array into the sorted bucket
+ // array if the length of the unsorted bucket array exceeds the
+ // threshold.
+ if (unsorted_buckets_->num_elements() > kUnsortedBucketsLengthThreshold) {
+ ICING_RETURN_IF_ERROR(SortBuckets());
+ }
info().num_data += new_keys.size();
@@ -679,29 +719,23 @@ IntegerIndexStorage::InitializeNewFiles(
absl_ports::StrCat("Failed to create directory: ", working_path));
}
- // TODO(b/259743562): [Optimization 1] decide max # buckets, unsorted buckets
- // threshold
// Initialize sorted_buckets
int32_t pre_mapping_mmap_size = sizeof(Bucket) * (1 << 10);
- int32_t max_file_size =
- pre_mapping_mmap_size + FileBackedVector<Bucket>::Header::kHeaderSize;
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
FileBackedVector<Bucket>::Create(
filesystem, GetSortedBucketsFilePath(working_path),
- MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
- pre_mapping_mmap_size));
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ FileBackedVector<Bucket>::kMaxFileSize, pre_mapping_mmap_size));
// Initialize unsorted_buckets
- pre_mapping_mmap_size = sizeof(Bucket) * 100;
- max_file_size =
- pre_mapping_mmap_size + FileBackedVector<Bucket>::Header::kHeaderSize;
+ pre_mapping_mmap_size = sizeof(Bucket) * kUnsortedBucketsLengthThreshold;
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
FileBackedVector<Bucket>::Create(
filesystem, GetUnsortedBucketsFilePath(working_path),
- MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
- pre_mapping_mmap_size));
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ FileBackedVector<Bucket>::kMaxFileSize, pre_mapping_mmap_size));
// Initialize flash_index_storage
ICING_ASSIGN_OR_RETURN(
@@ -785,29 +819,23 @@ IntegerIndexStorage::InitializeExistingFiles(
/*pre_mapping_file_offset=*/0,
/*pre_mapping_mmap_size=*/kMetadataFileSize));
- // TODO(b/259743562): [Optimization 1] decide max # buckets, unsorted buckets
- // threshold
// Initialize sorted_buckets
int32_t pre_mapping_mmap_size = sizeof(Bucket) * (1 << 10);
- int32_t max_file_size =
- pre_mapping_mmap_size + FileBackedVector<Bucket>::Header::kHeaderSize;
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
FileBackedVector<Bucket>::Create(
filesystem, GetSortedBucketsFilePath(working_path),
- MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
- pre_mapping_mmap_size));
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ FileBackedVector<Bucket>::kMaxFileSize, pre_mapping_mmap_size));
// Initialize unsorted_buckets
- pre_mapping_mmap_size = sizeof(Bucket) * 100;
- max_file_size =
- pre_mapping_mmap_size + FileBackedVector<Bucket>::Header::kHeaderSize;
+ pre_mapping_mmap_size = sizeof(Bucket) * kUnsortedBucketsLengthThreshold;
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
FileBackedVector<Bucket>::Create(
filesystem, GetUnsortedBucketsFilePath(working_path),
- MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
- pre_mapping_mmap_size));
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ FileBackedVector<Bucket>::kMaxFileSize, pre_mapping_mmap_size));
// Initialize flash_index_storage
ICING_ASSIGN_OR_RETURN(
@@ -845,28 +873,13 @@ IntegerIndexStorage::FlushDataIntoNewSortedBucket(
}
ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<PostingListIntegerIndexAccessor> new_pl_accessor,
- PostingListIntegerIndexAccessor::Create(
- storage->flash_index_storage_.get(),
- storage->posting_list_serializer_));
-
- std::sort(data.begin(), data.end());
- for (auto itr = data.rbegin(); itr != data.rend(); ++itr) {
- ICING_RETURN_IF_ERROR(new_pl_accessor->PrependData(*itr));
- }
-
- PostingListAccessor::FinalizeResult result =
- std::move(*new_pl_accessor).Finalize();
- if (!result.status.ok()) {
- return result.status;
- }
- if (!result.id.is_valid()) {
- return absl_ports::InternalError("Fail to flush data into posting list");
- }
+ PostingListIdentifier pl_id,
+ FlushDataIntoPostingLists(storage->flash_index_storage_.get(),
+ storage->posting_list_serializer_, data.begin(),
+ data.end()));
storage->info().num_data += data.size();
- return storage->sorted_buckets_->Append(
- Bucket(key_lower, key_upper, result.id));
+ return storage->sorted_buckets_->Append(Bucket(key_lower, key_upper, pl_id));
}
libtextclassifier3::Status IntegerIndexStorage::PersistStoragesToDisk() {
@@ -921,21 +934,80 @@ IntegerIndexStorage::AddKeysIntoBucketAndSplitIfNecessary(
}
for (auto it = it_start; it != it_end; ++it) {
- // TODO(b/259743562): [Optimization 1] implement split bucket if pl is full
- // and the bucket is splittable
+ if (mutable_bucket.Get().key_lower() < mutable_bucket.Get().key_upper() &&
+ pl_accessor->WantsSplit()) {
+ // If the bucket needs split (max size and full) and is splittable, then
+ // we perform bucket splitting.
+
+ // 1. Finalize the current posting list accessor.
+ PostingListAccessor::FinalizeResult result =
+ std::move(*pl_accessor).Finalize();
+ if (!result.status.ok()) {
+ return result.status;
+ }
+
+ // 2. Create another posting list accessor instance. Read all data and
+ // free all posting lists.
+ ICING_ASSIGN_OR_RETURN(
+ pl_accessor,
+ PostingListIntegerIndexAccessor::CreateFromExisting(
+ flash_index_storage_.get(), posting_list_serializer_, result.id));
+ ICING_ASSIGN_OR_RETURN(std::vector<IntegerIndexData> all_data,
+ pl_accessor->GetAllDataAndFree());
+
+ // 3. Append all remaining new data.
+ all_data.reserve(all_data.size() + std::distance(it, it_end));
+ for (; it != it_end; ++it) {
+ all_data.push_back(IntegerIndexData(section_id, document_id, *it));
+ }
+
+ // 4. Run bucket splitting algorithm to decide new buckets and dispatch
+ // data.
+ std::vector<integer_index_bucket_util::DataRangeAndBucketInfo>
+ new_bucket_infos = integer_index_bucket_util::Split(
+ all_data, mutable_bucket.Get().key_lower(),
+ mutable_bucket.Get().key_upper(),
+ kNumDataThresholdForBucketSplit);
+ if (new_bucket_infos.empty()) {
+ ICING_LOG(WARNING)
+ << "No buckets after splitting. This should not happen.";
+ return absl_ports::InternalError("Split error");
+ }
+
+ // 5. Flush data.
+ std::vector<Bucket> new_buckets;
+ for (int i = 0; i < new_bucket_infos.size(); ++i) {
+ ICING_ASSIGN_OR_RETURN(
+ PostingListIdentifier pl_id,
+ FlushDataIntoPostingLists(
+ flash_index_storage_.get(), posting_list_serializer_,
+ new_bucket_infos[i].start, new_bucket_infos[i].end));
+ if (i == 0) {
+ // Reuse mutable_bucket
+ mutable_bucket.Get().set_key_lower(new_bucket_infos[i].key_lower);
+ mutable_bucket.Get().set_key_upper(new_bucket_infos[i].key_upper);
+ mutable_bucket.Get().set_posting_list_identifier(pl_id);
+ } else {
+ new_buckets.push_back(Bucket(new_bucket_infos[i].key_lower,
+ new_bucket_infos[i].key_upper, pl_id));
+ }
+ }
+
+ return new_buckets;
+ }
+
ICING_RETURN_IF_ERROR(pl_accessor->PrependData(
IntegerIndexData(section_id, document_id, *it)));
}
- // TODO(b/259743562): [Optimization 1] implement split and return new buckets.
- // We will change the original bucket (mutable_bucket)
- // in-place to one of the new buckets, and the rest will
- // be returned and added into unsorted buckets in AddKeys.
PostingListAccessor::FinalizeResult result =
std::move(*pl_accessor).Finalize();
if (!result.status.ok()) {
return result.status;
}
+ if (!result.id.is_valid()) {
+ return absl_ports::InternalError("Fail to flush data into posting list(s)");
+ }
mutable_bucket.Get().set_posting_list_identifier(result.id);
diff --git a/icing/index/numeric/integer-index-storage.h b/icing/index/numeric/integer-index-storage.h
index be0add9..ddd9231 100644
--- a/icing/index/numeric/integer-index-storage.h
+++ b/icing/index/numeric/integer-index-storage.h
@@ -30,6 +30,7 @@
#include "icing/file/posting_list/flash-index-storage.h"
#include "icing/file/posting_list/posting-list-identifier.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/integer-index-data.h"
#include "icing/index/numeric/posting-list-integer-index-serializer.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
@@ -117,6 +118,10 @@ class IntegerIndexStorage : public PersistentStorage {
int64_t key_upper() const { return key_upper_; }
+ void set_key_lower(int64_t key_lower) { key_lower_ = key_lower; }
+
+ void set_key_upper(int64_t key_upper) { key_upper_ = key_upper; }
+
PostingListIdentifier posting_list_identifier() const {
return posting_list_identifier_;
}
@@ -176,14 +181,29 @@ class IntegerIndexStorage : public PersistentStorage {
WorkingPathType::kDirectory;
static constexpr std::string_view kFilePrefix = "integer_index_storage";
- // # of data threshold for bucket merging. If total # data of adjacent buckets
- // exceed this value, then flush the accumulated data. Otherwise merge
- // buckets and their data.
+ // # of data threshold for bucket merging during optimization (TransferIndex).
+ // If total # data of adjacent buckets exceed this value, then flush the
+ // accumulated data. Otherwise merge buckets and their data.
//
// Calculated by: 0.7 * (kMaxPostingListSize / sizeof(IntegerIndexData)),
// where kMaxPostingListSize = (kPageSize - sizeof(IndexBlock::BlockHeader)).
static constexpr int32_t kNumDataThresholdForBucketMerge = 240;
+ // # of data threshold for bucket splitting during indexing (AddKeys).
+ // When the posting list of a bucket is full, we will try to split data into
+ // multiple buckets according to their keys. In order to achieve good
+ // (amortized) time complexity, we want # of data in new buckets to be at most
+ // half # of elements in a full posting list.
+ //
+ // Calculated by: 0.5 * (kMaxPostingListSize / sizeof(IntegerIndexData)),
+ // where kMaxPostingListSize = (kPageSize - sizeof(IndexBlock::BlockHeader)).
+ static constexpr int32_t kNumDataThresholdForBucketSplit = 170;
+
+ // Length threshold to sort and merge unsorted buckets into sorted buckets. If
+ // the length of unsorted_buckets exceed the threshold, then call
+ // SortBuckets().
+ static constexpr int32_t kUnsortedBucketsLengthThreshold = 50;
+
// Creates a new IntegerIndexStorage instance to index integers (for a single
// property). If any of the underlying file is missing, then delete the whole
// working_path and (re)initialize with new ones. Otherwise initialize and
@@ -370,7 +390,6 @@ class IntegerIndexStorage : public PersistentStorage {
// into several new buckets with new ranges, and split the data (according
// to their keys and the range of new buckets) of the original posting
// list into several new posting lists.
- // TODO(b/259743562): [Optimization 1] implement split
// - Otherwise, just simply add a new key into it, and PostingListAccessor
// mechanism will automatically create a new max size posting list and
// chain them.
diff --git a/icing/index/numeric/integer-index-storage_benchmark.cc b/icing/index/numeric/integer-index-storage_benchmark.cc
index d150f2d..54b19c3 100644
--- a/icing/index/numeric/integer-index-storage_benchmark.cc
+++ b/icing/index/numeric/integer-index-storage_benchmark.cc
@@ -57,6 +57,7 @@ namespace lib {
namespace {
using ::testing::Eq;
+using ::testing::IsEmpty;
using ::testing::SizeIs;
static constexpr SectionId kDefaultSectionId = 12;
@@ -237,18 +238,24 @@ void BM_ExactQuery(benchmark::State& state) {
std::unique_ptr<DocHitInfoIterator> iterator,
storage->GetIterator(/*query_key_lower=*/exact_query_key,
/*query_key_upper=*/exact_query_key));
- int cnt = 0;
+ std::vector<DocHitInfo> data;
while (iterator->Advance().ok()) {
- benchmark::DoNotOptimize(iterator->doc_hit_info());
- ++cnt;
+ data.push_back(iterator->doc_hit_info());
}
+ state.PauseTiming();
const auto it = keys.find(exact_query_key);
if (it == keys.end()) {
- ASSERT_THAT(cnt, Eq(0));
+ ASSERT_THAT(data, IsEmpty());
} else {
- ASSERT_THAT(it->second, SizeIs(cnt));
+ ASSERT_THAT(data, SizeIs(it->second.size()));
+ std::reverse(data.begin(), data.end());
+ for (int i = 0; i < data.size(); ++i) {
+ ASSERT_THAT(data[i].document_id(), Eq(it->second[i]));
+ ASSERT_THAT(data[i].hit_section_ids_mask(), Eq(1 << kDefaultSectionId));
+ }
}
+ state.ResumeTiming();
}
}
BENCHMARK(BM_ExactQuery)
diff --git a/icing/index/numeric/integer-index-storage_test.cc b/icing/index/numeric/integer-index-storage_test.cc
index 9d6864c..ed7d5db 100644
--- a/icing/index/numeric/integer-index-storage_test.cc
+++ b/icing/index/numeric/integer-index-storage_test.cc
@@ -14,6 +14,8 @@
#include "icing/index/numeric/integer-index-storage.h"
+#include <unistd.h>
+
#include <cstdint>
#include <limits>
#include <memory>
@@ -26,7 +28,10 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/file/file-backed-vector.h"
+#include "icing/file/filesystem.h"
#include "icing/file/persistent-storage.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/index-block.h"
#include "icing/file/posting_list/posting-list-identifier.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
@@ -42,14 +47,17 @@ namespace lib {
namespace {
+using ::testing::Contains;
using ::testing::ElementsAre;
using ::testing::ElementsAreArray;
using ::testing::Eq;
+using ::testing::Ge;
using ::testing::Gt;
using ::testing::HasSubstr;
using ::testing::IsEmpty;
using ::testing::IsFalse;
using ::testing::IsTrue;
+using ::testing::Key;
using ::testing::Le;
using ::testing::Ne;
using ::testing::Not;
@@ -1186,6 +1194,150 @@ TEST_F(IntegerIndexStorageTest,
EqualsDocHitInfo(kDefaultDocumentId, expected_sections))));
}
+TEST_F(IntegerIndexStorageTest, SplitBuckets) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(filesystem_, working_path_, Options(),
+ serializer_.get()));
+
+ uint32_t block_size = FlashIndexStorage::SelectBlockSize();
+ uint32_t max_posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
+ block_size, serializer_->GetDataTypeBytes());
+ uint32_t max_num_data_before_split =
+ max_posting_list_bytes / serializer_->GetDataTypeBytes();
+
+ // Add max_num_data_before_split + 1 keys to invoke bucket splitting.
+ // Keys: max_num_data_before_split to 0
+ // Document ids: 0 to max_num_data_before_split
+ std::unordered_map<int64_t, DocumentId> data;
+ int64_t key = max_num_data_before_split;
+ DocumentId document_id = 0;
+ for (int i = 0; i < max_num_data_before_split + 1; ++i) {
+ data[key] = document_id;
+ ICING_ASSERT_OK(
+ storage->AddKeys(document_id, kDefaultSectionId, /*new_keys=*/{key}));
+ ++document_id;
+ --key;
+ }
+ ICING_ASSERT_OK(storage->PersistToDisk());
+
+ // Manually check sorted and unsorted buckets.
+ {
+ // Check sorted buckets.
+ const std::string sorted_buckets_file_path = absl_ports::StrCat(
+ working_path_, "/", IntegerIndexStorage::kFilePrefix, ".s");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem_, sorted_buckets_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+ EXPECT_THAT(sorted_buckets->num_elements(), Eq(1));
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* bucket1,
+ sorted_buckets->Get(/*idx=*/0));
+ EXPECT_THAT(bucket1->key_lower(), Eq(std::numeric_limits<int64_t>::min()));
+ EXPECT_THAT(bucket1->key_upper(), Ne(std::numeric_limits<int64_t>::max()));
+
+ int64_t sorted_bucket_key_upper = bucket1->key_upper();
+
+ // Check unsorted buckets.
+ const std::string unsorted_buckets_file_path = absl_ports::StrCat(
+ working_path_, "/", IntegerIndexStorage::kFilePrefix, ".u");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem_, unsorted_buckets_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+ EXPECT_THAT(unsorted_buckets->num_elements(), Ge(1));
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* bucket2,
+ unsorted_buckets->Get(/*idx=*/0));
+ EXPECT_THAT(bucket2->key_lower(), Eq(sorted_bucket_key_upper + 1));
+ }
+
+ // Ensure that search works normally.
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ for (int64_t key = max_num_data_before_split; key >= 0; key--) {
+ ASSERT_THAT(data, Contains(Key(key)));
+ DocumentId expected_document_id = data[key];
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/key, /*key_upper=*/key),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(expected_document_id,
+ expected_sections))));
+ }
+}
+
+TEST_F(IntegerIndexStorageTest, SplitBucketsTriggerSortBuckets) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(filesystem_, working_path_, Options(),
+ serializer_.get()));
+
+ uint32_t block_size = FlashIndexStorage::SelectBlockSize();
+ uint32_t max_posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
+ block_size, serializer_->GetDataTypeBytes());
+ uint32_t max_num_data_before_split =
+ max_posting_list_bytes / serializer_->GetDataTypeBytes();
+
+ // Add IntegerIndexStorage::kUnsortedBucketsLengthThreshold keys. For each
+ // key, add max_num_data_before_split + 1 data. Then we will get:
+ // - Bucket splitting will create kUnsortedBucketsLengthThreshold + 1 unsorted
+ // buckets [[50, 50], [49, 49], ..., [1, 1], [51, INT64_MAX]].
+ // - Since there are kUnsortedBucketsLengthThreshold + 1 unsorted buckets, we
+ // should sort and merge buckets.
+ std::unordered_map<int64_t, std::vector<DocumentId>> data;
+ int64_t key = IntegerIndexStorage::kUnsortedBucketsLengthThreshold;
+ DocumentId document_id = 0;
+ for (int i = 0; i < IntegerIndexStorage::kUnsortedBucketsLengthThreshold;
+ ++i) {
+ for (int j = 0; j < max_num_data_before_split + 1; ++j) {
+ data[key].push_back(document_id);
+ ICING_ASSERT_OK(
+ storage->AddKeys(document_id, kDefaultSectionId, /*new_keys=*/{key}));
+ ++document_id;
+ }
+ --key;
+ }
+ ICING_ASSERT_OK(storage->PersistToDisk());
+
+ // Manually check sorted and unsorted buckets.
+ {
+ // Check unsorted buckets.
+ const std::string unsorted_buckets_file_path = absl_ports::StrCat(
+ working_path_, "/", IntegerIndexStorage::kFilePrefix, ".u");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem_, unsorted_buckets_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ EXPECT_THAT(unsorted_buckets->num_elements(), Eq(0));
+
+ // Check sorted buckets.
+ const std::string sorted_buckets_file_path = absl_ports::StrCat(
+ working_path_, "/", IntegerIndexStorage::kFilePrefix, ".s");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem_, sorted_buckets_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ EXPECT_THAT(sorted_buckets->num_elements(), Gt(1));
+ }
+
+ // Ensure that search works normally.
+ for (key = 1; key <= IntegerIndexStorage::kUnsortedBucketsLengthThreshold;
+ ++key) {
+ ASSERT_THAT(data, Contains(Key(key)));
+
+ std::vector<DocHitInfo> expected_doc_hit_infos;
+ for (DocumentId doc_id : data[key]) {
+ expected_doc_hit_infos.push_back(DocHitInfo(
+ doc_id, /*hit_section_ids_mask=*/UINT64_C(1) << kDefaultSectionId));
+ }
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/key, /*key_upper=*/key),
+ IsOkAndHolds(ElementsAreArray(expected_doc_hit_infos.rbegin(),
+ expected_doc_hit_infos.rend())));
+ }
+}
+
TEST_F(IntegerIndexStorageTest, TransferIndex) {
// We use predefined custom buckets to initialize new integer index storage
// and create some test keys accordingly.
diff --git a/icing/index/numeric/integer-index.cc b/icing/index/numeric/integer-index.cc
index a2d40f1..2f876e4 100644
--- a/icing/index/numeric/integer-index.cc
+++ b/icing/index/numeric/integer-index.cc
@@ -14,10 +14,12 @@
#include "icing/index/numeric/integer-index.h"
+#include <algorithm>
#include <cstdint>
#include <memory>
#include <string>
#include <string_view>
+#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
@@ -27,6 +29,7 @@
#include "icing/file/destructible-directory.h"
#include "icing/file/filesystem.h"
#include "icing/file/memory-mapped-file.h"
+#include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
#include "icing/index/numeric/doc-hit-info-iterator-numeric.h"
#include "icing/index/numeric/integer-index-storage.h"
#include "icing/index/numeric/posting-list-integer-index-serializer.h"
@@ -50,6 +53,17 @@ std::string GetMetadataFilePath(std::string_view working_path) {
return absl_ports::StrCat(working_path, "/", GetMetadataFileName());
}
+constexpr std::string_view kWildcardPropertyIndexFileName =
+ "wildcard_property_index";
+
+constexpr std::string_view kWildcardPropertyStorageFileName =
+ "wildcard_property_storage";
+
+std::string GetWildcardPropertyStorageFilePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/",
+ kWildcardPropertyStorageFileName);
+}
+
// Helper function to get the sub working (directory) path of
// IntegerIndexStorage according to the given working directory and property
// path.
@@ -64,8 +78,9 @@ libtextclassifier3::StatusOr<std::vector<std::string>>
GetAllExistingPropertyPaths(const Filesystem& filesystem,
const std::string& working_path) {
std::vector<std::string> property_paths;
- if (!filesystem.ListDirectory(working_path.c_str(),
- /*exclude=*/{GetMetadataFileName()},
+ std::unordered_set<std::string> excludes = {
+ GetMetadataFileName(), std::string(kWildcardPropertyStorageFileName)};
+ if (!filesystem.ListDirectory(working_path.c_str(), excludes,
/*recursive=*/false, &property_paths)) {
return absl_ports::InternalError("Failed to list directory");
}
@@ -81,6 +96,9 @@ GetPropertyIntegerIndexStorageMap(
IntegerIndex::PropertyToStorageMapType property_to_storage_map;
for (const std::string& property_path : property_paths) {
+ if (property_path == kWildcardPropertyIndexFileName) {
+ continue;
+ }
std::string storage_working_path =
GetPropertyIndexStoragePath(working_path, property_path);
ICING_ASSIGN_OR_RETURN(
@@ -95,16 +113,61 @@ GetPropertyIntegerIndexStorageMap(
return property_to_storage_map;
}
+// RETURNS:
+// - On success, an unordered_set representing the list of property paths
+// stored in the WildcardPropertyStorage managed by property_storage
+// - INTERNAL_ERROR on any failure to successfully read the underlying proto.
+libtextclassifier3::StatusOr<std::unordered_set<std::string>> CreatePropertySet(
+ const FileBackedProto<WildcardPropertyStorage>& property_storage) {
+ std::unordered_set<std::string> wildcard_properties_set;
+ auto wildcard_properties_or = property_storage.Read();
+ if (!wildcard_properties_or.ok()) {
+ if (absl_ports::IsNotFound(wildcard_properties_or.status())) {
+ return wildcard_properties_set;
+ }
+ return wildcard_properties_or.status();
+ }
+
+ const WildcardPropertyStorage* wildcard_properties =
+ wildcard_properties_or.ValueOrDie();
+ wildcard_properties_set.reserve(wildcard_properties->property_entries_size());
+ for (const std::string& property : wildcard_properties->property_entries()) {
+ wildcard_properties_set.insert(property);
+ }
+ return wildcard_properties_set;
+}
+
} // namespace
libtextclassifier3::Status IntegerIndex::Editor::IndexAllBufferedKeys() && {
auto iter = integer_index_.property_to_storage_map_.find(property_path_);
IntegerIndexStorage* target_storage = nullptr;
+ // 1. Check if this property already has its own individual index.
if (iter != integer_index_.property_to_storage_map_.end()) {
target_storage = iter->second.get();
+ // 2. Check if this property was added to wildcard storage.
+ } else if (integer_index_.wildcard_properties_set_.find(property_path_) !=
+ integer_index_.wildcard_properties_set_.end()) {
+ target_storage = integer_index_.wildcard_index_storage_.get();
+ // 3. Check if we've reach the limit of individual property storages.
+ } else if (integer_index_.property_to_storage_map_.size() >=
+ kMaxPropertyStorages) {
+ // 3a. Create the wildcard storage if it doesn't exist.
+ if (integer_index_.wildcard_index_storage_ == nullptr) {
+ ICING_ASSIGN_OR_RETURN(
+ integer_index_.wildcard_index_storage_,
+ IntegerIndexStorage::Create(
+ integer_index_.filesystem_,
+ GetPropertyIndexStoragePath(integer_index_.working_path_,
+ kWildcardPropertyIndexFileName),
+ IntegerIndexStorage::Options(),
+ integer_index_.posting_list_serializer_.get()));
+ }
+ ICING_RETURN_IF_ERROR(
+ integer_index_.AddPropertyToWildcardStorage(property_path_));
+ target_storage = integer_index_.wildcard_index_storage_.get();
+ // 4. Create a new individual storage for this new property.
} else {
- // A new property path. Create a new storage instance and insert into the
- // map.
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<IntegerIndexStorage> new_storage,
IntegerIndexStorage::Create(
@@ -144,15 +207,45 @@ IntegerIndex::~IntegerIndex() {
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
IntegerIndex::GetIterator(std::string_view property_path, int64_t key_lower,
- int64_t key_upper) const {
- auto iter = property_to_storage_map_.find(std::string(property_path));
- if (iter == property_to_storage_map_.end()) {
- // Return an empty iterator.
- return std::make_unique<DocHitInfoIteratorNumeric<int64_t>>(
- /*numeric_index_iter=*/nullptr);
+ int64_t key_upper,
+ const DocumentStore& document_store,
+ const SchemaStore& schema_store) const {
+ std::string property_path_str(property_path);
+ auto iter = property_to_storage_map_.find(property_path_str);
+ if (iter != property_to_storage_map_.end()) {
+ return iter->second->GetIterator(key_lower, key_upper);
+ }
+
+ if (wildcard_properties_set_.find(property_path_str) !=
+ wildcard_properties_set_.end()) {
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<DocHitInfoIterator> delegate,
+ wildcard_index_storage_->GetIterator(key_lower, key_upper));
+ std::set<std::string> property_paths = {std::move(property_path_str)};
+ return std::make_unique<DocHitInfoIteratorSectionRestrict>(
+ std::move(delegate), &document_store, &schema_store,
+ std::move(property_paths));
+ }
+
+ // Return an empty iterator.
+ return std::make_unique<DocHitInfoIteratorNumeric<int64_t>>(
+ /*numeric_index_iter=*/nullptr);
+}
+
+libtextclassifier3::Status IntegerIndex::AddPropertyToWildcardStorage(
+ const std::string& property_path) {
+ WildcardPropertyStorage wildcard_properties;
+ wildcard_properties.mutable_property_entries()->Reserve(
+ wildcard_properties_set_.size());
+ for (const std::string& property_path : wildcard_properties_set_) {
+ wildcard_properties.add_property_entries(property_path);
}
+ ICING_RETURN_IF_ERROR(wildcard_property_storage_->Write(
+ std::make_unique<WildcardPropertyStorage>(
+ std::move(wildcard_properties))));
- return iter->second->GetIterator(key_lower, key_upper);
+ wildcard_properties_set_.insert(property_path);
+ return libtextclassifier3::Status::OK;
}
libtextclassifier3::Status IntegerIndex::Optimize(
@@ -183,6 +276,8 @@ libtextclassifier3::Status IntegerIndex::Optimize(
// Destruct current storage instances to safely swap directories.
metadata_mmapped_file_.reset();
property_to_storage_map_.clear();
+ wildcard_index_storage_.reset();
+ wildcard_property_storage_.reset();
if (!filesystem_.SwapFiles(temp_working_path_ddir.dir().c_str(),
working_path_.c_str())) {
return absl_ports::InternalError(
@@ -190,9 +285,10 @@ libtextclassifier3::Status IntegerIndex::Optimize(
}
// Reinitialize the integer index.
+ std::string metadata_file_path = GetMetadataFilePath(working_path_);
ICING_ASSIGN_OR_RETURN(
MemoryMappedFile metadata_mmapped_file,
- MemoryMappedFile::Create(filesystem_, GetMetadataFilePath(working_path_),
+ MemoryMappedFile::Create(filesystem_, metadata_file_path,
MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
/*max_file_size=*/kMetadataFileSize,
/*pre_mapping_file_offset=*/0,
@@ -200,6 +296,25 @@ libtextclassifier3::Status IntegerIndex::Optimize(
metadata_mmapped_file_ =
std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file));
+ // Recreate all of the data structures tracking the wildcard storage.
+ std::string wildcard_property_path =
+ GetWildcardPropertyStorageFilePath(working_path_);
+ wildcard_property_storage_ =
+ std::make_unique<FileBackedProto<WildcardPropertyStorage>>(
+ filesystem_, wildcard_property_path);
+
+ ICING_ASSIGN_OR_RETURN(wildcard_properties_set_,
+ CreatePropertySet(*wildcard_property_storage_));
+ if (!wildcard_properties_set_.empty()) {
+ ICING_ASSIGN_OR_RETURN(
+ wildcard_index_storage_,
+ IntegerIndexStorage::Create(
+ filesystem_,
+ GetPropertyIndexStoragePath(working_path_,
+ kWildcardPropertyIndexFileName),
+ IntegerIndexStorage::Options(), posting_list_serializer_.get()));
+ }
+
// Initialize all existing integer index storages.
ICING_ASSIGN_OR_RETURN(
property_to_storage_map_,
@@ -212,6 +327,7 @@ libtextclassifier3::Status IntegerIndex::Optimize(
libtextclassifier3::Status IntegerIndex::Clear() {
// Step 1: clear property_to_storage_map_.
property_to_storage_map_.clear();
+ wildcard_index_storage_.reset();
// Step 2: delete all IntegerIndexStorages. It is safe because there is no
// active IntegerIndexStorage after clearing the map.
@@ -224,6 +340,15 @@ libtextclassifier3::Status IntegerIndex::Clear() {
GetPropertyIndexStoragePath(working_path_, property_path)));
}
+ // Step 3: Delete the wildcard property storage
+ std::string wildcard_property_path =
+ GetWildcardPropertyStorageFilePath(working_path_);
+ if (filesystem_.FileExists(wildcard_property_path.c_str()) ||
+ !filesystem_.DeleteFile(wildcard_property_path.c_str())) {
+ return absl_ports::InternalError(absl_ports::StrCat(
+ "Unable to delete file at path ", wildcard_property_path));
+ }
+
info().last_added_document_id = kInvalidDocumentId;
return libtextclassifier3::Status::OK;
}
@@ -249,12 +374,20 @@ IntegerIndex::InitializeNewFiles(const Filesystem& filesystem,
ICING_RETURN_IF_ERROR(metadata_mmapped_file.GrowAndRemapIfNecessary(
/*file_offset=*/0, /*mmap_size=*/kMetadataFileSize));
+ std::string wildcard_property_path =
+ GetWildcardPropertyStorageFilePath(working_path);
+ auto wildcard_property_storage =
+ std::make_unique<FileBackedProto<WildcardPropertyStorage>>(
+ filesystem, wildcard_property_path);
+
// Create instance.
auto new_integer_index = std::unique_ptr<IntegerIndex>(new IntegerIndex(
filesystem, std::move(working_path),
std::make_unique<PostingListIntegerIndexSerializer>(),
std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
- /*property_to_storage_map=*/{}));
+ /*property_to_storage_map=*/{}, std::move(wildcard_property_storage),
+ /*wildcard_properties_set=*/{}, /*wildcard_index_storage=*/nullptr));
+
// Initialize info content by writing mapped memory directly.
Info& info_ref = new_integer_index->info();
info_ref.magic = Info::kMagic;
@@ -287,11 +420,33 @@ IntegerIndex::InitializeExistingFiles(const Filesystem& filesystem,
GetPropertyIntegerIndexStorageMap(filesystem, working_path,
posting_list_serializer.get()));
+ std::string wildcard_property_path =
+ GetWildcardPropertyStorageFilePath(working_path);
+ auto wildcard_property_storage =
+ std::make_unique<FileBackedProto<WildcardPropertyStorage>>(
+ filesystem, wildcard_property_path);
+
+ ICING_ASSIGN_OR_RETURN(
+ std::unordered_set<std::string> wildcard_properties_set,
+ CreatePropertySet(*wildcard_property_storage));
+
+ std::unique_ptr<IntegerIndexStorage> wildcard_index_storage;
+ if (!wildcard_properties_set.empty()) {
+ ICING_ASSIGN_OR_RETURN(
+ wildcard_index_storage,
+ IntegerIndexStorage::Create(
+ filesystem,
+ GetPropertyIndexStoragePath(working_path,
+ kWildcardPropertyIndexFileName),
+ IntegerIndexStorage::Options(), posting_list_serializer.get()));
+ }
+
// Create instance.
auto integer_index = std::unique_ptr<IntegerIndex>(new IntegerIndex(
filesystem, std::move(working_path), std::move(posting_list_serializer),
std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
- std::move(property_to_storage_map)));
+ std::move(property_to_storage_map), std::move(wildcard_property_storage),
+ std::move(wildcard_properties_set), std::move(wildcard_index_storage)));
// Initialize existing PersistentStorage. Checksums will be validated.
ICING_RETURN_IF_ERROR(integer_index->InitializeExistingStorage());
@@ -303,31 +458,78 @@ IntegerIndex::InitializeExistingFiles(const Filesystem& filesystem,
return integer_index;
}
-libtextclassifier3::Status IntegerIndex::TransferIndex(
+libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+IntegerIndex::TransferIntegerIndexStorage(
const std::vector<DocumentId>& document_id_old_to_new,
+ const IntegerIndexStorage* old_storage, const std::string& property_path,
IntegerIndex* new_integer_index) const {
- for (const auto& [property_path, old_storage] : property_to_storage_map_) {
- std::string new_storage_working_path = GetPropertyIndexStoragePath(
- new_integer_index->working_path_, property_path);
- ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<IntegerIndexStorage> new_storage,
- IntegerIndexStorage::Create(
- new_integer_index->filesystem_, new_storage_working_path,
- IntegerIndexStorage::Options(),
- new_integer_index->posting_list_serializer_.get()));
+ std::string new_storage_working_path = GetPropertyIndexStoragePath(
+ new_integer_index->working_path_, property_path);
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<IntegerIndexStorage> new_storage,
+ IntegerIndexStorage::Create(
+ new_integer_index->filesystem_, new_storage_working_path,
+ IntegerIndexStorage::Options(),
+ new_integer_index->posting_list_serializer_.get()));
+
+ ICING_RETURN_IF_ERROR(
+ old_storage->TransferIndex(document_id_old_to_new, new_storage.get()));
+ if (new_storage->num_data() == 0) {
+ new_storage.reset();
ICING_RETURN_IF_ERROR(
- old_storage->TransferIndex(document_id_old_to_new, new_storage.get()));
+ IntegerIndexStorage::Discard(filesystem_, new_storage_working_path));
+ }
+ return new_storage;
+}
+
+libtextclassifier3::Status IntegerIndex::TransferWildcardStorage(
+ IntegerIndex* new_integer_index) const {
+ auto property_storage = std::make_unique<WildcardPropertyStorage>();
+ property_storage->mutable_property_entries()->Reserve(
+ wildcard_properties_set_.size());
+ for (const std::string& property : wildcard_properties_set_) {
+ property_storage->add_property_entries(property);
+ }
+
+ ICING_RETURN_IF_ERROR(new_integer_index->wildcard_property_storage_->Write(
+ std::move(property_storage)));
+ new_integer_index->wildcard_properties_set_ = wildcard_properties_set_;
+ return libtextclassifier3::Status::OK;
+}
- if (new_storage->num_data() == 0) {
- new_storage.reset();
- ICING_RETURN_IF_ERROR(
- IntegerIndexStorage::Discard(filesystem_, new_storage_working_path));
- } else {
+libtextclassifier3::Status IntegerIndex::TransferIndex(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ IntegerIndex* new_integer_index) const {
+ // Transfer over the integer index storages
+ std::unique_ptr<IntegerIndexStorage> new_storage;
+ for (const auto& [property_path, old_storage] : property_to_storage_map_) {
+ ICING_ASSIGN_OR_RETURN(
+ new_storage,
+ TransferIntegerIndexStorage(document_id_old_to_new, old_storage.get(),
+ property_path, new_integer_index));
+ if (new_storage != nullptr) {
new_integer_index->property_to_storage_map_.insert(
- std::make_pair(property_path, std::move(new_storage)));
+ {property_path, std::move(new_storage)});
}
}
+ if (wildcard_index_storage_ != nullptr) {
+ ICING_ASSIGN_OR_RETURN(
+ new_storage,
+ TransferIntegerIndexStorage(
+ document_id_old_to_new, wildcard_index_storage_.get(),
+ std::string(kWildcardPropertyIndexFileName), new_integer_index));
+ if (new_storage != nullptr) {
+ new_integer_index->wildcard_index_storage_ = std::move(new_storage);
+
+ // The only time we need to copy over the list of properties using
+ // wildcard storage is if wildcard_index_storage and new_storage are both
+ // non-null. Otherwise, the new wildcard index storage won't have any
+ // data.
+ ICING_RETURN_IF_ERROR(TransferWildcardStorage(new_integer_index));
+ }
+ }
+
return libtextclassifier3::Status::OK;
}
@@ -335,6 +537,11 @@ libtextclassifier3::Status IntegerIndex::PersistStoragesToDisk() {
for (auto& [_, storage] : property_to_storage_map_) {
ICING_RETURN_IF_ERROR(storage->PersistToDisk());
}
+ // No need to persist wildcard_properties_storage_. All calls to
+ // FileBackedProto::Write are fully written through at the time of the call.
+ if (wildcard_index_storage_) {
+ ICING_RETURN_IF_ERROR(wildcard_index_storage_->PersistToDisk());
+ }
return libtextclassifier3::Status::OK;
}
@@ -350,8 +557,8 @@ libtextclassifier3::StatusOr<Crc32> IntegerIndex::ComputeInfoChecksum() {
}
libtextclassifier3::StatusOr<Crc32> IntegerIndex::ComputeStoragesChecksum() {
- // XOR all crcs of all storages. Since XOR is commutative and associative, the
- // order doesn't matter.
+ // XOR all crcs of all storages. Since XOR is commutative and associative,
+ // the order doesn't matter.
uint32_t storages_checksum = 0;
for (auto& [property_path, storage] : property_to_storage_map_) {
ICING_ASSIGN_OR_RETURN(Crc32 storage_crc, storage->UpdateChecksums());
@@ -359,6 +566,17 @@ libtextclassifier3::StatusOr<Crc32> IntegerIndex::ComputeStoragesChecksum() {
storages_checksum ^= storage_crc.Get();
}
+
+ if (wildcard_index_storage_ != nullptr) {
+ ICING_ASSIGN_OR_RETURN(Crc32 storage_crc,
+ wildcard_index_storage_->UpdateChecksums());
+ storages_checksum ^= storage_crc.Get();
+ }
+
+ ICING_ASSIGN_OR_RETURN(Crc32 wildcard_properties_crc,
+ wildcard_property_storage_->ComputeChecksum());
+ storages_checksum ^= wildcard_properties_crc.Get();
+
return Crc32(storages_checksum);
}
diff --git a/icing/index/numeric/integer-index.h b/icing/index/numeric/integer-index.h
index 050a143..303bb41 100644
--- a/icing/index/numeric/integer-index.h
+++ b/icing/index/numeric/integer-index.h
@@ -23,12 +23,16 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/file-backed-proto.h"
#include "icing/file/filesystem.h"
#include "icing/file/memory-mapped-file.h"
#include "icing/index/numeric/integer-index-storage.h"
#include "icing/index/numeric/numeric-index.h"
#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/index/numeric/wildcard-property-storage.pb.h"
+#include "icing/schema/schema-store.h"
#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
#include "icing/util/crc32.h"
namespace icing {
@@ -46,6 +50,11 @@ class IntegerIndex : public NumericIndex<int64_t> {
using PropertyToStorageMapType =
std::unordered_map<std::string, std::unique_ptr<IntegerIndexStorage>>;
+ // Maximum number of individual property storages that this index will allow
+ // before falling back to placing hits for any new properties into the
+ // 'wildcard' storage.
+ static constexpr int kMaxPropertyStorages = 32;
+
struct Info {
static constexpr int32_t kMagic = 0x238a3dcb;
@@ -125,8 +134,9 @@ class IntegerIndex : public NumericIndex<int64_t> {
// - NOT_FOUND_ERROR if the given property_path doesn't exist
// - Any IntegerIndexStorage errors
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> GetIterator(
- std::string_view property_path, int64_t key_lower,
- int64_t key_upper) const override;
+ std::string_view property_path, int64_t key_lower, int64_t key_upper,
+ const DocumentStore& document_store,
+ const SchemaStore& schema_store) const override;
// Reduces internal file sizes by reclaiming space and ids of deleted
// documents. Integer index will convert all data (hits) to the new document
@@ -165,6 +175,11 @@ class IntegerIndex : public NumericIndex<int64_t> {
}
}
+ int num_property_indices() const override {
+ return property_to_storage_map_.size() +
+ ((wildcard_index_storage_ == nullptr) ? 0 : 1);
+ }
+
private:
class Editor : public NumericIndex<int64_t>::Editor {
public:
@@ -191,17 +206,24 @@ class IntegerIndex : public NumericIndex<int64_t> {
IntegerIndex& integer_index_; // Does not own.
};
- explicit IntegerIndex(const Filesystem& filesystem,
- std::string&& working_path,
- std::unique_ptr<PostingListIntegerIndexSerializer>
- posting_list_serializer,
- std::unique_ptr<MemoryMappedFile> metadata_mmapped_file,
- PropertyToStorageMapType&& property_to_storage_map)
+ explicit IntegerIndex(
+ const Filesystem& filesystem, std::string&& working_path,
+ std::unique_ptr<PostingListIntegerIndexSerializer>
+ posting_list_serializer,
+ std::unique_ptr<MemoryMappedFile> metadata_mmapped_file,
+ PropertyToStorageMapType&& property_to_storage_map,
+ std::unique_ptr<FileBackedProto<WildcardPropertyStorage>>
+ wildcard_property_storage,
+ std::unordered_set<std::string> wildcard_properties_set,
+ std::unique_ptr<icing::lib::IntegerIndexStorage> wildcard_index_storage)
: NumericIndex<int64_t>(filesystem, std::move(working_path),
kWorkingPathType),
posting_list_serializer_(std::move(posting_list_serializer)),
metadata_mmapped_file_(std::move(metadata_mmapped_file)),
- property_to_storage_map_(std::move(property_to_storage_map)) {}
+ property_to_storage_map_(std::move(property_to_storage_map)),
+ wildcard_property_storage_(std::move(wildcard_property_storage)),
+ wildcard_properties_set_(std::move(wildcard_properties_set)),
+ wildcard_index_storage_(std::move(wildcard_index_storage)) {}
static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path);
@@ -210,6 +232,17 @@ class IntegerIndex : public NumericIndex<int64_t> {
InitializeExistingFiles(const Filesystem& filesystem,
std::string&& working_path);
+ // Adds the property path to the list of properties using wildcard storage.
+ // This will both update the in-memory list (wildcard_properties_set_) and
+ // the persistent list (wilcard_property_storage_).
+ //
+ // RETURNS:
+ // - OK on success
+ // - INTERNAL_ERROR if unable to successfully persist updated properties
+ // list in wildcard_property_storage_.
+ libtextclassifier3::Status AddPropertyToWildcardStorage(
+ const std::string& property_path);
+
// Transfers integer index data from the current integer index to
// new_integer_index.
//
@@ -222,6 +255,29 @@ class IntegerIndex : public NumericIndex<int64_t> {
const std::vector<DocumentId>& document_id_old_to_new,
IntegerIndex* new_integer_index) const;
+ // Transfers integer index data from old_storage to new_integer_index.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error. This could potentially leave the storages
+ // in an invalid state and the caller should handle it properly (e.g.
+ // discard and rebuild)
+ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+ TransferIntegerIndexStorage(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ const IntegerIndexStorage* old_storage, const std::string& property_path,
+ IntegerIndex* new_integer_index) const;
+
+ // Transfers the persistent and in-memory list of properties using the
+ // wildcard storage from old_storage to new_integer_index.
+ //
+ // RETURNS:
+ // - OK on success
+ // - INTERNAL_ERROR if unable to successfully persist updated properties
+ // list in new_integer_index.
+ libtextclassifier3::Status TransferWildcardStorage(
+ IntegerIndex* new_integer_index) const;
+
// Flushes contents of all storages to underlying files.
//
// Returns:
@@ -277,6 +333,19 @@ class IntegerIndex : public NumericIndex<int64_t> {
// Property path to integer index storage map.
PropertyToStorageMapType property_to_storage_map_;
+
+ // Persistent list of properties that have added content to
+ // wildcard_index_storage_.
+ std::unique_ptr<FileBackedProto<WildcardPropertyStorage>>
+ wildcard_property_storage_;
+
+ // In-memory list of properties that have added content to
+ // wildcard_index_storage_.
+ std::unordered_set<std::string> wildcard_properties_set_;
+
+ // The index storage that is used once we have already created
+ // kMaxPropertyStorages in property_to_storage_map.
+ std::unique_ptr<icing::lib::IntegerIndexStorage> wildcard_index_storage_;
};
} // namespace lib
diff --git a/icing/index/numeric/integer-index_test.cc b/icing/index/numeric/integer-index_test.cc
index c6cf855..c4dacb8 100644
--- a/icing/index/numeric/integer-index_test.cc
+++ b/icing/index/numeric/integer-index_test.cc
@@ -25,6 +25,7 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
@@ -32,6 +33,9 @@
#include "icing/index/numeric/integer-index-storage.h"
#include "icing/index/numeric/numeric-index.h"
#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
#include "icing/testing/common-matchers.h"
@@ -68,9 +72,25 @@ class NumericIndexIntegerTest : public ::testing::Test {
IsTrue());
working_path_ = base_dir_ + "/numeric_index_integer_test";
+ std::string schema_dir = base_dir_ + "/schema_test";
+
+ ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(schema_dir.c_str()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_, SchemaStore::Create(&filesystem_, schema_dir, &clock_));
+
+ std::string document_store_dir = base_dir_ + "/doc_store_test";
+ ASSERT_TRUE(
+ filesystem_.CreateDirectoryRecursively(document_store_dir.c_str()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult doc_store_create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir, &clock_,
+ schema_store_.get()));
+ doc_store_ = std::move(doc_store_create_result.document_store);
}
void TearDown() override {
+ doc_store_.reset();
+ schema_store_.reset();
filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
}
@@ -92,9 +112,67 @@ class NumericIndexIntegerTest : public ::testing::Test {
return IntegerIndex::Create(filesystem_, working_path_);
}
+ template <typename NotIntegerIndexType>
+ bool is_integer_index() const {
+ return false;
+ }
+
+ template <>
+ bool is_integer_index<IntegerIndex>() const {
+ return true;
+ }
+
+ libtextclassifier3::StatusOr<std::vector<DocumentId>> CompactDocStore() {
+ std::string document_store_dir = base_dir_ + "/doc_store_test";
+ std::string document_store_compact_dir =
+ base_dir_ + "/doc_store_compact_test";
+ if (!filesystem_.CreateDirectoryRecursively(
+ document_store_compact_dir.c_str())) {
+ return absl_ports::InternalError("Unable to create compact directory");
+ }
+ ICING_ASSIGN_OR_RETURN(
+ std::vector<DocumentId> docid_map,
+ doc_store_->OptimizeInto(document_store_compact_dir, nullptr));
+
+ doc_store_.reset();
+ if (!filesystem_.SwapFiles(document_store_dir.c_str(),
+ document_store_compact_dir.c_str())) {
+ return absl_ports::InternalError("Unable to swap directories.");
+ }
+ if (!filesystem_.DeleteDirectoryRecursively(
+ document_store_compact_dir.c_str())) {
+ return absl_ports::InternalError("Unable to delete compact directory");
+ }
+
+ ICING_ASSIGN_OR_RETURN(
+ DocumentStore::CreateResult doc_store_create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir, &clock_,
+ schema_store_.get()));
+ doc_store_ = std::move(doc_store_create_result.document_store);
+ return docid_map;
+ }
+
+ libtextclassifier3::StatusOr<std::vector<DocHitInfo>> Query(
+ const NumericIndex<int64_t>* integer_index,
+ std::string_view property_path, int64_t key_lower, int64_t key_upper) {
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<DocHitInfoIterator> iter,
+ integer_index->GetIterator(property_path, key_lower, key_upper,
+ *doc_store_, *schema_store_));
+
+ std::vector<DocHitInfo> result;
+ while (iter->Advance().ok()) {
+ result.push_back(iter->doc_hit_info());
+ }
+ return result;
+ }
+
Filesystem filesystem_;
std::string base_dir_;
std::string working_path_;
+ std::unique_ptr<SchemaStore> schema_store_;
+ std::unique_ptr<DocumentStore> doc_store_;
+ Clock clock_;
};
void Index(NumericIndex<int64_t>* integer_index, std::string_view property_path,
@@ -109,20 +187,6 @@ void Index(NumericIndex<int64_t>* integer_index, std::string_view property_path,
ICING_EXPECT_OK(std::move(*editor).IndexAllBufferedKeys());
}
-libtextclassifier3::StatusOr<std::vector<DocHitInfo>> Query(
- const NumericIndex<int64_t>* integer_index, std::string_view property_path,
- int64_t key_lower, int64_t key_upper) {
- ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<DocHitInfoIterator> iter,
- integer_index->GetIterator(property_path, key_lower, key_upper));
-
- std::vector<DocHitInfo> result;
- while (iter->Advance().ok()) {
- result.push_back(iter->doc_hit_info());
- }
- return result;
-}
-
using TestTypes = ::testing::Types<DummyNumericIndex<int64_t>, IntegerIndex>;
TYPED_TEST_SUITE(NumericIndexIntegerTest, TestTypes);
@@ -180,8 +244,8 @@ TYPED_TEST(NumericIndexIntegerTest, SingleKeyExactQuery) {
int64_t query_key = 2;
std::vector<SectionId> expected_sections = {kDefaultSectionId};
- EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
- /*key_lower=*/query_key, /*key_upper=*/query_key),
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/query_key, /*key_upper=*/query_key),
IsOkAndHolds(ElementsAre(
EqualsDocHitInfo(/*document_id=*/5, expected_sections),
EqualsDocHitInfo(/*document_id=*/2, expected_sections))));
@@ -206,8 +270,8 @@ TYPED_TEST(NumericIndexIntegerTest, SingleKeyRangeQuery) {
kDefaultSectionId, /*keys=*/{2});
std::vector<SectionId> expected_sections = {kDefaultSectionId};
- EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
- /*key_lower=*/1, /*key_upper=*/3),
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/1, /*key_upper=*/3),
IsOkAndHolds(ElementsAre(
EqualsDocHitInfo(/*document_id=*/5, expected_sections),
EqualsDocHitInfo(/*document_id=*/2, expected_sections),
@@ -215,6 +279,258 @@ TYPED_TEST(NumericIndexIntegerTest, SingleKeyRangeQuery) {
EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
}
+TYPED_TEST(NumericIndexIntegerTest, WildcardStorageQuery) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ // This test sets its schema assuming that max property storages == 32.
+ ASSERT_THAT(IntegerIndex::kMaxPropertyStorages, Eq(32));
+
+ PropertyConfigProto int_property_config =
+ PropertyConfigBuilder()
+ .SetName("otherProperty1")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .Build();
+ // Create a schema with two types:
+ // - TypeA has 34 properties:
+ // 'desiredProperty', 'otherProperty'*, 'undesiredProperty'
+ // - TypeB has 2 properties: 'anotherProperty', 'desiredProperty'
+ // 1. The 32 'otherProperty's will consume all of the individual storages
+ // 2. TypeA.desiredProperty and TypeB.anotherProperty will both be assigned
+ // SectionId = 0 for their respective types.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(int_property_config)
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty2"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty3"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty4"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty5"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty6"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty7"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty8"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty9"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty10"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty11"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty12"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty13"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty14"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty15"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty16"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty17"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty18"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty19"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty20"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty21"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty22"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty23"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty24"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty25"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty26"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty27"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty28"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty29"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty30"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty31"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty32"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("desiredProperty"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("undesiredProperty")))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("TypeB")
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("anotherProperty"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("desiredProperty")))
+ .Build();
+ ICING_ASSERT_OK(this->schema_store_->SetSchema(schema));
+
+ // Put 11 docs of "TypeA" into the document store.
+ DocumentProto doc =
+ DocumentBuilder().SetKey("ns1", "uri0").SetSchema("TypeA").Build();
+ ICING_ASSERT_OK(this->doc_store_->Put(doc));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri1").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri2").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri3").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri4").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri5").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri6").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri7").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri8").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri9").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri10").Build()));
+
+ // Put 5 docs of "TypeB" into the document store.
+ doc = DocumentBuilder(doc).SetUri("uri11").SetSchema("TypeB").Build();
+ ICING_ASSERT_OK(this->doc_store_->Put(doc));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri12").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri13").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri14").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri15").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri16").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri17").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri18").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri19").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri20").Build()));
+
+ // Ids are assigned alphabetically, so the property ids are:
+ // TypeA.desiredProperty = 0
+ // TypeA.otherPropertyN = N
+ // TypeA.undesiredProperty = 33
+ // TypeB.anotherProperty = 0
+ // TypeB.desiredProperty = 1
+ SectionId typea_desired_prop_id = 0;
+ SectionId typea_undesired_prop_id = 33;
+ SectionId typeb_another_prop_id = 0;
+ SectionId typeb_desired_prop_id = 1;
+
+ // Index numeric content for other properties to force our property into the
+ // wildcard storage.
+ std::string other_property_path = "otherProperty";
+ for (int i = 1; i <= IntegerIndex::kMaxPropertyStorages; ++i) {
+ Index(integer_index.get(),
+ absl_ports::StrCat(other_property_path, std::to_string(i)),
+ /*document_id=*/0, /*section_id=*/i, /*keys=*/{i});
+ }
+
+ // Index numeric content for TypeA.desiredProperty
+ std::string desired_property = "desiredProperty";
+ Index(integer_index.get(), desired_property, /*document_id=*/0,
+ typea_desired_prop_id, /*keys=*/{1});
+ Index(integer_index.get(), desired_property, /*document_id=*/1,
+ typea_desired_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), desired_property, /*document_id=*/2,
+ typea_desired_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), desired_property, /*document_id=*/3,
+ typea_desired_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), desired_property, /*document_id=*/4,
+ typea_desired_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), desired_property, /*document_id=*/5,
+ typea_desired_prop_id, /*keys=*/{2});
+
+ // Index the same numeric content for TypeA.undesiredProperty
+ std::string undesired_property = "undesiredProperty";
+ Index(integer_index.get(), undesired_property, /*document_id=*/6,
+ typea_undesired_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), undesired_property, /*document_id=*/7,
+ typea_undesired_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), undesired_property, /*document_id=*/8,
+ typea_undesired_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), undesired_property, /*document_id=*/9,
+ typea_undesired_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), undesired_property, /*document_id=*/10,
+ typea_undesired_prop_id, /*keys=*/{2});
+
+ // Index the same numeric content for TypeB.anotherProperty
+ std::string another_property = "anotherProperty";
+ Index(integer_index.get(), another_property, /*document_id=*/11,
+ typeb_another_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), another_property, /*document_id=*/12,
+ typeb_another_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), another_property, /*document_id=*/13,
+ typeb_another_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), another_property, /*document_id=*/14,
+ typeb_another_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), another_property, /*document_id=*/15,
+ typeb_another_prop_id, /*keys=*/{2});
+
+ // Finally, index the same numeric content for TypeB.desiredProperty
+ Index(integer_index.get(), desired_property, /*document_id=*/16,
+ typeb_desired_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), desired_property, /*document_id=*/17,
+ typeb_desired_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), desired_property, /*document_id=*/18,
+ typeb_desired_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), desired_property, /*document_id=*/19,
+ typeb_desired_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), desired_property, /*document_id=*/20,
+ typeb_desired_prop_id, /*keys=*/{2});
+
+ if (this->template is_integer_index<TypeParam>()) {
+ EXPECT_THAT(integer_index->num_property_indices(), Eq(33));
+ } else {
+ EXPECT_THAT(integer_index->num_property_indices(), Eq(35));
+ }
+
+ // Only the hits for 'desired_prop_id' should be returned.
+ std::vector<SectionId> expected_sections_typea = {typea_desired_prop_id};
+ std::vector<SectionId> expected_sections_typeb = {typeb_desired_prop_id};
+ EXPECT_THAT(
+ this->Query(integer_index.get(), desired_property,
+ /*key_lower=*/2, /*key_upper=*/2),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/20, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/17, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/5, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections_typea))));
+
+ EXPECT_THAT(
+ this->Query(integer_index.get(), desired_property,
+ /*key_lower=*/1, /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/20, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/17, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/16, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/5, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections_typea))));
+}
+
TYPED_TEST(NumericIndexIntegerTest, EmptyResult) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<NumericIndex<int64_t>> integer_index,
@@ -233,11 +549,11 @@ TYPED_TEST(NumericIndexIntegerTest, EmptyResult) {
Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
kDefaultSectionId, /*keys=*/{2});
- EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
- /*key_lower=*/10, /*key_upper=*/10),
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/10, /*key_upper=*/10),
IsOkAndHolds(IsEmpty()));
- EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
- /*key_lower=*/100, /*key_upper=*/200),
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/100, /*key_upper=*/200),
IsOkAndHolds(IsEmpty()));
}
@@ -252,8 +568,8 @@ TYPED_TEST(NumericIndexIntegerTest,
Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
kDefaultSectionId, /*keys=*/{1});
- EXPECT_THAT(Query(integer_index.get(), kAnotherPropertyPath,
- /*key_lower=*/100, /*key_upper=*/200),
+ EXPECT_THAT(this->Query(integer_index.get(), kAnotherPropertyPath,
+ /*key_lower=*/100, /*key_upper=*/200),
IsOkAndHolds(IsEmpty()));
}
@@ -286,8 +602,8 @@ TYPED_TEST(NumericIndexIntegerTest,
kDefaultSectionId, /*keys=*/{4, -1000});
std::vector<SectionId> expected_sections = {kDefaultSectionId};
- EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
- /*key_lower=*/1, /*key_upper=*/3),
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/1, /*key_upper=*/3),
IsOkAndHolds(ElementsAre(
EqualsDocHitInfo(/*document_id=*/6, expected_sections),
EqualsDocHitInfo(/*document_id=*/5, expected_sections),
@@ -326,39 +642,39 @@ TYPED_TEST(NumericIndexIntegerTest, EdgeNumericValues) {
std::vector<SectionId> expected_sections = {kDefaultSectionId};
// Negative key
- EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
- /*key_lower=*/-100, /*key_upper=*/-70),
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/-100, /*key_upper=*/-70),
IsOkAndHolds(ElementsAre(
EqualsDocHitInfo(/*document_id=*/2, expected_sections),
EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
// INT64_MAX key
- EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
- /*key_lower=*/std::numeric_limits<int64_t>::max(),
- /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/std::numeric_limits<int64_t>::max(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
IsOkAndHolds(ElementsAre(
EqualsDocHitInfo(/*document_id=*/7, expected_sections),
EqualsDocHitInfo(/*document_id=*/3, expected_sections))));
// INT64_MIN key
- EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
- /*key_lower=*/std::numeric_limits<int64_t>::min(),
- /*key_upper=*/std::numeric_limits<int64_t>::min()),
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::min()),
IsOkAndHolds(ElementsAre(
EqualsDocHitInfo(/*document_id=*/9, expected_sections),
EqualsDocHitInfo(/*document_id=*/4, expected_sections))));
// Key = 0
- EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
- /*key_lower=*/0, /*key_upper=*/0),
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/0, /*key_upper=*/0),
IsOkAndHolds(ElementsAre(
EqualsDocHitInfo(/*document_id=*/8, expected_sections),
EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
// All keys from INT64_MIN to INT64_MAX
- EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
- /*key_lower=*/std::numeric_limits<int64_t>::min(),
- /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
IsOkAndHolds(ElementsAre(
EqualsDocHitInfo(/*document_id=*/9, expected_sections),
EqualsDocHitInfo(/*document_id=*/8, expected_sections),
@@ -404,8 +720,9 @@ TYPED_TEST(NumericIndexIntegerTest,
/*section_id=*/3, /*keys=*/{5});
EXPECT_THAT(
- Query(integer_index.get(), kDefaultTestPropertyPath, /*key_lower=*/1,
- /*key_upper=*/3),
+ this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/1,
+ /*key_upper=*/3),
IsOkAndHolds(ElementsAre(
EqualsDocHitInfo(/*document_id=*/2, std::vector<SectionId>{4, 5}),
EqualsDocHitInfo(/*document_id=*/1, std::vector<SectionId>{1, 2}),
@@ -433,8 +750,8 @@ TYPED_TEST(NumericIndexIntegerTest, NonRelevantPropertyShouldNotBeIncluded) {
kDefaultSectionId, /*keys=*/{2});
std::vector<SectionId> expected_sections = {kDefaultSectionId};
- EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
- /*key_lower=*/1, /*key_upper=*/3),
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/1, /*key_upper=*/3),
IsOkAndHolds(ElementsAre(
EqualsDocHitInfo(/*document_id=*/5, expected_sections),
EqualsDocHitInfo(/*document_id=*/1, expected_sections),
@@ -460,8 +777,8 @@ TYPED_TEST(NumericIndexIntegerTest,
Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
kDefaultSectionId, /*keys=*/{2});
- EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
- /*key_lower=*/3, /*key_upper=*/1),
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/3, /*key_upper=*/1),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
@@ -499,30 +816,30 @@ TYPED_TEST(NumericIndexIntegerTest, Optimize) {
// Verify index and query API still work normally after Optimize().
std::vector<SectionId> expected_sections = {kDefaultSectionId};
- EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
- /*key_lower=*/1, /*key_upper=*/1),
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/1, /*key_upper=*/1),
IsOkAndHolds(ElementsAre(
EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
- EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
- /*key_lower=*/3, /*key_upper=*/3),
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/3, /*key_upper=*/3),
IsOkAndHolds(ElementsAre(
EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
- EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
- /*key_lower=*/0, /*key_upper=*/0),
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/0, /*key_upper=*/0),
IsOkAndHolds(IsEmpty()));
- EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
- /*key_lower=*/4, /*key_upper=*/4),
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/4, /*key_upper=*/4),
IsOkAndHolds(ElementsAre(
EqualsDocHitInfo(/*document_id=*/2, expected_sections))));
- EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
- /*key_lower=*/2, /*key_upper=*/2),
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/2, /*key_upper=*/2),
IsOkAndHolds(ElementsAre(
EqualsDocHitInfo(/*document_id=*/3, expected_sections))));
Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
kDefaultSectionId, /*keys=*/{123});
- EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
- /*key_lower=*/123, /*key_upper=*/123),
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/123, /*key_upper=*/123),
IsOkAndHolds(ElementsAre(
EqualsDocHitInfo(/*document_id=*/5, expected_sections))));
}
@@ -581,40 +898,40 @@ TYPED_TEST(NumericIndexIntegerTest, OptimizeMultiplePropertyPaths) {
// Verify index and query API still work normally after Optimize().
// Key = 1
- EXPECT_THAT(Query(integer_index.get(), kPropertyPath1, /*key_lower=*/1,
- /*key_upper=*/1),
+ EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath1, /*key_lower=*/1,
+ /*key_upper=*/1),
IsOkAndHolds(IsEmpty()));
- EXPECT_THAT(Query(integer_index.get(), kPropertyPath2, /*key_lower=*/1,
- /*key_upper=*/1),
+ EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath2, /*key_lower=*/1,
+ /*key_upper=*/1),
IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
/*document_id=*/0, std::vector<SectionId>{kSectionId2}))));
// key = 2
- EXPECT_THAT(Query(integer_index.get(), kPropertyPath1, /*key_lower=*/2,
- /*key_upper=*/2),
+ EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath1, /*key_lower=*/2,
+ /*key_upper=*/2),
IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
/*document_id=*/0, std::vector<SectionId>{kSectionId1}))));
- EXPECT_THAT(Query(integer_index.get(), kPropertyPath2, /*key_lower=*/2,
- /*key_upper=*/2),
+ EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath2, /*key_lower=*/2,
+ /*key_upper=*/2),
IsOkAndHolds(IsEmpty()));
// key = 3
- EXPECT_THAT(Query(integer_index.get(), kPropertyPath1, /*key_lower=*/3,
- /*key_upper=*/3),
+ EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath1, /*key_lower=*/3,
+ /*key_upper=*/3),
IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
/*document_id=*/1, std::vector<SectionId>{kSectionId1}))));
- EXPECT_THAT(Query(integer_index.get(), kPropertyPath2, /*key_lower=*/3,
- /*key_upper=*/3),
+ EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath2, /*key_lower=*/3,
+ /*key_upper=*/3),
IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
/*document_id=*/2, std::vector<SectionId>{kSectionId2}))));
// key = 4
- EXPECT_THAT(Query(integer_index.get(), kPropertyPath1, /*key_lower=*/4,
- /*key_upper=*/4),
+ EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath1, /*key_lower=*/4,
+ /*key_upper=*/4),
IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
/*document_id=*/3, std::vector<SectionId>{kSectionId1}))));
- EXPECT_THAT(Query(integer_index.get(), kPropertyPath2, /*key_lower=*/4,
- /*key_upper=*/4),
+ EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath2, /*key_lower=*/4,
+ /*key_upper=*/4),
IsOkAndHolds(IsEmpty()));
}
@@ -655,9 +972,9 @@ TYPED_TEST(NumericIndexIntegerTest, OptimizeShouldDiscardEmptyPropertyStorage) {
// All data in "prop2" as well as the underlying storage should be deleted, so
// when querying "prop2", we should get empty result.
- EXPECT_THAT(Query(integer_index.get(), kPropertyPath2,
- /*key_lower=*/std::numeric_limits<int64_t>::min(),
- /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath2,
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
IsOkAndHolds(IsEmpty()));
if (std::is_same_v<IntegerIndex, TypeParam>) {
std::string prop2_storage_working_path =
@@ -670,8 +987,8 @@ TYPED_TEST(NumericIndexIntegerTest, OptimizeShouldDiscardEmptyPropertyStorage) {
// Verify we can still index and query for "prop2".
Index(integer_index.get(), kPropertyPath2, /*document_id=*/100, kSectionId2,
/*keys=*/{123});
- EXPECT_THAT(Query(integer_index.get(), kPropertyPath2,
- /*key_lower=*/123, /*key_upper=*/123),
+ EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath2,
+ /*key_lower=*/123, /*key_upper=*/123),
IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
/*document_id=*/100, std::vector<SectionId>{kSectionId2}))));
}
@@ -697,9 +1014,9 @@ TYPED_TEST(NumericIndexIntegerTest, OptimizeOutOfRangeDocumentId) {
EXPECT_THAT(integer_index->last_added_document_id(), Eq(kInvalidDocumentId));
// Verify all data are discarded after Optimize().
- EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
- /*key_lower=*/std::numeric_limits<int64_t>::min(),
- /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
IsOkAndHolds(IsEmpty()));
}
@@ -731,9 +1048,9 @@ TYPED_TEST(NumericIndexIntegerTest, OptimizeDeleteAll) {
EXPECT_THAT(integer_index->last_added_document_id(), Eq(kInvalidDocumentId));
// Verify all data are discarded after Optimize().
- EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
- /*key_lower=*/std::numeric_limits<int64_t>::min(),
- /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
IsOkAndHolds(IsEmpty()));
}
@@ -750,13 +1067,13 @@ TYPED_TEST(NumericIndexIntegerTest, Clear) {
ASSERT_THAT(integer_index->last_added_document_id(), Eq(1));
ASSERT_THAT(
- Query(integer_index.get(), /*property_path=*/"A", /*key_lower=*/1,
- /*key_upper=*/1),
+ this->Query(integer_index.get(), /*property_path=*/"A", /*key_lower=*/1,
+ /*key_upper=*/1),
IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
/*document_id=*/0, std::vector<SectionId>{kDefaultSectionId}))));
ASSERT_THAT(
- Query(integer_index.get(), /*property_path=*/"B", /*key_lower=*/3,
- /*key_upper=*/3),
+ this->Query(integer_index.get(), /*property_path=*/"B", /*key_lower=*/3,
+ /*key_upper=*/3),
IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
/*document_id=*/1, std::vector<SectionId>{kDefaultSectionId}))));
@@ -764,12 +1081,14 @@ TYPED_TEST(NumericIndexIntegerTest, Clear) {
// kInvalidDocumentId, and the previous added keys should be deleted.
ICING_ASSERT_OK(integer_index->Clear());
EXPECT_THAT(integer_index->last_added_document_id(), Eq(kInvalidDocumentId));
- EXPECT_THAT(Query(integer_index.get(), /*property_path=*/"A", /*key_lower=*/1,
- /*key_upper=*/1),
- IsOkAndHolds(IsEmpty()));
- EXPECT_THAT(Query(integer_index.get(), /*property_path=*/"B", /*key_lower=*/3,
- /*key_upper=*/3),
- IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(
+ this->Query(integer_index.get(), /*property_path=*/"A", /*key_lower=*/1,
+ /*key_upper=*/1),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(
+ this->Query(integer_index.get(), /*property_path=*/"B", /*key_lower=*/3,
+ /*key_upper=*/3),
+ IsOkAndHolds(IsEmpty()));
// Integer index should be able to work normally after Clear().
Index(integer_index.get(), /*property_path=*/"A", /*document_id=*/3,
@@ -780,13 +1099,13 @@ TYPED_TEST(NumericIndexIntegerTest, Clear) {
EXPECT_THAT(integer_index->last_added_document_id(), Eq(4));
EXPECT_THAT(
- Query(integer_index.get(), /*property_path=*/"A", /*key_lower=*/123,
- /*key_upper=*/123),
+ this->Query(integer_index.get(), /*property_path=*/"A", /*key_lower=*/123,
+ /*key_upper=*/123),
IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
/*document_id=*/3, std::vector<SectionId>{kDefaultSectionId}))));
EXPECT_THAT(
- Query(integer_index.get(), /*property_path=*/"B", /*key_lower=*/456,
- /*key_upper=*/456),
+ this->Query(integer_index.get(), /*property_path=*/"B", /*key_lower=*/456,
+ /*key_upper=*/456),
IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
/*document_id=*/4, std::vector<SectionId>{kDefaultSectionId}))));
}
@@ -1066,6 +1385,260 @@ TEST_F(IntegerIndexTest,
HasSubstr("Invalid storages crc"));
}
}
+
+TEST_F(IntegerIndexTest, WildcardStoragePersistenceQuery) {
+ // This test sets its schema assuming that max property storages == 32.
+ ASSERT_THAT(IntegerIndex::kMaxPropertyStorages, Eq(32));
+
+ PropertyConfigProto int_property_config =
+ PropertyConfigBuilder()
+ .SetName("otherProperty1")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .Build();
+ // Create a schema with two types:
+ // - TypeA has 34 properties:
+ // 'desiredProperty', 'otherProperty'*, 'undesiredProperty'
+ // - TypeB has 2 properties: 'anotherProperty', 'desiredProperty'
+ // 1. The 32 'otherProperty's will consume all of the individual storages
+ // 2. TypeA.desiredProperty and TypeB.anotherProperty will both be assigned
+ // SectionId = 0 for their respective types.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(int_property_config)
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty2"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty3"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty4"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty5"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty6"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty7"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty8"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty9"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty10"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty11"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty12"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty13"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty14"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty15"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty16"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty17"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty18"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty19"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty20"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty21"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty22"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty23"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty24"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty25"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty26"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty27"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty28"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty29"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty30"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty31"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty32"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("desiredProperty"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("undesiredProperty")))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("TypeB")
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("anotherProperty"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("desiredProperty")))
+ .Build();
+ ICING_ASSERT_OK(this->schema_store_->SetSchema(schema));
+
+ // Ids are assigned alphabetically, so the property ids are:
+ // TypeA.desiredProperty = 0
+ // TypeA.otherPropertyN = N
+ // TypeA.undesiredProperty = 33
+ // TypeB.anotherProperty = 0
+ // TypeB.desiredProperty = 1
+ SectionId typea_desired_prop_id = 0;
+ SectionId typea_undesired_prop_id = 33;
+ SectionId typeb_another_prop_id = 0;
+ SectionId typeb_desired_prop_id = 1;
+ std::string desired_property = "desiredProperty";
+ std::string undesired_property = "undesiredProperty";
+ std::string another_property = "anotherProperty";
+
+ // Put 11 docs of "TypeA" into the document store.
+ DocumentProto doc =
+ DocumentBuilder().SetKey("ns1", "uri0").SetSchema("TypeA").Build();
+ ICING_ASSERT_OK(this->doc_store_->Put(doc));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri1").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri2").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri3").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri4").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri5").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri6").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri7").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri8").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri9").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri10").Build()));
+
+ // Put 10 docs of "TypeB" into the document store.
+ doc = DocumentBuilder(doc).SetUri("uri11").SetSchema("TypeB").Build();
+ ICING_ASSERT_OK(this->doc_store_->Put(doc));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri12").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri13").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri14").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri15").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri16").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri17").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri18").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri19").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri20").Build()));
+
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_));
+
+ // Index numeric content for other properties to force our property into the
+ // wildcard storage.
+ std::string other_property_path = "otherProperty";
+ for (int i = 1; i <= IntegerIndex::kMaxPropertyStorages; ++i) {
+ Index(integer_index.get(),
+ absl_ports::StrCat(other_property_path, std::to_string(i)),
+ /*document_id=*/0, /*section_id=*/i, /*keys=*/{i});
+ }
+
+ // Index numeric content for TypeA.desiredProperty
+ Index(integer_index.get(), desired_property, /*document_id=*/0,
+ typea_desired_prop_id, /*keys=*/{1});
+ Index(integer_index.get(), desired_property, /*document_id=*/1,
+ typea_desired_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), desired_property, /*document_id=*/2,
+ typea_desired_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), desired_property, /*document_id=*/3,
+ typea_desired_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), desired_property, /*document_id=*/4,
+ typea_desired_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), desired_property, /*document_id=*/5,
+ typea_desired_prop_id, /*keys=*/{2});
+
+ // Index the same numeric content for TypeA.undesiredProperty
+ Index(integer_index.get(), undesired_property, /*document_id=*/6,
+ typea_undesired_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), undesired_property, /*document_id=*/7,
+ typea_undesired_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), undesired_property, /*document_id=*/8,
+ typea_undesired_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), undesired_property, /*document_id=*/9,
+ typea_undesired_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), undesired_property, /*document_id=*/10,
+ typea_undesired_prop_id, /*keys=*/{2});
+
+ // Index the same numeric content for TypeB.undesiredProperty
+ Index(integer_index.get(), another_property, /*document_id=*/11,
+ typeb_another_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), another_property, /*document_id=*/12,
+ typeb_another_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), another_property, /*document_id=*/13,
+ typeb_another_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), another_property, /*document_id=*/14,
+ typeb_another_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), another_property, /*document_id=*/15,
+ typeb_another_prop_id, /*keys=*/{2});
+
+ // Finally, index the same numeric content for TypeB.desiredProperty
+ Index(integer_index.get(), desired_property, /*document_id=*/16,
+ typeb_desired_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), desired_property, /*document_id=*/17,
+ typeb_desired_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), desired_property, /*document_id=*/18,
+ typeb_desired_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), desired_property, /*document_id=*/19,
+ typeb_desired_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), desired_property, /*document_id=*/20,
+ typeb_desired_prop_id, /*keys=*/{2});
+ }
+
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_));
+
+ EXPECT_THAT(integer_index->num_property_indices(), Eq(33));
+
+ // Only the hits for 'desired_prop_id' should be returned.
+ std::vector<SectionId> expected_sections_typea = {typea_desired_prop_id};
+ std::vector<SectionId> expected_sections_typeb = {typeb_desired_prop_id};
+ EXPECT_THAT(
+ Query(integer_index.get(), desired_property,
+ /*key_lower=*/2, /*key_upper=*/2),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/20, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/17, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/5, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections_typea))));
+
+ EXPECT_THAT(
+ Query(integer_index.get(), desired_property,
+ /*key_lower=*/1, /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/20, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/17, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/16, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/5, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections_typea))));
+}
+
TEST_F(IntegerIndexTest,
IntegerIndexShouldWorkAfterOptimizeAndReinitialization) {
constexpr std::string_view kPropertyPath1 = "prop1";
@@ -1183,6 +1756,550 @@ TEST_F(IntegerIndexTest,
}
}
+TEST_F(IntegerIndexTest, WildcardStorageWorksAfterOptimize) {
+ // This test sets its schema assuming that max property storages == 32.
+ ASSERT_THAT(IntegerIndex::kMaxPropertyStorages, Eq(32));
+
+ PropertyConfigProto int_property_config =
+ PropertyConfigBuilder()
+ .SetName("otherProperty1")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .Build();
+ // Create a schema with two types:
+ // - TypeA has 34 properties:
+ // 'desiredProperty', 'otherProperty'*, 'undesiredProperty'
+ // - TypeB has 2 properties: 'anotherProperty', 'desiredProperty'
+ // 1. The 32 'otherProperty's will consume all of the individual storages
+ // 2. TypeA.desiredProperty and TypeB.anotherProperty will both be assigned
+ // SectionId = 0 for their respective types.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(int_property_config)
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty2"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty3"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty4"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty5"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty6"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty7"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty8"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty9"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty10"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty11"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty12"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty13"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty14"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty15"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty16"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty17"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty18"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty19"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty20"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty21"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty22"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty23"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty24"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty25"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty26"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty27"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty28"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty29"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty30"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty31"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty32"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("desiredProperty"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("undesiredProperty")))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("TypeB")
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("anotherProperty"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("desiredProperty")))
+ .Build();
+ ICING_ASSERT_OK(this->schema_store_->SetSchema(schema));
+
+ // Ids are assigned alphabetically, so the property ids are:
+ // TypeA.desiredProperty = 0
+ // TypeA.otherPropertyN = N
+ // TypeA.undesiredProperty = 33
+ // TypeB.anotherProperty = 0
+ // TypeB.desiredProperty = 1
+ SectionId typea_desired_prop_id = 0;
+ SectionId typea_undesired_prop_id = 33;
+ SectionId typeb_another_prop_id = 0;
+ SectionId typeb_desired_prop_id = 1;
+ std::string desired_property = "desiredProperty";
+ std::string undesired_property = "undesiredProperty";
+ std::string another_property = "anotherProperty";
+
+ // Only the hits for 'desired_prop_id' should be returned.
+ std::vector<SectionId> expected_sections_typea = {typea_desired_prop_id};
+ std::vector<SectionId> expected_sections_typeb = {typeb_desired_prop_id};
+
+ // Put 11 docs of "TypeA" into the document store.
+ DocumentProto doc =
+ DocumentBuilder().SetKey("ns1", "uri0").SetSchema("TypeA").Build();
+ ICING_ASSERT_OK(this->doc_store_->Put(doc));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri1").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri2").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri3").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri4").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri5").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri6").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri7").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri8").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri9").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri10").Build()));
+
+ // Put 10 docs of "TypeB" into the document store.
+ doc = DocumentBuilder(doc).SetUri("uri11").SetSchema("TypeB").Build();
+ ICING_ASSERT_OK(this->doc_store_->Put(doc));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri12").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri13").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri14").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri15").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri16").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri17").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri18").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri19").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri20").Build()));
+
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_));
+
+ // Index numeric content for other properties to force our property into the
+ // wildcard storage.
+ std::string other_property_path = "otherProperty";
+ for (int i = 1; i <= IntegerIndex::kMaxPropertyStorages; ++i) {
+ Index(integer_index.get(),
+ absl_ports::StrCat(other_property_path, std::to_string(i)),
+ /*document_id=*/0, /*section_id=*/i, /*keys=*/{i});
+ }
+
+ // Index numeric content for TypeA.desiredProperty
+ Index(integer_index.get(), desired_property, /*document_id=*/0,
+ typea_desired_prop_id, /*keys=*/{1});
+ Index(integer_index.get(), desired_property, /*document_id=*/1,
+ typea_desired_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), desired_property, /*document_id=*/2,
+ typea_desired_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), desired_property, /*document_id=*/3,
+ typea_desired_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), desired_property, /*document_id=*/4,
+ typea_desired_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), desired_property, /*document_id=*/5,
+ typea_desired_prop_id, /*keys=*/{2});
+
+ // Index the same numeric content for TypeA.undesiredProperty
+ Index(integer_index.get(), undesired_property, /*document_id=*/6,
+ typea_undesired_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), undesired_property, /*document_id=*/7,
+ typea_undesired_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), undesired_property, /*document_id=*/8,
+ typea_undesired_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), undesired_property, /*document_id=*/9,
+ typea_undesired_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), undesired_property, /*document_id=*/10,
+ typea_undesired_prop_id, /*keys=*/{2});
+
+ // Index the same numeric content for TypeB.undesiredProperty
+ Index(integer_index.get(), another_property, /*document_id=*/11,
+ typeb_another_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), another_property, /*document_id=*/12,
+ typeb_another_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), another_property, /*document_id=*/13,
+ typeb_another_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), another_property, /*document_id=*/14,
+ typeb_another_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), another_property, /*document_id=*/15,
+ typeb_another_prop_id, /*keys=*/{2});
+
+ // Finally, index the same numeric content for TypeB.desiredProperty
+ Index(integer_index.get(), desired_property, /*document_id=*/16,
+ typeb_desired_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), desired_property, /*document_id=*/17,
+ typeb_desired_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), desired_property, /*document_id=*/18,
+ typeb_desired_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), desired_property, /*document_id=*/19,
+ typeb_desired_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), desired_property, /*document_id=*/20,
+ typeb_desired_prop_id, /*keys=*/{2});
+
+ ICING_ASSERT_OK(doc_store_->Delete(/*document_id=*/3));
+ ICING_ASSERT_OK(doc_store_->Delete(/*document_id=*/5));
+ // Delete doc id = 3, 5, compress and keep the rest.
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<DocumentId> document_id_old_to_new,
+ CompactDocStore());
+
+ DocumentId new_last_added_document_id = 18;
+ EXPECT_THAT(integer_index->Optimize(document_id_old_to_new,
+ new_last_added_document_id),
+ IsOk());
+ EXPECT_THAT(integer_index->last_added_document_id(),
+ Eq(new_last_added_document_id));
+
+ EXPECT_THAT(
+ Query(integer_index.get(), desired_property,
+ /*key_lower=*/2, /*key_upper=*/2),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/20 - 2, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/17 - 2, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections_typea))));
+
+ EXPECT_THAT(
+ Query(integer_index.get(), desired_property,
+ /*key_lower=*/1, /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/20 - 2, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/17 - 2, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/16 - 2, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections_typea))));
+ }
+
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_));
+
+ EXPECT_THAT(integer_index->num_property_indices(), Eq(33));
+
+ EXPECT_THAT(
+ Query(integer_index.get(), desired_property,
+ /*key_lower=*/2, /*key_upper=*/2),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/20 - 2, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/17 - 2, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections_typea))));
+
+ EXPECT_THAT(
+ Query(integer_index.get(), desired_property,
+ /*key_lower=*/1, /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/20 - 2, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/17 - 2, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/16 - 2, expected_sections_typeb),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections_typea))));
+}
+
+// This test covers the situation where Optimize causes us to throw out some of
+// the individual index storages (because they don't have any hits anymore).
+// In this case, any properties that added content to the wildcard storage (even
+// if all of their content was also deleted) should still be placed in the
+// wilcard storage.
+TEST_F(IntegerIndexTest, WildcardStorageAvailableIndicesAfterOptimize) {
+ // This test sets its schema assuming that max property storages == 32.
+ ASSERT_THAT(IntegerIndex::kMaxPropertyStorages, Eq(32));
+
+ PropertyConfigProto int_property_config =
+ PropertyConfigBuilder()
+ .SetName("otherProperty1")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .Build();
+ // Create a schema with two types:
+ // - TypeA has 34 properties:
+ // 'desiredProperty', 'otherProperty'*, 'undesiredProperty'
+ // - TypeB has 2 properties: 'anotherProperty', 'desiredProperty'
+ // 1. The 32 'otherProperty's will consume all of the individual storages
+ // 2. TypeA.desiredProperty and TypeB.anotherProperty will both be assigned
+ // SectionId = 0 for their respective types.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(int_property_config)
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty2"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty3"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty4"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty5"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty6"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty7"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty8"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty9"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty10"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty11"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty12"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty13"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty14"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty15"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty16"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty17"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty18"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty19"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty20"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty21"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty22"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty23"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty24"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty25"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty26"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty27"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty28"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty29"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty30"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty31"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("otherProperty32"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("desiredProperty"))
+ .AddProperty(PropertyConfigBuilder(int_property_config)
+ .SetName("undesiredProperty")))
+ .Build();
+ ICING_ASSERT_OK(this->schema_store_->SetSchema(schema));
+
+ // Ids are assigned alphabetically, so the property ids are:
+ // TypeA.desiredProperty = 0
+ // TypeA.otherPropertyN = N
+ // TypeA.undesiredProperty = 33
+ // TypeB.anotherProperty = 0
+ // TypeB.desiredProperty = 1
+ SectionId typea_desired_prop_id = 0;
+ SectionId typea_undesired_prop_id = 33;
+ SectionId typea_other1_prop_id = 1;
+ std::string desired_property = "desiredProperty";
+ std::string undesired_property = "undesiredProperty";
+ std::string another_property = "anotherProperty";
+ std::string other_property_1 = "otherProperty1";
+
+ // Only the hits for 'desired_prop_id' should be returned.
+ std::vector<SectionId> expected_sections_typea = {typea_desired_prop_id};
+
+ // Put 11 docs of "TypeA" into the document store.
+ DocumentProto doc =
+ DocumentBuilder().SetKey("ns1", "uri0").SetSchema("TypeA").Build();
+ ICING_ASSERT_OK(this->doc_store_->Put(doc));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri1").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri2").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri3").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri4").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri5").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri6").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri7").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri8").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri9").Build()));
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri10").Build()));
+
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_));
+
+ // Index numeric content for other properties to force our property into the
+ // wildcard storage.
+ std::string other_property_path = "otherProperty";
+ for (int i = 1; i <= IntegerIndex::kMaxPropertyStorages; ++i) {
+ Index(integer_index.get(),
+ absl_ports::StrCat(other_property_path, std::to_string(i)),
+ /*document_id=*/0, /*section_id=*/i, /*keys=*/{i});
+ }
+
+ // Index numeric content for TypeA.desiredProperty
+ Index(integer_index.get(), desired_property, /*document_id=*/0,
+ typea_desired_prop_id, /*keys=*/{1});
+ Index(integer_index.get(), desired_property, /*document_id=*/1,
+ typea_desired_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), desired_property, /*document_id=*/2,
+ typea_desired_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), desired_property, /*document_id=*/3,
+ typea_desired_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), desired_property, /*document_id=*/4,
+ typea_desired_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), desired_property, /*document_id=*/5,
+ typea_desired_prop_id, /*keys=*/{2});
+
+ // Index the same numeric content for TypeA.undesiredProperty
+ Index(integer_index.get(), undesired_property, /*document_id=*/6,
+ typea_undesired_prop_id, /*keys=*/{3});
+ Index(integer_index.get(), undesired_property, /*document_id=*/7,
+ typea_undesired_prop_id, /*keys=*/{2});
+ Index(integer_index.get(), undesired_property, /*document_id=*/8,
+ typea_undesired_prop_id, /*keys=*/{0});
+ Index(integer_index.get(), undesired_property, /*document_id=*/9,
+ typea_undesired_prop_id, /*keys=*/{4});
+ Index(integer_index.get(), undesired_property, /*document_id=*/10,
+ typea_undesired_prop_id, /*keys=*/{2});
+
+ // Delete all the docs that had hits in otherProperty* and
+ // undesiredProperty.
+ ICING_ASSERT_OK(doc_store_->Delete(/*document_id=*/0));
+ ICING_ASSERT_OK(doc_store_->Delete(/*document_id=*/6));
+ ICING_ASSERT_OK(doc_store_->Delete(/*document_id=*/7));
+ ICING_ASSERT_OK(doc_store_->Delete(/*document_id=*/8));
+ ICING_ASSERT_OK(doc_store_->Delete(/*document_id=*/9));
+ ICING_ASSERT_OK(doc_store_->Delete(/*document_id=*/10));
+ // Delete doc id = 0, 6, 7, 8, 9, 10. Compress and keep the rest.
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<DocumentId> document_id_old_to_new,
+ CompactDocStore());
+
+ DocumentId new_last_added_document_id = 5 - 1;
+ EXPECT_THAT(integer_index->Optimize(document_id_old_to_new,
+ new_last_added_document_id),
+ IsOk());
+ EXPECT_THAT(integer_index->last_added_document_id(),
+ Eq(new_last_added_document_id));
+
+ EXPECT_THAT(
+ Query(integer_index.get(), desired_property,
+ /*key_lower=*/2, /*key_upper=*/2),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/5 - 1, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/2 - 1, expected_sections_typea))));
+
+ EXPECT_THAT(
+ Query(integer_index.get(), desired_property,
+ /*key_lower=*/1, /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/5 - 1, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/2 - 1, expected_sections_typea),
+ EqualsDocHitInfo(/*document_id=*/1 - 1, expected_sections_typea))));
+ }
+
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_));
+
+ EXPECT_THAT(integer_index->num_property_indices(), Eq(1));
+
+ // Add a new doc (docid==5) and a hit for desiredProperty. This should still
+ // be placed into the wildcard integer storage.
+ doc = DocumentBuilder().SetKey("ns1", "uri11").SetSchema("TypeA").Build();
+ ICING_ASSERT_OK(this->doc_store_->Put(doc));
+ Index(integer_index.get(), desired_property, /*document_id=*/5,
+ typea_desired_prop_id, /*keys=*/{12});
+ EXPECT_THAT(integer_index->num_property_indices(), Eq(1));
+
+ EXPECT_THAT(Query(integer_index.get(), desired_property,
+ /*key_lower=*/12, /*key_upper=*/12),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/5, expected_sections_typea))));
+
+ // Add a new doc (docid==6) and a hit for undesiredProperty. This should still
+ // be placed into the wildcard integer storage.
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri12").Build()));
+ Index(integer_index.get(), undesired_property, /*document_id=*/6,
+ typea_undesired_prop_id, /*keys=*/{3});
+ EXPECT_THAT(integer_index->num_property_indices(), Eq(1));
+
+ expected_sections_typea = {typea_undesired_prop_id};
+ EXPECT_THAT(Query(integer_index.get(), undesired_property,
+ /*key_lower=*/3, /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/6, expected_sections_typea))));
+
+ // Add a new doc (docid==7) and a hit for otherProperty1. This should be given
+ // its own individual storage.
+ ICING_ASSERT_OK(
+ this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri13").Build()));
+ Index(integer_index.get(), other_property_1, /*document_id=*/7,
+ typea_other1_prop_id, /*keys=*/{3});
+ EXPECT_THAT(integer_index->num_property_indices(), Eq(2));
+
+ expected_sections_typea = {typea_other1_prop_id};
+ EXPECT_THAT(Query(integer_index.get(), other_property_1,
+ /*key_lower=*/3, /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/7, expected_sections_typea))));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/index/numeric/numeric-index.h b/icing/index/numeric/numeric-index.h
index 347260a..28640ca 100644
--- a/icing/index/numeric/numeric-index.h
+++ b/icing/index/numeric/numeric-index.h
@@ -23,8 +23,10 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/file/persistent-storage.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
namespace icing {
namespace lib {
@@ -126,8 +128,9 @@ class NumericIndex : public PersistentStorage {
// - INVALID_ARGUMENT_ERROR if key_lower > key_upper
// - Any other errors, depending on the actual implementation
virtual libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
- GetIterator(std::string_view property_path, T key_lower,
- T key_upper) const = 0;
+ GetIterator(std::string_view property_path, T key_lower, T key_upper,
+ const DocumentStore& document_store,
+ const SchemaStore& schema_store) const = 0;
// Reduces internal file sizes by reclaiming space and ids of deleted
// documents. Numeric index will convert all data (hits) to the new document
@@ -162,6 +165,10 @@ class NumericIndex : public PersistentStorage {
// last_added_document_id() or last_added_document_id() is invalid.
virtual void set_last_added_document_id(DocumentId document_id) = 0;
+ // The number of individual indices that the NumericIndex has created to
+ // search over all indexed properties thus far.
+ virtual int num_property_indices() const = 0;
+
protected:
explicit NumericIndex(const Filesystem& filesystem,
std::string&& working_path,
diff --git a/icing/index/numeric/posting-list-integer-index-accessor.cc b/icing/index/numeric/posting-list-integer-index-accessor.cc
index 220b240..af2aea4 100644
--- a/icing/index/numeric/posting-list-integer-index-accessor.cc
+++ b/icing/index/numeric/posting-list-integer-index-accessor.cc
@@ -64,6 +64,58 @@ PostingListIntegerIndexAccessor::CreateFromExisting(
// Returns the next batch of integer index data for the provided posting list.
libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
PostingListIntegerIndexAccessor::GetNextDataBatch() {
+ return GetNextDataBatchImpl(/*free_posting_list=*/false);
+}
+
+libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
+PostingListIntegerIndexAccessor::GetAllDataAndFree() {
+ if (preexisting_posting_list_ == nullptr) {
+ return absl_ports::FailedPreconditionError(
+ "Cannot retrieve data from a PostingListIntegerIndexAccessor that "
+ "was not created from a preexisting posting list.");
+ }
+
+ std::vector<IntegerIndexData> all_data;
+ while (true) {
+ ICING_ASSIGN_OR_RETURN(std::vector<IntegerIndexData> batch,
+ GetNextDataBatchImpl(/*free_posting_list=*/true));
+ if (batch.empty()) {
+ break;
+ }
+ std::move(batch.begin(), batch.end(), std::back_inserter(all_data));
+ }
+
+ return all_data;
+}
+
+libtextclassifier3::Status PostingListIntegerIndexAccessor::PrependData(
+ const IntegerIndexData& data) {
+ PostingListUsed& active_pl = (preexisting_posting_list_ != nullptr)
+ ? preexisting_posting_list_->posting_list
+ : in_memory_posting_list_;
+ libtextclassifier3::Status status =
+ serializer_->PrependData(&active_pl, data);
+ if (!absl_ports::IsResourceExhausted(status)) {
+ return status;
+ }
+ // There is no more room to add data to this current posting list! Therefore,
+ // we need to either move those data to a larger posting list or flush this
+ // posting list and create another max-sized posting list in the chain.
+ if (preexisting_posting_list_ != nullptr) {
+ ICING_RETURN_IF_ERROR(FlushPreexistingPostingList());
+ } else {
+ ICING_RETURN_IF_ERROR(FlushInMemoryPostingList());
+ }
+
+ // Re-add data. Should always fit since we just cleared
+ // in_memory_posting_list_. It's fine to explicitly reference
+ // in_memory_posting_list_ here because there's no way of reaching this line
+ // while preexisting_posting_list_ is still in use.
+ return serializer_->PrependData(&in_memory_posting_list_, data);
+}
+
+libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
+PostingListIntegerIndexAccessor::GetNextDataBatchImpl(bool free_posting_list) {
if (preexisting_posting_list_ == nullptr) {
if (has_reached_posting_list_chain_end_) {
return std::vector<IntegerIndexData>();
@@ -85,6 +137,11 @@ PostingListIntegerIndexAccessor::GetNextDataBatch() {
next_block_index = preexisting_posting_list_->next_block_index;
}
+ if (free_posting_list) {
+ ICING_RETURN_IF_ERROR(
+ storage_->FreePostingList(std::move(*preexisting_posting_list_)));
+ }
+
if (next_block_index != kInvalidBlockIndex) {
// Since we only have to deal with next block for max-sized posting list
// block, max_num_posting_lists is 1 and posting_list_index_bits is
@@ -103,31 +160,5 @@ PostingListIntegerIndexAccessor::GetNextDataBatch() {
return batch;
}
-libtextclassifier3::Status PostingListIntegerIndexAccessor::PrependData(
- const IntegerIndexData& data) {
- PostingListUsed& active_pl = (preexisting_posting_list_ != nullptr)
- ? preexisting_posting_list_->posting_list
- : in_memory_posting_list_;
- libtextclassifier3::Status status =
- serializer_->PrependData(&active_pl, data);
- if (!absl_ports::IsResourceExhausted(status)) {
- return status;
- }
- // There is no more room to add data to this current posting list! Therefore,
- // we need to either move those data to a larger posting list or flush this
- // posting list and create another max-sized posting list in the chain.
- if (preexisting_posting_list_ != nullptr) {
- ICING_RETURN_IF_ERROR(FlushPreexistingPostingList());
- } else {
- ICING_RETURN_IF_ERROR(FlushInMemoryPostingList());
- }
-
- // Re-add data. Should always fit since we just cleared
- // in_memory_posting_list_. It's fine to explicitly reference
- // in_memory_posting_list_ here because there's no way of reaching this line
- // while preexisting_posting_list_ is still in use.
- return serializer_->PrependData(&in_memory_posting_list_, data);
-}
-
} // namespace lib
} // namespace icing
diff --git a/icing/index/numeric/posting-list-integer-index-accessor.h b/icing/index/numeric/posting-list-integer-index-accessor.h
index 4c1eced..f0d3d25 100644
--- a/icing/index/numeric/posting-list-integer-index-accessor.h
+++ b/icing/index/numeric/posting-list-integer-index-accessor.h
@@ -50,7 +50,7 @@ class PostingListIntegerIndexAccessor : public PostingListAccessor {
Create(FlashIndexStorage* storage,
PostingListIntegerIndexSerializer* serializer);
- // Create a PostingListIntegerIndexAccessor with an existing posting list
+ // Creates a PostingListIntegerIndexAccessor with an existing posting list
// identified by existing_posting_list_id.
//
// RETURNS:
@@ -64,17 +64,30 @@ class PostingListIntegerIndexAccessor : public PostingListAccessor {
PostingListSerializer* GetSerializer() override { return serializer_; }
- // Retrieve the next batch of data in the posting list chain
+ // Retrieves the next batch of data in the posting list chain.
//
// RETURNS:
// - On success, a vector of integer index data in the posting list chain
- // - INTERNAL if called on an instance that was created via Create, if
- // unable to read the next posting list in the chain or if the posting
- // list has been corrupted somehow.
+ // - FAILED_PRECONDITION_ERROR if called on an instance that was created via
+ // Create.
+ // - INTERNAL_ERROR if unable to read the next posting list in the chain or
+ // if the posting list has been corrupted somehow.
libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
GetNextDataBatch();
- // Prepend one data. This may result in flushing the posting list to disk (if
+ // Retrieves all data from the posting list chain and frees all posting
+ // list(s).
+ //
+ // RETURNS:
+ // - On success, a vector of integer index data in the posting list chain
+ // - FAILED_PRECONDITION_ERROR if called on an instance that was created via
+ // Create.
+ // - INTERNAL_ERROR if unable to read the next posting list in the chain or
+ // if the posting list has been corrupted somehow.
+ libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
+ GetAllDataAndFree();
+
+ // Prepends one data. This may result in flushing the posting list to disk (if
// the PostingListIntegerIndexAccessor holds a max-sized posting list that
// is full) or freeing a pre-existing posting list if it is too small to fit
// all data necessary.
@@ -87,7 +100,15 @@ class PostingListIntegerIndexAccessor : public PostingListAccessor {
// posting list.
libtextclassifier3::Status PrependData(const IntegerIndexData& data);
- // TODO(b/259743562): [Optimization 1] add GetAndClear, IsFull for split
+ bool WantsSplit() const {
+ const PostingListUsed* current_pl =
+ preexisting_posting_list_ != nullptr
+ ? &preexisting_posting_list_->posting_list
+ : &in_memory_posting_list_;
+ // Only max-sized PLs get split. Smaller PLs just get copied to larger PLs.
+ return current_pl->size_in_bytes() == storage_->max_posting_list_bytes() &&
+ serializer_->IsFull(current_pl);
+ }
private:
explicit PostingListIntegerIndexAccessor(
@@ -96,6 +117,20 @@ class PostingListIntegerIndexAccessor : public PostingListAccessor {
: PostingListAccessor(storage, std::move(in_memory_posting_list)),
serializer_(serializer) {}
+ // Retrieves the next batch of data in the posting list chain.
+ //
+ // - free_posting_list: a boolean flag indicating whether freeing all posting
+ // lists after retrieving batch data.
+ //
+ // RETURNS:
+ // - On success, a vector of integer index data in the posting list chain
+ // - FAILED_PRECONDITION_ERROR if called on an instance that was created via
+ // Create.
+ // - INTERNAL_ERROR if unable to read the next posting list in the chain or
+ // if the posting list has been corrupted somehow.
+ libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
+ GetNextDataBatchImpl(bool free_posting_list);
+
PostingListIntegerIndexSerializer* serializer_; // Does not own.
};
diff --git a/icing/index/numeric/posting-list-integer-index-accessor_test.cc b/icing/index/numeric/posting-list-integer-index-accessor_test.cc
index 48221b9..f655fea 100644
--- a/icing/index/numeric/posting-list-integer-index-accessor_test.cc
+++ b/icing/index/numeric/posting-list-integer-index-accessor_test.cc
@@ -25,6 +25,7 @@
#include "gtest/gtest.h"
#include "icing/file/filesystem.h"
#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/posting-list-common.h"
#include "icing/file/posting_list/posting-list-identifier.h"
#include "icing/index/numeric/integer-index-data.h"
#include "icing/index/numeric/posting-list-integer-index-serializer.h"
@@ -42,6 +43,7 @@ using ::testing::ElementsAre;
using ::testing::ElementsAreArray;
using ::testing::Eq;
using ::testing::Lt;
+using ::testing::Ne;
using ::testing::SizeIs;
class PostingListIntegerIndexAccessorTest : public ::testing::Test {
@@ -402,6 +404,131 @@ TEST_F(PostingListIntegerIndexAccessorTest,
EXPECT_THAT(result2.status, IsOk());
}
+TEST_F(PostingListIntegerIndexAccessorTest, GetAllDataAndFree) {
+ IntegerIndexData data1(/*section_id=*/3, /*document_id=*/1, /*key=*/123);
+ IntegerIndexData data2(/*section_id=*/3, /*document_id=*/2, /*key=*/456);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor1,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
+ // Add 2 data.
+ ICING_ASSERT_OK(pl_accessor1->PrependData(data1));
+ ICING_ASSERT_OK(pl_accessor1->PrependData(data2));
+ PostingListAccessor::FinalizeResult result1 =
+ std::move(*pl_accessor1).Finalize();
+ ICING_ASSERT_OK(result1.status);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor2,
+ PostingListIntegerIndexAccessor::CreateFromExisting(
+ flash_index_storage_.get(), serializer_.get(), result1.id));
+ EXPECT_THAT(pl_accessor2->GetAllDataAndFree(),
+ IsOkAndHolds(ElementsAre(data2, data1)));
+
+ // Allocate a new posting list with same size again.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor3,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
+ // Add 2 data.
+ ICING_ASSERT_OK(pl_accessor3->PrependData(data1));
+ ICING_ASSERT_OK(pl_accessor3->PrependData(data2));
+ PostingListAccessor::FinalizeResult result3 =
+ std::move(*pl_accessor3).Finalize();
+ ICING_ASSERT_OK(result3.status);
+ // We should get the same id if the previous one has been freed correctly by
+ // GetAllDataAndFree.
+ EXPECT_THAT(result3.id, Eq(result1.id));
+}
+
+TEST_F(PostingListIntegerIndexAccessorTest, GetAllDataAndFreePostingListChain) {
+ uint32_t block_size = FlashIndexStorage::SelectBlockSize();
+ uint32_t max_posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
+ block_size, serializer_->GetDataTypeBytes());
+ uint32_t max_num_data_single_posting_list =
+ max_posting_list_bytes / serializer_->GetDataTypeBytes();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor1,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
+
+ // Prepend max_num_data_single_posting_list + 1 data.
+ std::vector<IntegerIndexData> data_vec;
+ for (uint32_t i = 0; i < max_num_data_single_posting_list + 1; ++i) {
+ IntegerIndexData data(/*section_id=*/3, static_cast<DocumentId>(i),
+ /*key=*/i);
+ ICING_ASSERT_OK(pl_accessor1->PrependData(data));
+ data_vec.push_back(data);
+ }
+
+ // This will cause:
+ // - Allocate the first max-sized posting list at block index = 1, storing
+ // max_num_data_single_posting_list data.
+ // - Allocate the second max-sized posting list at block index = 2, storing 1
+ // data. Also its next_block_index is 1.
+ // - IOW, we will get 2 -> 1 and result1.id points to 2.
+ PostingListAccessor::FinalizeResult result1 =
+ std::move(*pl_accessor1).Finalize();
+ ICING_ASSERT_OK(result1.status);
+
+ uint32_t first_pl_block_index = kInvalidBlockIndex;
+ {
+ // result1.id points at the second (max-sized) PL, and next_block_index of
+ // the second PL points to the first PL's block. Fetch the first PL's block
+ // index manually.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListHolder pl_holder,
+ flash_index_storage_->GetPostingList(result1.id));
+ first_pl_block_index = pl_holder.next_block_index;
+ }
+ ASSERT_THAT(first_pl_block_index, Ne(kInvalidBlockIndex));
+
+ // Call GetAllDataAndFree. This will free block 2 and block 1.
+ // Free block list: 1 -> 2 (since free block list is LIFO).
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor2,
+ PostingListIntegerIndexAccessor::CreateFromExisting(
+ flash_index_storage_.get(), serializer_.get(), result1.id));
+ EXPECT_THAT(
+ pl_accessor2->GetAllDataAndFree(),
+ IsOkAndHolds(ElementsAreArray(data_vec.rbegin(), data_vec.rend())));
+ pl_accessor2.reset();
+
+ // Allocate a new posting list with same size again.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor3,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
+ // Add same set of data.
+ for (uint32_t i = 0; i < max_num_data_single_posting_list + 1; ++i) {
+ ICING_ASSERT_OK(pl_accessor3->PrependData(data_vec[i]));
+ }
+
+ // This will cause:
+ // - Allocate the first max-sized posting list from the free block list, which
+ // is block index = 1, storing max_num_data_single_posting_list data.
+ // - Allocate the second max-sized posting list from the next block in free
+ // block list, which is block index = 2, storing 1 data. Also its
+ // next_block_index should be 1.
+ PostingListAccessor::FinalizeResult result3 =
+ std::move(*pl_accessor3).Finalize();
+ ICING_ASSERT_OK(result3.status);
+ // We should get the same id if the previous one has been freed correctly by
+ // GetAllDataAndFree.
+ EXPECT_THAT(result3.id, Eq(result1.id));
+ // Also the first PL should be the same if it has been freed correctly by
+ // GetAllDataAndFree. Since it is a max-sized posting list, we just need to
+ // verify the block index.
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListHolder pl_holder,
+ flash_index_storage_->GetPostingList(result3.id));
+ EXPECT_THAT(pl_holder.next_block_index, Eq(first_pl_block_index));
+ }
+}
+
} // namespace
} // namespace lib
diff --git a/icing/index/numeric/posting-list-integer-index-serializer.h b/icing/index/numeric/posting-list-integer-index-serializer.h
index 9cfdb7a..ea2f2da 100644
--- a/icing/index/numeric/posting-list-integer-index-serializer.h
+++ b/icing/index/numeric/posting-list-integer-index-serializer.h
@@ -111,6 +111,12 @@ class PostingListIntegerIndexSerializer : public PostingListSerializer {
libtextclassifier3::Status PopFrontData(PostingListUsed* posting_list_used,
uint32_t num_data) const;
+ // Helper function to determine if posting list is full.
+ bool IsFull(const PostingListUsed* posting_list_used) const {
+ return GetSpecialData(posting_list_used, /*index=*/0).data().is_valid() &&
+ GetSpecialData(posting_list_used, /*index=*/1).data().is_valid();
+ }
+
private:
// Posting list layout formats:
//
@@ -228,11 +234,6 @@ class PostingListIntegerIndexSerializer : public PostingListSerializer {
// +-----------------+-----------------+---+--------+-----+--------+--------+
// Helpers to determine what state the posting list is in.
- bool IsFull(const PostingListUsed* posting_list_used) const {
- return GetSpecialData(posting_list_used, /*index=*/0).data().is_valid() &&
- GetSpecialData(posting_list_used, /*index=*/1).data().is_valid();
- }
-
bool IsAlmostFull(const PostingListUsed* posting_list_used) const {
return !GetSpecialData(posting_list_used, /*index=*/0).data().is_valid() &&
GetSpecialData(posting_list_used, /*index=*/1).data().is_valid();
diff --git a/icing/join/join-processor.cc b/icing/join/join-processor.cc
index ab32850..da0e5d2 100644
--- a/icing/join/join-processor.cc
+++ b/icing/join/join-processor.cc
@@ -27,9 +27,10 @@
#include "icing/join/qualified-id.h"
#include "icing/proto/scoring.pb.h"
#include "icing/proto/search.pb.h"
+#include "icing/schema/joinable-property.h"
#include "icing/scoring/scored-document-hit.h"
#include "icing/store/document-id.h"
-#include "icing/util/snippet-helpers.h"
+#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
@@ -67,34 +68,20 @@ JoinProcessor::GetChildrenFetcher(
std::unordered_map<DocumentId, std::vector<ScoredDocumentHit>>
map_joinable_qualified_id;
for (const ScoredDocumentHit& child : child_scored_document_hits) {
- std::string property_content = FetchPropertyExpressionValue(
- child.document_id(), join_spec.child_property_expression());
-
- // Parse qualified id.
- libtextclassifier3::StatusOr<QualifiedId> qualified_id_or =
- QualifiedId::Parse(property_content);
- if (!qualified_id_or.ok()) {
- ICING_VLOG(2) << "Skip content with invalid format of QualifiedId";
- continue;
- }
- QualifiedId qualified_id = std::move(qualified_id_or).ValueOrDie();
-
- // Lookup parent DocumentId.
- libtextclassifier3::StatusOr<DocumentId> parent_doc_id_or =
- doc_store_->GetDocumentId(qualified_id.name_space(),
- qualified_id.uri());
- if (!parent_doc_id_or.ok()) {
- // Skip the document if getting errors.
+ ICING_ASSIGN_OR_RETURN(
+ DocumentId ref_doc_id,
+ FetchReferencedQualifiedId(child.document_id(),
+ join_spec.child_property_expression()));
+ if (ref_doc_id == kInvalidDocumentId) {
continue;
}
- DocumentId parent_doc_id = std::move(parent_doc_id_or).ValueOrDie();
// Since we've already sorted child_scored_document_hits, just simply omit
// if the map_joinable_qualified_id[parent_doc_id].size() has reached max
// joined child count.
- if (map_joinable_qualified_id[parent_doc_id].size() <
+ if (map_joinable_qualified_id[ref_doc_id].size() <
join_spec.max_joined_child_count()) {
- map_joinable_qualified_id[parent_doc_id].push_back(child);
+ map_joinable_qualified_id[ref_doc_id].push_back(child);
}
}
return JoinChildrenFetcher(join_spec, std::move(map_joinable_qualified_id));
@@ -127,20 +114,49 @@ JoinProcessor::Join(
return joined_scored_document_hits;
}
-std::string JoinProcessor::FetchPropertyExpressionValue(
- const DocumentId& document_id,
- const std::string& property_expression) const {
- // TODO(b/256022027): Add caching of document_id -> {expression -> value}
- libtextclassifier3::StatusOr<DocumentProto> document_or =
- doc_store_->Get(document_id);
- if (!document_or.ok()) {
- // Skip the document if getting errors.
- return "";
+libtextclassifier3::StatusOr<DocumentId>
+JoinProcessor::FetchReferencedQualifiedId(
+ const DocumentId& document_id, const std::string& property_path) const {
+ std::optional<DocumentFilterData> filter_data =
+ doc_store_->GetAliveDocumentFilterData(document_id);
+ if (!filter_data) {
+ return kInvalidDocumentId;
+ }
+
+ ICING_ASSIGN_OR_RETURN(const JoinablePropertyMetadata* metadata,
+ schema_store_->GetJoinablePropertyMetadata(
+ filter_data->schema_type_id(), property_path));
+ if (metadata == nullptr ||
+ metadata->value_type != JoinableConfig::ValueType::QUALIFIED_ID) {
+ // Currently we only support qualified id.
+ return kInvalidDocumentId;
}
- DocumentProto document = std::move(document_or).ValueOrDie();
+ DocJoinInfo info(document_id, metadata->id);
+ libtextclassifier3::StatusOr<std::string_view> ref_qualified_id_str_or =
+ qualified_id_join_index_->Get(info);
+ if (!ref_qualified_id_str_or.ok()) {
+ if (absl_ports::IsNotFound(ref_qualified_id_str_or.status())) {
+ return kInvalidDocumentId;
+ }
+ return std::move(ref_qualified_id_str_or).status();
+ }
- return std::string(GetString(&document, property_expression));
+ libtextclassifier3::StatusOr<QualifiedId> ref_qualified_id_or =
+ QualifiedId::Parse(std::move(ref_qualified_id_str_or).ValueOrDie());
+ if (!ref_qualified_id_or.ok()) {
+ // This shouldn't happen because we've validated it during indexing and only
+ // put valid qualified id strings into qualified id join index.
+ return kInvalidDocumentId;
+ }
+ QualifiedId qualified_id = std::move(ref_qualified_id_or).ValueOrDie();
+
+ libtextclassifier3::StatusOr<DocumentId> ref_document_id_or =
+ doc_store_->GetDocumentId(qualified_id.name_space(), qualified_id.uri());
+ if (!ref_document_id_or.ok()) {
+ return kInvalidDocumentId;
+ }
+ return std::move(ref_document_id_or).ValueOrDie();
}
} // namespace lib
diff --git a/icing/join/join-processor.h b/icing/join/join-processor.h
index 9d5ee11..497787f 100644
--- a/icing/join/join-processor.h
+++ b/icing/join/join-processor.h
@@ -21,7 +21,9 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/join/join-children-fetcher.h"
+#include "icing/join/qualified-id-type-joinable-index.h"
#include "icing/proto/search.pb.h"
+#include "icing/schema/schema-store.h"
#include "icing/scoring/scored-document-hit.h"
#include "icing/store/document-store.h"
@@ -32,8 +34,12 @@ class JoinProcessor {
public:
static constexpr std::string_view kQualifiedIdExpr = "this.qualifiedId()";
- explicit JoinProcessor(const DocumentStore* doc_store)
- : doc_store_(doc_store) {}
+ explicit JoinProcessor(
+ const DocumentStore* doc_store, const SchemaStore* schema_store,
+ const QualifiedIdTypeJoinableIndex* qualified_id_join_index)
+ : doc_store_(doc_store),
+ schema_store_(schema_store),
+ qualified_id_join_index_(qualified_id_join_index) {}
// Get a JoinChildrenFetcher used to fetch all children documents by a parent
// document id.
@@ -52,23 +58,25 @@ class JoinProcessor {
const JoinChildrenFetcher& join_children_fetcher);
private:
- // Loads a document and uses a property expression to fetch the value of the
- // property from the document. The property expression may refer to nested
- // document properties.
- // Note: currently we only support single joining, so we use the first element
- // (index 0) for any repeated values.
+ // Fetches referenced document id of the given document under the given
+ // property path.
//
// TODO(b/256022027): validate joinable property (and its upper-level) should
// not have REPEATED cardinality.
//
// Returns:
- // "" on document load error.
- // "" if the property path is not found in the document.
- std::string FetchPropertyExpressionValue(
- const DocumentId& document_id,
- const std::string& property_expression) const;
+ // - A valid referenced document id on success
+ // - kInvalidDocumentId if the given document is not found, doesn't have
+ // qualified id joinable type for the given property_path, or doesn't have
+ // joinable value (an optional property)
+ // - Any other QualifiedIdTypeJoinableIndex errors
+ libtextclassifier3::StatusOr<DocumentId> FetchReferencedQualifiedId(
+ const DocumentId& document_id, const std::string& property_path) const;
const DocumentStore* doc_store_; // Does not own.
+ const SchemaStore* schema_store_; // Does not own.
+ const QualifiedIdTypeJoinableIndex*
+ qualified_id_join_index_; // Does not own.
};
} // namespace lib
diff --git a/icing/join/join-processor_test.cc b/icing/join/join-processor_test.cc
index 00f2b1c..25d4cfc 100644
--- a/icing/join/join-processor_test.cc
+++ b/icing/join/join-processor_test.cc
@@ -22,6 +22,8 @@
#include "gtest/gtest.h"
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
+#include "icing/join/qualified-id-joinable-property-indexing-handler.h"
+#include "icing/join/qualified-id-type-joinable-index.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/scoring.pb.h"
@@ -33,7 +35,14 @@
#include "icing/store/document-id.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/util/status-macros.h"
+#include "icing/util/tokenized-document.h"
+#include "unicode/uloc.h"
namespace icing {
namespace lib {
@@ -41,16 +50,37 @@ namespace lib {
namespace {
using ::testing::ElementsAre;
+using ::testing::IsTrue;
class JoinProcessorTest : public ::testing::Test {
protected:
void SetUp() override {
test_dir_ = GetTestTempDir() + "/icing_join_processor_test";
- filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(test_dir_.c_str()),
+ IsTrue());
+ schema_store_dir_ = test_dir_ + "/schema_store";
+ doc_store_dir_ = test_dir_ + "/doc_store";
+ qualified_id_join_index_dir_ = test_dir_ + "/qualified_id_join_index";
+
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ ICING_ASSERT_OK(
+ // File generated via icu_data_file rule in //icing/BUILD.
+ icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
+
+ language_segmenter_factory::SegmenterOptions options(ULOC_US);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ lang_segmenter_,
+ language_segmenter_factory::Create(std::move(options)));
+
+ ASSERT_THAT(
+ filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()),
+ IsTrue());
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
SchemaProto schema =
SchemaBuilder()
@@ -75,24 +105,51 @@ class JoinProcessorTest : public ::testing::Test {
.Build();
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str()),
+ IsTrue());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
schema_store_.get()));
doc_store_ = std::move(create_result.document_store);
+
+ ICING_ASSERT_OK_AND_ASSIGN(qualified_id_join_index_,
+ QualifiedIdTypeJoinableIndex::Create(
+ filesystem_, qualified_id_join_index_dir_));
}
void TearDown() override {
+ qualified_id_join_index_.reset();
doc_store_.reset();
schema_store_.reset();
+ lang_segmenter_.reset();
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
}
+ libtextclassifier3::StatusOr<DocumentId> PutAndIndexDocument(
+ const DocumentProto& document) {
+ ICING_ASSIGN_OR_RETURN(DocumentId document_id, doc_store_->Put(document));
+ ICING_ASSIGN_OR_RETURN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler,
+ QualifiedIdJoinablePropertyIndexingHandler::Create(
+ &fake_clock_, qualified_id_join_index_.get()));
+ ICING_RETURN_IF_ERROR(handler->Handle(tokenized_document, document_id,
+ /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr));
+ return document_id;
+ }
+
libtextclassifier3::StatusOr<std::vector<JoinedScoredDocumentHit>> Join(
const JoinSpecProto& join_spec,
std::vector<ScoredDocumentHit>&& parent_scored_document_hits,
std::vector<ScoredDocumentHit>&& child_scored_document_hits) {
- JoinProcessor join_processor(doc_store_.get());
+ JoinProcessor join_processor(doc_store_.get(), schema_store_.get(),
+ qualified_id_join_index_.get());
ICING_ASSIGN_OR_RETURN(
JoinChildrenFetcher join_children_fetcher,
join_processor.GetChildrenFetcher(
@@ -104,8 +161,15 @@ class JoinProcessorTest : public ::testing::Test {
Filesystem filesystem_;
std::string test_dir_;
+ std::string schema_store_dir_;
+ std::string doc_store_dir_;
+ std::string qualified_id_join_index_dir_;
+
+ std::unique_ptr<LanguageSegmenter> lang_segmenter_;
std::unique_ptr<SchemaStore> schema_store_;
std::unique_ptr<DocumentStore> doc_store_;
+ std::unique_ptr<QualifiedIdTypeJoinableIndex> qualified_id_join_index_;
+
FakeClock fake_clock_;
};
@@ -144,11 +208,16 @@ TEST_F(JoinProcessorTest, JoinByQualifiedId) {
.AddStringProperty("sender", "pkg$db/namespace#person1")
.Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, doc_store_->Put(person1));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, doc_store_->Put(person2));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, doc_store_->Put(email1));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4, doc_store_->Put(email2));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5, doc_store_->Put(email3));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ PutAndIndexDocument(person1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ PutAndIndexDocument(person2));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ PutAndIndexDocument(email1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+ PutAndIndexDocument(email2));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+ PutAndIndexDocument(email3));
ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
/*score=*/0.0);
@@ -216,9 +285,12 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithoutJoiningProperty) {
.AddStringProperty("subject", "test subject 2")
.Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, doc_store_->Put(person1));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, doc_store_->Put(email1));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, doc_store_->Put(email2));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ PutAndIndexDocument(person1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ PutAndIndexDocument(email1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ PutAndIndexDocument(email2));
ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
/*score=*/0.0);
@@ -290,10 +362,14 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithInvalidQualifiedId) {
R"(pkg$db/namespace\#person1)") // invalid format
.Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, doc_store_->Put(person1));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, doc_store_->Put(email1));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, doc_store_->Put(email2));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4, doc_store_->Put(email3));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ PutAndIndexDocument(person1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ PutAndIndexDocument(email1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ PutAndIndexDocument(email2));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+ PutAndIndexDocument(email3));
ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
/*score=*/0.0);
@@ -356,9 +432,12 @@ TEST_F(JoinProcessorTest, LeftJoinShouldReturnParentWithoutChildren) {
R"(pkg$db/name\#space\\\\#person2)") // escaped
.Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, doc_store_->Put(person1));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, doc_store_->Put(person2));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, doc_store_->Put(email1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ PutAndIndexDocument(person1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ PutAndIndexDocument(person2));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ PutAndIndexDocument(email1));
ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
/*score=*/0.0);
@@ -430,10 +509,14 @@ TEST_F(JoinProcessorTest, ShouldSortChildDocumentsByRankingStrategy) {
.AddStringProperty("sender", "pkg$db/namespace#person1")
.Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, doc_store_->Put(person1));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, doc_store_->Put(email1));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, doc_store_->Put(email2));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4, doc_store_->Put(email3));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ PutAndIndexDocument(person1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ PutAndIndexDocument(email1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ PutAndIndexDocument(email2));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+ PutAndIndexDocument(email3));
ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
/*score=*/0.0);
@@ -519,12 +602,18 @@ TEST_F(JoinProcessorTest,
R"(pkg$db/name\#space\\\\#person2)") // escaped
.Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, doc_store_->Put(person1));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, doc_store_->Put(person2));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, doc_store_->Put(email1));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4, doc_store_->Put(email2));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5, doc_store_->Put(email3));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id6, doc_store_->Put(email4));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ PutAndIndexDocument(person1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ PutAndIndexDocument(person2));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ PutAndIndexDocument(email1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+ PutAndIndexDocument(email2));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+ PutAndIndexDocument(email3));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id6,
+ PutAndIndexDocument(email4));
ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
/*score=*/0.0);
@@ -587,7 +676,8 @@ TEST_F(JoinProcessorTest, ShouldAllowSelfJoining) {
.AddStringProperty("sender", "pkg$db/namespace#email1")
.Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, doc_store_->Put(email1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ PutAndIndexDocument(email1));
ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
/*score=*/0.0);
diff --git a/icing/join/qualified-id-joinable-property-indexing-handler.cc b/icing/join/qualified-id-joinable-property-indexing-handler.cc
new file mode 100644
index 0000000..0b28444
--- /dev/null
+++ b/icing/join/qualified-id-joinable-property-indexing-handler.cc
@@ -0,0 +1,96 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/qualified-id-joinable-property-indexing-handler.h"
+
+#include <memory>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/join/doc-join-info.h"
+#include "icing/join/qualified-id-type-joinable-index.h"
+#include "icing/join/qualified-id.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/store/document-id.h"
+#include "icing/util/logging.h"
+#include "icing/util/tokenized-document.h"
+
+namespace icing {
+namespace lib {
+
+/* static */ libtextclassifier3::StatusOr<
+ std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler>>
+QualifiedIdJoinablePropertyIndexingHandler::Create(
+ const Clock* clock, QualifiedIdTypeJoinableIndex* qualified_id_join_index) {
+ ICING_RETURN_ERROR_IF_NULL(clock);
+ ICING_RETURN_ERROR_IF_NULL(qualified_id_join_index);
+
+ return std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler>(
+ new QualifiedIdJoinablePropertyIndexingHandler(clock,
+ qualified_id_join_index));
+}
+
+libtextclassifier3::Status QualifiedIdJoinablePropertyIndexingHandler::Handle(
+ const TokenizedDocument& tokenized_document, DocumentId document_id,
+ bool recovery_mode, PutDocumentStatsProto* put_document_stats) {
+ // TODO(b/263890397): set qualified id join index processing latency and other
+ // stats.
+
+ if (qualified_id_join_index_.last_added_document_id() != kInvalidDocumentId &&
+ document_id <= qualified_id_join_index_.last_added_document_id()) {
+ if (recovery_mode) {
+ // Skip the document if document_id <= last_added_document_id in recovery
+ // mode without returning an error.
+ return libtextclassifier3::Status::OK;
+ }
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "DocumentId %d must be greater than last added document_id %d",
+ document_id, qualified_id_join_index_.last_added_document_id()));
+ }
+ qualified_id_join_index_.set_last_added_document_id(document_id);
+
+ for (const JoinableProperty<std::string_view>& qualified_id_property :
+ tokenized_document.qualified_id_join_properties()) {
+ if (qualified_id_property.values.empty()) {
+ continue;
+ }
+
+ DocJoinInfo info(document_id, qualified_id_property.metadata.id);
+ // Currently we only support single (non-repeated) joinable value under a
+ // property.
+ std::string_view ref_qualified_id_str = qualified_id_property.values[0];
+
+ // Attempt to parse qualified id string to make sure the format is correct.
+ if (!QualifiedId::Parse(ref_qualified_id_str).ok()) {
+ // Skip incorrect format of qualified id string to save disk space.
+ continue;
+ }
+
+ libtextclassifier3::Status status =
+ qualified_id_join_index_.Put(info, ref_qualified_id_str);
+ if (!status.ok()) {
+ ICING_LOG(WARNING)
+ << "Failed to add data into qualified id join index due to: "
+ << status.error_message();
+ return status;
+ }
+ }
+
+ return libtextclassifier3::Status::OK;
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/join/qualified-id-joinable-property-indexing-handler.h b/icing/join/qualified-id-joinable-property-indexing-handler.h
new file mode 100644
index 0000000..111526e
--- /dev/null
+++ b/icing/join/qualified-id-joinable-property-indexing-handler.h
@@ -0,0 +1,71 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_QUALIFIED_ID_JOINABLE_PROPERTY_INDEXING_HANDLER_H_
+#define ICING_JOIN_QUALIFIED_ID_JOINABLE_PROPERTY_INDEXING_HANDLER_H_
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/index/data-indexing-handler.h"
+#include "icing/join/qualified-id-type-joinable-index.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/store/document-id.h"
+#include "icing/util/clock.h"
+#include "icing/util/tokenized-document.h"
+
+namespace icing {
+namespace lib {
+
+class QualifiedIdJoinablePropertyIndexingHandler : public DataIndexingHandler {
+ public:
+ // Creates a QualifiedIdJoinablePropertyIndexingHandler instance which does
+ // not take ownership of any input components. All pointers must refer to
+ // valid objects that outlive the created
+ // QualifiedIdJoinablePropertyIndexingHandler instance.
+ //
+ // Returns:
+ // - A QualifiedIdJoinablePropertyIndexingHandler instance on success
+ // - FAILED_PRECONDITION_ERROR if any of the input pointer is null
+ static libtextclassifier3::StatusOr<
+ std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler>>
+ Create(const Clock* clock,
+ QualifiedIdTypeJoinableIndex* qualified_id_join_index);
+
+ ~QualifiedIdJoinablePropertyIndexingHandler() override = default;
+
+ // Handles the joinable qualified id data indexing process: add data into the
+ // qualified id type joinable cache.
+ //
+ /// Returns:
+ // - OK on success
+ // - INVALID_ARGUMENT_ERROR if document_id is less than or equal to the
+ // document_id of a previously indexed document in non recovery mode.
+ // - INTERNAL_ERROR if any other errors occur.
+ // - Any QualifiedIdTypeJoinableIndex errors.
+ libtextclassifier3::Status Handle(
+ const TokenizedDocument& tokenized_document, DocumentId document_id,
+ bool recovery_mode, PutDocumentStatsProto* put_document_stats) override;
+
+ private:
+ explicit QualifiedIdJoinablePropertyIndexingHandler(
+ const Clock* clock, QualifiedIdTypeJoinableIndex* qualified_id_join_index)
+ : DataIndexingHandler(clock),
+ qualified_id_join_index_(*qualified_id_join_index) {}
+
+ QualifiedIdTypeJoinableIndex& qualified_id_join_index_; // Does not own.
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_JOIN_QUALIFIED_ID_JOINABLE_PROPERTY_INDEXING_HANDLER_H_
diff --git a/icing/join/qualified-id-joinable-property-indexing-handler_test.cc b/icing/join/qualified-id-joinable-property-indexing-handler_test.cc
new file mode 100644
index 0000000..aa5624c
--- /dev/null
+++ b/icing/join/qualified-id-joinable-property-indexing-handler_test.cc
@@ -0,0 +1,332 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/qualified-id-joinable-property-indexing-handler.h"
+
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/join/qualified-id-type-joinable-index.h"
+#include "icing/join/qualified-id.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/util/tokenized-document.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::IsTrue;
+
+// Schema type for referenced documents: ReferencedType
+static constexpr std::string_view kReferencedType = "ReferencedType";
+static constexpr std::string_view kPropertyName = "name";
+
+// Joinable properties and joinable property id. Joinable property id is
+// determined by the lexicographical order of joinable property path.
+// Schema type with joinable property: FakeType
+static constexpr std::string_view kFakeType = "FakeType";
+static constexpr std::string_view kPropertyQualifiedId = "qualifiedId";
+
+static constexpr JoinablePropertyId kQualifiedIdJoinablePropertyId = 0;
+
+// Schema type with nested joinable properties: NestedType
+static constexpr std::string_view kNestedType = "NestedType";
+static constexpr std::string_view kPropertyNestedDoc = "nested";
+static constexpr std::string_view kPropertyQualifiedId2 = "qualifiedId2";
+
+static constexpr JoinablePropertyId kNestedQualifiedIdJoinablePropertyId = 0;
+static constexpr JoinablePropertyId kQualifiedId2JoinablePropertyId = 1;
+
+static constexpr DocumentId kDefaultDocumentId = 3;
+
+class QualifiedIdJoinablePropertyIndexingHandlerTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ ICING_ASSERT_OK(
+ // File generated via icu_data_file rule in //icing/BUILD.
+ icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
+
+ base_dir_ = GetTestTempDir() + "/icing_test";
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+ IsTrue());
+
+ qualified_id_join_index_dir_ = base_dir_ + "/qualified_id_join_index";
+ schema_store_dir_ = base_dir_ + "/schema_store";
+
+ ICING_ASSERT_OK_AND_ASSIGN(qualified_id_join_index_,
+ QualifiedIdTypeJoinableIndex::Create(
+ filesystem_, qualified_id_join_index_dir_));
+
+ language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ lang_segmenter_,
+ language_segmenter_factory::Create(std::move(segmenter_options)));
+
+ ASSERT_THAT(
+ filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()),
+ IsTrue());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kReferencedType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyName)
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType(kFakeType).AddProperty(
+ PropertyConfigBuilder()
+ .SetName(kPropertyQualifiedId)
+ .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kNestedType)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName(kPropertyNestedDoc)
+ .SetDataTypeDocument(
+ kFakeType, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyQualifiedId2)
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ }
+
+ void TearDown() override {
+ schema_store_.reset();
+ lang_segmenter_.reset();
+ qualified_id_join_index_.reset();
+
+ filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+ }
+
+ Filesystem filesystem_;
+ FakeClock fake_clock_;
+ std::string base_dir_;
+ std::string qualified_id_join_index_dir_;
+ std::string schema_store_dir_;
+
+ std::unique_ptr<QualifiedIdTypeJoinableIndex> qualified_id_join_index_;
+ std::unique_ptr<LanguageSegmenter> lang_segmenter_;
+ std::unique_ptr<SchemaStore> schema_store_;
+};
+
+TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest,
+ CreationWithNullPointerShouldFail) {
+ EXPECT_THAT(QualifiedIdJoinablePropertyIndexingHandler::Create(
+ /*clock=*/nullptr, qualified_id_join_index_.get()),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+
+ EXPECT_THAT(QualifiedIdJoinablePropertyIndexingHandler::Create(
+ &fake_clock_, /*qualified_id_join_index=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, HandleJoinableProperty) {
+ DocumentProto referenced_document =
+ DocumentBuilder()
+ .SetKey("pkg$db/ns", "ref_type/1")
+ .SetSchema(std::string(kReferencedType))
+ .AddStringProperty(std::string(kPropertyName), "one")
+ .Build();
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyQualifiedId),
+ "pkg$db/ns#ref_type/1")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+
+ // Handle document.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler,
+ QualifiedIdJoinablePropertyIndexingHandler::Create(
+ &fake_clock_, qualified_id_join_index_.get()));
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, kDefaultDocumentId,
+ /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::OK));
+
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+ IsOkAndHolds("pkg$db/ns#ref_type/1"));
+}
+
+TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest,
+ HandleNestedJoinableProperty) {
+ DocumentProto referenced_document1 =
+ DocumentBuilder()
+ .SetKey("pkg$db/ns", "ref_type/1")
+ .SetSchema(std::string(kReferencedType))
+ .AddStringProperty(std::string(kPropertyName), "one")
+ .Build();
+ DocumentProto referenced_document2 =
+ DocumentBuilder()
+ .SetKey("pkg$db/ns", "ref_type/2")
+ .SetSchema(std::string(kReferencedType))
+ .AddStringProperty(std::string(kPropertyName), "two")
+ .Build();
+
+ DocumentProto nested_document =
+ DocumentBuilder()
+ .SetKey("pkg$db/ns", "nested_type/1")
+ .SetSchema(std::string(kNestedType))
+ .AddDocumentProperty(
+ std::string(kPropertyNestedDoc),
+ DocumentBuilder()
+ .SetKey("pkg$db/ns", "nested_fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyQualifiedId),
+ "pkg$db/ns#ref_type/2")
+ .Build())
+ .AddStringProperty(std::string(kPropertyQualifiedId2),
+ "pkg$db/ns#ref_type/1")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ nested_document));
+
+ // Handle nested_document.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler,
+ QualifiedIdJoinablePropertyIndexingHandler::Create(
+ &fake_clock_, qualified_id_join_index_.get()));
+ EXPECT_THAT(handler->Handle(tokenized_document, kDefaultDocumentId,
+ /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::OK));
+
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kDefaultDocumentId, kNestedQualifiedIdJoinablePropertyId)),
+ IsOkAndHolds("pkg$db/ns#ref_type/2"));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kDefaultDocumentId, kQualifiedId2JoinablePropertyId)),
+ IsOkAndHolds("pkg$db/ns#ref_type/1"));
+}
+
+TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest,
+ HandleShouldSkipInvalidFormatQualifiedId) {
+ static constexpr std::string_view kInvalidFormatQualifiedId =
+ "invalid_format_qualified_id";
+ ASSERT_THAT(QualifiedId::Parse(kInvalidFormatQualifiedId),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyQualifiedId),
+ std::string(kInvalidFormatQualifiedId))
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+
+ // Handle document. Handle() should ignore invalid format qualified id.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler,
+ QualifiedIdJoinablePropertyIndexingHandler::Create(
+ &fake_clock_, qualified_id_join_index_.get()));
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, kDefaultDocumentId,
+ /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::OK));
+
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest,
+ HandleShouldSkipEmptyQualifiedId) {
+ // Create a document without any qualified id.
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+ ASSERT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty());
+
+ // Handle document. Handle() should ignore invalid format qualified id.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler,
+ QualifiedIdJoinablePropertyIndexingHandler::Create(
+ &fake_clock_, qualified_id_join_index_.get()));
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, kDefaultDocumentId,
+ /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::OK));
+
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/join/qualified-id-type-joinable-index.cc b/icing/join/qualified-id-type-joinable-index.cc
index 231e78a..9c25e62 100644
--- a/icing/join/qualified-id-type-joinable-index.cc
+++ b/icing/join/qualified-id-type-joinable-index.cc
@@ -14,6 +14,7 @@
#include "icing/join/qualified-id-type-joinable-index.h"
+#include <cstring>
#include <memory>
#include <string>
#include <string_view>
@@ -24,7 +25,9 @@
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/file/destructible-directory.h"
+#include "icing/file/file-backed-vector.h"
#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
#include "icing/join/doc-join-info.h"
#include "icing/store/document-id.h"
#include "icing/store/key-mapper.h"
@@ -49,13 +52,15 @@ DocumentId GetNewDocumentId(
}
std::string GetMetadataFilePath(std::string_view working_path) {
- return absl_ports::StrCat(working_path, "/",
- QualifiedIdTypeJoinableIndex::kFilePrefix, ".m");
+ return absl_ports::StrCat(working_path, "/metadata");
}
-std::string GetDocumentToQualifiedIdMapperPath(std::string_view working_path) {
- return absl_ports::StrCat(
- working_path, "/", QualifiedIdTypeJoinableIndex::kFilePrefix, "_mapper");
+std::string GetDocJoinInfoMapperPath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/doc_join_info_mapper");
+}
+
+std::string GetQualifiedIdStoragePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/qualified_id_storage");
}
} // namespace
@@ -66,9 +71,12 @@ QualifiedIdTypeJoinableIndex::Create(const Filesystem& filesystem,
std::string working_path) {
if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str()) ||
!filesystem.DirectoryExists(
- GetDocumentToQualifiedIdMapperPath(working_path).c_str())) {
+ GetDocJoinInfoMapperPath(working_path).c_str()) ||
+ !filesystem.FileExists(GetQualifiedIdStoragePath(working_path).c_str())) {
// Discard working_path if any file/directory is missing, and reinitialize.
- ICING_RETURN_IF_ERROR(Discard(filesystem, working_path));
+ if (filesystem.DirectoryExists(working_path.c_str())) {
+ ICING_RETURN_IF_ERROR(Discard(filesystem, working_path));
+ }
return InitializeNewFiles(filesystem, std::move(working_path));
}
return InitializeExistingFiles(filesystem, std::move(working_path));
@@ -83,29 +91,44 @@ QualifiedIdTypeJoinableIndex::~QualifiedIdTypeJoinableIndex() {
}
libtextclassifier3::Status QualifiedIdTypeJoinableIndex::Put(
- const DocJoinInfo& doc_join_info, DocumentId ref_document_id) {
+ const DocJoinInfo& doc_join_info, std::string_view ref_qualified_id_str) {
if (!doc_join_info.is_valid()) {
return absl_ports::InvalidArgumentError(
"Cannot put data for an invalid DocJoinInfo");
}
- ICING_RETURN_IF_ERROR(document_to_qualified_id_mapper_->Put(
- encode_util::EncodeIntToCString(doc_join_info.value()), ref_document_id));
+ int32_t qualified_id_index = qualified_id_storage_->num_elements();
+ ICING_ASSIGN_OR_RETURN(
+ FileBackedVector<char>::MutableArrayView mutable_arr,
+ qualified_id_storage_->Allocate(ref_qualified_id_str.size() + 1));
+ mutable_arr.SetArray(/*idx=*/0, ref_qualified_id_str.data(),
+ ref_qualified_id_str.size());
+ mutable_arr.SetArray(/*idx=*/ref_qualified_id_str.size(), /*arr=*/"\0",
+ /*arr_len=*/1);
+
+ ICING_RETURN_IF_ERROR(doc_join_info_mapper_->Put(
+ encode_util::EncodeIntToCString(doc_join_info.value()),
+ qualified_id_index));
// TODO(b/268521214): add data into delete propagation storage
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::StatusOr<DocumentId> QualifiedIdTypeJoinableIndex::Get(
- const DocJoinInfo& doc_join_info) const {
+libtextclassifier3::StatusOr<std::string_view>
+QualifiedIdTypeJoinableIndex::Get(const DocJoinInfo& doc_join_info) const {
if (!doc_join_info.is_valid()) {
return absl_ports::InvalidArgumentError(
"Cannot get data for an invalid DocJoinInfo");
}
- return document_to_qualified_id_mapper_->Get(
- encode_util::EncodeIntToCString(doc_join_info.value()));
+ ICING_ASSIGN_OR_RETURN(
+ int32_t qualified_id_index,
+ doc_join_info_mapper_->Get(
+ encode_util::EncodeIntToCString(doc_join_info.value())));
+
+ const char* data = qualified_id_storage_->array() + qualified_id_index;
+ return std::string_view(data, strlen(data));
}
libtextclassifier3::Status QualifiedIdTypeJoinableIndex::Optimize(
@@ -137,7 +160,8 @@ libtextclassifier3::Status QualifiedIdTypeJoinableIndex::Optimize(
// Destruct current index's storage instances to safely swap directories.
// TODO(b/268521214): handle delete propagation storage
- document_to_qualified_id_mapper_.reset();
+ doc_join_info_mapper_.reset();
+ qualified_id_storage_.reset();
if (!filesystem_.SwapFiles(temp_working_path_ddir.dir().c_str(),
working_path_.c_str())) {
@@ -153,24 +177,37 @@ libtextclassifier3::Status QualifiedIdTypeJoinableIndex::Optimize(
return absl_ports::InternalError("Fail to read metadata file");
}
ICING_ASSIGN_OR_RETURN(
- document_to_qualified_id_mapper_,
- PersistentHashMapKeyMapper<DocumentId>::Create(
- filesystem_, GetDocumentToQualifiedIdMapperPath(working_path_)));
+ doc_join_info_mapper_,
+ PersistentHashMapKeyMapper<int32_t>::Create(
+ filesystem_, GetDocJoinInfoMapperPath(working_path_)));
+
+ ICING_ASSIGN_OR_RETURN(
+ qualified_id_storage_,
+ FileBackedVector<char>::Create(
+ filesystem_, GetQualifiedIdStoragePath(working_path_),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ FileBackedVector<char>::kMaxFileSize,
+ /*pre_mapping_mmap_size=*/1024 * 1024));
return libtextclassifier3::Status::OK;
}
libtextclassifier3::Status QualifiedIdTypeJoinableIndex::Clear() {
- document_to_qualified_id_mapper_.reset();
- // Discard and reinitialize document to qualified id mapper.
- std::string document_to_qualified_id_mapper_path =
- GetDocumentToQualifiedIdMapperPath(working_path_);
- ICING_RETURN_IF_ERROR(PersistentHashMapKeyMapper<DocumentId>::Delete(
- filesystem_, document_to_qualified_id_mapper_path));
+ doc_join_info_mapper_.reset();
+ // Discard and reinitialize doc join info mapper.
+ std::string doc_join_info_mapper_path =
+ GetDocJoinInfoMapperPath(working_path_);
+ ICING_RETURN_IF_ERROR(PersistentHashMapKeyMapper<int32_t>::Delete(
+ filesystem_, doc_join_info_mapper_path));
ICING_ASSIGN_OR_RETURN(
- document_to_qualified_id_mapper_,
- PersistentHashMapKeyMapper<DocumentId>::Create(
- filesystem_, std::move(document_to_qualified_id_mapper_path)));
+ doc_join_info_mapper_,
+ PersistentHashMapKeyMapper<int32_t>::Create(
+ filesystem_, std::move(doc_join_info_mapper_path)));
+
+ // Clear qualified_id_storage_.
+ if (qualified_id_storage_->num_elements() > 0) {
+ ICING_RETURN_IF_ERROR(qualified_id_storage_->TruncateTo(0));
+ }
// TODO(b/268521214): clear delete propagation storage
@@ -188,26 +225,34 @@ QualifiedIdTypeJoinableIndex::InitializeNewFiles(const Filesystem& filesystem,
absl_ports::StrCat("Failed to create directory: ", working_path));
}
- // Initialize document_to_qualified_id_mapper
+ // Initialize doc_join_info_mapper
// TODO(b/263890397): decide PersistentHashMapKeyMapper size
ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<KeyMapper<DocumentId>> document_to_qualified_id_mapper,
- PersistentHashMapKeyMapper<DocumentId>::Create(
- filesystem, GetDocumentToQualifiedIdMapperPath(working_path)));
+ std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper,
+ PersistentHashMapKeyMapper<int32_t>::Create(
+ filesystem, GetDocJoinInfoMapperPath(working_path)));
+
+ // Initialize qualified_id_storage
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<FileBackedVector<char>> qualified_id_storage,
+ FileBackedVector<char>::Create(
+ filesystem, GetQualifiedIdStoragePath(working_path),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ FileBackedVector<char>::kMaxFileSize,
+ /*pre_mapping_mmap_size=*/1024 * 1024));
// Create instance.
auto new_index = std::unique_ptr<QualifiedIdTypeJoinableIndex>(
new QualifiedIdTypeJoinableIndex(
filesystem, std::move(working_path),
/*metadata_buffer=*/std::make_unique<uint8_t[]>(kMetadataFileSize),
- std::move(document_to_qualified_id_mapper)));
+ std::move(doc_join_info_mapper), std::move(qualified_id_storage)));
// Initialize info content.
new_index->info().magic = Info::kMagic;
new_index->info().last_added_document_id = kInvalidDocumentId;
// Initialize new PersistentStorage. The initial checksums will be computed
- // and set via InitializeNewStorage. Also write them into disk as well.
+ // and set via InitializeNewStorage.
ICING_RETURN_IF_ERROR(new_index->InitializeNewStorage());
- ICING_RETURN_IF_ERROR(new_index->PersistMetadataToDisk());
return new_index;
}
@@ -224,17 +269,26 @@ QualifiedIdTypeJoinableIndex::InitializeExistingFiles(
return absl_ports::InternalError("Fail to read metadata file");
}
- // Initialize document_to_qualified_id_mapper
+ // Initialize doc_join_info_mapper
ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<KeyMapper<DocumentId>> document_to_qualified_id_mapper,
- PersistentHashMapKeyMapper<DocumentId>::Create(
- filesystem, GetDocumentToQualifiedIdMapperPath(working_path)));
+ std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper,
+ PersistentHashMapKeyMapper<int32_t>::Create(
+ filesystem, GetDocJoinInfoMapperPath(working_path)));
+
+ // Initialize qualified_id_storage
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<FileBackedVector<char>> qualified_id_storage,
+ FileBackedVector<char>::Create(
+ filesystem, GetQualifiedIdStoragePath(working_path),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ FileBackedVector<char>::kMaxFileSize,
+ /*pre_mapping_mmap_size=*/1024 * 1024));
// Create instance.
auto type_joinable_index = std::unique_ptr<QualifiedIdTypeJoinableIndex>(
new QualifiedIdTypeJoinableIndex(
filesystem, std::move(working_path), std::move(metadata_buffer),
- std::move(document_to_qualified_id_mapper)));
+ std::move(doc_join_info_mapper), std::move(qualified_id_storage)));
// Initialize existing PersistentStorage. Checksums will be validated.
ICING_RETURN_IF_ERROR(type_joinable_index->InitializeExistingStorage());
@@ -249,25 +303,25 @@ QualifiedIdTypeJoinableIndex::InitializeExistingFiles(
libtextclassifier3::Status QualifiedIdTypeJoinableIndex::TransferIndex(
const std::vector<DocumentId>& document_id_old_to_new,
QualifiedIdTypeJoinableIndex* new_index) const {
- std::unique_ptr<KeyMapper<DocumentId>::Iterator> iter =
- document_to_qualified_id_mapper_->GetIterator();
+ std::unique_ptr<KeyMapper<int32_t>::Iterator> iter =
+ doc_join_info_mapper_->GetIterator();
while (iter->Advance()) {
DocJoinInfo old_doc_join_info(
encode_util::DecodeIntFromCString(iter->GetKey()));
- DocumentId old_ref_document_id = iter->GetValue();
+ int32_t qualified_id_index = iter->GetValue();
+
+ const char* data = qualified_id_storage_->array() + qualified_id_index;
+ std::string_view ref_qualified_id_str(data, strlen(data));
- // Translate to new doc ids.
+ // Translate to new doc id.
DocumentId new_document_id = GetNewDocumentId(
document_id_old_to_new, old_doc_join_info.document_id());
- DocumentId new_ref_document_id =
- GetNewDocumentId(document_id_old_to_new, old_ref_document_id);
- if (new_document_id != kInvalidDocumentId &&
- new_ref_document_id != kInvalidDocumentId) {
+ if (new_document_id != kInvalidDocumentId) {
ICING_RETURN_IF_ERROR(
new_index->Put(DocJoinInfo(new_document_id,
old_doc_join_info.joinable_property_id()),
- new_ref_document_id));
+ ref_qualified_id_str));
}
}
@@ -299,7 +353,9 @@ QualifiedIdTypeJoinableIndex::PersistMetadataToDisk() {
libtextclassifier3::Status
QualifiedIdTypeJoinableIndex::PersistStoragesToDisk() {
- return document_to_qualified_id_mapper_->PersistToDisk();
+ ICING_RETURN_IF_ERROR(doc_join_info_mapper_->PersistToDisk());
+ ICING_RETURN_IF_ERROR(qualified_id_storage_->PersistToDisk());
+ return libtextclassifier3::Status::OK;
}
libtextclassifier3::StatusOr<Crc32>
@@ -309,7 +365,12 @@ QualifiedIdTypeJoinableIndex::ComputeInfoChecksum() {
libtextclassifier3::StatusOr<Crc32>
QualifiedIdTypeJoinableIndex::ComputeStoragesChecksum() {
- return document_to_qualified_id_mapper_->ComputeChecksum();
+ ICING_ASSIGN_OR_RETURN(Crc32 doc_join_info_mapper_crc,
+ doc_join_info_mapper_->ComputeChecksum());
+ ICING_ASSIGN_OR_RETURN(Crc32 qualified_id_storage_crc,
+ qualified_id_storage_->ComputeChecksum());
+
+ return Crc32(doc_join_info_mapper_crc.Get() ^ qualified_id_storage_crc.Get());
}
} // namespace lib
diff --git a/icing/join/qualified-id-type-joinable-index.h b/icing/join/qualified-id-type-joinable-index.h
index 794f33f..1127641 100644
--- a/icing/join/qualified-id-type-joinable-index.h
+++ b/icing/join/qualified-id-type-joinable-index.h
@@ -23,6 +23,7 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/file-backed-vector.h"
#include "icing/file/filesystem.h"
#include "icing/file/persistent-storage.h"
#include "icing/join/doc-join-info.h"
@@ -59,8 +60,6 @@ class QualifiedIdTypeJoinableIndex : public PersistentStorage {
static constexpr WorkingPathType kWorkingPathType =
WorkingPathType::kDirectory;
- static constexpr std::string_view kFilePrefix =
- "qualified_id_type_joinable_index";
// Creates a QualifiedIdTypeJoinableIndex instance to store qualified ids for
// future joining search. If any of the underlying file is missing, then
@@ -112,24 +111,26 @@ class QualifiedIdTypeJoinableIndex : public PersistentStorage {
~QualifiedIdTypeJoinableIndex() override;
// Puts a new data into index: DocJoinInfo (DocumentId, JoinablePropertyId)
- // references to ref_document_id.
+ // references to ref_qualified_id_str (the identifier of another document).
+ //
+ // REQUIRES: ref_qualified_id_str contains no '\0'.
//
// Returns:
// - OK on success
// - INVALID_ARGUMENT_ERROR if doc_join_info is invalid
// - Any KeyMapper errors
libtextclassifier3::Status Put(const DocJoinInfo& doc_join_info,
- DocumentId ref_document_id);
+ std::string_view ref_qualified_id_str);
- // Gets the referenced DocumentId by DocJoinInfo.
+ // Gets the referenced document's qualified id string by DocJoinInfo.
//
// Returns:
- // - DocumentId referenced by the given DocJoinInfo (DocumentId,
+ // - A qualified id string referenced by the given DocJoinInfo (DocumentId,
// JoinablePropertyId) on success
// - INVALID_ARGUMENT_ERROR if doc_join_info is invalid
// - NOT_FOUND_ERROR if doc_join_info doesn't exist
// - Any KeyMapper errors
- libtextclassifier3::StatusOr<DocumentId> Get(
+ libtextclassifier3::StatusOr<std::string_view> Get(
const DocJoinInfo& doc_join_info) const;
// Reduces internal file sizes by reclaiming space and ids of deleted
@@ -158,7 +159,7 @@ class QualifiedIdTypeJoinableIndex : public PersistentStorage {
// - INTERNAL_ERROR on I/O error
libtextclassifier3::Status Clear();
- int32_t size() const { return document_to_qualified_id_mapper_->num_keys(); }
+ int32_t size() const { return doc_join_info_mapper_->num_keys(); }
bool empty() const { return size() == 0; }
@@ -178,11 +179,13 @@ class QualifiedIdTypeJoinableIndex : public PersistentStorage {
explicit QualifiedIdTypeJoinableIndex(
const Filesystem& filesystem, std::string&& working_path,
std::unique_ptr<uint8_t[]> metadata_buffer,
- std::unique_ptr<KeyMapper<DocumentId>> key_mapper)
+ std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper,
+ std::unique_ptr<FileBackedVector<char>> qualified_id_storage)
: PersistentStorage(filesystem, std::move(working_path),
kWorkingPathType),
metadata_buffer_(std::move(metadata_buffer)),
- document_to_qualified_id_mapper_(std::move(key_mapper)) {}
+ doc_join_info_mapper_(std::move(doc_join_info_mapper)),
+ qualified_id_storage_(std::move(qualified_id_storage)) {}
static libtextclassifier3::StatusOr<
std::unique_ptr<QualifiedIdTypeJoinableIndex>>
@@ -255,9 +258,12 @@ class QualifiedIdTypeJoinableIndex : public PersistentStorage {
std::unique_ptr<uint8_t[]> metadata_buffer_;
// Persistent KeyMapper for mapping (encoded) DocJoinInfo (DocumentId,
- // JoinablePropertyId) to another referenced DocumentId (converted from
- // qualified id string).
- std::unique_ptr<KeyMapper<DocumentId>> document_to_qualified_id_mapper_;
+ // JoinablePropertyId) to another referenced document's qualified id string
+ // index in qualified_id_storage_.
+ std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper_;
+
+ // Storage for qualified id strings.
+ std::unique_ptr<FileBackedVector<char>> qualified_id_storage_;
// TODO(b/268521214): add delete propagation storage
};
diff --git a/icing/join/qualified-id-type-joinable-index_test.cc b/icing/join/qualified-id-type-joinable-index_test.cc
index 6cbc9e4..745b0c1 100644
--- a/icing/join/qualified-id-type-joinable-index_test.cc
+++ b/icing/join/qualified-id-type-joinable-index_test.cc
@@ -16,10 +16,12 @@
#include <memory>
#include <string>
+#include <string_view>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include "icing/file/file-backed-vector.h"
#include "icing/file/filesystem.h"
#include "icing/file/persistent-storage.h"
#include "icing/join/doc-join-info.h"
@@ -89,8 +91,8 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, InitializeNewFiles) {
// Metadata file should be initialized correctly for both info and crcs
// sections.
- const std::string metadata_file_path = absl_ports::StrCat(
- working_path_, "/", QualifiedIdTypeJoinableIndex::kFilePrefix, ".m");
+ const std::string metadata_file_path =
+ absl_ports::StrCat(working_path_, "/metadata");
auto metadata_buffer = std::make_unique<uint8_t[]>(
QualifiedIdTypeJoinableIndex::kMetadataFileSize);
ASSERT_THAT(
@@ -134,13 +136,13 @@ TEST_F(QualifiedIdTypeJoinableIndexTest,
// Insert some data.
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
- /*ref_document_id=*/0));
+ /*ref_qualified_id_str=*/"namespace#uriA"));
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/20),
- /*ref_document_id=*/2));
+ /*ref_qualified_id_str=*/"namespace#uriB"));
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/5, /*joinable_property_id=*/20),
- /*ref_document_id=*/4));
+ /*ref_qualified_id_str=*/"namespace#uriC"));
// Without calling PersistToDisk, checksums will not be recomputed or synced
// to disk, so initializing another instance on the same files should fail.
@@ -158,13 +160,13 @@ TEST_F(QualifiedIdTypeJoinableIndexTest,
// Insert some data.
ICING_ASSERT_OK(
index1->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
- /*ref_document_id=*/0));
+ /*ref_qualified_id_str=*/"namespace#uriA"));
ICING_ASSERT_OK(
index1->Put(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/20),
- /*ref_document_id=*/2));
+ /*ref_qualified_id_str=*/"namespace#uriB"));
ICING_ASSERT_OK(
index1->Put(DocJoinInfo(/*document_id=*/5, /*joinable_property_id=*/20),
- /*ref_document_id=*/4));
+ /*ref_qualified_id_str=*/"namespace#uriC"));
ASSERT_THAT(index1, Pointee(SizeIs(3)));
// After calling PersistToDisk, all checksums should be recomputed and synced
@@ -178,13 +180,13 @@ TEST_F(QualifiedIdTypeJoinableIndexTest,
EXPECT_THAT(index2, Pointee(SizeIs(3)));
EXPECT_THAT(
index2->Get(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20)),
- IsOkAndHolds(0));
+ IsOkAndHolds(/*ref_qualified_id_str=*/"namespace#uriA"));
EXPECT_THAT(
index2->Get(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/20)),
- IsOkAndHolds(2));
+ IsOkAndHolds(/*ref_qualified_id_str=*/"namespace#uriB"));
EXPECT_THAT(
index2->Get(DocJoinInfo(/*document_id=*/5, /*joinable_property_id=*/20)),
- IsOkAndHolds(4));
+ IsOkAndHolds(/*ref_qualified_id_str=*/"namespace#uriC"));
}
TEST_F(QualifiedIdTypeJoinableIndexTest,
@@ -198,13 +200,13 @@ TEST_F(QualifiedIdTypeJoinableIndexTest,
// Insert some data.
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
- /*ref_document_id=*/0));
+ /*ref_qualified_id_str=*/"namespace#uriA"));
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/20),
- /*ref_document_id=*/2));
+ /*ref_qualified_id_str=*/"namespace#uriB"));
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/5, /*joinable_property_id=*/20),
- /*ref_document_id=*/4));
+ /*ref_qualified_id_str=*/"namespace#uriC"));
ASSERT_THAT(index, Pointee(SizeIs(3)));
}
@@ -219,13 +221,13 @@ TEST_F(QualifiedIdTypeJoinableIndexTest,
EXPECT_THAT(index, Pointee(SizeIs(3)));
EXPECT_THAT(index->Get(DocJoinInfo(/*document_id=*/1,
/*joinable_property_id=*/20)),
- IsOkAndHolds(0));
+ IsOkAndHolds("namespace#uriA"));
EXPECT_THAT(index->Get(DocJoinInfo(/*document_id=*/3,
/*joinable_property_id=*/20)),
- IsOkAndHolds(2));
+ IsOkAndHolds("namespace#uriB"));
EXPECT_THAT(index->Get(DocJoinInfo(/*document_id=*/5,
/*joinable_property_id=*/20)),
- IsOkAndHolds(4));
+ IsOkAndHolds("namespace#uriC"));
}
}
@@ -238,15 +240,15 @@ TEST_F(QualifiedIdTypeJoinableIndexTest,
QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_));
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
- /*ref_document_id=*/0));
+ /*ref_qualified_id_str=*/"namespace#uriA"));
ICING_ASSERT_OK(index->PersistToDisk());
}
{
// Manually change magic and update checksum
- const std::string metadata_file_path = absl_ports::StrCat(
- working_path_, "/", QualifiedIdTypeJoinableIndex::kFilePrefix, ".m");
+ const std::string metadata_file_path =
+ absl_ports::StrCat(working_path_, "/metadata");
ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
@@ -290,14 +292,14 @@ TEST_F(QualifiedIdTypeJoinableIndexTest,
QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_));
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
- /*ref_document_id=*/0));
+ /*ref_qualified_id_str=*/"namespace#uriA"));
ICING_ASSERT_OK(index->PersistToDisk());
}
{
- const std::string metadata_file_path = absl_ports::StrCat(
- working_path_, "/", QualifiedIdTypeJoinableIndex::kFilePrefix, ".m");
+ const std::string metadata_file_path =
+ absl_ports::StrCat(working_path_, "/metadata");
ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
@@ -337,14 +339,14 @@ TEST_F(QualifiedIdTypeJoinableIndexTest,
QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_));
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
- /*ref_document_id=*/0));
+ /*ref_qualified_id_str=*/"namespace#uriA"));
ICING_ASSERT_OK(index->PersistToDisk());
}
{
- const std::string metadata_file_path = absl_ports::StrCat(
- working_path_, "/", QualifiedIdTypeJoinableIndex::kFilePrefix, ".m");
+ const std::string metadata_file_path =
+ absl_ports::StrCat(working_path_, "/metadata");
ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
@@ -376,9 +378,8 @@ TEST_F(QualifiedIdTypeJoinableIndexTest,
HasSubstr("Invalid info crc")));
}
-TEST_F(
- QualifiedIdTypeJoinableIndexTest,
- InitializeExistingFilesWithCorruptedDocumentToQualifiedIdMapperShouldFail) {
+TEST_F(QualifiedIdTypeJoinableIndexTest,
+ InitializeExistingFilesWithCorruptedDocJoinInfoMapperShouldFail) {
{
// Create new qualified id type joinable index
ICING_ASSERT_OK_AND_ASSIGN(
@@ -386,19 +387,18 @@ TEST_F(
QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_));
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
- /*ref_document_id=*/0));
+ /*ref_qualified_id_str=*/"namespace#uriA"));
ICING_ASSERT_OK(index->PersistToDisk());
}
{
- // Corrupt document_to_qualified_id_mapper manually.
- std::string mapper_working_path = absl_ports::StrCat(
- working_path_, "/", QualifiedIdTypeJoinableIndex::kFilePrefix,
- "_mapper");
+ // Corrupt doc_join_info_mapper manually.
+ std::string mapper_working_path =
+ absl_ports::StrCat(working_path_, "/doc_join_info_mapper");
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<PersistentHashMapKeyMapper<DocumentId>> mapper,
- PersistentHashMapKeyMapper<DocumentId>::Create(
+ std::unique_ptr<PersistentHashMapKeyMapper<int32_t>> mapper,
+ PersistentHashMapKeyMapper<int32_t>::Create(
filesystem_, std::move(mapper_working_path)));
ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc, mapper->ComputeChecksum());
ICING_ASSERT_OK(mapper->Put("foo", 12345));
@@ -408,7 +408,47 @@ TEST_F(
}
// Attempt to create the qualified id type joinable index with corrupted
- // document_to_qualified_id_mapper. This should fail.
+ // doc_join_info_mapper. This should fail.
+ EXPECT_THAT(QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+ HasSubstr("Invalid storages crc")));
+}
+
+TEST_F(QualifiedIdTypeJoinableIndexTest,
+ InitializeExistingFilesWithCorruptedQualifiedIdStorageShouldFail) {
+ {
+ // Create new qualified id type joinable index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdTypeJoinableIndex> index,
+ QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_));
+ ICING_ASSERT_OK(
+ index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+ /*ref_qualified_id_str=*/"namespace#uriA"));
+
+ ICING_ASSERT_OK(index->PersistToDisk());
+ }
+
+ {
+ // Corrupt qualified_id_storage manually.
+ std::string qualified_id_storage_path =
+ absl_ports::StrCat(working_path_, "/qualified_id_storage");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> qualified_id_storage,
+ FileBackedVector<char>::Create(
+ filesystem_, qualified_id_storage_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc,
+ qualified_id_storage->ComputeChecksum());
+ ICING_ASSERT_OK(qualified_id_storage->Append('a'));
+ ICING_ASSERT_OK(qualified_id_storage->Append('b'));
+ ICING_ASSERT_OK(qualified_id_storage->PersistToDisk());
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc,
+ qualified_id_storage->ComputeChecksum());
+ ASSERT_THAT(old_crc, Not(Eq(new_crc)));
+ }
+
+ // Attempt to create the qualified id type joinable index with corrupted
+ // qualified_id_storage. This should fail.
EXPECT_THAT(QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
HasSubstr("Invalid storages crc")));
@@ -421,8 +461,9 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, InvalidPut) {
QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_));
DocJoinInfo default_invalid;
- EXPECT_THAT(index->Put(default_invalid, /*ref_document_id=*/0),
- StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(
+ index->Put(default_invalid, /*ref_qualified_id_str=*/"namespace#uriA"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST_F(QualifiedIdTypeJoinableIndexTest, InvalidGet) {
@@ -438,13 +479,13 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, InvalidGet) {
TEST_F(QualifiedIdTypeJoinableIndexTest, PutAndGet) {
DocJoinInfo target_info1(/*document_id=*/1, /*joinable_property_id=*/20);
- DocumentId ref_document1 = 0;
+ std::string_view ref_qualified_id_str_a = "namespace#uriA";
DocJoinInfo target_info2(/*document_id=*/3, /*joinable_property_id=*/13);
- DocumentId ref_document2 = 2;
+ std::string_view ref_qualified_id_str_b = "namespace#uriB";
DocJoinInfo target_info3(/*document_id=*/4, /*joinable_property_id=*/4);
- DocumentId ref_document3 = ref_document1;
+ std::string_view ref_qualified_id_str_c = "namespace#uriC";
{
// Create new qualified id type joinable index
@@ -452,17 +493,14 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, PutAndGet) {
std::unique_ptr<QualifiedIdTypeJoinableIndex> index,
QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_));
- EXPECT_THAT(index->Put(target_info1, /*ref_document_id=*/ref_document1),
- IsOk());
- EXPECT_THAT(index->Put(target_info2, /*ref_document_id=*/ref_document2),
- IsOk());
- EXPECT_THAT(index->Put(target_info3, /*ref_document_id=*/ref_document3),
- IsOk());
+ EXPECT_THAT(index->Put(target_info1, ref_qualified_id_str_a), IsOk());
+ EXPECT_THAT(index->Put(target_info2, ref_qualified_id_str_b), IsOk());
+ EXPECT_THAT(index->Put(target_info3, ref_qualified_id_str_c), IsOk());
EXPECT_THAT(index, Pointee(SizeIs(3)));
- EXPECT_THAT(index->Get(target_info1), IsOkAndHolds(ref_document1));
- EXPECT_THAT(index->Get(target_info2), IsOkAndHolds(ref_document2));
- EXPECT_THAT(index->Get(target_info3), IsOkAndHolds(ref_document3));
+ EXPECT_THAT(index->Get(target_info1), IsOkAndHolds(ref_qualified_id_str_a));
+ EXPECT_THAT(index->Get(target_info2), IsOkAndHolds(ref_qualified_id_str_b));
+ EXPECT_THAT(index->Get(target_info3), IsOkAndHolds(ref_qualified_id_str_c));
ICING_ASSERT_OK(index->PersistToDisk());
}
@@ -472,15 +510,15 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, PutAndGet) {
std::unique_ptr<QualifiedIdTypeJoinableIndex> index,
QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_));
EXPECT_THAT(index, Pointee(SizeIs(3)));
- EXPECT_THAT(index->Get(target_info1), IsOkAndHolds(ref_document1));
- EXPECT_THAT(index->Get(target_info2), IsOkAndHolds(ref_document2));
- EXPECT_THAT(index->Get(target_info3), IsOkAndHolds(ref_document3));
+ EXPECT_THAT(index->Get(target_info1), IsOkAndHolds(ref_qualified_id_str_a));
+ EXPECT_THAT(index->Get(target_info2), IsOkAndHolds(ref_qualified_id_str_b));
+ EXPECT_THAT(index->Get(target_info3), IsOkAndHolds(ref_qualified_id_str_c));
}
TEST_F(QualifiedIdTypeJoinableIndexTest,
GetShouldReturnNotFoundErrorIfNotExist) {
DocJoinInfo target_info(/*document_id=*/1, /*joinable_property_id=*/20);
- DocumentId ref_document = 0;
+ std::string_view ref_qualified_id_str = "namespace#uriA";
// Create new qualified id type joinable index
ICING_ASSERT_OK_AND_ASSIGN(
@@ -491,8 +529,8 @@ TEST_F(QualifiedIdTypeJoinableIndexTest,
EXPECT_THAT(index->Get(target_info),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- ICING_ASSERT_OK(index->Put(target_info, /*ref_document_id=*/ref_document));
- ASSERT_THAT(index->Get(target_info), IsOkAndHolds(ref_document));
+ ICING_ASSERT_OK(index->Put(target_info, ref_qualified_id_str));
+ ASSERT_THAT(index->Get(target_info), IsOkAndHolds(ref_qualified_id_str));
// Get another non-existing entry. This should get NOT_FOUND_ERROR.
DocJoinInfo another_target_info(/*document_id=*/2,
@@ -542,34 +580,30 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, Optimize) {
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/10),
- /*ref_document_id=*/0));
+ /*ref_qualified_id_str=*/"namespace#uriA"));
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/5, /*joinable_property_id=*/3),
- /*ref_document_id=*/0));
+ /*ref_qualified_id_str=*/"namespace#uriA"));
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/8, /*joinable_property_id=*/9),
- /*ref_document_id=*/2));
+ /*ref_qualified_id_str=*/"namespace#uriB"));
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/13, /*joinable_property_id=*/4),
- /*ref_document_id=*/12));
+ /*ref_qualified_id_str=*/"namespace#uriC"));
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/21, /*joinable_property_id=*/12),
- /*ref_document_id=*/12));
+ /*ref_qualified_id_str=*/"namespace#uriC"));
index->set_last_added_document_id(21);
ASSERT_THAT(index, Pointee(SizeIs(5)));
- // Used doc id: 0, 2, 3, 5, 8, 12, 13, 21.
- // Delete doc id = 2, 5, compress and keep the rest.
+ // Delete doc id = 5, 8, compress and keep the rest.
std::vector<DocumentId> document_id_old_to_new(22, kInvalidDocumentId);
- document_id_old_to_new[0] = 0;
- document_id_old_to_new[3] = 1;
- document_id_old_to_new[8] = 2;
- document_id_old_to_new[12] = 3;
- document_id_old_to_new[13] = 4;
- document_id_old_to_new[21] = 5;
-
- DocumentId new_last_added_document_id = 5;
+ document_id_old_to_new[3] = 0;
+ document_id_old_to_new[13] = 1;
+ document_id_old_to_new[21] = 2;
+
+ DocumentId new_last_added_document_id = 2;
EXPECT_THAT(
index->Optimize(document_id_old_to_new, new_last_added_document_id),
IsOk());
@@ -577,48 +611,43 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, Optimize) {
EXPECT_THAT(index->last_added_document_id(), Eq(new_last_added_document_id));
// Verify Put and Get API still work normally after Optimize().
- // (old_doc_id=3, joinable_property_id=10) had old referenced doc_id = 0,
- // which is now (doc_id=1, joinable_property_id=10) and referenced doc_id = 0.
+ // (old_doc_id=3, joinable_property_id=10), which is now (doc_id=0,
+ // joinable_property_id=10), has referenced qualified id str =
+ // "namespace#uriA".
EXPECT_THAT(
- index->Get(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/10)),
- IsOkAndHolds(0));
-
- // (old_doc_id=5, joinable_property_id=3) had old referenced doc_id = 0,
- // which is now not found since we've deleted old_doc_id = 5. It is not
- // testable via Get() because there is no valid doc_id mapping for old_doc_id
- // = 5 and we cannot generate a valid DocJoinInfo for it.
-
- // (old_doc_id=8, joinable_property_id=9) had old referenced doc_id = 2,
- // which is now (doc_id=2, joinable_property_id=9), but since we've deleted
- // old referenced doc_id = 2, this data should not be found after
- // optimization.
- EXPECT_THAT(
- index->Get(DocJoinInfo(/*document_id=*/2, /*joinable_property_id=*/9)),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ index->Get(DocJoinInfo(/*document_id=*/0, /*joinable_property_id=*/10)),
+ IsOkAndHolds("namespace#uriA"));
+
+ // (old_doc_id=5, joinable_property_id=3) and (old_doc_id=8,
+ // joinable_property_id=9) are now not found since we've deleted old_doc_id =
+ // 5, 8. It is not testable via Get() because there is no valid doc_id mapping
+ // for old_doc_id = 5, 8 and we cannot generate a valid DocJoinInfo for it.
- // (old_doc_id=13, joinable_property_id=4) had old referenced doc_id = 12,
- // which is now (doc_id=4, joinable_property_id=4) and referenced doc_id = 3.
+ // (old_doc_id=13, joinable_property_id=4), which is now (doc_id=1,
+ // joinable_property_id=4), has referenced qualified id str =
+ // "namespace#uriC".
EXPECT_THAT(
- index->Get(DocJoinInfo(/*document_id=*/4, /*joinable_property_id=*/4)),
- IsOkAndHolds(3));
+ index->Get(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/4)),
+ IsOkAndHolds("namespace#uriC"));
- // (old_doc_id=21, joinable_property_id=12) had old referenced doc_id = 12,
- // which is now (doc_id=5, joinable_property_id=12) and referenced doc_id = 3.
+ // (old_doc_id=21, joinable_property_id=12), which is now (doc_id=2,
+ // joinable_property_id=12), has referenced qualified id str =
+ // "namespace#uriC".
EXPECT_THAT(
- index->Get(DocJoinInfo(/*document_id=*/5, /*joinable_property_id=*/12)),
- IsOkAndHolds(3));
+ index->Get(DocJoinInfo(/*document_id=*/2, /*joinable_property_id=*/12)),
+ IsOkAndHolds("namespace#uriC"));
// Joinable index should be able to work normally after Optimize().
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/99, /*joinable_property_id=*/2),
- /*ref_document_id=*/90));
+ /*ref_qualified_id_str=*/"namespace#uriD"));
index->set_last_added_document_id(99);
EXPECT_THAT(index, Pointee(SizeIs(4)));
EXPECT_THAT(index->last_added_document_id(), Eq(99));
EXPECT_THAT(index->Get(DocJoinInfo(/*document_id=*/99,
/*joinable_property_id=*/2)),
- IsOkAndHolds(90));
+ IsOkAndHolds("namespace#uriD"));
}
TEST_F(QualifiedIdTypeJoinableIndexTest, OptimizeOutOfRangeDocumentId) {
@@ -628,7 +657,7 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, OptimizeOutOfRangeDocumentId) {
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/99, /*joinable_property_id=*/10),
- /*ref_document_id=*/91));
+ /*ref_qualified_id_str=*/"namespace#uriA"));
index->set_last_added_document_id(99);
// Create document_id_old_to_new with size = 1. Optimize should handle out of
@@ -653,19 +682,19 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, OptimizeDeleteAll) {
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/10),
- /*ref_document_id=*/0));
+ /*ref_qualified_id_str=*/"namespace#uriA"));
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/5, /*joinable_property_id=*/3),
- /*ref_document_id=*/0));
+ /*ref_qualified_id_str=*/"namespace#uriA"));
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/8, /*joinable_property_id=*/9),
- /*ref_document_id=*/2));
+ /*ref_qualified_id_str=*/"namespace#uriB"));
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/13, /*joinable_property_id=*/4),
- /*ref_document_id=*/12));
+ /*ref_qualified_id_str=*/"namespace#uriC"));
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/21, /*joinable_property_id=*/12),
- /*ref_document_id=*/12));
+ /*ref_qualified_id_str=*/"namespace#uriC"));
index->set_last_added_document_id(21);
// Delete all documents.
@@ -690,9 +719,12 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, Clear) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<QualifiedIdTypeJoinableIndex> index,
QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_));
- ICING_ASSERT_OK(index->Put(target_info1, /*ref_document_id=*/0));
- ICING_ASSERT_OK(index->Put(target_info2, /*ref_document_id=*/2));
- ICING_ASSERT_OK(index->Put(target_info3, /*ref_document_id=*/4));
+ ICING_ASSERT_OK(
+ index->Put(target_info1, /*ref_qualified_id_str=*/"namespace#uriA"));
+ ICING_ASSERT_OK(
+ index->Put(target_info2, /*ref_qualified_id_str=*/"namespace#uriB"));
+ ICING_ASSERT_OK(
+ index->Put(target_info3, /*ref_qualified_id_str=*/"namespace#uriC"));
ASSERT_THAT(index, Pointee(SizeIs(3)));
index->set_last_added_document_id(6);
ASSERT_THAT(index->last_added_document_id(), Eq(6));
@@ -711,11 +743,12 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, Clear) {
// Joinable index should be able to work normally after Clear().
DocJoinInfo target_info4(/*document_id=*/2, /*joinable_property_id=*/19);
- ICING_ASSERT_OK(index->Put(target_info4, /*ref_document_id=*/0));
+ ICING_ASSERT_OK(
+ index->Put(target_info4, /*ref_qualified_id_str=*/"namespace#uriD"));
index->set_last_added_document_id(2);
EXPECT_THAT(index->last_added_document_id(), Eq(2));
- EXPECT_THAT(index->Get(target_info4), IsOkAndHolds(0));
+ EXPECT_THAT(index->Get(target_info4), IsOkAndHolds("namespace#uriD"));
ICING_ASSERT_OK(index->PersistToDisk());
index.reset();
@@ -730,7 +763,7 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, Clear) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
EXPECT_THAT(index->Get(target_info3),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(index->Get(target_info4), IsOkAndHolds(0));
+ EXPECT_THAT(index->Get(target_info4), IsOkAndHolds("namespace#uriD"));
}
} // namespace
diff --git a/icing/join/qualified-id.cc b/icing/join/qualified-id.cc
index 2a30c44..42e080c 100644
--- a/icing/join/qualified-id.cc
+++ b/icing/join/qualified-id.cc
@@ -40,9 +40,14 @@ bool IsSpecialCharacter(char c) {
// A valid index of the separator on success.
// std::string::npos if the escape format of content is incorrect.
// std::string::npos if the content contains 0 or more than 1 separators.
+// std::string::npos if the content contains '\0'.
size_t VerifyFormatAndGetSeparatorPosition(std::string_view content) {
size_t separator_pos = std::string::npos;
for (size_t i = 0; i < content.length(); ++i) {
+ if (content[i] == '\0') {
+ return std::string::npos;
+ }
+
if (content[i] == QualifiedId::kEscapeChar) {
// Advance to the next character.
++i;
diff --git a/icing/join/qualified-id_test.cc b/icing/join/qualified-id_test.cc
index 0c3750a..92bf63e 100644
--- a/icing/join/qualified-id_test.cc
+++ b/icing/join/qualified-id_test.cc
@@ -135,6 +135,24 @@ TEST(QualifiedIdTest, InvalidQualifiedIdWithWrongNumberOfSeparators) {
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
+TEST(QualifiedIdTest, InvalidQualifiedIdWithStringTerminator) {
+ const char invalid_qualified_id1[] = "names\0pace#uri";
+ EXPECT_THAT(QualifiedId::Parse(std::string_view(invalid_qualified_id1, 14)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ const char invalid_qualified_id2[] = "namespace#ur\0i";
+ EXPECT_THAT(QualifiedId::Parse(std::string_view(invalid_qualified_id2, 14)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ const char invalid_qualified_id3[] = "\0namespace#uri";
+ EXPECT_THAT(QualifiedId::Parse(std::string_view(invalid_qualified_id3, 14)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ const char invalid_qualified_id4[] = "namespace#uri\0";
+ EXPECT_THAT(QualifiedId::Parse(std::string_view(invalid_qualified_id4, 14)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/monkey_test/icing-monkey-test-runner.cc b/icing/monkey_test/icing-monkey-test-runner.cc
index db518bd..89b8e89 100644
--- a/icing/monkey_test/icing-monkey-test-runner.cc
+++ b/icing/monkey_test/icing-monkey-test-runner.cc
@@ -453,9 +453,15 @@ void IcingMonkeyTestRunner::DoOptimize() {
}
void IcingMonkeyTestRunner::CreateIcingSearchEngine() {
+ std::uniform_int_distribution<> dist(0, 1);
+
IcingSearchEngineOptions icing_options;
icing_options.set_index_merge_size(config_.index_merge_size);
icing_options.set_base_dir(icing_dir_->dir());
+ // The method will be called every time when we ReloadFromDisk(), so randomly
+ // flip this flag to test document store's compatibility.
+ icing_options.set_document_store_namespace_id_fingerprint(
+ (bool)dist(random_));
icing_ = std::make_unique<IcingSearchEngine>(icing_options);
ASSERT_THAT(icing_->Initialize().status(), ProtoIsOk());
}
diff --git a/icing/query/advanced_query_parser/abstract-syntax-tree.h b/icing/query/advanced_query_parser/abstract-syntax-tree.h
index d18f6ea..67049ad 100644
--- a/icing/query/advanced_query_parser/abstract-syntax-tree.h
+++ b/icing/query/advanced_query_parser/abstract-syntax-tree.h
@@ -17,6 +17,7 @@
#include <memory>
#include <string>
+#include <string_view>
#include <utility>
#include <vector>
@@ -52,24 +53,29 @@ class Node {
class TerminalNode : public Node {
public:
- explicit TerminalNode(std::string value, bool is_prefix)
- : value_(std::move(value)), is_prefix_(is_prefix) {}
+ explicit TerminalNode(std::string value, std::string_view raw_value,
+ bool is_prefix)
+ : value_(std::move(value)),
+ raw_value_(raw_value),
+ is_prefix_(is_prefix) {}
const std::string& value() const& { return value_; }
std::string value() && { return std::move(value_); }
bool is_prefix() const { return is_prefix_; }
+ std::string_view raw_value() const { return raw_value_; }
+
private:
std::string value_;
+ std::string_view raw_value_;
bool is_prefix_;
};
class FunctionNameNode : public TerminalNode {
public:
explicit FunctionNameNode(std::string value)
- : TerminalNode(std::move(value), /*is_prefix=*/false) {}
-
+ : TerminalNode(std::move(value), /*raw_value=*/"", /*is_prefix=*/false) {}
void Accept(AbstractSyntaxTreeVisitor* visitor) const override {
visitor->VisitFunctionName(this);
}
@@ -77,9 +83,9 @@ class FunctionNameNode : public TerminalNode {
class StringNode : public TerminalNode {
public:
- explicit StringNode(std::string value, bool is_prefix = false)
- : TerminalNode(std::move(value), is_prefix) {}
-
+ explicit StringNode(std::string value, std::string_view raw_value,
+ bool is_prefix = false)
+ : TerminalNode(std::move(value), raw_value, is_prefix) {}
void Accept(AbstractSyntaxTreeVisitor* visitor) const override {
visitor->VisitString(this);
}
@@ -87,9 +93,9 @@ class StringNode : public TerminalNode {
class TextNode : public TerminalNode {
public:
- explicit TextNode(std::string value, bool is_prefix = false)
- : TerminalNode(std::move(value), is_prefix) {}
-
+ explicit TextNode(std::string value, std::string_view raw_value,
+ bool is_prefix = false)
+ : TerminalNode(std::move(value), raw_value, is_prefix) {}
void Accept(AbstractSyntaxTreeVisitor* visitor) const override {
visitor->VisitText(this);
}
diff --git a/icing/query/advanced_query_parser/abstract-syntax-tree_test.cc b/icing/query/advanced_query_parser/abstract-syntax-tree_test.cc
index a8599fd..5e28278 100644
--- a/icing/query/advanced_query_parser/abstract-syntax-tree_test.cc
+++ b/icing/query/advanced_query_parser/abstract-syntax-tree_test.cc
@@ -27,8 +27,8 @@ namespace {
using ::testing::ElementsAre;
TEST(AbstractSyntaxTreeTest, Simple) {
- // foo
- std::unique_ptr<Node> root = std::make_unique<TextNode>("foo");
+ std::string_view query = "foo";
+ std::unique_ptr<Node> root = std::make_unique<TextNode>("foo", query);
SimpleVisitor visitor;
root->Accept(&visitor);
@@ -37,16 +37,16 @@ TEST(AbstractSyntaxTreeTest, Simple) {
}
TEST(AbstractSyntaxTreeTest, Composite) {
- // (foo bar) OR baz
+ std::string_view query = "(foo bar) OR baz";
std::vector<std::unique_ptr<Node>> and_args;
- and_args.push_back(std::make_unique<TextNode>("foo"));
- and_args.push_back(std::make_unique<TextNode>("bar"));
+ and_args.push_back(std::make_unique<TextNode>("foo", query.substr(1, 3)));
+ and_args.push_back(std::make_unique<TextNode>("bar", query.substr(5, 3)));
auto and_node =
std::make_unique<NaryOperatorNode>("AND", std::move(and_args));
std::vector<std::unique_ptr<Node>> or_args;
or_args.push_back(std::move(and_node));
- or_args.push_back(std::make_unique<TextNode>("baz"));
+ or_args.push_back(std::make_unique<TextNode>("baz", query.substr(13, 3)));
std::unique_ptr<Node> root =
std::make_unique<NaryOperatorNode>("OR", std::move(or_args));
@@ -72,9 +72,9 @@ TEST(AbstractSyntaxTreeTest, Function) {
ElementsAre(EqualsNodeInfo("foo", NodeType::kFunctionName),
EqualsNodeInfo("", NodeType::kFunction)));
- // foo("bar")
+ std::string_view query = "foo(\"bar\")";
std::vector<std::unique_ptr<Node>> args;
- args.push_back(std::make_unique<StringNode>("bar"));
+ args.push_back(std::make_unique<StringNode>("bar", query.substr(5, 3)));
root = std::make_unique<FunctionNode>(
std::make_unique<FunctionNameNode>("foo"), std::move(args));
visitor = SimpleVisitor();
@@ -85,9 +85,9 @@ TEST(AbstractSyntaxTreeTest, Function) {
EqualsNodeInfo("bar", NodeType::kString),
EqualsNodeInfo("", NodeType::kFunction)));
- // foo(bar("baz"))
+ query = "foo(bar(\"baz\"))";
std::vector<std::unique_ptr<Node>> inner_args;
- inner_args.push_back(std::make_unique<StringNode>("baz"));
+ inner_args.push_back(std::make_unique<StringNode>("baz", query.substr(9, 3)));
args.clear();
args.push_back(std::make_unique<FunctionNode>(
std::make_unique<FunctionNameNode>("bar"), std::move(inner_args)));
@@ -105,14 +105,16 @@ TEST(AbstractSyntaxTreeTest, Function) {
}
TEST(AbstractSyntaxTreeTest, Restriction) {
- // sender.name:(IMPORTANT OR URGENT)
+ std::string_view query = "sender.name:(IMPORTANT OR URGENT)";
std::vector<std::unique_ptr<TextNode>> member_args;
- member_args.push_back(std::make_unique<TextNode>("sender"));
- member_args.push_back(std::make_unique<TextNode>("name"));
+ member_args.push_back(
+ std::make_unique<TextNode>("sender", query.substr(0, 6)));
+ member_args.push_back(std::make_unique<TextNode>("name", query.substr(7, 4)));
std::vector<std::unique_ptr<Node>> or_args;
- or_args.push_back(std::make_unique<TextNode>("IMPORTANT"));
- or_args.push_back(std::make_unique<TextNode>("URGENT"));
+ or_args.push_back(
+ std::make_unique<TextNode>("IMPORTANT", query.substr(13, 9)));
+ or_args.push_back(std::make_unique<TextNode>("URGENT", query.substr(26, 6)));
std::vector<std::unique_ptr<Node>> has_args;
has_args.push_back(std::make_unique<MemberNode>(std::move(member_args),
diff --git a/icing/query/advanced_query_parser/function_test.cc b/icing/query/advanced_query_parser/function_test.cc
index 3b3ca40..afd4e04 100644
--- a/icing/query/advanced_query_parser/function_test.cc
+++ b/icing/query/advanced_query_parser/function_test.cc
@@ -63,10 +63,10 @@ TEST(FunctionTest, ParamNotWrongTypeFails) {
Function function,
Function::Create(/*return_type=*/DataType::kString, "foo",
/*params=*/{Param(DataType::kString)}, TrivialEval()));
- // foo(bar)
+ std::string_view query = "foo(bar)";
std::vector<PendingValue> args;
args.push_back(PendingValue::CreateTextPendingValue(
- QueryTerm{"bar", /*is_prefix_val=*/false}));
+ QueryTerm{"bar", query.substr(4, 3), /*is_prefix_val=*/false}));
EXPECT_THAT(function.Eval(std::move(args)),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
@@ -77,10 +77,10 @@ TEST(FunctionTest, ParamRequiredArgSucceeds) {
Function::Create(/*return_type=*/DataType::kString, "foo",
/*params=*/{Param(DataType::kString)}, TrivialEval()));
- // foo("bar")
+ std::string_view query = R"(foo("bar"))";
std::vector<PendingValue> args;
args.push_back(PendingValue::CreateStringPendingValue(
- QueryTerm{"bar", /*is_prefix_val=*/false}));
+ QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
ICING_ASSERT_OK_AND_ASSIGN(PendingValue val, function.Eval(std::move(args)));
EXPECT_THAT(val.is_placeholder(), IsTrue());
}
@@ -136,19 +136,19 @@ TEST(FunctionTest, MultipleArgsTrailingOptionalSucceeds) {
Param(DataType::kString, Cardinality::kOptional)},
TrivialEval()));
- // foo("bar")
+ std::string_view query = R"(foo("bar"))";
std::vector<PendingValue> args;
args.push_back(PendingValue::CreateStringPendingValue(
- QueryTerm{"bar", /*is_prefix_val=*/false}));
+ QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
ICING_ASSERT_OK_AND_ASSIGN(PendingValue val, function.Eval(std::move(args)));
EXPECT_THAT(val.is_placeholder(), IsTrue());
- // foo("bar", "baz")
+ query = R"(foo("bar", "baz"))";
args = std::vector<PendingValue>();
args.push_back(PendingValue::CreateStringPendingValue(
- QueryTerm{"bar", /*is_prefix_val=*/false}));
+ QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
args.push_back(PendingValue::CreateStringPendingValue(
- QueryTerm{"baz", /*is_prefix_val=*/false}));
+ QueryTerm{"baz", query.substr(12, 3), /*is_prefix_val=*/false}));
ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
EXPECT_THAT(val.is_placeholder(), IsTrue());
}
@@ -162,30 +162,30 @@ TEST(FunctionTest, MultipleArgsTrailingVariableSucceeds) {
Param(DataType::kString, Cardinality::kVariable)},
TrivialEval()));
- // foo("bar")
+ std::string_view query = R"(foo("bar"))";
std::vector<PendingValue> args;
args.push_back(PendingValue::CreateStringPendingValue(
- QueryTerm{"bar", /*is_prefix_val=*/false}));
+ QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
ICING_ASSERT_OK_AND_ASSIGN(PendingValue val, function.Eval(std::move(args)));
EXPECT_THAT(val.is_placeholder(), IsTrue());
- // foo("bar", "baz")
+ query = R"(foo("bar", "baz"))";
args = std::vector<PendingValue>();
args.push_back(PendingValue::CreateStringPendingValue(
- QueryTerm{"bar", /*is_prefix_val=*/false}));
+ QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
args.push_back(PendingValue::CreateStringPendingValue(
- QueryTerm{"baz", /*is_prefix_val=*/false}));
+ QueryTerm{"baz", query.substr(12, 3), /*is_prefix_val=*/false}));
ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
EXPECT_THAT(val.is_placeholder(), IsTrue());
- // foo("bar", "baz", "bat")
+ query = R"(foo("bar", "baz", "bat"))";
args = std::vector<PendingValue>();
args.push_back(PendingValue::CreateStringPendingValue(
- QueryTerm{"bar", /*is_prefix_val=*/false}));
+ QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
args.push_back(PendingValue::CreateStringPendingValue(
- QueryTerm{"baz", /*is_prefix_val=*/false}));
+ QueryTerm{"baz", query.substr(12, 3), /*is_prefix_val=*/false}));
args.push_back(PendingValue::CreateStringPendingValue(
- QueryTerm{"bat", /*is_prefix_val=*/false}));
+ QueryTerm{"bat", query.substr(19, 3), /*is_prefix_val=*/false}));
ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
EXPECT_THAT(val.is_placeholder(), IsTrue());
}
@@ -214,26 +214,26 @@ TEST(FunctionTest, MultipleArgsOptionalBeforeOptionalSucceeds) {
ICING_ASSERT_OK_AND_ASSIGN(PendingValue val, function.Eval(std::move(args)));
EXPECT_THAT(val.is_placeholder(), IsTrue());
- // foo("bar")
+ std::string_view query = R"(foo("bar"))";
args = std::vector<PendingValue>();
args.push_back(PendingValue::CreateStringPendingValue(
- QueryTerm{"bar", /*is_prefix_val=*/false}));
+ QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
EXPECT_THAT(val.is_placeholder(), IsTrue());
- // foo("bar", baz)
+ query = R"(foo("bar", baz))";
args = std::vector<PendingValue>();
args.push_back(PendingValue::CreateStringPendingValue(
- QueryTerm{"bar", /*is_prefix_val=*/false}));
+ QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
args.push_back(PendingValue::CreateTextPendingValue(
- QueryTerm{"baz", /*is_prefix_val=*/false}));
+ QueryTerm{"baz", query.substr(11, 3), /*is_prefix_val=*/false}));
ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
EXPECT_THAT(val.is_placeholder(), IsTrue());
- // foo(baz)
+ query = R"(foo(baz))";
args = std::vector<PendingValue>();
args.push_back(PendingValue::CreateTextPendingValue(
- QueryTerm{"baz", /*is_prefix_val=*/false}));
+ QueryTerm{"baz", query.substr(4, 3), /*is_prefix_val=*/false}));
EXPECT_THAT(function.Eval(std::move(args)),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
@@ -252,46 +252,46 @@ TEST(FunctionTest, MultipleArgsOptionalBeforeVariableSucceeds) {
ICING_ASSERT_OK_AND_ASSIGN(PendingValue val, function.Eval(std::move(args)));
EXPECT_THAT(val.is_placeholder(), IsTrue());
- // foo("bar")
+ std::string_view query = R"(foo("bar"))";
args = std::vector<PendingValue>();
args.push_back(PendingValue::CreateStringPendingValue(
- QueryTerm{"bar", /*is_prefix_val=*/false}));
+ QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
EXPECT_THAT(val.is_placeholder(), IsTrue());
- // foo("bar", baz)
+ query = R"(foo("bar", baz))";
args = std::vector<PendingValue>();
args.push_back(PendingValue::CreateStringPendingValue(
- QueryTerm{"bar", /*is_prefix_val=*/false}));
+ QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
args.push_back(PendingValue::CreateTextPendingValue(
- QueryTerm{"baz", /*is_prefix_val=*/false}));
+ QueryTerm{"baz", query.substr(11, 3), /*is_prefix_val=*/false}));
ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
EXPECT_THAT(val.is_placeholder(), IsTrue());
- // foo("bar", baz, bat)
+ query = R"(foo("bar", baz, bat))";
args = std::vector<PendingValue>();
args.push_back(PendingValue::CreateStringPendingValue(
- QueryTerm{"bar", /*is_prefix_val=*/false}));
+ QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
args.push_back(PendingValue::CreateTextPendingValue(
- QueryTerm{"baz", /*is_prefix_val=*/false}));
+ QueryTerm{"baz", query.substr(11, 3), /*is_prefix_val=*/false}));
args.push_back(PendingValue::CreateTextPendingValue(
- QueryTerm{"bat", /*is_prefix_val=*/false}));
+ QueryTerm{"bat", query.substr(16, 3), /*is_prefix_val=*/false}));
ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
EXPECT_THAT(val.is_placeholder(), IsTrue());
- // foo(baz)
+ query = R"(foo(baz))";
args = std::vector<PendingValue>();
args.push_back(PendingValue::CreateTextPendingValue(
- QueryTerm{"baz", /*is_prefix_val=*/false}));
+ QueryTerm{"baz", query.substr(4, 3), /*is_prefix_val=*/false}));
EXPECT_THAT(function.Eval(std::move(args)),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
- // foo(baz, bat)
+ query = R"(foo(baz, bat))";
args = std::vector<PendingValue>();
args.push_back(PendingValue::CreateTextPendingValue(
- QueryTerm{"baz", /*is_prefix_val=*/false}));
+ QueryTerm{"baz", query.substr(4, 3), /*is_prefix_val=*/false}));
args.push_back(PendingValue::CreateTextPendingValue(
- QueryTerm{"bat", /*is_prefix_val=*/false}));
+ QueryTerm{"bat", query.substr(9, 3), /*is_prefix_val=*/false}));
EXPECT_THAT(function.Eval(std::move(args)),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
diff --git a/icing/query/advanced_query_parser/lexer.cc b/icing/query/advanced_query_parser/lexer.cc
index 6cddd96..0dd0bb0 100644
--- a/icing/query/advanced_query_parser/lexer.cc
+++ b/icing/query/advanced_query_parser/lexer.cc
@@ -38,12 +38,13 @@ bool Lexer::ConsumeWhitespace() {
}
bool Lexer::ConsumeQuerySingleChar() {
+ std::string_view original_text = query_.substr(current_index_, 1);
switch (current_char_) {
case ':':
- tokens_.push_back({":", TokenType::COMPARATOR});
+ tokens_.push_back({":", original_text, TokenType::COMPARATOR});
break;
case '*':
- tokens_.push_back({"", TokenType::STAR});
+ tokens_.push_back({"", original_text, TokenType::STAR});
break;
case '-':
if (in_text_) {
@@ -51,7 +52,7 @@ bool Lexer::ConsumeQuerySingleChar() {
// in the middle of a TEXT segment (ex. `foo-bar`).
return false;
}
- tokens_.push_back({"", TokenType::MINUS});
+ tokens_.push_back({"", original_text, TokenType::MINUS});
break;
default:
return false;
@@ -61,18 +62,19 @@ bool Lexer::ConsumeQuerySingleChar() {
}
bool Lexer::ConsumeScoringSingleChar() {
+ std::string_view original_text = query_.substr(current_index_, 1);
switch (current_char_) {
case '+':
- tokens_.push_back({"", TokenType::PLUS});
+ tokens_.push_back({"", original_text, TokenType::PLUS});
break;
case '*':
- tokens_.push_back({"", TokenType::TIMES});
+ tokens_.push_back({"", original_text, TokenType::TIMES});
break;
case '/':
- tokens_.push_back({"", TokenType::DIV});
+ tokens_.push_back({"", original_text, TokenType::DIV});
break;
case '-':
- tokens_.push_back({"", TokenType::MINUS});
+ tokens_.push_back({"", original_text, TokenType::MINUS});
break;
default:
return false;
@@ -82,18 +84,19 @@ bool Lexer::ConsumeScoringSingleChar() {
}
bool Lexer::ConsumeGeneralSingleChar() {
+ std::string_view original_text = query_.substr(current_index_, 1);
switch (current_char_) {
case ',':
- tokens_.push_back({"", TokenType::COMMA});
+ tokens_.push_back({"", original_text, TokenType::COMMA});
break;
case '.':
- tokens_.push_back({"", TokenType::DOT});
+ tokens_.push_back({"", original_text, TokenType::DOT});
break;
case '(':
- tokens_.push_back({"", TokenType::LPAREN});
+ tokens_.push_back({"", original_text, TokenType::LPAREN});
break;
case ')':
- tokens_.push_back({"", TokenType::RPAREN});
+ tokens_.push_back({"", original_text, TokenType::RPAREN});
break;
default:
return false;
@@ -124,13 +127,17 @@ bool Lexer::ConsumeComparator() {
// Matching for '<=', '>=', '!=', or '=='.
char next_char = PeekNext(1);
if (next_char == '=') {
- tokens_.push_back({{current_char_, next_char}, TokenType::COMPARATOR});
+ tokens_.push_back({{current_char_, next_char},
+ query_.substr(current_index_, 2),
+ TokenType::COMPARATOR});
Advance(2);
return true;
}
// Now, next_char must not be '='. Let's match for '<' and '>'.
if (current_char_ == '<' || current_char_ == '>') {
- tokens_.push_back({{current_char_}, TokenType::COMPARATOR});
+ tokens_.push_back({{current_char_},
+ query_.substr(current_index_, 1),
+ TokenType::COMPARATOR});
Advance();
return true;
}
@@ -145,10 +152,11 @@ bool Lexer::ConsumeAndOr() {
if (current_char_ != next_char) {
return false;
}
+ std::string_view original_text = query_.substr(current_index_, 2);
if (current_char_ == '&') {
- tokens_.push_back({"", TokenType::AND});
+ tokens_.push_back({"", original_text, TokenType::AND});
} else {
- tokens_.push_back({"", TokenType::OR});
+ tokens_.push_back({"", original_text, TokenType::OR});
}
Advance(2);
return true;
@@ -158,37 +166,42 @@ bool Lexer::ConsumeStringLiteral() {
if (current_char_ != '"') {
return false;
}
- std::string text;
Advance();
+ int32_t unnormalized_start_pos = current_index_;
while (current_char_ != '\0' && current_char_ != '"') {
// When getting a backslash, we will always match the next character, even
// if the next character is a quotation mark
if (current_char_ == '\\') {
- text.push_back(current_char_);
Advance();
if (current_char_ == '\0') {
// In this case, we are missing a terminating quotation mark.
break;
}
}
- text.push_back(current_char_);
Advance();
}
if (current_char_ == '\0') {
SyntaxError("missing terminating \" character");
return false;
}
- tokens_.push_back({text, TokenType::STRING});
+ int32_t unnormalized_length = current_index_ - unnormalized_start_pos;
+ std::string_view raw_token_text =
+ query_.substr(unnormalized_start_pos, unnormalized_length);
+ std::string token_text(raw_token_text);
+ tokens_.push_back({std::move(token_text), raw_token_text, TokenType::STRING});
Advance();
return true;
}
-bool Lexer::Text() {
+bool Lexer::ConsumeText() {
if (current_char_ == '\0') {
return false;
}
- tokens_.push_back({"", TokenType::TEXT});
+ tokens_.push_back({"", query_.substr(current_index_, 0), TokenType::TEXT});
int token_index = tokens_.size() - 1;
+
+ int32_t unnormalized_start_pos = current_index_;
+ int32_t unnormalized_end_pos = current_index_;
while (!ConsumeNonText() && current_char_ != '\0') {
in_text_ = true;
// When getting a backslash in TEXT, unescape it by accepting its following
@@ -203,14 +216,18 @@ bool Lexer::Text() {
}
tokens_[token_index].text.push_back(current_char_);
Advance();
- if (current_char_ == '(') {
- // A TEXT followed by a LPAREN is a FUNCTION_NAME.
- tokens_.back().type = TokenType::FUNCTION_NAME;
- // No need to break, since NonText() must be true at this point.
- }
+ unnormalized_end_pos = current_index_;
}
in_text_ = false;
+ tokens_[token_index].original_text = query_.substr(
+ unnormalized_start_pos, unnormalized_end_pos - unnormalized_start_pos);
+ if (unnormalized_end_pos < query_.length() &&
+ query_[unnormalized_end_pos] == '(') {
+ // A TEXT followed by a LPAREN is a FUNCTION_NAME.
+ tokens_[token_index].type = TokenType::FUNCTION_NAME;
+ }
+
if (language_ == Lexer::Language::QUERY) {
std::string &text = tokens_[token_index].text;
TokenType &type = tokens_[token_index].type;
@@ -234,7 +251,7 @@ Lexer::ExtractTokens() {
// Clear out any non-text before matching a Text.
while (ConsumeNonText()) {
}
- Text();
+ ConsumeText();
}
if (!error_.empty()) {
return absl_ports::InvalidArgumentError(
diff --git a/icing/query/advanced_query_parser/lexer.h b/icing/query/advanced_query_parser/lexer.h
index f7f06dc..b313fa7 100644
--- a/icing/query/advanced_query_parser/lexer.h
+++ b/icing/query/advanced_query_parser/lexer.h
@@ -48,7 +48,9 @@ class Lexer {
AND, // 'AND' | '&&' Not allowed in SCORING language.
OR, // 'OR' | '||' Not allowed in SCORING language.
NOT, // 'NOT' Not allowed in SCORING language.
- STRING, // String literal surrounded by quotation marks
+ STRING, // String literal surrounded by quotation marks. The
+ // original_text of a STRING token will not include quotation
+ // marks.
TEXT, // A sequence of chars that are not any above-listed operator
FUNCTION_NAME, // A TEXT followed by LPAREN.
// Whitespaces not inside a string literal will be skipped.
@@ -69,6 +71,10 @@ class Lexer {
// For other types, this field will be empty.
std::string text;
+ // Lifecycle is dependent on the lifecycle of the string pointed to by
+ // query_.
+ std::string_view original_text;
+
// The type of the token.
TokenType type;
};
@@ -141,8 +147,9 @@ class Lexer {
}
// Try to match TEXT, FUNCTION_NAME, 'AND', 'OR' and 'NOT'.
- // Should make sure that NonText() is false before calling into this method.
- bool Text();
+ // REQUIRES: ConsumeNonText() must be called immediately before calling this
+ // function.
+ bool ConsumeText();
std::string_view query_;
std::string error_;
diff --git a/icing/query/advanced_query_parser/parser.cc b/icing/query/advanced_query_parser/parser.cc
index 0e4c78d..fd74561 100644
--- a/icing/query/advanced_query_parser/parser.cc
+++ b/icing/query/advanced_query_parser/parser.cc
@@ -55,7 +55,8 @@ libtextclassifier3::StatusOr<std::unique_ptr<TextNode>> Parser::ConsumeText() {
if (!Match(Lexer::TokenType::TEXT)) {
return absl_ports::InvalidArgumentError("Unable to consume token as TEXT.");
}
- auto text_node = std::make_unique<TextNode>(std::move(current_token_->text));
+ auto text_node = std::make_unique<TextNode>(std::move(current_token_->text),
+ current_token_->original_text);
++current_token_;
return text_node;
}
@@ -81,6 +82,7 @@ Parser::ConsumeStringElement() {
"Unable to consume token as STRING.");
}
std::string text = std::move(current_token_->text);
+ std::string_view raw_text = current_token_->original_text;
++current_token_;
bool is_prefix = false;
@@ -89,7 +91,7 @@ Parser::ConsumeStringElement() {
++current_token_;
}
- return std::make_unique<StringNode>(std::move(text), is_prefix);
+ return std::make_unique<StringNode>(std::move(text), raw_text, is_prefix);
}
libtextclassifier3::StatusOr<std::string> Parser::ConsumeComparator() {
@@ -115,7 +117,9 @@ Parser::ConsumeMember() {
// at this point. So check for 'STAR' to differentiate the two cases.
if (Match(Lexer::TokenType::STAR)) {
Consume(Lexer::TokenType::STAR);
- text_node = std::make_unique<TextNode>(std::move(*text_node).value(),
+ std::string_view raw_text = text_node->raw_value();
+ std::string text = std::move(*text_node).value();
+ text_node = std::make_unique<TextNode>(std::move(text), raw_text,
/*is_prefix=*/true);
children.push_back(std::move(text_node));
} else {
diff --git a/icing/query/advanced_query_parser/parser_test.cc b/icing/query/advanced_query_parser/parser_test.cc
index 502dbd3..824c2ce 100644
--- a/icing/query/advanced_query_parser/parser_test.cc
+++ b/icing/query/advanced_query_parser/parser_test.cc
@@ -46,9 +46,9 @@ TEST(ParserTest, EmptyScoring) {
}
TEST(ParserTest, SingleTerm) {
- // Query: "foo"
+ std::string_view query = "foo";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::TEXT}};
+ {"foo", query, Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -67,9 +67,10 @@ TEST(ParserTest, SingleTerm) {
}
TEST(ParserTest, ImplicitAnd) {
- // Query: "foo bar"
+ std::string_view query = "foo bar";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::TEXT}, {"bar", Lexer::TokenType::TEXT}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+ {"bar", query.substr(4, 3), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -93,11 +94,11 @@ TEST(ParserTest, ImplicitAnd) {
}
TEST(ParserTest, Or) {
- // Query: "foo OR bar"
+ std::string_view query = "foo OR bar";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::OR},
- {"bar", Lexer::TokenType::TEXT}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+ {"", query.substr(4, 2), Lexer::TokenType::OR},
+ {"bar", query.substr(7, 3), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -121,11 +122,11 @@ TEST(ParserTest, Or) {
}
TEST(ParserTest, And) {
- // Query: "foo AND bar"
+ std::string_view query = "foo AND bar";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::AND},
- {"bar", Lexer::TokenType::TEXT}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+ {"", query.substr(4, 3), Lexer::TokenType::AND},
+ {"bar", query.substr(8, 4), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -149,9 +150,10 @@ TEST(ParserTest, And) {
}
TEST(ParserTest, Not) {
- // Query: "NOT foo"
+ std::string_view query = "NOT foo";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"", Lexer::TokenType::NOT}, {"foo", Lexer::TokenType::TEXT}};
+ {"", query.substr(0, 3), Lexer::TokenType::NOT},
+ {"foo", query.substr(4, 3), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -173,9 +175,10 @@ TEST(ParserTest, Not) {
}
TEST(ParserTest, Minus) {
- // Query: "-foo"
+ std::string_view query = "-foo";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"", Lexer::TokenType::MINUS}, {"foo", Lexer::TokenType::TEXT}};
+ {"", query.substr(0, 1), Lexer::TokenType::MINUS},
+ {"foo", query.substr(1, 3), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -197,11 +200,11 @@ TEST(ParserTest, Minus) {
}
TEST(ParserTest, Has) {
- // Query: "subject:foo"
+ std::string_view query = "subject:foo";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"subject", Lexer::TokenType::TEXT},
- {":", Lexer::TokenType::COMPARATOR},
- {"foo", Lexer::TokenType::TEXT}};
+ {"subject", query.substr(0, 7), Lexer::TokenType::TEXT},
+ {":", query.substr(7, 1), Lexer::TokenType::COMPARATOR},
+ {"foo", query.substr(8, 3), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -225,13 +228,13 @@ TEST(ParserTest, Has) {
}
TEST(ParserTest, HasNested) {
- // Query: "sender.name:foo"
+ std::string_view query = "sender.name:foo";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"sender", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::DOT},
- {"name", Lexer::TokenType::TEXT},
- {":", Lexer::TokenType::COMPARATOR},
- {"foo", Lexer::TokenType::TEXT}};
+ {"sender", query.substr(0, 6), Lexer::TokenType::TEXT},
+ {"", query.substr(6, 1), Lexer::TokenType::DOT},
+ {"name", query.substr(7, 4), Lexer::TokenType::TEXT},
+ {":", query.substr(11, 1), Lexer::TokenType::COMPARATOR},
+ {"foo", query.substr(12, 3), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -256,11 +259,11 @@ TEST(ParserTest, HasNested) {
}
TEST(ParserTest, EmptyFunction) {
- // Query: "foo()"
+ std::string_view query = "foo()";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::FUNCTION_NAME},
- {"", Lexer::TokenType::LPAREN},
- {"", Lexer::TokenType::RPAREN}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", query.substr(3, 1), Lexer::TokenType::LPAREN},
+ {"", query.substr(4, 1), Lexer::TokenType::RPAREN}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -279,12 +282,12 @@ TEST(ParserTest, EmptyFunction) {
}
TEST(ParserTest, FunctionSingleArg) {
- // Query: "foo("bar")"
+ std::string_view query = "foo(\"bar\")";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::FUNCTION_NAME},
- {"", Lexer::TokenType::LPAREN},
- {"bar", Lexer::TokenType::STRING},
- {"", Lexer::TokenType::RPAREN}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", query.substr(3, 1), Lexer::TokenType::LPAREN},
+ {"bar", query.substr(5, 3), Lexer::TokenType::STRING},
+ {"", query.substr(8, 1), Lexer::TokenType::RPAREN}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -304,11 +307,14 @@ TEST(ParserTest, FunctionSingleArg) {
}
TEST(ParserTest, FunctionMultiArg) {
- // Query: "foo("bar", "baz")"
+ std::string_view query = "foo(\"bar\", \"baz\")";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::FUNCTION_NAME}, {"", Lexer::TokenType::LPAREN},
- {"bar", Lexer::TokenType::STRING}, {"", Lexer::TokenType::COMMA},
- {"baz", Lexer::TokenType::STRING}, {"", Lexer::TokenType::RPAREN}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", query.substr(3, 1), Lexer::TokenType::LPAREN},
+ {"bar", query.substr(5, 3), Lexer::TokenType::STRING},
+ {"", query.substr(9, 1), Lexer::TokenType::COMMA},
+ {"baz", query.substr(12, 3), Lexer::TokenType::STRING},
+ {"", query.substr(16, 1), Lexer::TokenType::RPAREN}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -329,11 +335,14 @@ TEST(ParserTest, FunctionMultiArg) {
}
TEST(ParserTest, FunctionNested) {
- // Query: "foo(bar())"
+ std::string_view query = "foo(bar())";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::FUNCTION_NAME}, {"", Lexer::TokenType::LPAREN},
- {"bar", Lexer::TokenType::FUNCTION_NAME}, {"", Lexer::TokenType::LPAREN},
- {"", Lexer::TokenType::RPAREN}, {"", Lexer::TokenType::RPAREN}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", query.substr(3, 1), Lexer::TokenType::LPAREN},
+ {"bar", query.substr(4, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", query.substr(7, 1), Lexer::TokenType::LPAREN},
+ {"", query.substr(8, 1), Lexer::TokenType::RPAREN},
+ {"", query.substr(9, 1), Lexer::TokenType::RPAREN}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -356,13 +365,13 @@ TEST(ParserTest, FunctionNested) {
}
TEST(ParserTest, FunctionWithTrailingSequence) {
- // Query: "foo() OR bar"
+ std::string_view query = "foo() OR bar";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::FUNCTION_NAME},
- {"", Lexer::TokenType::LPAREN},
- {"", Lexer::TokenType::RPAREN},
- {"", Lexer::TokenType::OR},
- {"bar", Lexer::TokenType::TEXT}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", query.substr(3, 1), Lexer::TokenType::LPAREN},
+ {"", query.substr(4, 1), Lexer::TokenType::RPAREN},
+ {"", query.substr(6, 2), Lexer::TokenType::OR},
+ {"bar", query.substr(9, 3), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -386,11 +395,14 @@ TEST(ParserTest, FunctionWithTrailingSequence) {
}
TEST(ParserTest, Composite) {
- // Query: "foo OR (bar baz)"
+ std::string_view query = "foo OR (bar baz)";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::OR},
- {"", Lexer::TokenType::LPAREN}, {"bar", Lexer::TokenType::TEXT},
- {"baz", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::RPAREN}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+ {"", query.substr(4, 2), Lexer::TokenType::OR},
+ {"", query.substr(7, 1), Lexer::TokenType::LPAREN},
+ {"bar", query.substr(8, 3), Lexer::TokenType::TEXT},
+ {"baz", query.substr(12, 3), Lexer::TokenType::TEXT},
+ {"", query.substr(15, 1), Lexer::TokenType::RPAREN}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -419,11 +431,14 @@ TEST(ParserTest, Composite) {
}
TEST(ParserTest, CompositeWithTrailingSequence) {
- // Query: "(bar baz) OR foo"
+ std::string_view query = "(bar baz) OR foo";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"", Lexer::TokenType::LPAREN}, {"bar", Lexer::TokenType::TEXT},
- {"baz", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::RPAREN},
- {"", Lexer::TokenType::OR}, {"foo", Lexer::TokenType::TEXT}};
+ {"", query.substr(0, 1), Lexer::TokenType::LPAREN},
+ {"bar", query.substr(1, 3), Lexer::TokenType::TEXT},
+ {"baz", query.substr(5, 3), Lexer::TokenType::TEXT},
+ {"", query.substr(8, 1), Lexer::TokenType::RPAREN},
+ {"", query.substr(10, 2), Lexer::TokenType::OR},
+ {"foo", query.substr(13, 3), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -452,17 +467,17 @@ TEST(ParserTest, CompositeWithTrailingSequence) {
}
TEST(ParserTest, Complex) {
- // Query: "foo bar:baz OR pal("bat")"
+ std::string_view query = R"(foo bar:baz OR pal("bat"))";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::TEXT},
- {"bar", Lexer::TokenType::TEXT},
- {":", Lexer::TokenType::COMPARATOR},
- {"baz", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::OR},
- {"pal", Lexer::TokenType::FUNCTION_NAME},
- {"", Lexer::TokenType::LPAREN},
- {"bat", Lexer::TokenType::STRING},
- {"", Lexer::TokenType::RPAREN}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+ {"bar", query.substr(4, 3), Lexer::TokenType::TEXT},
+ {":", query.substr(7, 1), Lexer::TokenType::COMPARATOR},
+ {"baz", query.substr(8, 3), Lexer::TokenType::TEXT},
+ {"", query.substr(12, 2), Lexer::TokenType::OR},
+ {"pal", query.substr(15, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", query.substr(18, 1), Lexer::TokenType::LPAREN},
+ {"bat", query.substr(20, 3), Lexer::TokenType::STRING},
+ {"", query.substr(24, 1), Lexer::TokenType::RPAREN}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -498,107 +513,116 @@ TEST(ParserTest, Complex) {
}
TEST(ParserTest, InvalidHas) {
- // Query: "foo:" No right hand operand to :
+ std::string_view query = "foo:"; // No right hand operand to :
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::TEXT}, {":", Lexer::TokenType::COMPARATOR}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+ {":", query.substr(3, 1), Lexer::TokenType::COMPARATOR}};
Parser parser = Parser::Create(std::move(lexer_tokens));
EXPECT_THAT(parser.ConsumeQuery(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST(ParserTest, InvalidComposite) {
- // Query: "(foo bar" No terminating RPAREN
+ std::string_view query = "(foo bar"; // No terminating RPAREN
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"", Lexer::TokenType::LPAREN},
- {"foo", Lexer::TokenType::TEXT},
- {"bar", Lexer::TokenType::TEXT}};
+ {"", query.substr(0, 1), Lexer::TokenType::LPAREN},
+ {"foo", query.substr(1, 3), Lexer::TokenType::TEXT},
+ {"bar", query.substr(5, 3), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
EXPECT_THAT(parser.ConsumeQuery(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST(ParserTest, InvalidMember) {
- // Query: "foo." DOT must have succeeding TEXT
+ std::string_view query = "foo."; // DOT must have succeeding TEXT
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::DOT}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+ {"", query.substr(3, 1), Lexer::TokenType::DOT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
EXPECT_THAT(parser.ConsumeQuery(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST(ParserTest, InvalidOr) {
- // Query: "foo OR" No right hand operand to OR
+ std::string_view query = "foo OR"; // No right hand operand to OR
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::OR}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+ {"", query.substr(3, 2), Lexer::TokenType::OR}};
Parser parser = Parser::Create(std::move(lexer_tokens));
EXPECT_THAT(parser.ConsumeQuery(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST(ParserTest, InvalidAnd) {
- // Query: "foo AND" No right hand operand to AND
+ std::string_view query = "foo AND"; // No right hand operand to AND
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::AND}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+ {"", query.substr(4, 3), Lexer::TokenType::AND}};
Parser parser = Parser::Create(std::move(lexer_tokens));
EXPECT_THAT(parser.ConsumeQuery(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST(ParserTest, InvalidNot) {
- // Query: "NOT" No right hand operand to NOT
- std::vector<Lexer::LexerToken> lexer_tokens = {{"", Lexer::TokenType::NOT}};
+ std::string_view query = "NOT"; // No right hand operand to NOT
+ std::vector<Lexer::LexerToken> lexer_tokens = {
+ {"", query.substr(0, 3), Lexer::TokenType::NOT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
EXPECT_THAT(parser.ConsumeQuery(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST(ParserTest, InvalidMinus) {
- // Query: "-" No right hand operand to -
- std::vector<Lexer::LexerToken> lexer_tokens = {{"", Lexer::TokenType::MINUS}};
+ std::string_view query = "-"; // No right hand operand to -
+ std::vector<Lexer::LexerToken> lexer_tokens = {
+ {"", query.substr(0, 1), Lexer::TokenType::MINUS}};
Parser parser = Parser::Create(std::move(lexer_tokens));
EXPECT_THAT(parser.ConsumeQuery(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST(ParserTest, InvalidFunctionCallNoRparen) {
- // Query: "foo(" No terminating RPAREN
+ std::string_view query = "foo("; // No terminating RPAREN
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::FUNCTION_NAME}, {"", Lexer::TokenType::LPAREN}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", query.substr(3, 0), Lexer::TokenType::LPAREN}};
Parser parser = Parser::Create(std::move(lexer_tokens));
EXPECT_THAT(parser.ConsumeQuery(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST(ParserTest, InvalidFunctionCallNoLparen) {
- // Query: "foo bar" foo labeled FUNCTION_NAME despite no LPAREN
+ std::string_view query =
+ "foo bar"; // foo labeled FUNCTION_NAME despite no LPAREN
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::FUNCTION_NAME},
- {"bar", Lexer::TokenType::FUNCTION_NAME}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"bar", query.substr(4, 3), Lexer::TokenType::FUNCTION_NAME}};
Parser parser = Parser::Create(std::move(lexer_tokens));
EXPECT_THAT(parser.ConsumeQuery(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST(ParserTest, InvalidFunctionArgsHangingComma) {
- // Query: "foo("bar",)" no valid arg following COMMA
+ std::string_view query = R"(foo("bar",))"; // no valid arg following COMMA
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"foo", Lexer::TokenType::FUNCTION_NAME},
- {"", Lexer::TokenType::LPAREN},
- {"bar", Lexer::TokenType::STRING},
- {"", Lexer::TokenType::COMMA},
- {"", Lexer::TokenType::RPAREN}};
+ {"foo", query.substr(0, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", query.substr(3, 1), Lexer::TokenType::LPAREN},
+ {"bar", query.substr(5, 3), Lexer::TokenType::STRING},
+ {"", query.substr(9, 1), Lexer::TokenType::COMMA},
+ {"", query.substr(10, 1), Lexer::TokenType::RPAREN}};
Parser parser = Parser::Create(std::move(lexer_tokens));
EXPECT_THAT(parser.ConsumeQuery(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST(ParserTest, ScoringPlus) {
- // Scoring: "1 + 1 + 1"
- std::vector<Lexer::LexerToken> lexer_tokens = {{"1", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::PLUS},
- {"1", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::PLUS},
- {"1", Lexer::TokenType::TEXT}};
+ std::string_view scoring_exp = "1 + 1 + 1";
+ std::vector<Lexer::LexerToken> lexer_tokens = {
+ {"1", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(2, 1), Lexer::TokenType::PLUS},
+ {"1", scoring_exp.substr(4, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(6, 1), Lexer::TokenType::PLUS},
+ {"1", scoring_exp.substr(8, 1), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeScoring());
@@ -622,12 +646,13 @@ TEST(ParserTest, ScoringPlus) {
}
TEST(ParserTest, ScoringMinus) {
- // Scoring: "1 - 1 - 1"
- std::vector<Lexer::LexerToken> lexer_tokens = {{"1", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::MINUS},
- {"1", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::MINUS},
- {"1", Lexer::TokenType::TEXT}};
+ std::string_view scoring_exp = "1 - 1 - 1";
+ std::vector<Lexer::LexerToken> lexer_tokens = {
+ {"1", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(2, 1), Lexer::TokenType::MINUS},
+ {"1", scoring_exp.substr(4, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(6, 1), Lexer::TokenType::MINUS},
+ {"1", scoring_exp.substr(8, 1), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeScoring());
@@ -651,11 +676,14 @@ TEST(ParserTest, ScoringMinus) {
}
TEST(ParserTest, ScoringUnaryMinus) {
- // Scoring: "1 + -1 + 1"
+ std::string_view scoring_exp = "1 + -1 + 1";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"1", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::PLUS},
- {"", Lexer::TokenType::MINUS}, {"1", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::PLUS}, {"1", Lexer::TokenType::TEXT}};
+ {"1", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(2, 1), Lexer::TokenType::PLUS},
+ {"", scoring_exp.substr(4, 1), Lexer::TokenType::MINUS},
+ {"1", scoring_exp.substr(5, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(7, 1), Lexer::TokenType::PLUS},
+ {"1", scoring_exp.substr(9, 1), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeScoring());
@@ -682,12 +710,15 @@ TEST(ParserTest, ScoringUnaryMinus) {
}
TEST(ParserTest, ScoringPlusMinus) {
- // Scoring: "11 + 12 - 13 + 14"
+ std::string_view scoring_exp = "11 + 12 - 13 + 14";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"11", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::PLUS},
- {"12", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::MINUS},
- {"13", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::PLUS},
- {"14", Lexer::TokenType::TEXT}};
+ {"11", scoring_exp.substr(0, 2), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(3, 1), Lexer::TokenType::PLUS},
+ {"12", scoring_exp.substr(5, 2), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(6, 1), Lexer::TokenType::MINUS},
+ {"13", scoring_exp.substr(8, 2), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(11, 1), Lexer::TokenType::PLUS},
+ {"14", scoring_exp.substr(13, 2), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeScoring());
@@ -719,12 +750,13 @@ TEST(ParserTest, ScoringPlusMinus) {
}
TEST(ParserTest, ScoringTimes) {
- // Scoring: "1 * 1 * 1"
- std::vector<Lexer::LexerToken> lexer_tokens = {{"1", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::TIMES},
- {"1", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::TIMES},
- {"1", Lexer::TokenType::TEXT}};
+ std::string_view scoring_exp = "1 * 1 * 1";
+ std::vector<Lexer::LexerToken> lexer_tokens = {
+ {"1", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(2, 1), Lexer::TokenType::TIMES},
+ {"1", scoring_exp.substr(4, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(6, 1), Lexer::TokenType::TIMES},
+ {"1", scoring_exp.substr(8, 1), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeScoring());
@@ -748,12 +780,13 @@ TEST(ParserTest, ScoringTimes) {
}
TEST(ParserTest, ScoringDiv) {
- // Scoring: "1 / 1 / 1"
- std::vector<Lexer::LexerToken> lexer_tokens = {{"1", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::DIV},
- {"1", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::DIV},
- {"1", Lexer::TokenType::TEXT}};
+ std::string_view scoring_exp = "1 / 1 / 1";
+ std::vector<Lexer::LexerToken> lexer_tokens = {
+ {"1", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(2, 1), Lexer::TokenType::DIV},
+ {"1", scoring_exp.substr(4, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(6, 1), Lexer::TokenType::DIV},
+ {"1", scoring_exp.substr(8, 1), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeScoring());
@@ -777,13 +810,17 @@ TEST(ParserTest, ScoringDiv) {
}
TEST(ParserTest, ScoringTimesDiv) {
- // Scoring: "11 / 12 * 13 / 14 / 15"
+ std::string_view scoring_exp = "11 / 12 * 13 / 14 / 15";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"11", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::DIV},
- {"12", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::TIMES},
- {"13", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::DIV},
- {"14", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::DIV},
- {"15", Lexer::TokenType::TEXT}};
+ {"11", scoring_exp.substr(0, 2), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(3, 1), Lexer::TokenType::DIV},
+ {"12", scoring_exp.substr(5, 2), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(8, 1), Lexer::TokenType::TIMES},
+ {"13", scoring_exp.substr(10, 2), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(13, 1), Lexer::TokenType::DIV},
+ {"14", scoring_exp.substr(15, 2), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(18, 1), Lexer::TokenType::DIV},
+ {"15", scoring_exp.substr(20, 2), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeScoring());
@@ -817,29 +854,29 @@ TEST(ParserTest, ScoringTimesDiv) {
}
TEST(ParserTest, ComplexScoring) {
- // Scoring: "1 + pow((2 * sin(3)), 4) + -5 / 6"
+ std::string_view scoring_exp = "1 + pow((2 * sin(3)), 4) + -5 / 6";
// With parentheses in function arguments.
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"1", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::PLUS},
- {"pow", Lexer::TokenType::FUNCTION_NAME},
- {"", Lexer::TokenType::LPAREN},
- {"", Lexer::TokenType::LPAREN},
- {"2", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::TIMES},
- {"sin", Lexer::TokenType::FUNCTION_NAME},
- {"", Lexer::TokenType::LPAREN},
- {"3", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::RPAREN},
- {"", Lexer::TokenType::RPAREN},
- {"", Lexer::TokenType::COMMA},
- {"4", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::RPAREN},
- {"", Lexer::TokenType::PLUS},
- {"", Lexer::TokenType::MINUS},
- {"5", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::DIV},
- {"6", Lexer::TokenType::TEXT},
+ {"1", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(2, 1), Lexer::TokenType::PLUS},
+ {"pow", scoring_exp.substr(4, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", scoring_exp.substr(7, 1), Lexer::TokenType::LPAREN},
+ {"", scoring_exp.substr(8, 1), Lexer::TokenType::LPAREN},
+ {"2", scoring_exp.substr(9, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(11, 1), Lexer::TokenType::TIMES},
+ {"sin", scoring_exp.substr(13, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", scoring_exp.substr(16, 1), Lexer::TokenType::LPAREN},
+ {"3", scoring_exp.substr(17, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(18, 1), Lexer::TokenType::RPAREN},
+ {"", scoring_exp.substr(19, 1), Lexer::TokenType::RPAREN},
+ {"", scoring_exp.substr(20, 1), Lexer::TokenType::COMMA},
+ {"4", scoring_exp.substr(22, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(23, 1), Lexer::TokenType::RPAREN},
+ {"", scoring_exp.substr(25, 1), Lexer::TokenType::PLUS},
+ {"", scoring_exp.substr(27, 1), Lexer::TokenType::MINUS},
+ {"5", scoring_exp.substr(28, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(30, 1), Lexer::TokenType::DIV},
+ {"6", scoring_exp.substr(32, 1), Lexer::TokenType::TEXT},
};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
@@ -869,27 +906,27 @@ TEST(ParserTest, ComplexScoring) {
EqualsNodeInfo("DIV", NodeType::kNaryOperator),
EqualsNodeInfo("PLUS", NodeType::kNaryOperator)));
- // Scoring: "1 + pow(2 * sin(3), 4) + -5 / 6"
+ scoring_exp = "1 + pow(2 * sin(3), 4) + -5 / 6";
// Without parentheses in function arguments.
lexer_tokens = {
- {"1", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::PLUS},
- {"pow", Lexer::TokenType::FUNCTION_NAME},
- {"", Lexer::TokenType::LPAREN},
- {"2", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::TIMES},
- {"sin", Lexer::TokenType::FUNCTION_NAME},
- {"", Lexer::TokenType::LPAREN},
- {"3", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::RPAREN},
- {"", Lexer::TokenType::COMMA},
- {"4", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::RPAREN},
- {"", Lexer::TokenType::PLUS},
- {"", Lexer::TokenType::MINUS},
- {"5", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::DIV},
- {"6", Lexer::TokenType::TEXT},
+ {"1", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(2, 1), Lexer::TokenType::PLUS},
+ {"pow", scoring_exp.substr(4, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", scoring_exp.substr(7, 1), Lexer::TokenType::LPAREN},
+ {"2", scoring_exp.substr(8, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(10, 1), Lexer::TokenType::TIMES},
+ {"sin", scoring_exp.substr(12, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", scoring_exp.substr(15, 1), Lexer::TokenType::LPAREN},
+ {"3", scoring_exp.substr(16, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(17, 1), Lexer::TokenType::RPAREN},
+ {"", scoring_exp.substr(18, 1), Lexer::TokenType::COMMA},
+ {"4", scoring_exp.substr(20, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(21, 1), Lexer::TokenType::RPAREN},
+ {"", scoring_exp.substr(23, 1), Lexer::TokenType::PLUS},
+ {"", scoring_exp.substr(25, 1), Lexer::TokenType::MINUS},
+ {"5", scoring_exp.substr(26, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(28, 1), Lexer::TokenType::DIV},
+ {"6", scoring_exp.substr(30, 1), Lexer::TokenType::TEXT},
};
parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(tree_root, parser.ConsumeScoring());
@@ -899,13 +936,14 @@ TEST(ParserTest, ComplexScoring) {
}
TEST(ParserTest, ScoringMemberFunction) {
- // Scoring: this.CreationTimestamp()
+ std::string_view scoring_exp = "this.CreationTimestamp()";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"this", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::DOT},
- {"CreationTimestamp", Lexer::TokenType::FUNCTION_NAME},
- {"", Lexer::TokenType::LPAREN},
- {"", Lexer::TokenType::RPAREN}};
+ {"this", scoring_exp.substr(0, 4), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(4, 1), Lexer::TokenType::DOT},
+ {"CreationTimestamp", scoring_exp.substr(5, 17),
+ Lexer::TokenType::FUNCTION_NAME},
+ {"", scoring_exp.substr(22, 1), Lexer::TokenType::LPAREN},
+ {"", scoring_exp.substr(23, 1), Lexer::TokenType::RPAREN}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeScoring());
@@ -927,13 +965,13 @@ TEST(ParserTest, ScoringMemberFunction) {
}
TEST(ParserTest, QueryMemberFunction) {
- // Query: this.foo()
+ std::string_view query = "this.foo()";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"this", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::DOT},
- {"foo", Lexer::TokenType::FUNCTION_NAME},
- {"", Lexer::TokenType::LPAREN},
- {"", Lexer::TokenType::RPAREN}};
+ {"this", query.substr(0, 4), Lexer::TokenType::TEXT},
+ {"", query.substr(4, 1), Lexer::TokenType::DOT},
+ {"foo", query.substr(5, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", query.substr(8, 1), Lexer::TokenType::LPAREN},
+ {"", query.substr(9, 1), Lexer::TokenType::RPAREN}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -954,18 +992,18 @@ TEST(ParserTest, QueryMemberFunction) {
}
TEST(ParserTest, ScoringComplexMemberFunction) {
- // Scoring: a.b.fun(c, d)
+ std::string_view scoring_exp = "a.b.fun(c, d)";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"a", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::DOT},
- {"b", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::DOT},
- {"fun", Lexer::TokenType::FUNCTION_NAME},
- {"", Lexer::TokenType::LPAREN},
- {"c", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::COMMA},
- {"d", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::RPAREN}};
+ {"a", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(1, 1), Lexer::TokenType::DOT},
+ {"b", scoring_exp.substr(2, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(3, 1), Lexer::TokenType::DOT},
+ {"fun", scoring_exp.substr(4, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", scoring_exp.substr(7, 1), Lexer::TokenType::LPAREN},
+ {"c", scoring_exp.substr(8, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(9, 1), Lexer::TokenType::COMMA},
+ {"d", scoring_exp.substr(11, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(12, 1), Lexer::TokenType::RPAREN}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeScoring());
@@ -993,13 +1031,18 @@ TEST(ParserTest, ScoringComplexMemberFunction) {
}
TEST(ParserTest, QueryComplexMemberFunction) {
- // Query: this.abc.fun(def, ghi)
+ std::string_view query = "this.abc.fun(def, ghi)";
std::vector<Lexer::LexerToken> lexer_tokens = {
- {"this", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::DOT},
- {"abc", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::DOT},
- {"fun", Lexer::TokenType::FUNCTION_NAME}, {"", Lexer::TokenType::LPAREN},
- {"def", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::COMMA},
- {"ghi", Lexer::TokenType::TEXT}, {"", Lexer::TokenType::RPAREN}};
+ {"this", query.substr(0, 4), Lexer::TokenType::TEXT},
+ {"", query.substr(4, 1), Lexer::TokenType::DOT},
+ {"abc", query.substr(5, 3), Lexer::TokenType::TEXT},
+ {"", query.substr(8, 1), Lexer::TokenType::DOT},
+ {"fun", query.substr(9, 3), Lexer::TokenType::FUNCTION_NAME},
+ {"", query.substr(12, 1), Lexer::TokenType::LPAREN},
+ {"def", query.substr(13, 3), Lexer::TokenType::TEXT},
+ {"", query.substr(16, 1), Lexer::TokenType::COMMA},
+ {"ghi", query.substr(17, 3), Lexer::TokenType::TEXT},
+ {"", query.substr(20, 1), Lexer::TokenType::RPAREN}};
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
@@ -1027,11 +1070,12 @@ TEST(ParserTest, QueryComplexMemberFunction) {
}
TEST(ParserTest, InvalidScoringToken) {
- // Scoring: "1 + NOT 1"
- std::vector<Lexer::LexerToken> lexer_tokens = {{"1", Lexer::TokenType::TEXT},
- {"", Lexer::TokenType::PLUS},
- {"", Lexer::TokenType::NOT},
- {"1", Lexer::TokenType::TEXT}};
+ std::string_view scoring_exp = "1 + NOT 1";
+ std::vector<Lexer::LexerToken> lexer_tokens = {
+ {"1", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+ {"", scoring_exp.substr(2, 1), Lexer::TokenType::PLUS},
+ {"", scoring_exp.substr(4, 3), Lexer::TokenType::NOT},
+ {"1", scoring_exp.substr(8, 1), Lexer::TokenType::TEXT}};
Parser parser = Parser::Create(std::move(lexer_tokens));
EXPECT_THAT(parser.ConsumeScoring(),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
diff --git a/icing/query/advanced_query_parser/pending-value.cc b/icing/query/advanced_query_parser/pending-value.cc
index 903e12f..67bdc3a 100644
--- a/icing/query/advanced_query_parser/pending-value.cc
+++ b/icing/query/advanced_query_parser/pending-value.cc
@@ -36,7 +36,7 @@ libtextclassifier3::Status PendingValue::ParseInt() {
"Unable to parse \"", query_term_.term, "\" as number."));
}
data_type_ = DataType::kLong;
- query_term_ = {"", false};
+ query_term_ = {/*term=*/"", /*raw_term=*/"", /*is_prefix_val=*/false};
return libtextclassifier3::Status::OK;
}
diff --git a/icing/query/advanced_query_parser/pending-value.h b/icing/query/advanced_query_parser/pending-value.h
index d18789d..1a6717e 100644
--- a/icing/query/advanced_query_parser/pending-value.h
+++ b/icing/query/advanced_query_parser/pending-value.h
@@ -38,17 +38,18 @@ enum class DataType {
struct QueryTerm {
std::string term;
+ std::string_view raw_term;
bool is_prefix_val;
};
// A holder for intermediate results when processing child nodes.
struct PendingValue {
- static PendingValue CreateStringPendingValue(QueryTerm query_term) {
- return PendingValue(std::move(query_term), DataType::kString);
+ static PendingValue CreateStringPendingValue(QueryTerm str) {
+ return PendingValue(std::move(str), DataType::kString);
}
- static PendingValue CreateTextPendingValue(QueryTerm query_term) {
- return PendingValue(std::move(query_term), DataType::kText);
+ static PendingValue CreateTextPendingValue(QueryTerm text) {
+ return PendingValue(std::move(text), DataType::kText);
}
PendingValue() : data_type_(DataType::kNone) {}
@@ -125,7 +126,7 @@ struct PendingValue {
private:
explicit PendingValue(QueryTerm query_term, DataType data_type)
- : query_term_({std::move(query_term)}), data_type_(data_type) {}
+ : query_term_(std::move(query_term)), data_type_(data_type) {}
libtextclassifier3::Status CheckDataType(DataType required_data_type) const {
if (data_type_ == required_data_type) {
@@ -141,7 +142,7 @@ struct PendingValue {
// iterator_ will be populated when data_type_ is kDocumentIterator.
std::unique_ptr<DocHitInfoIterator> iterator_;
- // string_vals_ will be populated when data_type_ is kStringList.
+ // string_vals_ will be populated when data_type_ kStringList.
std::vector<std::string> string_vals_;
// query_term_ will be populated when data_type_ is kString or kText
diff --git a/icing/query/advanced_query_parser/query-visitor.cc b/icing/query/advanced_query_parser/query-visitor.cc
index a1a9c38..31223a5 100644
--- a/icing/query/advanced_query_parser/query-visitor.cc
+++ b/icing/query/advanced_query_parser/query-visitor.cc
@@ -37,9 +37,12 @@
#include "icing/query/advanced_query_parser/lexer.h"
#include "icing/query/advanced_query_parser/param.h"
#include "icing/query/advanced_query_parser/parser.h"
+#include "icing/query/advanced_query_parser/pending-value.h"
+#include "icing/query/advanced_query_parser/util/string-util.h"
#include "icing/query/query-features.h"
#include "icing/schema/property-util.h"
#include "icing/schema/section.h"
+#include "icing/tokenization/token.h"
#include "icing/tokenization/tokenizer.h"
#include "icing/util/status-macros.h"
@@ -54,32 +57,13 @@ struct CreateList {
std::vector<std::string> values;
values.reserve(args.size());
for (PendingValue& arg : args) {
- QueryTerm val = std::move(arg).string_val().ValueOrDie();
- values.push_back(std::move(val.term));
+ QueryTerm string_val = std::move(arg).string_val().ValueOrDie();
+ values.push_back(std::move(string_val.term));
}
return PendingValue(std::move(values));
}
};
-libtextclassifier3::StatusOr<std::string> UnescapeStringValue(
- std::string_view value) {
- std::string result;
- bool in_escape = false;
- for (char c : value) {
- if (in_escape) {
- in_escape = false;
- } else if (c == '\\') {
- in_escape = true;
- continue;
- } else if (c == '"') {
- return absl_ports::InvalidArgumentError(
- "Encountered an unescaped quotation mark!");
- }
- result += c;
- }
- return result;
-}
-
bool IsNumericComparator(std::string_view operator_text) {
if (operator_text.length() < 1 || operator_text.length() > 2) {
return false;
@@ -168,8 +152,14 @@ void QueryVisitor::PendingPropertyRestricts::AddValidRestricts(
}
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
-QueryVisitor::CreateTermIterator(QueryTerm query_term) {
+QueryVisitor::CreateTermIterator(const QueryTerm& query_term) {
+ if (query_term.is_prefix_val) {
+ // '*' prefix operator was added in list filters
+ features_.insert(kListFilterQueryLanguageFeature);
+ }
TermMatchType::Code match_type = GetTermMatchType(query_term.is_prefix_val);
+ int unnormalized_term_start =
+ query_term.raw_term.data() - raw_query_text_.data();
if (!processing_not_) {
// 1. Add term to property_query_terms_map
if (pending_property_restricts_.has_active_property_restricts()) {
@@ -183,13 +173,11 @@ QueryVisitor::CreateTermIterator(QueryTerm query_term) {
// 2. If needed add term iterator to query_term_iterators_ map.
if (needs_term_frequency_info_) {
- // TODO(b/152934343) Save "term start index" into Node and PendingValue and
- // pass it into index.GetIterator
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<DocHitInfoIterator> term_iterator,
- index_.GetIterator(query_term.term, /*term_start_index=*/0,
- /*unnormalized_term_length=*/0, kSectionIdMaskAll,
- match_type, needs_term_frequency_info_));
+ index_.GetIterator(query_term.term, unnormalized_term_start,
+ query_term.raw_term.length(), kSectionIdMaskAll,
+ match_type_, needs_term_frequency_info_));
query_term_iterators_[query_term.term] =
std::make_unique<DocHitInfoIteratorFilter>(
std::move(term_iterator), &document_store_, &schema_store_,
@@ -198,10 +186,8 @@ QueryVisitor::CreateTermIterator(QueryTerm query_term) {
}
// 3. Add the term iterator.
- // TODO(b/152934343) Save "term start index" into Node and PendingValue and
- // pass it into index.GetIterator
- return index_.GetIterator(query_term.term, /*term_start_index=*/0,
- /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ return index_.GetIterator(query_term.term, unnormalized_term_start,
+ query_term.raw_term.length(), kSectionIdMaskAll,
match_type, needs_term_frequency_info_);
}
@@ -266,7 +252,7 @@ libtextclassifier3::StatusOr<PendingValue> QueryVisitor::SearchFunction(
} else {
QueryVisitor query_visitor(&index_, &numeric_index_, &document_store_,
&schema_store_, &normalizer_, &tokenizer_,
- filter_options_, match_type_,
+ query->raw_term, filter_options_, match_type_,
needs_term_frequency_info_,
pending_property_restricts_, processing_not_);
tree_root->Accept(&query_visitor);
@@ -353,24 +339,44 @@ QueryVisitor::PopPendingIterator() {
// The tokenizer will produce 1+ tokens out of the text. The prefix operator
// only applies to the final token.
bool reached_final_token = !token_itr->Advance();
+ // raw_text is the portion of text_value.raw_term that hasn't yet been
+ // matched to any of the tokens that we've processed. escaped_token will
+ // hold the portion of raw_text that corresponds to the current token that
+ // is being processed.
+ std::string_view raw_text = text_value.raw_term;
+ std::string_view raw_token;
while (!reached_final_token) {
std::vector<Token> tokens = token_itr->GetTokens();
- reached_final_token = !token_itr->Advance();
+ if (tokens.size() > 1) {
+ // The tokenizer iterator iterates between token groups. In practice,
+ // the tokenizer used with QueryVisitor (PlainTokenizer) will always
+ // only produce a single token per token group.
+ return absl_ports::InvalidArgumentError(
+ "Encountered unexpected token group with >1 tokens.");
+ }
- // The tokenizer iterator iterates between token groups. In practice, the
- // tokenizer used with QueryVisitor (PlainTokenizer) will always only
- // produce a single token per token group.
- // For simplicity, we will apply the prefix operator to *all* tokens
- // in the final token group.
- for (const Token& token : tokens) {
- normalized_term = normalizer_.NormalizeTerm(token.text);
- ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<DocHitInfoIterator> iterator,
- CreateTermIterator(
- QueryTerm{std::move(normalized_term),
- reached_final_token && text_value.is_prefix_val}));
- iterators.push_back(std::move(iterator));
+ reached_final_token = !token_itr->Advance();
+ const Token& token = tokens.at(0);
+ if (reached_final_token && token.text.length() == raw_text.length()) {
+ // Unescaped tokens are strictly smaller than their escaped counterparts
+ // This means that if we're at the final token and token.length equals
+ // raw_text, then all of raw_text must correspond to this token.
+ raw_token = raw_text;
+ } else {
+ ICING_ASSIGN_OR_RETURN(raw_token, string_util::FindEscapedToken(
+ raw_text, token.text));
}
+ normalized_term = normalizer_.NormalizeTerm(token.text);
+ QueryTerm term_value{std::move(normalized_term), raw_token,
+ reached_final_token && text_value.is_prefix_val};
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<DocHitInfoIterator> iterator,
+ CreateTermIterator(std::move(term_value)));
+ iterators.push_back(std::move(iterator));
+
+ // Remove escaped_token from raw_text now that we've processed
+ // raw_text.
+ const char* escaped_token_end = raw_token.data() + raw_token.length();
+ raw_text = raw_text.substr(escaped_token_end - raw_text.data());
}
// Finally, create an And Iterator. If there's only a single term here, then
@@ -452,7 +458,8 @@ libtextclassifier3::Status QueryVisitor::ProcessNumericComparator(
GetInt64Range(node->operator_text(), int_value));
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<DocHitInfoIterator> iterator,
- numeric_index_.GetIterator(text_value.term, range.low, range.high));
+ numeric_index_.GetIterator(text_value.term, range.low, range.high,
+ document_store_, schema_store_));
features_.insert(kNumericSearchFeature);
pending_values_.push(PendingValue(std::move(iterator)));
@@ -618,22 +625,23 @@ void QueryVisitor::VisitFunctionName(const FunctionNameNode* node) {
void QueryVisitor::VisitString(const StringNode* node) {
// A STRING node can only be a term. Create the iterator now.
- auto unescaped_string_or = UnescapeStringValue(node->value());
+ auto unescaped_string_or = string_util::UnescapeStringValue(node->value());
if (!unescaped_string_or.ok()) {
pending_error_ = std::move(unescaped_string_or).status();
return;
}
std::string unescaped_string = std::move(unescaped_string_or).ValueOrDie();
- pending_values_.push(PendingValue::CreateStringPendingValue(
- QueryTerm{std::move(unescaped_string), node->is_prefix()}));
+ QueryTerm val{std::move(unescaped_string), node->raw_value(),
+ node->is_prefix()};
+ pending_values_.push(PendingValue::CreateStringPendingValue(std::move(val)));
}
void QueryVisitor::VisitText(const TextNode* node) {
// TEXT nodes could either be a term (and will become DocHitInfoIteratorTerm)
// or a property name. As such, we just push the TEXT value into pending
// values and determine which it is at a later point.
- pending_values_.push(PendingValue::CreateTextPendingValue(
- QueryTerm{std::move(node->value()), node->is_prefix()}));
+ QueryTerm val{std::move(node->value()), node->raw_value(), node->is_prefix()};
+ pending_values_.push(PendingValue::CreateTextPendingValue(std::move(val)));
}
void QueryVisitor::VisitMember(const MemberNode* node) {
@@ -668,6 +676,8 @@ void QueryVisitor::VisitMember(const MemberNode* node) {
libtextclassifier3::StatusOr<QueryTerm> member_or;
std::vector<std::string> members;
QueryTerm text_val;
+ const char* start = nullptr;
+ const char* end = nullptr;
while (!pending_values_.empty() &&
!pending_values_.top().is_placeholder()) {
member_or = PopPendingTextValue();
@@ -681,11 +691,19 @@ void QueryVisitor::VisitMember(const MemberNode* node) {
"Cannot use prefix operator '*' within a property name!");
return;
}
+ if (start == nullptr) {
+ start = text_val.raw_term.data();
+ end = text_val.raw_term.data() + text_val.raw_term.length();
+ } else {
+ start = std::min(start, text_val.raw_term.data());
+ end = std::max(end, text_val.raw_term.data() + text_val.raw_term.length());
+ }
members.push_back(std::move(text_val.term));
}
QueryTerm member;
member.term = absl_ports::StrJoin(members.rbegin(), members.rend(),
property_util::kPropertyPathSeparator);
+ member.raw_term = std::string_view(start, end - start);
member.is_prefix_val = false;
pending_value = PendingValue::CreateTextPendingValue(std::move(member));
}
@@ -739,6 +757,9 @@ void QueryVisitor::VisitFunction(const FunctionNode* node) {
// 5. Pop placeholder in pending_values and add the result of our function.
pending_values_.pop();
pending_values_.push(std::move(eval_result).ValueOrDie());
+
+ // Support for custom functions was added in list filters.
+ features_.insert(kListFilterQueryLanguageFeature);
}
// TODO(b/265312785) Clarify handling of the interaction between HAS and NOT.
@@ -771,6 +792,15 @@ void QueryVisitor::VisitUnaryOperator(const UnaryOperatorNode* node) {
if (!status.ok()) {
pending_error_ = std::move(status);
}
+
+ if (!is_minus ||
+ pending_property_restricts_.has_active_property_restricts() ||
+ processing_not_) {
+ // 'NOT' operator was added in list filters.
+ // Likewise, mixing property restricts and NOTs were made valid in list
+ // filters.
+ features_.insert(kListFilterQueryLanguageFeature);
+ }
}
void QueryVisitor::VisitNaryOperator(const NaryOperatorNode* node) {
@@ -780,6 +810,13 @@ void QueryVisitor::VisitNaryOperator(const NaryOperatorNode* node) {
return;
}
+ if (pending_property_restricts_.has_active_property_restricts() ||
+ processing_not_) {
+ // Likewise, mixing property restricts and NOT with compound statements was
+ // added in list filters.
+ features_.insert(kListFilterQueryLanguageFeature);
+ }
+
if (node->operator_text() == ":") {
libtextclassifier3::Status status = ProcessHasOperator(node);
if (!status.ok()) {
diff --git a/icing/query/advanced_query_parser/query-visitor.h b/icing/query/advanced_query_parser/query-visitor.h
index 7498457..9fcaec0 100644
--- a/icing/query/advanced_query_parser/query-visitor.h
+++ b/icing/query/advanced_query_parser/query-visitor.h
@@ -49,12 +49,12 @@ class QueryVisitor : public AbstractSyntaxTreeVisitor {
Index* index, const NumericIndex<int64_t>* numeric_index,
const DocumentStore* document_store, const SchemaStore* schema_store,
const Normalizer* normalizer, const Tokenizer* tokenizer,
+ std::string_view raw_query_text,
DocHitInfoIteratorFilter::Options filter_options,
TermMatchType::Code match_type, bool needs_term_frequency_info)
: QueryVisitor(index, numeric_index, document_store, schema_store,
- normalizer, tokenizer, filter_options, match_type,
- needs_term_frequency_info,
-
+ normalizer, tokenizer, raw_query_text, filter_options,
+ match_type, needs_term_frequency_info,
PendingPropertyRestricts(),
/*processing_not=*/false) {}
@@ -105,9 +105,9 @@ class QueryVisitor : public AbstractSyntaxTreeVisitor {
Index* index, const NumericIndex<int64_t>* numeric_index,
const DocumentStore* document_store, const SchemaStore* schema_store,
const Normalizer* normalizer, const Tokenizer* tokenizer,
+ std::string_view raw_query_text,
DocHitInfoIteratorFilter::Options filter_options,
TermMatchType::Code match_type, bool needs_term_frequency_info,
-
PendingPropertyRestricts pending_property_restricts, bool processing_not)
: index_(*index),
numeric_index_(*numeric_index),
@@ -115,6 +115,7 @@ class QueryVisitor : public AbstractSyntaxTreeVisitor {
schema_store_(*schema_store),
normalizer_(*normalizer),
tokenizer_(*tokenizer),
+ raw_query_text_(raw_query_text),
filter_options_(std::move(filter_options)),
match_type_(match_type),
needs_term_frequency_info_(needs_term_frequency_info),
@@ -133,7 +134,7 @@ class QueryVisitor : public AbstractSyntaxTreeVisitor {
// - On success, a DocHitInfoIterator for the provided term
// - INVALID_ARGUMENT if unable to create an iterator for the term.
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
- CreateTermIterator(QueryTerm term);
+ CreateTermIterator(const QueryTerm& term);
// Processes the PendingValue at the top of pending_values_, parses it into a
// int64_t and pops the top.
@@ -279,6 +280,7 @@ class QueryVisitor : public AbstractSyntaxTreeVisitor {
const Normalizer& normalizer_; // Does not own!
const Tokenizer& tokenizer_; // Does not own!
+ std::string_view raw_query_text_;
DocHitInfoIteratorFilter::Options filter_options_;
TermMatchType::Code match_type_;
// Whether or not term_frequency information is needed. This affects:
diff --git a/icing/query/advanced_query_parser/query-visitor_test.cc b/icing/query/advanced_query_parser/query-visitor_test.cc
index 033e86b..b560d52 100644
--- a/icing/query/advanced_query_parser/query-visitor_test.cc
+++ b/icing/query/advanced_query_parser/query-visitor_test.cc
@@ -17,6 +17,7 @@
#include <cstdint>
#include <limits>
#include <memory>
+#include <string_view>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "gmock/gmock.h"
@@ -225,14 +226,20 @@ TEST_P(QueryVisitorTest, SimpleLessThan) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kNumericSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature));
+ }
// "price" is a property restrict here and "2" isn't a "term" - its a numeric
// value. So QueryTermIterators should be empty.
EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
@@ -262,14 +269,20 @@ TEST_P(QueryVisitorTest, SimpleLessThanEq) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kNumericSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature));
+ }
// "price" is a property restrict here and "1" isn't a "term" - its a numeric
// value. So QueryTermIterators should be empty.
EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
@@ -299,14 +312,20 @@ TEST_P(QueryVisitorTest, SimpleEqual) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kNumericSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature));
+ }
// "price" is a property restrict here and "2" isn't a "term" - its a numeric
// value. So QueryTermIterators should be empty.
EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
@@ -336,14 +355,20 @@ TEST_P(QueryVisitorTest, SimpleGreaterThanEq) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kNumericSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature));
+ }
// "price" is a property restrict here and "1" isn't a "term" - its a numeric
// value. So QueryTermIterators should be empty.
EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
@@ -373,14 +398,20 @@ TEST_P(QueryVisitorTest, SimpleGreaterThan) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kNumericSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature));
+ }
// "price" is a property restrict here and "1" isn't a "term" - its a numeric
// value. So QueryTermIterators should be empty.
EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
@@ -411,14 +442,20 @@ TEST_P(QueryVisitorTest, IntMinLessThanEqual) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kNumericSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature));
+ }
// "price" is a property restrict here and int_min isn't a "term" - its a
// numeric value. So QueryTermIterators should be empty.
EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
@@ -449,14 +486,20 @@ TEST_P(QueryVisitorTest, IntMaxGreaterThanEqual) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kNumericSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature));
+ }
// "price" is a property restrict here and int_max isn't a "term" - its a
// numeric value. So QueryTermIterators should be empty.
EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
@@ -488,14 +531,20 @@ TEST_P(QueryVisitorTest, NestedPropertyLessThan) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kNumericSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature));
+ }
// "subscription.price" is a property restrict here and int_max isn't a "term"
// - its a numeric value. So QueryTermIterators should be empty.
EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
@@ -510,7 +559,7 @@ TEST_P(QueryVisitorTest, IntParsingError) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -524,7 +573,7 @@ TEST_P(QueryVisitorTest, NotEqualsUnsupported) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -552,16 +601,19 @@ TEST_P(QueryVisitorTest, LessThanTooManyOperandsInvalid) {
// Create an invalid AST for the query '3 < subscription.price 25' where '<'
// has three operands
- auto property_node = std::make_unique<TextNode>("subscription");
- auto subproperty_node = std::make_unique<TextNode>("price");
+ std::string_view query = "3 < subscription.price 25";
+ auto property_node =
+ std::make_unique<TextNode>("subscription", query.substr(4, 12));
+ auto subproperty_node =
+ std::make_unique<TextNode>("price", query.substr(17, 5));
std::vector<std::unique_ptr<TextNode>> member_args;
member_args.push_back(std::move(property_node));
member_args.push_back(std::move(subproperty_node));
auto member_node = std::make_unique<MemberNode>(std::move(member_args),
/*function=*/nullptr);
- auto value_node = std::make_unique<TextNode>("3");
- auto extra_value_node = std::make_unique<TextNode>("25");
+ auto value_node = std::make_unique<TextNode>("3", query.substr(0, 1));
+ auto extra_value_node = std::make_unique<TextNode>("25", query.substr(23, 2));
std::vector<std::unique_ptr<Node>> args;
args.push_back(std::move(value_node));
args.push_back(std::move(member_node));
@@ -569,7 +621,7 @@ TEST_P(QueryVisitorTest, LessThanTooManyOperandsInvalid) {
auto root_node = std::make_unique<NaryOperatorNode>("<", std::move(args));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -580,8 +632,11 @@ TEST_P(QueryVisitorTest, LessThanTooManyOperandsInvalid) {
TEST_P(QueryVisitorTest, LessThanTooFewOperandsInvalid) {
// Create an invalid AST for the query 'subscription.price <' where '<'
// has a single operand
- auto property_node = std::make_unique<TextNode>("subscription");
- auto subproperty_node = std::make_unique<TextNode>("price");
+ std::string_view query = "subscription.price <";
+ auto property_node =
+ std::make_unique<TextNode>("subscription", query.substr(0, 12));
+ auto subproperty_node =
+ std::make_unique<TextNode>("price", query.substr(13, 5));
std::vector<std::unique_ptr<TextNode>> member_args;
member_args.push_back(std::move(property_node));
member_args.push_back(std::move(subproperty_node));
@@ -593,7 +648,7 @@ TEST_P(QueryVisitorTest, LessThanTooFewOperandsInvalid) {
auto root_node = std::make_unique<NaryOperatorNode>("<", std::move(args));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -624,14 +679,20 @@ TEST_P(QueryVisitorTest, LessThanNonExistentPropertyNotFound) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kNumericSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature));
+ }
EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
EXPECT_THAT(query_results.query_terms, IsEmpty());
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), IsEmpty());
@@ -640,7 +701,7 @@ TEST_P(QueryVisitorTest, LessThanNonExistentPropertyNotFound) {
TEST_P(QueryVisitorTest, NeverVisitedReturnsInvalid) {
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), "",
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
@@ -669,7 +730,7 @@ TEST_P(QueryVisitorTest, IntMinLessThanInvalid) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -699,7 +760,7 @@ TEST_P(QueryVisitorTest, IntMaxGreaterThanInvalid) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -714,7 +775,7 @@ TEST_P(QueryVisitorTest, NumericComparisonPropertyStringIsInvalid) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -776,14 +837,20 @@ TEST_P(QueryVisitorTest, NumericComparatorDoesntAffectLaterTerms) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kNumericSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kNumericSearchFeature));
+ }
EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
EXPECT_THAT(query_results.query_terms, IsEmpty());
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
@@ -813,7 +880,7 @@ TEST_P(QueryVisitorTest, SingleTermTermFrequencyEnabled) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -865,7 +932,7 @@ TEST_P(QueryVisitorTest, SingleTermTermFrequencyDisabled) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/false);
root_node->Accept(&query_visitor);
@@ -917,7 +984,7 @@ TEST_P(QueryVisitorTest, SingleTermPrefix) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_EXACT,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -933,7 +1000,7 @@ TEST_P(QueryVisitorTest, SingleTermPrefix) {
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_EXACT,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_two);
@@ -953,7 +1020,7 @@ TEST_P(QueryVisitorTest, PrefixOperatorAfterPropertyReturnsInvalid) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -967,7 +1034,7 @@ TEST_P(QueryVisitorTest, PrefixOperatorAfterNumericValueReturnsInvalid) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -981,7 +1048,7 @@ TEST_P(QueryVisitorTest, PrefixOperatorAfterPropertyRestrictReturnsInvalid) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -1019,7 +1086,7 @@ TEST_P(QueryVisitorTest, SegmentationWithPrefix) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_EXACT,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -1042,7 +1109,7 @@ TEST_P(QueryVisitorTest, SegmentationWithPrefix) {
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_EXACT,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_two);
@@ -1079,14 +1146,20 @@ TEST_P(QueryVisitorTest, SingleVerbatimTerm) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kVerbatimSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature));
+ }
EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
EXPECT_THAT(query_results.query_terms[""],
UnorderedElementsAre("foo:bar(baz)"));
@@ -1120,14 +1193,15 @@ TEST_P(QueryVisitorTest, SingleVerbatimTermPrefix) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_EXACT,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kVerbatimSearchFeature));
+ UnorderedElementsAre(kVerbatimSearchFeature,
+ kListFilterQueryLanguageFeature));
EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
EXPECT_THAT(query_results.query_terms[""], UnorderedElementsAre("foo:bar("));
EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
@@ -1172,14 +1246,20 @@ TEST_P(QueryVisitorTest, VerbatimTermEscapingQuote) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kVerbatimSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature));
+ }
EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
EXPECT_THAT(query_results.query_terms[""],
UnorderedElementsAre(R"(foobar")"));
@@ -1218,14 +1298,20 @@ TEST_P(QueryVisitorTest, VerbatimTermEscapingEscape) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kVerbatimSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature));
+ }
EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
EXPECT_THAT(query_results.query_terms[""],
UnorderedElementsAre(R"(foobar\)"));
@@ -1266,14 +1352,20 @@ TEST_P(QueryVisitorTest, VerbatimTermEscapingNonSpecialChar) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kVerbatimSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature));
+ }
EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
EXPECT_THAT(query_results.query_terms[""],
UnorderedElementsAre(R"(foobary)"));
@@ -1287,14 +1379,20 @@ TEST_P(QueryVisitorTest, VerbatimTermEscapingNonSpecialChar) {
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_two);
ICING_ASSERT_OK_AND_ASSIGN(query_results,
std::move(query_visitor_two).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kVerbatimSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature));
+ }
EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
EXPECT_THAT(query_results.query_terms[""],
UnorderedElementsAre(R"(foobar\y)"));
@@ -1336,14 +1434,20 @@ TEST_P(QueryVisitorTest, VerbatimTermNewLine) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kVerbatimSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature));
+ }
EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
EXPECT_THAT(query_results.query_terms[""], UnorderedElementsAre("foobar\n"));
EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
@@ -1356,14 +1460,20 @@ TEST_P(QueryVisitorTest, VerbatimTermNewLine) {
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_two);
ICING_ASSERT_OK_AND_ASSIGN(query_results,
std::move(query_visitor_two).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kVerbatimSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature));
+ }
EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
EXPECT_THAT(query_results.query_terms[""],
UnorderedElementsAre(R"(foobar\n)"));
@@ -1399,14 +1509,20 @@ TEST_P(QueryVisitorTest, VerbatimTermEscapingComplex) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use,
- ElementsAre(kVerbatimSearchFeature));
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature,
+ kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kVerbatimSearchFeature));
+ }
EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
EXPECT_THAT(query_results.query_terms[""],
UnorderedElementsAre(R"(foo\"bar\nbaz")"));
@@ -1450,7 +1566,7 @@ TEST_P(QueryVisitorTest, SingleMinusTerm) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -1458,7 +1574,12 @@ TEST_P(QueryVisitorTest, SingleMinusTerm) {
std::move(query_visitor).ConsumeResults());
EXPECT_THAT(ExtractKeys(query_results.query_terms), IsEmpty());
EXPECT_THAT(query_results.query_term_iterators, IsEmpty());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ }
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId2));
}
@@ -1497,14 +1618,15 @@ TEST_P(QueryVisitorTest, SingleNotTerm) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
EXPECT_THAT(query_results.query_terms, IsEmpty());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
EXPECT_THAT(query_results.query_term_iterators, IsEmpty());
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId2));
@@ -1549,13 +1671,14 @@ TEST_P(QueryVisitorTest, NestedNotTerms) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
EXPECT_THAT(query_results.query_terms[""],
UnorderedElementsAre("foo", "baz"));
@@ -1615,13 +1738,14 @@ TEST_P(QueryVisitorTest, DeeplyNestedNotTerms) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
EXPECT_THAT(query_results.query_terms[""],
UnorderedElementsAre("bar", "baz", "bat"));
@@ -1653,13 +1777,18 @@ TEST_P(QueryVisitorTest, ImplicitAndTerms) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ }
EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
EXPECT_THAT(query_results.query_terms[""],
UnorderedElementsAre("foo", "bar"));
@@ -1691,13 +1820,18 @@ TEST_P(QueryVisitorTest, ExplicitAndTerms) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ }
EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
EXPECT_THAT(query_results.query_terms[""],
UnorderedElementsAre("foo", "bar"));
@@ -1729,13 +1863,18 @@ TEST_P(QueryVisitorTest, OrTerms) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ }
EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
EXPECT_THAT(query_results.query_terms[""],
UnorderedElementsAre("foo", "bar"));
@@ -1769,13 +1908,18 @@ TEST_P(QueryVisitorTest, AndOrTermPrecedence) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ }
EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
EXPECT_THAT(query_results.query_terms[""],
UnorderedElementsAre("foo", "bar", "baz"));
@@ -1789,13 +1933,18 @@ TEST_P(QueryVisitorTest, AndOrTermPrecedence) {
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_two);
ICING_ASSERT_OK_AND_ASSIGN(query_results,
std::move(query_visitor_two).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ }
EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
EXPECT_THAT(query_results.query_terms[""],
UnorderedElementsAre("foo", "bar", "baz"));
@@ -1808,13 +1957,18 @@ TEST_P(QueryVisitorTest, AndOrTermPrecedence) {
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_three(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_three);
ICING_ASSERT_OK_AND_ASSIGN(query_results,
std::move(query_visitor_three).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ }
EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
EXPECT_THAT(query_results.query_terms[""],
UnorderedElementsAre("foo", "bar", "baz"));
@@ -1863,13 +2017,14 @@ TEST_P(QueryVisitorTest, AndOrNotPrecedence) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
EXPECT_THAT(query_results.query_terms[""],
UnorderedElementsAre("foo", "baz"));
@@ -1882,13 +2037,14 @@ TEST_P(QueryVisitorTest, AndOrNotPrecedence) {
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_two);
ICING_ASSERT_OK_AND_ASSIGN(query_results,
std::move(query_visitor_two).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
EXPECT_THAT(query_results.query_terms[""], UnorderedElementsAre("foo"));
EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
@@ -1943,7 +2099,7 @@ TEST_P(QueryVisitorTest, PropertyFilter) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -1954,7 +2110,12 @@ TEST_P(QueryVisitorTest, PropertyFilter) {
EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("foo"));
EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
UnorderedElementsAre("foo"));
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ }
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId1, kDocumentId0));
}
@@ -2011,7 +2172,7 @@ TEST_F(QueryVisitorTest, MultiPropertyFilter) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -2023,7 +2184,8 @@ TEST_F(QueryVisitorTest, MultiPropertyFilter) {
EXPECT_THAT(query_results.query_terms["prop2"], UnorderedElementsAre("foo"));
EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
UnorderedElementsAre("foo"));
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId1, kDocumentId0));
}
@@ -2051,7 +2213,7 @@ TEST_P(QueryVisitorTest, PropertyFilterStringIsInvalid) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -2105,7 +2267,7 @@ TEST_P(QueryVisitorTest, PropertyFilterNonNormalized) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -2116,7 +2278,12 @@ TEST_P(QueryVisitorTest, PropertyFilterNonNormalized) {
EXPECT_THAT(query_results.query_terms["PROP1"], UnorderedElementsAre("foo"));
EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
UnorderedElementsAre("foo"));
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ }
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId1, kDocumentId0));
}
@@ -2168,13 +2335,14 @@ TEST_P(QueryVisitorTest, PropertyFilterWithGrouping) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
EXPECT_THAT(ExtractKeys(query_results.query_terms),
UnorderedElementsAre("prop1"));
EXPECT_THAT(query_results.query_terms["prop1"],
@@ -2231,13 +2399,14 @@ TEST_P(QueryVisitorTest, ValidNestedPropertyFilter) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
EXPECT_THAT(ExtractKeys(query_results.query_terms),
UnorderedElementsAre("prop1"));
EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("foo"));
@@ -2251,13 +2420,14 @@ TEST_P(QueryVisitorTest, ValidNestedPropertyFilter) {
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_two);
ICING_ASSERT_OK_AND_ASSIGN(query_results,
std::move(query_visitor_two).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
EXPECT_THAT(ExtractKeys(query_results.query_terms),
UnorderedElementsAre("prop1"));
EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("foo"));
@@ -2313,13 +2483,14 @@ TEST_P(QueryVisitorTest, InvalidNestedPropertyFilter) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
EXPECT_THAT(ExtractKeys(query_results.query_terms), IsEmpty());
EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), IsEmpty());
@@ -2333,13 +2504,14 @@ TEST_P(QueryVisitorTest, InvalidNestedPropertyFilter) {
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_two);
ICING_ASSERT_OK_AND_ASSIGN(query_results,
std::move(query_visitor_two).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
EXPECT_THAT(ExtractKeys(query_results.query_terms), IsEmpty());
EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), IsEmpty());
@@ -2395,13 +2567,14 @@ TEST_P(QueryVisitorTest, NotWithPropertyFilter) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
EXPECT_THAT(ExtractKeys(query_results.query_terms), IsEmpty());
EXPECT_THAT(query_results.query_term_iterators, IsEmpty());
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
@@ -2415,13 +2588,14 @@ TEST_P(QueryVisitorTest, NotWithPropertyFilter) {
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_two);
ICING_ASSERT_OK_AND_ASSIGN(query_results,
std::move(query_visitor_two).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
EXPECT_THAT(ExtractKeys(query_results.query_terms), IsEmpty());
EXPECT_THAT(query_results.query_term_iterators, IsEmpty());
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
@@ -2478,14 +2652,15 @@ TEST_P(QueryVisitorTest, PropertyFilterWithNot) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
EXPECT_THAT(ExtractKeys(query_results.query_terms),
UnorderedElementsAre("prop1"));
EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("bar"));
@@ -2501,13 +2676,14 @@ TEST_P(QueryVisitorTest, PropertyFilterWithNot) {
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_two);
ICING_ASSERT_OK_AND_ASSIGN(query_results,
std::move(query_visitor_two).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
EXPECT_THAT(ExtractKeys(query_results.query_terms),
UnorderedElementsAre("prop1"));
EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("bar"));
@@ -2579,13 +2755,18 @@ TEST_P(QueryVisitorTest, SegmentationTest) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ }
EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
if (IsCfStringTokenization()) {
EXPECT_THAT(query_results.query_terms[""],
@@ -2692,13 +2873,18 @@ TEST_P(QueryVisitorTest, PropertyRestrictsPopCorrectly) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ if (GetParam() == QueryType::kSearch) {
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+ } else {
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ }
EXPECT_THAT(ExtractKeys(query_results.query_terms),
UnorderedElementsAre("", "prop1"));
EXPECT_THAT(query_results.query_terms[""],
@@ -2802,13 +2988,14 @@ TEST_P(QueryVisitorTest, UnsatisfiablePropertyRestrictsPopCorrectly) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
EXPECT_THAT(ExtractKeys(query_results.query_terms),
UnorderedElementsAre("prop0", "prop2"));
EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("val0"));
@@ -2825,7 +3012,7 @@ TEST_F(QueryVisitorTest, UnsupportedFunctionReturnsInvalidArgument) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -2839,7 +3026,7 @@ TEST_F(QueryVisitorTest, SearchFunctionTooFewArgumentsReturnsInvalidArgument) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -2853,7 +3040,7 @@ TEST_F(QueryVisitorTest, SearchFunctionTooManyArgumentsReturnsInvalidArgument) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -2869,7 +3056,7 @@ TEST_F(QueryVisitorTest,
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -2881,7 +3068,7 @@ TEST_F(QueryVisitorTest,
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_two);
@@ -2897,7 +3084,7 @@ TEST_F(QueryVisitorTest,
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -2909,7 +3096,7 @@ TEST_F(QueryVisitorTest,
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_two);
@@ -2924,7 +3111,7 @@ TEST_F(QueryVisitorTest,
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -2984,14 +3171,15 @@ TEST_F(QueryVisitorTest, SearchFunctionNestedFunctionCalls) {
ParseQueryHelper(level_two_query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), level_two_query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
EXPECT_THAT(ExtractKeys(query_results.query_terms),
UnorderedElementsAre("prop1"));
EXPECT_THAT(query_results.query_terms["prop1"],
@@ -3008,13 +3196,14 @@ TEST_F(QueryVisitorTest, SearchFunctionNestedFunctionCalls) {
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
schema_store_.get(), normalizer_.get(), tokenizer_.get(),
- DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ level_three_query, DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_two);
ICING_ASSERT_OK_AND_ASSIGN(query_results,
std::move(query_visitor_two).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
EXPECT_THAT(ExtractKeys(query_results.query_terms),
UnorderedElementsAre("prop1"));
EXPECT_THAT(query_results.query_terms["prop1"],
@@ -3031,13 +3220,14 @@ TEST_F(QueryVisitorTest, SearchFunctionNestedFunctionCalls) {
QueryVisitor query_visitor_three(
index_.get(), numeric_index_.get(), document_store_.get(),
schema_store_.get(), normalizer_.get(), tokenizer_.get(),
- DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ level_four_query, DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_three);
ICING_ASSERT_OK_AND_ASSIGN(query_results,
std::move(query_visitor_three).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
EXPECT_THAT(ExtractKeys(query_results.query_terms),
UnorderedElementsAre("prop1"));
EXPECT_THAT(query_results.query_terms["prop1"],
@@ -3148,14 +3338,15 @@ TEST_F(QueryVisitorTest, SearchFunctionNestedPropertyRestrictsNarrowing) {
ParseQueryHelper(level_one_query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), level_one_query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
EXPECT_THAT(ExtractKeys(query_results.query_terms),
UnorderedElementsAre("prop0", "prop1", "prop2", "prop3", "prop4",
"prop5", "prop6", "prop7"));
@@ -3179,14 +3370,15 @@ TEST_F(QueryVisitorTest, SearchFunctionNestedPropertyRestrictsNarrowing) {
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(level_two_query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), level_two_query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_two);
ICING_ASSERT_OK_AND_ASSIGN(query_results,
std::move(query_visitor_two).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
EXPECT_THAT(ExtractKeys(query_results.query_terms),
UnorderedElementsAre("prop0", "prop2", "prop4", "prop6"));
EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("foo"));
@@ -3205,13 +3397,14 @@ TEST_F(QueryVisitorTest, SearchFunctionNestedPropertyRestrictsNarrowing) {
QueryVisitor query_visitor_three(
index_.get(), numeric_index_.get(), document_store_.get(),
schema_store_.get(), normalizer_.get(), tokenizer_.get(),
- DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ level_three_query, DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_three);
ICING_ASSERT_OK_AND_ASSIGN(query_results,
std::move(query_visitor_three).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
EXPECT_THAT(ExtractKeys(query_results.query_terms),
UnorderedElementsAre("prop0", "prop6"));
EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("foo"));
@@ -3224,7 +3417,7 @@ TEST_F(QueryVisitorTest, SearchFunctionNestedPropertyRestrictsNarrowing) {
// This test will nest `search` calls together with the set of restricts
// narrowing at each level so that the set of docs matching the query shrinks.
-TEST_F(QueryVisitorTest, SearchFunctionNestedPropertyRestrictsExpandinging) {
+TEST_F(QueryVisitorTest, SearchFunctionNestedPropertyRestrictsExpanding) {
PropertyConfigProto prop =
PropertyConfigBuilder()
.SetName("prop0")
@@ -3322,14 +3515,15 @@ TEST_F(QueryVisitorTest, SearchFunctionNestedPropertyRestrictsExpandinging) {
ParseQueryHelper(level_one_query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), level_one_query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
EXPECT_THAT(ExtractKeys(query_results.query_terms),
UnorderedElementsAre("prop0", "prop6"));
EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("foo"));
@@ -3345,14 +3539,15 @@ TEST_F(QueryVisitorTest, SearchFunctionNestedPropertyRestrictsExpandinging) {
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(level_two_query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), level_two_query,
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_two);
ICING_ASSERT_OK_AND_ASSIGN(query_results,
std::move(query_visitor_two).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
EXPECT_THAT(ExtractKeys(query_results.query_terms),
UnorderedElementsAre("prop0", "prop6"));
EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("foo"));
@@ -3370,13 +3565,14 @@ TEST_F(QueryVisitorTest, SearchFunctionNestedPropertyRestrictsExpandinging) {
QueryVisitor query_visitor_three(
index_.get(), numeric_index_.get(), document_store_.get(),
schema_store_.get(), normalizer_.get(), tokenizer_.get(),
- DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ level_three_query, DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_three);
ICING_ASSERT_OK_AND_ASSIGN(query_results,
std::move(query_visitor_three).ConsumeResults());
- EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
EXPECT_THAT(ExtractKeys(query_results.query_terms),
UnorderedElementsAre("prop0", "prop6"));
EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("foo"));
diff --git a/icing/query/advanced_query_parser/util/string-util.cc b/icing/query/advanced_query_parser/util/string-util.cc
new file mode 100644
index 0000000..9af2ed6
--- /dev/null
+++ b/icing/query/advanced_query_parser/util/string-util.cc
@@ -0,0 +1,106 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/query/advanced_query_parser/util/string-util.h"
+
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+
+namespace icing {
+namespace lib {
+
+namespace string_util {
+
+libtextclassifier3::StatusOr<std::string> UnescapeStringValue(
+ std::string_view value) {
+ std::string result;
+ bool in_escape = false;
+ for (char c : value) {
+ if (in_escape) {
+ in_escape = false;
+ } else if (c == '\\') {
+ in_escape = true;
+ continue;
+ } else if (c == '"') {
+ return absl_ports::InvalidArgumentError(
+ "Encountered an unescaped quotation mark!");
+ }
+ result += c;
+ }
+ return result;
+}
+
+libtextclassifier3::StatusOr<std::string_view> FindEscapedToken(
+ std::string_view escaped_string, std::string_view unescaped_token) {
+ if (unescaped_token.empty()) {
+ return absl_ports::InvalidArgumentError(
+ "Cannot find escaped token in empty unescaped token.");
+ }
+
+ // Find the start of unescaped_token within the escaped_string
+ const char* esc_string_end = escaped_string.data() + escaped_string.length();
+ size_t pos = escaped_string.find(unescaped_token[0]);
+ const char* esc_token_start = (pos == std::string_view::npos)
+ ? esc_string_end
+ : escaped_string.data() + pos;
+ const char* esc_token_cur = esc_token_start;
+ const char* possible_next_start = nullptr;
+ bool is_escaped = false;
+ int i = 0;
+ for (; i < unescaped_token.length() && esc_token_cur < esc_string_end;
+ ++esc_token_cur) {
+ if (esc_token_cur != esc_token_start &&
+ *esc_token_cur == unescaped_token[0] &&
+ possible_next_start == nullptr) {
+ possible_next_start = esc_token_cur;
+ }
+
+ // Every char in unescaped_token should either be an escape or match the
+ // next char in unescaped_token.
+ if (!is_escaped && *esc_token_cur == '\\') {
+ is_escaped = true;
+ } else if (*esc_token_cur == unescaped_token[i]) {
+ is_escaped = false;
+ ++i;
+ } else {
+ // No match. If we don't have a possible_next_start, then try to find one.
+ if (possible_next_start == nullptr) {
+ pos = escaped_string.find(unescaped_token[0],
+ esc_token_cur - escaped_string.data());
+ if (pos == std::string_view::npos) {
+ break;
+ }
+ esc_token_start = escaped_string.data() + pos;
+ } else {
+ esc_token_start = possible_next_start;
+ possible_next_start = nullptr;
+ }
+ // esc_token_start has been reset to a char that equals unescaped_token[0]
+ // The for loop above will advance esc_token_cur so set i to 1.
+ i = 1;
+ esc_token_cur = esc_token_start;
+ }
+ }
+ if (i != unescaped_token.length()) {
+ return absl_ports::InvalidArgumentError(
+ absl_ports::StrCat("Couldn't match chars at token=", unescaped_token,
+ ") and raw_text=", escaped_string));
+ }
+ return std::string_view(esc_token_start, esc_token_cur - esc_token_start);
+}
+
+} // namespace string_util
+
+} // namespace lib
+} // namespace icing \ No newline at end of file
diff --git a/icing/query/advanced_query_parser/util/string-util.h b/icing/query/advanced_query_parser/util/string-util.h
new file mode 100644
index 0000000..09fb451
--- /dev/null
+++ b/icing/query/advanced_query_parser/util/string-util.h
@@ -0,0 +1,49 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_QUERY_ADVANCED_QUERY_PARSER__STRING_UTIL_H_
+#define ICING_QUERY_ADVANCED_QUERY_PARSER__STRING_UTIL_H_
+
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+
+namespace icing {
+namespace lib {
+
+namespace string_util {
+
+// Returns:
+// - On success, value with the escapes removed.
+// - INVALID_ARGUMENT if an non-escaped quote is encountered.
+// Ex. "fo\\\\o" -> "fo\\o"
+libtextclassifier3::StatusOr<std::string> UnescapeStringValue(
+ std::string_view value);
+
+// Returns:
+// - On success, string_view pointing to the segment of escaped_string that,
+// if unescaped, would match unescaped_token.
+// - INVALID_ARGUMENT
+// Ex. escaped_string="foo b\\a\\\"r baz", unescaped_token="ba\"r"
+// returns "b\\a\\\"r"
+libtextclassifier3::StatusOr<std::string_view> FindEscapedToken(
+ std::string_view escaped_string, std::string_view unescaped_token);
+
+} // namespace string_util
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_QUERY_ADVANCED_QUERY_PARSER__STRING_UTIL_H_
diff --git a/icing/query/advanced_query_parser/util/string-util_test.cc b/icing/query/advanced_query_parser/util/string-util_test.cc
new file mode 100644
index 0000000..a7ccf3e
--- /dev/null
+++ b/icing/query/advanced_query_parser/util/string-util_test.cc
@@ -0,0 +1,125 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/query/advanced_query_parser/util/string-util.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsEmpty;
+
+TEST(StringUtilTest, UnescapeStringEmptyString) {
+ EXPECT_THAT(string_util::UnescapeStringValue(""), IsOkAndHolds(IsEmpty()));
+}
+
+TEST(StringUtilTest, UnescapeStringStringWithNoEscapes) {
+ EXPECT_THAT(string_util::UnescapeStringValue("foo"), IsOkAndHolds("foo"));
+ EXPECT_THAT(string_util::UnescapeStringValue("f o o"), IsOkAndHolds("f o o"));
+ EXPECT_THAT(string_util::UnescapeStringValue("f\to\to"),
+ IsOkAndHolds("f\to\to"));
+ EXPECT_THAT(string_util::UnescapeStringValue("f.o.o"), IsOkAndHolds("f.o.o"));
+}
+
+TEST(StringUtilTest, UnescapeStringStringWithEscapes) {
+ EXPECT_THAT(string_util::UnescapeStringValue("f\\oo"), IsOkAndHolds("foo"));
+ EXPECT_THAT(string_util::UnescapeStringValue("f\\\\oo"),
+ IsOkAndHolds("f\\oo"));
+ EXPECT_THAT(string_util::UnescapeStringValue("f\\\"oo"),
+ IsOkAndHolds("f\"oo"));
+ EXPECT_THAT(string_util::UnescapeStringValue("foo\\"), IsOkAndHolds("foo"));
+ EXPECT_THAT(string_util::UnescapeStringValue("foo b\\a\\\"r baz"),
+ IsOkAndHolds("foo ba\"r baz"));
+ EXPECT_THAT(string_util::UnescapeStringValue("bar b\\aar bar\\s bart"),
+ IsOkAndHolds("bar baar bars bart"));
+ EXPECT_THAT(string_util::UnescapeStringValue("\\\\\\\\a"),
+ IsOkAndHolds("\\\\a"));
+}
+
+TEST(StringUtilTest, UnescapeStringQuoteWithoutEscape) {
+ EXPECT_THAT(string_util::UnescapeStringValue("f\\o\"o"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(string_util::UnescapeStringValue("f\"oo"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(StringUtilTest, FindEscapedTokenEmptyUnescapedToken) {
+ EXPECT_THAT(string_util::FindEscapedToken("foo b\\a\\\"r baz", ""),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(StringUtilTest, FindEscapedTokenTokenNotPresent) {
+ EXPECT_THAT(string_util::FindEscapedToken("foo b\\a\\\"r baz", "elephant"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(string_util::FindEscapedToken("foo b\\a\\\"r baz", "bat"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(string_util::FindEscapedToken("foo b\\a\\\"r baz", "taz"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(string_util::FindEscapedToken("foo b\\a\\\"r baz", "bazz"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(StringUtilTest, FindEscapedTokenMatchInMiddleToken) {
+ EXPECT_THAT(string_util::FindEscapedToken("babar", "bar"),
+ IsOkAndHolds("bar"));
+}
+
+TEST(StringUtilTest, FindEscapedTokenMatches) {
+ EXPECT_THAT(string_util::FindEscapedToken("foo b\\a\\\"r baz", "ba\"r"),
+ IsOkAndHolds("b\\a\\\"r"));
+ EXPECT_THAT(string_util::FindEscapedToken("\\\\\\\\a", "\\\\a"),
+ IsOkAndHolds("\\\\\\\\a"));
+}
+
+TEST(StringUtilTest, FindEscapedTokenTraversesThroughEscapedText) {
+ std::string_view escaped_text = "bar b\\aar bar\\s bart";
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::string_view result,
+ string_util::FindEscapedToken(escaped_text, "bar"));
+ // escaped_text = "bar b\\aar bar\\s bart";
+ // escaped_token ^ ^
+ EXPECT_THAT(result, Eq("bar"));
+
+ // escaped_text = "b\\aar bar\\s bart";
+ // escaped_token ^ ^
+ const char* result_end = result.data() + result.length();
+ escaped_text = escaped_text.substr(result_end - escaped_text.data());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ result, string_util::FindEscapedToken(escaped_text, "bar"));
+ EXPECT_THAT(result, Eq("bar"));
+
+ // escaped_text = "\\s bart";
+ // escaped_token ^ ^
+ result_end = result.data() + result.length();
+ escaped_text = escaped_text.substr(result_end - escaped_text.data());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ result, string_util::FindEscapedToken(escaped_text, "bar"));
+ EXPECT_THAT(result, Eq("bar"));
+
+ result_end = result.data() + result.length();
+ escaped_text = escaped_text.substr(result_end - escaped_text.data());
+ EXPECT_THAT(string_util::FindEscapedToken(escaped_text, "bar"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing \ No newline at end of file
diff --git a/icing/query/query-features.h b/icing/query/query-features.h
index 1471063..9fafba5 100644
--- a/icing/query/query-features.h
+++ b/icing/query/query-features.h
@@ -36,8 +36,14 @@ constexpr Feature kNumericSearchFeature =
constexpr Feature kVerbatimSearchFeature =
"VERBATIM_SEARCH"; // Features#VERBATIM_SEARCH
-// TODO(b/208654892): Add this as an enabled feature in the query visitor when
-// it gets invoked.
+// This feature covers all additions (other than numeric search and verbatim
+// search) to the query language to bring it into better alignment with the list
+// filters spec.
+// This includes:
+// - support for function calls
+// - expanding support for negation and property restriction expressions
+// - prefix operator '*'
+// - 'NOT' operator
constexpr Feature kListFilterQueryLanguageFeature =
"LIST_FILTER_QUERY_LANGUAGE"; // Features#LIST_FILTER_QUERY_LANGUAGE
diff --git a/icing/query/query-processor.cc b/icing/query/query-processor.cc
index 9b03a0e..6760fad 100644
--- a/icing/query/query-processor.cc
+++ b/icing/query/query-processor.cc
@@ -203,8 +203,8 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseAdvancedQuery(
ranking_strategy == ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE;
QueryVisitor query_visitor(
&index_, &numeric_index_, &document_store_, &schema_store_, &normalizer_,
- plain_tokenizer.get(), std::move(options), search_spec.term_match_type(),
- needs_term_frequency_info);
+ plain_tokenizer.get(), search_spec.query(), std::move(options),
+ search_spec.term_match_type(), needs_term_frequency_info);
tree_root->Accept(&query_visitor);
return std::move(query_visitor).ConsumeResults();
}
diff --git a/icing/query/suggestion-processor_test.cc b/icing/query/suggestion-processor_test.cc
index e161099..d4ecec0 100644
--- a/icing/query/suggestion-processor_test.cc
+++ b/icing/query/suggestion-processor_test.cc
@@ -14,6 +14,9 @@
#include "icing/query/suggestion-processor.h"
+#include <string>
+#include <vector>
+
#include "gmock/gmock.h"
#include "icing/document-builder.h"
#include "icing/index/numeric/dummy-numeric-index.h"
@@ -36,10 +39,19 @@ namespace lib {
namespace {
using ::testing::IsEmpty;
-using ::testing::SizeIs;
using ::testing::Test;
using ::testing::UnorderedElementsAre;
+std::vector<std::string> RetrieveSuggestionsText(
+ const std::vector<TermMetadata>& terms) {
+ std::vector<std::string> suggestions;
+ suggestions.reserve(terms.size());
+ for (const TermMetadata& term : terms) {
+ suggestions.push_back(term.content);
+ }
+ return suggestions;
+}
+
class SuggestionProcessorTest : public Test {
protected:
SuggestionProcessorTest()
@@ -181,8 +193,7 @@ TEST_F(SuggestionProcessorTest, MultipleTermsTest_And) {
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
suggestion_processor_->QuerySuggestions(suggestion_spec));
- EXPECT_THAT(terms.at(0).content, "bar foo");
- EXPECT_THAT(terms, SizeIs(1));
+ EXPECT_THAT(RetrieveSuggestionsText(terms), UnorderedElementsAre("bar foo"));
}
TEST_F(SuggestionProcessorTest, MultipleTermsTest_AndNary) {
@@ -228,8 +239,8 @@ TEST_F(SuggestionProcessorTest, MultipleTermsTest_AndNary) {
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
suggestion_processor_->QuerySuggestions(suggestion_spec));
- EXPECT_THAT(terms.at(0).content, "bar cat foo");
- EXPECT_THAT(terms, SizeIs(1));
+ EXPECT_THAT(RetrieveSuggestionsText(terms),
+ UnorderedElementsAre("bar cat foo"));
}
TEST_F(SuggestionProcessorTest, MultipleTermsTest_Or) {
@@ -277,11 +288,7 @@ TEST_F(SuggestionProcessorTest, MultipleTermsTest_Or) {
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
suggestion_processor_->QuerySuggestions(suggestion_spec));
- std::vector<std::string> suggestions;
- for (TermMetadata term : terms) {
- suggestions.push_back(term.content);
- }
- EXPECT_THAT(suggestions,
+ EXPECT_THAT(RetrieveSuggestionsText(terms),
UnorderedElementsAre("bar OR cat fo", "bar OR cat foo"));
}
@@ -340,14 +347,11 @@ TEST_F(SuggestionProcessorTest, MultipleTermsTest_OrNary) {
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
suggestion_processor_->QuerySuggestions(suggestion_spec));
- std::vector<std::string> suggestions;
- for (TermMetadata term : terms) {
- suggestions.push_back(term.content);
- }
// "fo" in document1, "foo" in document2 and "fool" in document3 could match.
- EXPECT_THAT(suggestions, UnorderedElementsAre("bar OR cat OR lot fo",
- "bar OR cat OR lot foo",
- "bar OR cat OR lot fool"));
+ EXPECT_THAT(
+ RetrieveSuggestionsText(terms),
+ UnorderedElementsAre("bar OR cat OR lot fo", "bar OR cat OR lot foo",
+ "bar OR cat OR lot fool"));
}
TEST_F(SuggestionProcessorTest, MultipleTermsTest_NormalizedTerm) {
@@ -394,22 +398,17 @@ TEST_F(SuggestionProcessorTest, MultipleTermsTest_NormalizedTerm) {
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
suggestion_processor_->QuerySuggestions(suggestion_spec));
- std::vector<std::string> suggestions;
- for (TermMetadata term : terms) {
- suggestions.push_back(term.content);
- }
// The term is normalized.
- EXPECT_THAT(suggestions, UnorderedElementsAre("bar foo", "bar fool"));
- suggestions.clear();
+ EXPECT_THAT(RetrieveSuggestionsText(terms),
+ UnorderedElementsAre("bar foo", "bar fool"));
+
// Search for "bar AND ḞÖ"
suggestion_spec.set_prefix("bar ḞÖ");
ICING_ASSERT_OK_AND_ASSIGN(
terms, suggestion_processor_->QuerySuggestions(suggestion_spec));
- for (TermMetadata term : terms) {
- suggestions.push_back(term.content);
- }
// The term is normalized.
- EXPECT_THAT(suggestions, UnorderedElementsAre("bar foo", "bar fool"));
+ EXPECT_THAT(RetrieveSuggestionsText(terms),
+ UnorderedElementsAre("bar foo", "bar fool"));
}
TEST_F(SuggestionProcessorTest, NonExistentPrefixTest) {
@@ -441,7 +440,6 @@ TEST_F(SuggestionProcessorTest, NonExistentPrefixTest) {
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
suggestion_processor_->QuerySuggestions(suggestion_spec));
-
EXPECT_THAT(terms, IsEmpty());
}
@@ -474,7 +472,6 @@ TEST_F(SuggestionProcessorTest, PrefixTrailingSpaceTest) {
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
suggestion_processor_->QuerySuggestions(suggestion_spec));
-
EXPECT_THAT(terms, IsEmpty());
}
@@ -506,23 +503,22 @@ TEST_F(SuggestionProcessorTest, NormalizePrefixTest) {
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
suggestion_processor_->QuerySuggestions(suggestion_spec));
- EXPECT_THAT(terms.at(0).content, "foo");
+ EXPECT_THAT(RetrieveSuggestionsText(terms), UnorderedElementsAre("foo"));
suggestion_spec.set_prefix("fO");
ICING_ASSERT_OK_AND_ASSIGN(
terms, suggestion_processor_->QuerySuggestions(suggestion_spec));
- EXPECT_THAT(terms.at(0).content, "foo");
+ EXPECT_THAT(RetrieveSuggestionsText(terms), UnorderedElementsAre("foo"));
suggestion_spec.set_prefix("Fo");
ICING_ASSERT_OK_AND_ASSIGN(
terms, suggestion_processor_->QuerySuggestions(suggestion_spec));
- EXPECT_THAT(terms.at(0).content, "foo");
+ EXPECT_THAT(RetrieveSuggestionsText(terms), UnorderedElementsAre("foo"));
suggestion_spec.set_prefix("FO");
ICING_ASSERT_OK_AND_ASSIGN(
terms, suggestion_processor_->QuerySuggestions(suggestion_spec));
-
- EXPECT_THAT(terms.at(0).content, "foo");
+ EXPECT_THAT(RetrieveSuggestionsText(terms), UnorderedElementsAre("foo"));
}
TEST_F(SuggestionProcessorTest, ParenthesesOperatorPrefixTest) {
@@ -593,20 +589,34 @@ TEST_F(SuggestionProcessorTest, OtherSpecialPrefixTest) {
suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
TermMatchType::PREFIX);
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<TermMetadata> terms,
- suggestion_processor_->QuerySuggestions(suggestion_spec));
- EXPECT_THAT(terms, IsEmpty());
+ auto terms_or = suggestion_processor_->QuerySuggestions(suggestion_spec);
+ if (SearchSpecProto::default_instance().search_type() ==
+ SearchSpecProto::SearchType::ICING_RAW_QUERY) {
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms, terms_or);
+ EXPECT_THAT(terms, IsEmpty());
+ } else {
+ EXPECT_THAT(terms_or,
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ }
+ // TODO(b/208654892): Update handling for hyphens to only consider it a hyphen
+ // within a TEXT token (rather than a MINUS token) when surrounded on both
+ // sides by TEXT rather than just preceded by TEXT.
suggestion_spec.set_prefix("f-");
- ICING_ASSERT_OK_AND_ASSIGN(
- terms, suggestion_processor_->QuerySuggestions(suggestion_spec));
+ terms_or = suggestion_processor_->QuerySuggestions(suggestion_spec);
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms, terms_or);
EXPECT_THAT(terms, IsEmpty());
suggestion_spec.set_prefix("f OR");
- ICING_ASSERT_OK_AND_ASSIGN(
- terms, suggestion_processor_->QuerySuggestions(suggestion_spec));
- EXPECT_THAT(terms, IsEmpty());
+ terms_or = suggestion_processor_->QuerySuggestions(suggestion_spec);
+ if (SearchSpecProto::default_instance().search_type() ==
+ SearchSpecProto::SearchType::ICING_RAW_QUERY) {
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms, terms_or);
+ EXPECT_THAT(terms, IsEmpty());
+ } else {
+ EXPECT_THAT(terms_or,
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ }
}
TEST_F(SuggestionProcessorTest, InvalidPrefixTest) {
@@ -635,10 +645,15 @@ TEST_F(SuggestionProcessorTest, InvalidPrefixTest) {
suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
TermMatchType::PREFIX);
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<TermMetadata> terms,
- suggestion_processor_->QuerySuggestions(suggestion_spec));
- EXPECT_THAT(terms, IsEmpty());
+ auto terms_or = suggestion_processor_->QuerySuggestions(suggestion_spec);
+ if (SearchSpecProto::default_instance().search_type() ==
+ SearchSpecProto::SearchType::ICING_RAW_QUERY) {
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms, terms_or);
+ EXPECT_THAT(terms, IsEmpty());
+ } else {
+ EXPECT_THAT(terms_or,
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ }
}
} // namespace
diff --git a/icing/schema/joinable-property-manager.cc b/icing/schema/joinable-property-manager.cc
index 3977b6b..1606abb 100644
--- a/icing/schema/joinable-property-manager.cc
+++ b/icing/schema/joinable-property-manager.cc
@@ -156,8 +156,7 @@ JoinablePropertyManager::GetJoinablePropertyMetadata(
.property_path_to_id_map.find(property_path);
if (iter == joinable_property_metadata_cache_[schema_type_id]
.property_path_to_id_map.end()) {
- return absl_ports::NotFoundError(
- "Property path is not joinable or doesn't exist");
+ return nullptr;
}
JoinablePropertyId joinable_property_id = iter->second;
diff --git a/icing/schema/joinable-property-manager.h b/icing/schema/joinable-property-manager.h
index c7038ce..3ee5963 100644
--- a/icing/schema/joinable-property-manager.h
+++ b/icing/schema/joinable-property-manager.h
@@ -100,9 +100,9 @@ class JoinablePropertyManager {
//
// Returns:
// - Valid pointer to JoinablePropertyMetadata on success
+ // - nullptr if property_path doesn't exist (or is not joinable) in the
+ // joinable metadata list of the schema
// - INVALID_ARGUMENT_ERROR if schema type id is invalid
- // - NOT_FOUND_ERROR if property_path doesn't exist (or is not joinable) in
- // the joinable metadata list of the schema
libtextclassifier3::StatusOr<const JoinablePropertyMetadata*>
GetJoinablePropertyMetadata(SchemaTypeId schema_type_id,
const std::string& property_path) const;
diff --git a/icing/schema/joinable-property-manager_test.cc b/icing/schema/joinable-property-manager_test.cc
index d9a3841..ceaaa18 100644
--- a/icing/schema/joinable-property-manager_test.cc
+++ b/icing/schema/joinable-property-manager_test.cc
@@ -42,6 +42,7 @@ namespace lib {
namespace {
using ::testing::ElementsAre;
+using ::testing::IsNull;
using ::testing::Pointee;
using ::testing::SizeIs;
@@ -491,11 +492,11 @@ TEST_F(JoinablePropertyManagerTest, GetJoinablePropertyMetadataByPathNotExist) {
EXPECT_THAT(
schema_type_manager->joinable_property_manager()
.GetJoinablePropertyMetadata(/*schema_type_id=*/0, "nonExistingPath"),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ IsOkAndHolds(IsNull()));
EXPECT_THAT(schema_type_manager->joinable_property_manager()
.GetJoinablePropertyMetadata(/*schema_type_id=*/1,
"emails.nonExistingPath"),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ IsOkAndHolds(IsNull()));
}
// Note: valid GetMetadataList has been tested in
diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc
index 0e0c917..065157e 100644
--- a/icing/schema/schema-store.cc
+++ b/icing/schema/schema-store.cc
@@ -369,11 +369,6 @@ SchemaStore::SetSchema(SchemaProto&& new_schema,
bool ignore_errors_and_delete_documents) {
ICING_ASSIGN_OR_RETURN(SchemaUtil::DependentMap new_dependent_map,
SchemaUtil::Validate(new_schema));
- // TODO(b/256022027): validate and extract joinable properties.
- // - Joinable config in non-string properties should be ignored, since
- // currently we only support string joining.
- // - If set joinable, the property itself and all of its nested properties
- // should not have REPEATED cardinality.
SetSchemaResult result;
diff --git a/icing/schema/schema-store.h b/icing/schema/schema-store.h
index 601d22a..5ad714e 100644
--- a/icing/schema/schema-store.h
+++ b/icing/schema/schema-store.h
@@ -225,10 +225,10 @@ class SchemaStore {
//
// Returns:
// Valid pointer to JoinablePropertyMetadata on success
+ // nullptr if property_path doesn't exist (or is not joinable) in the
+ // joinable metadata list of the schema
// FAILED_PRECONDITION if schema hasn't been set yet
// INVALID_ARGUMENT if schema type id is invalid
- // NOT_FOUND if property_path doesn't exist (or is not joinable) in the
- // joinable metadata list of the schema
libtextclassifier3::StatusOr<const JoinablePropertyMetadata*>
GetJoinablePropertyMetadata(SchemaTypeId schema_type_id,
const std::string& property_path) const;
diff --git a/icing/schema/schema-util.cc b/icing/schema/schema-util.cc
index 0589ada..ea0d85a 100644
--- a/icing/schema/schema-util.cc
+++ b/icing/schema/schema-util.cc
@@ -15,6 +15,7 @@
#include "icing/schema/schema-util.h"
#include <cstdint>
+#include <queue>
#include <string>
#include <string_view>
#include <unordered_map>
@@ -143,7 +144,7 @@ void AddIncompatibleChangeToDelta(
auto dependent_types_itr =
new_schema_dependent_map.find(old_type_config.schema_type());
if (dependent_types_itr != new_schema_dependent_map.end()) {
- for (std::string_view dependent_type : dependent_types_itr->second) {
+ for (const auto& [dependent_type, _] : dependent_types_itr->second) {
// The types from new_schema that depend on the current
// old_type_config may not present in old_schema.
// Those types will be listed at schema_delta.schema_types_new
@@ -176,14 +177,15 @@ libtextclassifier3::Status ExpandTranstiveDependents(
return libtextclassifier3::Status::OK;
}
pending_expansions->insert(type);
- std::unordered_set<std::string_view> expanded_dependents;
+ std::unordered_map<std::string_view, std::vector<const PropertyConfigProto*>>
+ expanded_dependents;
// Add all of the direct dependents.
expanded_dependents.reserve(itr->second.size());
expanded_dependents.insert(itr->second.begin(), itr->second.end());
// Iterate through each direct dependent and add their indirect dependents.
- for (std::string_view dep : itr->second) {
+ for (const auto& [dep, _] : itr->second) {
// 1. Check if we're in the middle of expanding this type - IOW there's a
// cycle!
if (pending_expansions->count(dep) > 0) {
@@ -206,8 +208,12 @@ libtextclassifier3::Status ExpandTranstiveDependents(
auto dep_expanded_itr = expanded_dependent_map->find(dep);
expanded_dependents.reserve(expanded_dependents.size() +
dep_expanded_itr->second.size());
- expanded_dependents.insert(dep_expanded_itr->second.begin(),
- dep_expanded_itr->second.end());
+ for (const auto& [dep_dependent, _] : dep_expanded_itr->second) {
+ // Insert a transitive dependent `dep_dependent` for `type`. Also since
+ // there is no direct edge between `type` and `dep_dependent`, the direct
+ // edge (i.e. PropertyConfigProto*) vector is empty.
+ expanded_dependents.insert({dep_dependent, {}});
+ }
}
expanded_dependent_map->insert({type, std::move(expanded_dependents)});
pending_expansions->erase(type);
@@ -283,7 +289,8 @@ BuildTransitiveDependentGraph(const SchemaProto& schema) {
if (known_types.count(property_schema_type) == 0) {
unknown_types.insert(property_schema_type);
}
- dependent_map[property_schema_type].insert(schema_type);
+ dependent_map[property_schema_type][schema_type].push_back(
+ &property_config);
}
}
}
@@ -305,6 +312,9 @@ libtextclassifier3::StatusOr<SchemaUtil::DependentMap> SchemaUtil::Validate(
// already.
std::unordered_set<std::string_view> known_property_names;
+ // Tracks PropertyConfigs containing joinable properties.
+ std::unordered_set<std::string_view> schema_types_with_joinable_property;
+
// 2. Validate the properties of each type.
for (const auto& type_config : schema.types()) {
std::string_view schema_type(type_config.schema_type());
@@ -351,6 +361,55 @@ libtextclassifier3::StatusOr<SchemaUtil::DependentMap> SchemaUtil::Validate(
property_config.string_indexing_config(), data_type, schema_type,
property_name));
}
+
+ ICING_RETURN_IF_ERROR(ValidateJoinableConfig(
+ property_config.joinable_config(), data_type,
+ property_config.cardinality(), schema_type, property_name));
+ if (property_config.joinable_config().value_type() !=
+ JoinableConfig::ValueType::NONE) {
+ schema_types_with_joinable_property.insert(schema_type);
+ }
+ }
+ }
+
+ // BFS traverse the dependent graph to make sure that no nested levels
+ // (properties with DOCUMENT data type) have REPEATED cardinality while
+ // depending on schema types with joinable property.
+ std::queue<std::string_view> frontier;
+ for (const auto& schema_type : schema_types_with_joinable_property) {
+ frontier.push(schema_type);
+ }
+ std::unordered_set<std::string_view> traversed =
+ std::move(schema_types_with_joinable_property);
+ while (!frontier.empty()) {
+ std::string_view schema_type = frontier.front();
+ frontier.pop();
+
+ const auto it = dependent_map.find(schema_type);
+ if (it == dependent_map.end()) {
+ continue;
+ }
+
+ // Check every type that has a property of type schema_type.
+ for (const auto& [next_schema_type, property_configs] : it->second) {
+ // Check all properties in "next_schema_type" that are of type
+ // "schema_type".
+ for (const PropertyConfigProto* property_config : property_configs) {
+ if (property_config != nullptr &&
+ property_config->cardinality() ==
+ PropertyConfigProto::Cardinality::REPEATED) {
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "Schema type '", next_schema_type,
+ "' cannot have REPEATED nested document property '",
+ property_config->property_name(),
+ "' while connecting to some joinable properties"));
+ }
+ }
+
+ if (traversed.count(next_schema_type) == 0) {
+ traversed.insert(next_schema_type);
+ frontier.push(next_schema_type);
+ }
}
}
@@ -440,6 +499,35 @@ libtextclassifier3::Status SchemaUtil::ValidateStringIndexingConfig(
return libtextclassifier3::Status::OK;
}
+libtextclassifier3::Status SchemaUtil::ValidateJoinableConfig(
+ const JoinableConfig& config, PropertyConfigProto::DataType::Code data_type,
+ PropertyConfigProto::Cardinality::Code cardinality,
+ std::string_view schema_type, std::string_view property_name) {
+ if (config.value_type() == JoinableConfig::ValueType::QUALIFIED_ID) {
+ if (data_type != PropertyConfigProto::DataType::STRING) {
+ return absl_ports::InvalidArgumentError(
+ absl_ports::StrCat("Qualified id joinable property '", property_name,
+ "' is required to have STRING data type"));
+ }
+
+ if (cardinality == PropertyConfigProto::Cardinality::REPEATED) {
+ return absl_ports::InvalidArgumentError(
+ absl_ports::StrCat("Qualified id joinable property '", property_name,
+ "' cannot have REPEATED cardinality"));
+ }
+ }
+
+ if (config.propagate_delete() &&
+ config.value_type() != JoinableConfig::ValueType::QUALIFIED_ID) {
+ return absl_ports::InvalidArgumentError(
+ absl_ports::StrCat("Field 'property_name' '", property_name,
+ "' is required to have QUALIFIED_ID joinable "
+ "value type with delete propagation enabled"));
+ }
+
+ return libtextclassifier3::Status::OK;
+}
+
void SchemaUtil::BuildTypeConfigMap(
const SchemaProto& schema, SchemaUtil::TypeConfigMap* type_config_map) {
type_config_map->clear();
diff --git a/icing/schema/schema-util.h b/icing/schema/schema-util.h
index e5747bb..47bb76b 100644
--- a/icing/schema/schema-util.h
+++ b/icing/schema/schema-util.h
@@ -34,9 +34,19 @@ class SchemaUtil {
std::unordered_map<std::string, const SchemaTypeConfigProto>;
// If A -> B is indicated in the map, then type A must be built before
- // building type B, i.e. B depends on A.
- using DependentMap = std::unordered_map<std::string_view,
- std::unordered_set<std::string_view>>;
+ // building type B, i.e. B has a property of type A. Also include all
+ // PropertyConfigProto (with DOCUMENT data_type) pointers which directly
+ // connects type A and B. IOW, this vector of PropertyConfigProto* are "direct
+ // edges" connecting A and B directly. It will be an empty vector if A and B
+ // are not "directly" connected, but instead via another intermediate level of
+ // schema type. For example, the actual dependency is A -> C -> B, so there
+ // will be A -> C and C -> B with valid PropertyConfigProto* respectively in
+ // this map, but we will also expand transitive dependents: add A -> B into
+ // dependent map with empty vector of "edges".
+ using DependentMap = std::unordered_map<
+ std::string_view,
+ std::unordered_map<std::string_view,
+ std::vector<const PropertyConfigProto*>>>;
struct SchemaDelta {
// Which schema types were present in the old schema, but were deleted from
@@ -113,6 +123,11 @@ class SchemaUtil {
// itself, thus creating an infinite loop.
// 13. Two SchemaTypeConfigProtos cannot have properties that reference each
// other's schema_type, thus creating an infinite loop.
+ // 14. PropertyConfigProtos.joinable_config must be valid. See
+ // ValidateJoinableConfig for more details.
+ // 15. Any PropertyConfigProtos with nested DOCUMENT data type must not have
+ // REPEATED cardinality if they reference a schema type containing
+ // joinable property.
//
// TODO(b/171996137): Clarify 12 and 13 are only for indexed properties, once
// document properties can be opted out of indexing.
@@ -121,7 +136,7 @@ class SchemaUtil {
// On success, a dependent map from each types to their dependent types
// that depend on it directly or indirectly.
// ALREADY_EXISTS for case 1 and 2
- // INVALID_ARGUMENT for 3-13
+ // INVALID_ARGUMENT for 3-15
static libtextclassifier3::StatusOr<DependentMap> Validate(
const SchemaProto& schema);
@@ -145,6 +160,8 @@ class SchemaUtil {
// `SchemaDelta.schema_types_deleted`
// 3. A schema type's new definition would mean any existing data of the old
// definition is now incompatible.
+ // 4. The derived join index would be incompatible. This is held in
+ // `SchemaDelta.join_incompatible`.
//
// For case 1, the two schemas would result in an incompatible index if:
// 1.1. The new SchemaProto has a different set of indexed properties than
@@ -167,6 +184,11 @@ class SchemaUtil {
// scale defined as:
// LEAST <REPEATED - OPTIONAL - REQUIRED> MOST
//
+ // For case 4, the two schemas would result in an incompatible join if:
+ // 4.1. A SchematypeConfig exists in the new SchemaProto that has a
+ // different set of joinable properties than it did in the old
+ // SchemaProto.
+ //
// A property is defined by the combination of the
// SchemaTypeConfig.schema_type and the PropertyConfigProto.property_name.
//
@@ -227,6 +249,22 @@ class SchemaUtil {
const StringIndexingConfig& config,
PropertyConfigProto::DataType::Code data_type,
std::string_view schema_type, std::string_view property_name);
+
+ // Checks that the 'joinable_config' satisfies the following rules:
+ // 1. If the data type matches joinable value type
+ // a. Only STRING data types can use QUALIFIED_ID joinable value type
+ // 2. Only QUALIFIED_ID joinable value type can have delete propagation
+ // enabled
+ // 3. Any joinable property should have non-REPEATED cardinality
+ //
+ // Returns:
+ // INVALID_ARGUMENT if any of the rules are not followed
+ // OK on success
+ static libtextclassifier3::Status ValidateJoinableConfig(
+ const JoinableConfig& config,
+ PropertyConfigProto::DataType::Code data_type,
+ PropertyConfigProto::Cardinality::Code cardinality,
+ std::string_view schema_type, std::string_view property_name);
};
} // namespace lib
diff --git a/icing/schema/schema-util_test.cc b/icing/schema/schema-util_test.cc
index 44d8def..2d1e683 100644
--- a/icing/schema/schema-util_test.cc
+++ b/icing/schema/schema-util_test.cc
@@ -30,9 +30,13 @@ namespace icing {
namespace lib {
namespace {
+using portable_equals_proto::EqualsProto;
using ::testing::Eq;
using ::testing::HasSubstr;
using ::testing::IsEmpty;
+using ::testing::Pair;
+using ::testing::Pointee;
+using ::testing::UnorderedElementsAre;
// Properties/fields in a schema type
constexpr char kEmailType[] = "EmailMessage";
@@ -118,12 +122,32 @@ TEST(SchemaUtilTest, DependentGraphAlphabeticalOrder) {
ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
SchemaUtil::Validate(schema));
EXPECT_THAT(d_map, testing::SizeIs(5));
- EXPECT_THAT(d_map["F"],
- testing::UnorderedElementsAre("A", "B", "C", "D", "E"));
- EXPECT_THAT(d_map["E"], testing::UnorderedElementsAre("A", "B", "C", "D"));
- EXPECT_THAT(d_map["D"], testing::UnorderedElementsAre("A", "B"));
- EXPECT_THAT(d_map["C"], testing::UnorderedElementsAre("A", "B"));
- EXPECT_THAT(d_map["B"], testing::UnorderedElementsAre("A"));
+ EXPECT_THAT(
+ d_map["F"],
+ UnorderedElementsAre(Pair("A", IsEmpty()), Pair("B", IsEmpty()),
+ Pair("C", IsEmpty()), Pair("D", IsEmpty()),
+ Pair("E", UnorderedElementsAre(Pointee(
+ EqualsProto(type_e.properties(0)))))));
+ EXPECT_THAT(d_map["E"],
+ UnorderedElementsAre(
+ Pair("A", IsEmpty()), Pair("B", IsEmpty()),
+ Pair("C", UnorderedElementsAre(
+ Pointee(EqualsProto(type_c.properties(0))))),
+ Pair("D", UnorderedElementsAre(
+ Pointee(EqualsProto(type_d.properties(0)))))));
+ EXPECT_THAT(
+ d_map["D"],
+ UnorderedElementsAre(Pair("A", IsEmpty()),
+ Pair("B", UnorderedElementsAre(Pointee(
+ EqualsProto(type_b.properties(1)))))));
+ EXPECT_THAT(
+ d_map["C"],
+ UnorderedElementsAre(Pair("A", IsEmpty()),
+ Pair("B", UnorderedElementsAre(Pointee(
+ EqualsProto(type_b.properties(0)))))));
+ EXPECT_THAT(d_map["B"], UnorderedElementsAre(Pair(
+ "A", UnorderedElementsAre(Pointee(
+ EqualsProto(type_a.properties(0)))))));
}
TEST(SchemaUtilTest, DependentGraphReverseAlphabeticalOrder) {
@@ -206,12 +230,32 @@ TEST(SchemaUtilTest, DependentGraphReverseAlphabeticalOrder) {
ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
SchemaUtil::Validate(schema));
EXPECT_THAT(d_map, testing::SizeIs(5));
- EXPECT_THAT(d_map["F"],
- testing::UnorderedElementsAre("A", "B", "C", "D", "E"));
- EXPECT_THAT(d_map["E"], testing::UnorderedElementsAre("A", "B", "C", "D"));
- EXPECT_THAT(d_map["D"], testing::UnorderedElementsAre("A", "B"));
- EXPECT_THAT(d_map["C"], testing::UnorderedElementsAre("A", "B"));
- EXPECT_THAT(d_map["B"], testing::UnorderedElementsAre("A"));
+ EXPECT_THAT(
+ d_map["F"],
+ UnorderedElementsAre(Pair("A", IsEmpty()), Pair("B", IsEmpty()),
+ Pair("C", IsEmpty()), Pair("D", IsEmpty()),
+ Pair("E", UnorderedElementsAre(Pointee(
+ EqualsProto(type_e.properties(0)))))));
+ EXPECT_THAT(d_map["E"],
+ UnorderedElementsAre(
+ Pair("A", IsEmpty()), Pair("B", IsEmpty()),
+ Pair("C", UnorderedElementsAre(
+ Pointee(EqualsProto(type_c.properties(0))))),
+ Pair("D", UnorderedElementsAre(
+ Pointee(EqualsProto(type_d.properties(0)))))));
+ EXPECT_THAT(
+ d_map["D"],
+ UnorderedElementsAre(Pair("A", IsEmpty()),
+ Pair("B", UnorderedElementsAre(Pointee(
+ EqualsProto(type_b.properties(1)))))));
+ EXPECT_THAT(
+ d_map["C"],
+ UnorderedElementsAre(Pair("A", IsEmpty()),
+ Pair("B", UnorderedElementsAre(Pointee(
+ EqualsProto(type_b.properties(0)))))));
+ EXPECT_THAT(d_map["B"], UnorderedElementsAre(Pair(
+ "A", UnorderedElementsAre(Pointee(
+ EqualsProto(type_a.properties(0)))))));
}
TEST(SchemaUtilTest, DependentGraphMixedOrder) {
@@ -293,12 +337,32 @@ TEST(SchemaUtilTest, DependentGraphMixedOrder) {
ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
SchemaUtil::Validate(schema));
EXPECT_THAT(d_map, testing::SizeIs(5));
- EXPECT_THAT(d_map["F"],
- testing::UnorderedElementsAre("A", "B", "C", "D", "E"));
- EXPECT_THAT(d_map["E"], testing::UnorderedElementsAre("A", "B", "C", "D"));
- EXPECT_THAT(d_map["D"], testing::UnorderedElementsAre("A", "B"));
- EXPECT_THAT(d_map["C"], testing::UnorderedElementsAre("A", "B"));
- EXPECT_THAT(d_map["B"], testing::UnorderedElementsAre("A"));
+ EXPECT_THAT(
+ d_map["F"],
+ UnorderedElementsAre(Pair("A", IsEmpty()), Pair("B", IsEmpty()),
+ Pair("C", IsEmpty()), Pair("D", IsEmpty()),
+ Pair("E", UnorderedElementsAre(Pointee(
+ EqualsProto(type_e.properties(0)))))));
+ EXPECT_THAT(d_map["E"],
+ UnorderedElementsAre(
+ Pair("A", IsEmpty()), Pair("B", IsEmpty()),
+ Pair("C", UnorderedElementsAre(
+ Pointee(EqualsProto(type_c.properties(0))))),
+ Pair("D", UnorderedElementsAre(
+ Pointee(EqualsProto(type_d.properties(0)))))));
+ EXPECT_THAT(
+ d_map["D"],
+ UnorderedElementsAre(Pair("A", IsEmpty()),
+ Pair("B", UnorderedElementsAre(Pointee(
+ EqualsProto(type_b.properties(1)))))));
+ EXPECT_THAT(
+ d_map["C"],
+ UnorderedElementsAre(Pair("A", IsEmpty()),
+ Pair("B", UnorderedElementsAre(Pointee(
+ EqualsProto(type_b.properties(0)))))));
+ EXPECT_THAT(d_map["B"], UnorderedElementsAre(Pair(
+ "A", UnorderedElementsAre(Pointee(
+ EqualsProto(type_a.properties(0)))))));
}
TEST(SchemaUtilTest, TopLevelCycle) {
@@ -888,7 +952,8 @@ TEST(SchemaUtilTest, DifferentSchemaTypeIsIncompatible) {
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_incompatible.emplace(kEmailType);
// kEmailType depends on kMessageType
- SchemaUtil::DependentMap dependents_map = {{kMessageType, {kEmailType}}};
+ SchemaUtil::DependentMap dependents_map = {
+ {kMessageType, {{kEmailType, {}}}}};
SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta(
old_schema, new_schema, dependents_map);
EXPECT_THAT(actual, Eq(schema_delta));
@@ -1403,7 +1468,7 @@ TEST(SchemaUtilTest, IndexNestedDocumentsIndexIncompatible) {
// unaffected.
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_index_incompatible.emplace(kPersonType);
- SchemaUtil::DependentMap dependents_map = {{kEmailType, {kPersonType}}};
+ SchemaUtil::DependentMap dependents_map = {{kEmailType, {{kPersonType, {}}}}};
SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta(
no_nested_index_schema, nested_index_schema, dependents_map);
EXPECT_THAT(actual, Eq(schema_delta));
@@ -1466,6 +1531,547 @@ TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTokenizer) {
EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
}
+TEST(SchemaUtilTest,
+ ValidateJoinablePropertyTypeQualifiedIdShouldHaveStringDataType) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_INT64)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ // Error if data type is not STRING for qualified id joinable value type.
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Passes once we set STRING as the data type.
+ schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+}
+
+TEST(SchemaUtilTest, ValidateJoinablePropertyShouldNotHaveRepeatedCardinality) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+
+ // Error if using REPEATED cardinality for joinable property.
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Passes once we use OPTIONAL cardinality with joinable property.
+ schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+
+ // Passes once we use REQUIRED cardinality with joinable property.
+ schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+
+ // Passes once we use REPEATED cardinality with non-joinable property.
+ schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_NONE,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+}
+
+TEST(SchemaUtilTest,
+ ValidateJoinablePropertyWithDeletePropagationShouldHaveTypeQualifiedId) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_NONE,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ // Error if enabling delete propagation with non qualified id joinable value
+ // type.
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Passes once we set qualified id joinable value type with delete propagation
+ // enabled.
+ schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+
+ // Passes once we disable delete propagation.
+ schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_NONE,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+}
+
+TEST(SchemaUtilTest,
+ ValidateNestedJoinablePropertyShouldNotHaveNestedRepeatedCardinality) {
+ // Dependency and nested document property cardinality:
+ // "C" --(REPEATED)--> "B" --(OPTIONAL)--> "A"
+ // where "A" contains joinable property. This should not be allowed.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("C").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeDocument("B",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Passes once we use non-REPEATED cardinality for "C.b", i.e. the dependency
+ // and nested document property cardinality becomes:
+ // "C" --(OPTIONAL)--> "B" --(OPTIONAL)--> "A"
+ schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("C").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeDocument("B",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+}
+
+TEST(
+ SchemaUtilTest,
+ ValidateNestedJoinablePropertyShouldAllowRepeatedCardinalityIfNoJoinableProperty) {
+ // Dependency and nested document property cardinality:
+ // "C" --(OPTIONAL)--> "B" --(REPEATED)--> "A"
+ // where only "B" contains joinable property. This should be allowed.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_NONE,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument(
+ "A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Bar")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("C").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeDocument("B",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Passes since nested schema type with REPEATED cardinality doesn't have
+ // joinable property.
+ EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+}
+
+TEST(SchemaUtilTest,
+ ValidateNestedJoinablePropertyMultiplePropertiesWithSameSchema) {
+ // Dependency and nested document property cardinality:
+ // --(a1: OPTIONAL)--
+ // / \
+ // B -- --> A
+ // \ /
+ // --(a2: REPEATED)--
+ // where "A" contains joinable property. This should not be allowed.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("a1")
+ .SetDataTypeDocument(
+ "A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("a2")
+ .SetDataTypeDocument(
+ "A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Passes once we use non-REPEATED cardinality for "B.a2", i.e. the dependency
+ // and nested document property cardinality becomes:
+ // --(a1: OPTIONAL)--
+ // / \
+ // B -- --> A
+ // \ /
+ // --(a2: OPTIONAL)--
+ schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("a1")
+ .SetDataTypeDocument(
+ "A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("a2")
+ .SetDataTypeDocument(
+ "A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+}
+
+TEST(SchemaUtilTest, ValidateNestedJoinablePropertyDiamondRelationship) {
+ // Dependency and nested document property cardinality:
+ // B
+ // / \
+ // (OPTIONAL) (OPTIONAL)
+ // / \
+ // D --- --> A
+ // \ /
+ // (OPTIONAL) (OPTIONAL)
+ // \ /
+ // C
+ // where "A" contains joinable property. This should be allowed.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("C").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeDocument(
+ "B",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("c")
+ .SetDataTypeDocument(
+ "C",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+
+ // Fails once we change any of edge to REPEATED cardinality.
+ // B
+ // / \
+ // (REPEATED) (OPTIONAL)
+ // / \
+ // D --- --> A
+ // \ /
+ // (OPTIONAL) (OPTIONAL)
+ // \ /
+ // C
+ schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("C").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeDocument(
+ "B",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("c")
+ .SetDataTypeDocument(
+ "C",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // B
+ // / \
+ // (OPTIONAL) (REPEATED)
+ // / \
+ // D --- --> A
+ // \ /
+ // (OPTIONAL) (OPTIONAL)
+ // \ /
+ // C
+ schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .AddType(SchemaTypeConfigBuilder().SetType("C").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeDocument(
+ "B",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("c")
+ .SetDataTypeDocument(
+ "C",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // B
+ // / \
+ // (OPTIONAL) (OPTIONAL)
+ // / \
+ // D --- --> A
+ // \ /
+ // (REPEATED) (OPTIONAL)
+ // \ /
+ // C
+ schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("C").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeDocument(
+ "B",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("c")
+ .SetDataTypeDocument(
+ "C",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // B
+ // / \
+ // (OPTIONAL) (OPTIONAL)
+ // / \
+ // D --- --> A
+ // \ /
+ // (OPTIONAL) (REPEATED)
+ // \ /
+ // C
+ schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("C").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeDocument(
+ "B",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("c")
+ .SetDataTypeDocument(
+ "C",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
TEST(SchemaUtilTest, MultipleReferencesToSameNestedSchemaOk) {
SchemaProto schema =
SchemaBuilder()
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 35ee172..710ff58 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -89,17 +89,6 @@ constexpr int32_t kUriMapperMaxSize = 36 * 1024 * 1024; // 36 MiB
constexpr int32_t kNamespaceMapperMaxSize = 3 * 128 * 1024; // 384 KiB
constexpr int32_t kCorpusMapperMaxSize = 3 * 128 * 1024; // 384 KiB
-// Whether to use namespace id or namespace name to build up fingerprint for
-// document_key_mapper_ and corpus_mapper_.
-// Note: Changing this flag will require a reconstruction of the internal
-// mappers in the document store. A easy way to trigger a rebuild is to change
-// the kMagic value.
-//
-// TODO(b/259969017) Flip this flag to true at the time when we switch to use
-// persistent hash map for document_key_mapper_ so that we just need one
-// reconstruction of the internal mappers.
-constexpr bool kNamespaceIdFingerprint = false;
-
DocumentWrapper CreateDocumentWrapper(DocumentProto&& document) {
DocumentWrapper document_wrapper;
*document_wrapper.mutable_document() = std::move(document);
@@ -157,23 +146,6 @@ std::string EncodeNamespaceId(NamespaceId namespace_id) {
return encoding;
}
-std::string MakeFingerprint(NamespaceId namespace_id,
- std::string_view namespace_,
- std::string_view uri_or_schema) {
- if (!kNamespaceIdFingerprint) {
- // Using a 64-bit fingerprint to represent the key could lead to collisions.
- // But, even with 200K unique keys, the probability of collision is about
- // one-in-a-billion (https://en.wikipedia.org/wiki/Birthday_attack).
- uint64_t fprint = tc3farmhash::Fingerprint64(
- absl_ports::StrCat(namespace_, uri_or_schema));
- return fingerprint_util::GetFingerprintString(fprint);
- } else {
- return absl_ports::StrCat(EncodeNamespaceId(namespace_id),
- encode_util::EncodeIntToCString(
- tc3farmhash::Fingerprint64(uri_or_schema)));
- }
-}
-
int64_t CalculateExpirationTimestampMs(int64_t creation_timestamp_ms,
int64_t ttl_ms) {
if (ttl_ms == 0) {
@@ -236,15 +208,34 @@ std::unordered_map<NamespaceId, std::string> GetNamespaceIdsToNamespaces(
} // namespace
+std::string DocumentStore::MakeFingerprint(
+ NamespaceId namespace_id, std::string_view namespace_,
+ std::string_view uri_or_schema) const {
+ if (!namespace_id_fingerprint_) {
+ // Using a 64-bit fingerprint to represent the key could lead to collisions.
+ // But, even with 200K unique keys, the probability of collision is about
+ // one-in-a-billion (https://en.wikipedia.org/wiki/Birthday_attack).
+ uint64_t fprint = tc3farmhash::Fingerprint64(
+ absl_ports::StrCat(namespace_, uri_or_schema));
+ return fingerprint_util::GetFingerprintString(fprint);
+ } else {
+ return absl_ports::StrCat(EncodeNamespaceId(namespace_id),
+ encode_util::EncodeIntToCString(
+ tc3farmhash::Fingerprint64(uri_or_schema)));
+ }
+}
+
DocumentStore::DocumentStore(const Filesystem* filesystem,
const std::string_view base_dir,
const Clock* clock,
- const SchemaStore* schema_store)
+ const SchemaStore* schema_store,
+ bool namespace_id_fingerprint)
: filesystem_(filesystem),
base_dir_(base_dir),
clock_(*clock),
schema_store_(schema_store),
- document_validator_(schema_store) {}
+ document_validator_(schema_store),
+ namespace_id_fingerprint_(namespace_id_fingerprint) {}
libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
const DocumentProto& document, int32_t num_tokens,
@@ -271,14 +262,14 @@ DocumentStore::~DocumentStore() {
libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, const SchemaStore* schema_store,
- bool force_recovery_and_revalidate_documents,
+ bool force_recovery_and_revalidate_documents, bool namespace_id_fingerprint,
InitializeStatsProto* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(filesystem);
ICING_RETURN_ERROR_IF_NULL(clock);
ICING_RETURN_ERROR_IF_NULL(schema_store);
- auto document_store = std::unique_ptr<DocumentStore>(
- new DocumentStore(filesystem, base_dir, clock, schema_store));
+ auto document_store = std::unique_ptr<DocumentStore>(new DocumentStore(
+ filesystem, base_dir, clock, schema_store, namespace_id_fingerprint));
ICING_ASSIGN_OR_RETURN(
DataLoss data_loss,
document_store->Initialize(force_recovery_and_revalidate_documents,
@@ -386,7 +377,8 @@ libtextclassifier3::Status DocumentStore::InitializeExistingDerivedFiles() {
absl_ports::StrCat("Couldn't read: ", MakeHeaderFilename(base_dir_)));
}
- if (header.magic != DocumentStore::Header::kMagic) {
+ if (header.magic !=
+ DocumentStore::Header::GetCurrentMagic(namespace_id_fingerprint_)) {
return absl_ports::InternalError(absl_ports::StrCat(
"Invalid header kMagic for file: ", MakeHeaderFilename(base_dir_)));
}
@@ -859,7 +851,8 @@ bool DocumentStore::HeaderExists() {
libtextclassifier3::Status DocumentStore::UpdateHeader(const Crc32& checksum) {
// Write the header
DocumentStore::Header header;
- header.magic = DocumentStore::Header::kMagic;
+ header.magic =
+ DocumentStore::Header::GetCurrentMagic(namespace_id_fingerprint_);
header.checksum = checksum.Get();
// This should overwrite the header.
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index 3e02636..7c414d7 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -59,13 +59,19 @@ namespace lib {
class DocumentStore {
public:
struct Header {
- static constexpr int32_t kMagic = 0x746f7265;
+ static int32_t GetCurrentMagic(bool namespace_id_fingerprint) {
+ return namespace_id_fingerprint ? kNewMagic : kOldMagic;
+ }
// Holds the magic as a quick sanity check against file corruption.
int32_t magic;
// Checksum of the DocumentStore's sub-component's checksums.
uint32_t checksum;
+
+ private:
+ static constexpr int32_t kOldMagic = 0x746f7265;
+ static constexpr int32_t kNewMagic = 0x1b99c8b0;
};
struct OptimizeInfo {
@@ -136,6 +142,7 @@ class DocumentStore {
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, const SchemaStore* schema_store,
bool force_recovery_and_revalidate_documents = false,
+ bool namespace_id_fingerprint = false,
InitializeStatsProto* initialize_stats = nullptr);
// Returns the maximum DocumentId that the DocumentStore has assigned. If
@@ -472,7 +479,8 @@ class DocumentStore {
private:
// Use DocumentStore::Create() to instantiate.
DocumentStore(const Filesystem* filesystem, std::string_view base_dir,
- const Clock* clock, const SchemaStore* schema_store);
+ const Clock* clock, const SchemaStore* schema_store,
+ bool namespace_id_fingerprint);
const Filesystem* const filesystem_;
const std::string base_dir_;
@@ -485,6 +493,10 @@ class DocumentStore {
// Used to validate incoming documents
DocumentValidator document_validator_;
+ // Whether to use namespace id or namespace name to build up fingerprint for
+ // document_key_mapper_ and corpus_mapper_.
+ bool namespace_id_fingerprint_;
+
// A log used to store all documents, it serves as a ground truth of doc
// store. key_mapper_ and document_id_mapper_ can be regenerated from it.
std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log_;
@@ -733,6 +745,13 @@ class DocumentStore {
libtextclassifier3::StatusOr<
google::protobuf::RepeatedPtrField<DocumentDebugInfoProto::CorpusInfo>>
CollectCorpusInfo() const;
+
+ // Build fingerprint for the keys of document_key_mapper_ and corpus_mapper_.
+ // Note that namespace_id_fingerprint_ controls the way that a fingerprint is
+ // built.
+ std::string MakeFingerprint(NamespaceId namespace_id,
+ std::string_view namespace_,
+ std::string_view uri_or_schema) const;
};
} // namespace lib
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index a115e11..81da191 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -207,7 +207,8 @@ class DocumentStoreTest : public ::testing::Test {
const std::string header_file =
absl_ports::StrCat(document_store_dir_, "/document_store_header");
DocumentStore::Header header;
- header.magic = DocumentStore::Header::kMagic;
+ header.magic = DocumentStore::Header::GetCurrentMagic(
+ /*namespace_id_fingerprint=*/false);
header.checksum = 10; // Arbitrary garbage checksum
filesystem_.DeleteFile(header_file.c_str());
filesystem_.Write(header_file.c_str(), &header, sizeof(header));
@@ -3285,10 +3286,10 @@ TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
InitializeStatsProto initialize_stats;
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- &initialize_stats));
+ DocumentStore::Create(
+ &filesystem_, document_store_dir_, &fake_clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, &initialize_stats));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
// The document log is using the legacy v0 format so that a migration is
@@ -3489,10 +3490,10 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) {
InitializeStatsProto initialize_stats;
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get(),
- /*force_recovery_and_revalidate_documents=*/true,
- &initialize_stats));
+ DocumentStore::Create(
+ &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/true,
+ /*namespace_id_fingerprint=*/false, &initialize_stats));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -3875,10 +3876,10 @@ TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
InitializeStatsProto initialize_stats;
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir, &fake_clock_,
- schema_store.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- &initialize_stats));
+ DocumentStore::Create(
+ &filesystem_, document_store_dir, &fake_clock_, schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, &initialize_stats));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
diff --git a/icing/tokenization/icu/icu-language-segmenter-factory.cc b/icing/tokenization/icu/icu-language-segmenter-factory.cc
index 363bc6d..7b095b4 100644
--- a/icing/tokenization/icu/icu-language-segmenter-factory.cc
+++ b/icing/tokenization/icu/icu-language-segmenter-factory.cc
@@ -47,7 +47,7 @@ libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter>> Create(
<< " not supported. Converting to locale " << ULOC_US;
options.locale = ULOC_US;
}
- return std::make_unique<IcuLanguageSegmenter>(std::move(options.locale));
+ return IcuLanguageSegmenter::Create(std::move(options.locale));
}
} // namespace language_segmenter_factory
diff --git a/icing/tokenization/icu/icu-language-segmenter.cc b/icing/tokenization/icu/icu-language-segmenter.cc
index fd790cf..59bcc18 100644
--- a/icing/tokenization/icu/icu-language-segmenter.cc
+++ b/icing/tokenization/icu/icu-language-segmenter.cc
@@ -24,6 +24,7 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/mutex.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/util/character-iterator.h"
#include "icing/util/i18n-utils.h"
@@ -48,9 +49,11 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
// INTERNAL_ERROR if unable to create
static libtextclassifier3::StatusOr<
std::unique_ptr<LanguageSegmenter::Iterator>>
- Create(std::string_view text, std::string_view locale) {
+ Create(const IcuLanguageSegmenter* creator, UBreakIterator* break_iterator,
+ std::string_view text, std::string_view locale) {
std::unique_ptr<IcuLanguageSegmenterIterator> iterator(
- new IcuLanguageSegmenterIterator(text, locale));
+ new IcuLanguageSegmenterIterator(creator, break_iterator, text,
+ locale));
if (iterator->Initialize()) {
return iterator;
}
@@ -58,8 +61,8 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
}
~IcuLanguageSegmenterIterator() {
- ubrk_close(break_iterator_);
utext_close(u_text_);
+ creator_.ReturnBreakIterator(break_iterator_);
}
// Advances to the next term. Returns false if it has reached the end.
@@ -244,9 +247,12 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
}
private:
- explicit IcuLanguageSegmenterIterator(std::string_view text,
+ explicit IcuLanguageSegmenterIterator(const IcuLanguageSegmenter* creator,
+ UBreakIterator* break_iterator,
+ std::string_view text,
std::string_view locale)
- : break_iterator_(nullptr),
+ : creator_(*creator),
+ break_iterator_(break_iterator),
text_(text),
locale_(locale),
u_text_(nullptr),
@@ -256,13 +262,14 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
// Returns true on success
bool Initialize() {
+ if (break_iterator_ == nullptr) {
+ return false;
+ }
UErrorCode status = U_ZERO_ERROR;
u_text_ = utext_openUTF8(nullptr, text_.data(), text_.length(), &status);
if (u_text_ == nullptr) {
return false;
}
- break_iterator_ = ubrk_open(UBRK_WORD, locale_.data(), /*text=*/nullptr,
- /*textLength=*/0, &status);
ubrk_setUText(break_iterator_, u_text_, &status);
return !U_FAILURE(status);
}
@@ -290,9 +297,11 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
term_start_index_ = 0;
}
+ const IcuLanguageSegmenter& creator_; // Does not own.
+
// The underlying class that does the segmentation, ubrk_close() must be
// called after using.
- UBreakIterator* break_iterator_;
+ UBreakIterator* break_iterator_; // Does not own
// Text to be segmented
std::string_view text_;
@@ -321,19 +330,62 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
int term_end_index_exclusive_;
};
-IcuLanguageSegmenter::IcuLanguageSegmenter(std::string locale)
- : locale_(std::move(locale)) {}
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IcuLanguageSegmenter>>
+IcuLanguageSegmenter::Create(std::string&& locale) {
+ UErrorCode status = U_ZERO_ERROR;
+ UBreakIterator* break_iterator = ubrk_open(
+ UBRK_WORD, locale.c_str(), /*text=*/nullptr, /*textLength=*/0, &status);
+ if (U_FAILURE(status) || break_iterator == nullptr) {
+ return absl_ports::AbortedError(
+ "Unable to create ICU break_iterator for language segmentation");
+ }
+ return std::unique_ptr<IcuLanguageSegmenter>(
+ new IcuLanguageSegmenter(std::move(locale), break_iterator));
+}
+
+UBreakIterator* IcuLanguageSegmenter::ProduceBreakIterator() const {
+ UBreakIterator* itr = nullptr;
+ {
+ absl_ports::unique_lock l(&mutex_);
+ if (cached_break_iterator_ != nullptr) {
+ itr = cached_break_iterator_;
+ cached_break_iterator_ = nullptr;
+ }
+ }
+ if (itr == nullptr) {
+ UErrorCode status = U_ZERO_ERROR;
+ itr = ubrk_open(UBRK_WORD, locale_.c_str(), /*text=*/nullptr,
+ /*textLength=*/0, &status);
+ if (U_FAILURE(status)) {
+ itr = nullptr;
+ }
+ }
+ return itr;
+}
+
+void IcuLanguageSegmenter::ReturnBreakIterator(UBreakIterator* itr) const {
+ {
+ absl_ports::unique_lock l(&mutex_);
+ if (cached_break_iterator_ == nullptr) {
+ cached_break_iterator_ = itr;
+ return;
+ }
+ }
+ ubrk_close(itr);
+}
libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter::Iterator>>
IcuLanguageSegmenter::Segment(const std::string_view text,
LanguageSegmenter::AccessType) const {
- return IcuLanguageSegmenterIterator::Create(text, locale_);
+ return IcuLanguageSegmenterIterator::Create(this, ProduceBreakIterator(),
+ text, locale_);
}
libtextclassifier3::StatusOr<std::vector<std::string_view>>
IcuLanguageSegmenter::GetAllTerms(const std::string_view text) const {
- ICING_ASSIGN_OR_RETURN(std::unique_ptr<LanguageSegmenter::Iterator> iterator,
- IcuLanguageSegmenterIterator::Create(text, locale_));
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<LanguageSegmenter::Iterator> iterator,
+ Segment(text, LanguageSegmenter::AccessType::kForwardIterator));
std::vector<std::string_view> terms;
while (iterator->Advance()) {
terms.push_back(iterator->GetTerm());
diff --git a/icing/tokenization/icu/icu-language-segmenter.h b/icing/tokenization/icu/icu-language-segmenter.h
index f9cfbcb..e22c5d2 100644
--- a/icing/tokenization/icu/icu-language-segmenter.h
+++ b/icing/tokenization/icu/icu-language-segmenter.h
@@ -22,7 +22,9 @@
#include <vector>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/mutex.h"
#include "icing/tokenization/language-segmenter.h"
+#include "unicode/ubrk.h"
namespace icing {
namespace lib {
@@ -41,7 +43,14 @@ namespace lib {
// class. Other special tokenization logic will be in each tokenizer.
class IcuLanguageSegmenter : public LanguageSegmenter {
public:
- explicit IcuLanguageSegmenter(std::string locale);
+ static libtextclassifier3::StatusOr<std::unique_ptr<IcuLanguageSegmenter>>
+ Create(std::string&& locale);
+
+ ~IcuLanguageSegmenter() override {
+ if (cached_break_iterator_ != nullptr) {
+ ubrk_close(cached_break_iterator_);
+ }
+ }
IcuLanguageSegmenter(const IcuLanguageSegmenter&) = delete;
IcuLanguageSegmenter& operator=(const IcuLanguageSegmenter&) = delete;
@@ -69,8 +78,32 @@ class IcuLanguageSegmenter : public LanguageSegmenter {
std::string_view text) const override;
private:
+ // Declared a friend so that it can call AcceptBreakIterator.
+ friend class IcuLanguageSegmenterIterator;
+
+ explicit IcuLanguageSegmenter(std::string&& locale, UBreakIterator* iterator)
+ : locale_(std::move(locale)), cached_break_iterator_(iterator) {}
+
+ // Returns a UBreakIterator that the caller owns.
+ // If cached_break_iterator_ is non-null, transfers ownership to caller and
+ // sets cached_break_iterator_ to null.
+ // If cached_break_iterator is null, creates a new UBreakIterator and
+ // transfers ownership to caller.
+ UBreakIterator* ProduceBreakIterator() const;
+
+ // Caller transfers ownership of itr to IcuLanguageSegmenter.
+ // If cached_break_iterator_ is null, itr becomes the cached_break_iterator_
+ // If cached_break_iterator_ is non-null, then itr will be closed.
+ void ReturnBreakIterator(UBreakIterator* itr) const;
+
// Used to help segment text
const std::string locale_;
+
+ // The underlying class that does the segmentation, ubrk_close() must be
+ // called after using.
+ mutable UBreakIterator* cached_break_iterator_ ICING_LOCKS_EXCLUDED(mutex_);
+
+ mutable absl_ports::shared_mutex mutex_;
};
} // namespace lib
diff --git a/icing/tokenization/icu/icu-language-segmenter_test.cc b/icing/tokenization/icu/icu-language-segmenter_test.cc
index c88b992..d1bf5c6 100644
--- a/icing/tokenization/icu/icu-language-segmenter_test.cc
+++ b/icing/tokenization/icu/icu-language-segmenter_test.cc
@@ -1352,6 +1352,53 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, QuerySyntax) {
"subproperty2", ":", "term3"));
}
+TEST_P(IcuLanguageSegmenterAllLocalesTest, MultipleLangSegmentersTest) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ auto language_segmenter,
+ language_segmenter_factory::Create(
+ GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<LanguageSegmenter::Iterator> iterator_one,
+ language_segmenter->Segment(
+ "foo bar baz", LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<LanguageSegmenter::Iterator> iterator_two,
+ language_segmenter->Segment(
+ "abra kadabra alakazam",
+ LanguageSegmenter::AccessType::kForwardIterator));
+
+ ASSERT_TRUE(iterator_one->Advance());
+ ASSERT_TRUE(iterator_two->Advance());
+ EXPECT_THAT(iterator_one->GetTerm(), Eq("foo"));
+ EXPECT_THAT(iterator_two->GetTerm(), Eq("abra"));
+
+ ASSERT_TRUE(iterator_one->Advance());
+ ASSERT_TRUE(iterator_two->Advance());
+ EXPECT_THAT(iterator_one->GetTerm(), Eq(" "));
+ EXPECT_THAT(iterator_two->GetTerm(), Eq(" "));
+
+ ASSERT_TRUE(iterator_one->Advance());
+ EXPECT_THAT(iterator_one->GetTerm(), Eq("bar"));
+ EXPECT_THAT(iterator_two->GetTerm(), Eq(" "));
+ ASSERT_TRUE(iterator_two->Advance());
+ EXPECT_THAT(iterator_one->GetTerm(), Eq("bar"));
+ EXPECT_THAT(iterator_two->GetTerm(), Eq("kadabra"));
+
+ ASSERT_TRUE(iterator_one->Advance());
+ ASSERT_TRUE(iterator_two->Advance());
+ EXPECT_THAT(iterator_one->GetTerm(), Eq(" "));
+ EXPECT_THAT(iterator_two->GetTerm(), Eq(" "));
+
+ ASSERT_TRUE(iterator_two->Advance());
+ ASSERT_TRUE(iterator_one->Advance());
+ EXPECT_THAT(iterator_one->GetTerm(), Eq("baz"));
+ EXPECT_THAT(iterator_two->GetTerm(), Eq("alakazam"));
+
+ ASSERT_FALSE(iterator_two->Advance());
+ ASSERT_FALSE(iterator_one->Advance());
+}
+
INSTANTIATE_TEST_SUITE_P(
LocaleName, IcuLanguageSegmenterAllLocalesTest,
testing::Values(ULOC_US, ULOC_UK, ULOC_CANADA, ULOC_CANADA_FRENCH,
diff --git a/icing/tokenization/rfc822-tokenizer_test.cc b/icing/tokenization/rfc822-tokenizer_test.cc
index 6b95a07..e1a7fc8 100644
--- a/icing/tokenization/rfc822-tokenizer_test.cc
+++ b/icing/tokenization/rfc822-tokenizer_test.cc
@@ -21,10 +21,7 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/testing/common-matchers.h"
-#include "icing/testing/jni-test-helpers.h"
-#include "icing/tokenization/language-segmenter-factory.h"
#include "icing/tokenization/language-segmenter.h"
-#include "unicode/uloc.h"
namespace icing {
namespace lib {
@@ -32,21 +29,7 @@ namespace {
using ::testing::ElementsAre;
using ::testing::IsEmpty;
-class Rfc822TokenizerTest : public testing::Test {
- protected:
- void SetUp() override {
- jni_cache_ = GetTestJniCache();
- language_segmenter_factory::SegmenterOptions options(ULOC_US,
- jni_cache_.get());
- ICING_ASSERT_OK_AND_ASSIGN(
- language_segmenter_,
- language_segmenter_factory::Create(std::move(options)));
- }
- std::unique_ptr<const JniCache> jni_cache_;
- std::unique_ptr<LanguageSegmenter> language_segmenter_;
-};
-
-TEST_F(Rfc822TokenizerTest, StartingState) {
+TEST(Rfc822TokenizerTest, StartingState) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = "a@g.c";
auto token_iterator =
@@ -59,7 +42,7 @@ TEST_F(Rfc822TokenizerTest, StartingState) {
ASSERT_THAT(token_iterator->GetTokens(), Not(IsEmpty()));
}
-TEST_F(Rfc822TokenizerTest, EmptyMiddleToken) {
+TEST(Rfc822TokenizerTest, EmptyMiddleToken) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string s("<alex>,,<tom>");
@@ -77,7 +60,7 @@ TEST_F(Rfc822TokenizerTest, EmptyMiddleToken) {
EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "tom"))));
}
-TEST_F(Rfc822TokenizerTest, Simple) {
+TEST(Rfc822TokenizerTest, Simple) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string_view s("<你alex@google.com>");
@@ -94,7 +77,7 @@ TEST_F(Rfc822TokenizerTest, Simple) {
EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
}
-TEST_F(Rfc822TokenizerTest, Small) {
+TEST(Rfc822TokenizerTest, Small) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string s = "\"a\"";
@@ -127,7 +110,7 @@ TEST_F(Rfc822TokenizerTest, Small) {
EqualsToken(Token::Type::RFC822_COMMENT, "a"))));
}
-TEST_F(Rfc822TokenizerTest, PB) {
+TEST(Rfc822TokenizerTest, PB) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string_view s("peanut (comment) butter, <alex@google.com>");
@@ -154,7 +137,7 @@ TEST_F(Rfc822TokenizerTest, PB) {
EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
}
-TEST_F(Rfc822TokenizerTest, NoBrackets) {
+TEST(Rfc822TokenizerTest, NoBrackets) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string_view s("alex@google.com");
@@ -171,7 +154,7 @@ TEST_F(Rfc822TokenizerTest, NoBrackets) {
EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"))));
}
-TEST_F(Rfc822TokenizerTest, TwoAddresses) {
+TEST(Rfc822TokenizerTest, TwoAddresses) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string_view s("<你alex@google.com>; <alexsav@gmail.com>");
@@ -195,7 +178,7 @@ TEST_F(Rfc822TokenizerTest, TwoAddresses) {
EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
}
-TEST_F(Rfc822TokenizerTest, Comment) {
+TEST(Rfc822TokenizerTest, Comment) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string_view s("(a comment) <alex@google.com>");
@@ -214,7 +197,7 @@ TEST_F(Rfc822TokenizerTest, Comment) {
EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
}
-TEST_F(Rfc822TokenizerTest, NameAndComment) {
+TEST(Rfc822TokenizerTest, NameAndComment) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string_view s("\"a name\" also a name <alex@google.com>");
@@ -237,7 +220,7 @@ TEST_F(Rfc822TokenizerTest, NameAndComment) {
}
// Test from tokenizer_test.cc.
-TEST_F(Rfc822TokenizerTest, Rfc822SanityCheck) {
+TEST(Rfc822TokenizerTest, Rfc822SanityCheck) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string addr1("A name (A comment) <address@domain.com>");
@@ -297,7 +280,7 @@ TEST_F(Rfc822TokenizerTest, Rfc822SanityCheck) {
}
// Tests from rfc822 converter.
-TEST_F(Rfc822TokenizerTest, SimpleRfcText) {
+TEST(Rfc822TokenizerTest, SimpleRfcText) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string test_string =
"foo@google.com,bar@google.com,baz@google.com,foo+hello@google.com,baz@"
@@ -349,7 +332,7 @@ TEST_F(Rfc822TokenizerTest, SimpleRfcText) {
EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "corp.google.com"))));
}
-TEST_F(Rfc822TokenizerTest, ComplicatedRfcText) {
+TEST(Rfc822TokenizerTest, ComplicatedRfcText) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string test_string =
R"raw("Weird, But&(Also)\\Valid" Name (!With, "an" \\odd\\ cmt too¡) <Foo B(a)r,Baz@g.co>
@@ -390,7 +373,7 @@ TEST_F(Rfc822TokenizerTest, ComplicatedRfcText) {
EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
}
-TEST_F(Rfc822TokenizerTest, FromHtmlBugs) {
+TEST(Rfc822TokenizerTest, FromHtmlBugs) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
// This input used to cause HTML parsing exception. We don't do HTML parsing
// any more (b/8388100) so we are just checking that it does not crash and
@@ -422,7 +405,7 @@ TEST_F(Rfc822TokenizerTest, FromHtmlBugs) {
EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
}
-TEST_F(Rfc822TokenizerTest, EmptyComponentsTest) {
+TEST(Rfc822TokenizerTest, EmptyComponentsTest) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
EXPECT_THAT(rfc822_tokenizer.TokenizeAll(""),
IsOkAndHolds(testing::IsEmpty()));
@@ -463,7 +446,7 @@ TEST_F(Rfc822TokenizerTest, EmptyComponentsTest) {
EqualsToken(Token::Type::RFC822_COMMENT, "comment"))));
}
-TEST_F(Rfc822TokenizerTest, NameTest) {
+TEST(Rfc822TokenizerTest, NameTest) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
// Name spread between address or comment.
@@ -529,7 +512,7 @@ TEST_F(Rfc822TokenizerTest, NameTest) {
EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "addr"))));
}
-TEST_F(Rfc822TokenizerTest, CommentEscapeTest) {
+TEST(Rfc822TokenizerTest, CommentEscapeTest) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
// '(', ')', '\\' chars should be escaped. All other escaped chars should be
// unescaped.
@@ -564,7 +547,7 @@ TEST_F(Rfc822TokenizerTest, CommentEscapeTest) {
EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"))));
}
-TEST_F(Rfc822TokenizerTest, QuoteEscapeTest) {
+TEST(Rfc822TokenizerTest, QuoteEscapeTest) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
// All names that include non-alphanumeric chars must be quoted and have '\\'
// and '"' chars escaped.
@@ -593,7 +576,7 @@ TEST_F(Rfc822TokenizerTest, QuoteEscapeTest) {
EqualsToken(Token::Type::RFC822_HOST_ADDRESS, R"(n\\a\m\"e)"))));
}
-TEST_F(Rfc822TokenizerTest, UnterminatedComponentTest) {
+TEST(Rfc822TokenizerTest, UnterminatedComponentTest) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
EXPECT_THAT(
@@ -661,7 +644,7 @@ TEST_F(Rfc822TokenizerTest, UnterminatedComponentTest) {
EqualsToken(Token::Type::RFC822_COMMENT, "comment"))));
}
-TEST_F(Rfc822TokenizerTest, Tokenize) {
+TEST(Rfc822TokenizerTest, Tokenize) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text =
@@ -714,7 +697,7 @@ TEST_F(Rfc822TokenizerTest, Tokenize) {
EqualsToken(Token::Type::RFC822_COMMENT, "something"))));
}
-TEST_F(Rfc822TokenizerTest, EdgeCases) {
+TEST(Rfc822TokenizerTest, EdgeCases) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
// Text to trigger the scenario where you have a non-alphabetic followed
@@ -776,7 +759,7 @@ TEST_F(Rfc822TokenizerTest, EdgeCases) {
EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
}
-TEST_F(Rfc822TokenizerTest, NumberInAddress) {
+TEST(Rfc822TokenizerTest, NumberInAddress) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = "<3alex@google.com>";
EXPECT_THAT(
@@ -791,7 +774,7 @@ TEST_F(Rfc822TokenizerTest, NumberInAddress) {
EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
}
-TEST_F(Rfc822TokenizerTest, DoubleQuoteDoubleSlash) {
+TEST(Rfc822TokenizerTest, DoubleQuoteDoubleSlash) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = R"("alex\"")";
EXPECT_THAT(
@@ -813,7 +796,7 @@ TEST_F(Rfc822TokenizerTest, DoubleQuoteDoubleSlash) {
EqualsToken(Token::Type::RFC822_HOST_ADDRESS, R"(alex\\\a)"))));
}
-TEST_F(Rfc822TokenizerTest, TwoEmails) {
+TEST(Rfc822TokenizerTest, TwoEmails) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = "tjbarron@google.com alexsav@google.com";
EXPECT_THAT(
@@ -835,7 +818,7 @@ TEST_F(Rfc822TokenizerTest, TwoEmails) {
EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"))));
}
-TEST_F(Rfc822TokenizerTest, BackSlashes) {
+TEST(Rfc822TokenizerTest, BackSlashes) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = R"("\name")";
EXPECT_THAT(
@@ -859,7 +842,7 @@ TEST_F(Rfc822TokenizerTest, BackSlashes) {
EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "foo\\@gmail"))));
}
-TEST_F(Rfc822TokenizerTest, BigWhitespace) {
+TEST(Rfc822TokenizerTest, BigWhitespace) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = "\"quoted\" <address>";
EXPECT_THAT(
@@ -872,7 +855,7 @@ TEST_F(Rfc822TokenizerTest, BigWhitespace) {
EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "address"))));
}
-TEST_F(Rfc822TokenizerTest, AtSignFirst) {
+TEST(Rfc822TokenizerTest, AtSignFirst) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = "\"@foo\"";
EXPECT_THAT(
@@ -884,7 +867,7 @@ TEST_F(Rfc822TokenizerTest, AtSignFirst) {
EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "foo"))));
}
-TEST_F(Rfc822TokenizerTest, SlashThenUnicode) {
+TEST(Rfc822TokenizerTest, SlashThenUnicode) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = R"("quoted\你cjk")";
EXPECT_THAT(
@@ -897,7 +880,7 @@ TEST_F(Rfc822TokenizerTest, SlashThenUnicode) {
EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "quoted\\你cjk"))));
}
-TEST_F(Rfc822TokenizerTest, AddressEmptyAddress) {
+TEST(Rfc822TokenizerTest, AddressEmptyAddress) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = "<address> <> Name";
EXPECT_THAT(
@@ -910,7 +893,7 @@ TEST_F(Rfc822TokenizerTest, AddressEmptyAddress) {
EqualsToken(Token::Type::RFC822_NAME, "Name"))));
}
-TEST_F(Rfc822TokenizerTest, ProperComment) {
+TEST(Rfc822TokenizerTest, ProperComment) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = "(comment)alex@google.com";
EXPECT_THAT(
@@ -926,7 +909,7 @@ TEST_F(Rfc822TokenizerTest, ProperComment) {
EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"))));
}
-TEST_F(Rfc822TokenizerTest, SmallNameToEmail) {
+TEST(Rfc822TokenizerTest, SmallNameToEmail) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = "a@g.c,b@g.c";
EXPECT_THAT(rfc822_tokenizer.TokenizeAll(text),
@@ -958,7 +941,7 @@ TEST_F(Rfc822TokenizerTest, SmallNameToEmail) {
EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "g.c"))));
}
-TEST_F(Rfc822TokenizerTest, AtSignLast) {
+TEST(Rfc822TokenizerTest, AtSignLast) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string_view text("<alex@>, tim@");
EXPECT_THAT(
@@ -974,13 +957,13 @@ TEST_F(Rfc822TokenizerTest, AtSignLast) {
EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "tim"))));
}
-TEST_F(Rfc822TokenizerTest, Commas) {
+TEST(Rfc822TokenizerTest, Commas) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = ",,,,,,,,,,,,,,,,,,,,,,,,,,;";
EXPECT_THAT(rfc822_tokenizer.TokenizeAll(text), IsOkAndHolds(IsEmpty()));
}
-TEST_F(Rfc822TokenizerTest, ResetToTokenStartingAfter) {
+TEST(Rfc822TokenizerTest, ResetToTokenStartingAfter) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = "a@g.c,b@g.c";
auto token_iterator =
@@ -999,7 +982,7 @@ TEST_F(Rfc822TokenizerTest, ResetToTokenStartingAfter) {
ASSERT_FALSE(token_iterator->ResetToTokenStartingAfter(6));
}
-TEST_F(Rfc822TokenizerTest, ResetToTokenEndingBefore) {
+TEST(Rfc822TokenizerTest, ResetToTokenEndingBefore) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = "a@g.c,b@g.c";
auto token_iterator =
diff --git a/icing/util/tokenized-document.cc b/icing/util/tokenized-document.cc
index 1c11c3c..004181e 100644
--- a/icing/util/tokenized-document.cc
+++ b/icing/util/tokenized-document.cc
@@ -20,6 +20,7 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/proto/document.pb.h"
+#include "icing/schema/joinable-property.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/tokenization/language-segmenter.h"
@@ -74,6 +75,9 @@ TokenizedDocument::Create(const SchemaStore* schema_store,
ICING_ASSIGN_OR_RETURN(SectionGroup section_group,
schema_store->ExtractSections(document));
+ ICING_ASSIGN_OR_RETURN(JoinablePropertyGroup joinable_property_group,
+ schema_store->ExtractJoinableProperties(document));
+
// Tokenize string sections
ICING_ASSIGN_OR_RETURN(
std::vector<TokenizedSection> tokenized_string_sections,
@@ -82,7 +86,8 @@ TokenizedDocument::Create(const SchemaStore* schema_store,
return TokenizedDocument(std::move(document),
std::move(tokenized_string_sections),
- std::move(section_group.integer_sections));
+ std::move(section_group.integer_sections),
+ std::move(joinable_property_group));
}
} // namespace lib
diff --git a/icing/util/tokenized-document.h b/icing/util/tokenized-document.h
index 5729df2..7cc34e3 100644
--- a/icing/util/tokenized-document.h
+++ b/icing/util/tokenized-document.h
@@ -21,6 +21,7 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/proto/document.pb.h"
+#include "icing/schema/joinable-property.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/tokenization/language-segmenter.h"
@@ -62,19 +63,27 @@ class TokenizedDocument {
return integer_sections_;
}
+ const std::vector<JoinableProperty<std::string_view>>&
+ qualified_id_join_properties() const {
+ return joinable_property_group_.qualified_id_properties;
+ }
+
private:
// Use TokenizedDocument::Create() to instantiate.
explicit TokenizedDocument(
DocumentProto&& document,
std::vector<TokenizedSection>&& tokenized_string_sections,
- std::vector<Section<int64_t>>&& integer_sections)
+ std::vector<Section<int64_t>>&& integer_sections,
+ JoinablePropertyGroup&& joinable_property_group)
: document_(std::move(document)),
tokenized_string_sections_(std::move(tokenized_string_sections)),
- integer_sections_(std::move(integer_sections)) {}
+ integer_sections_(std::move(integer_sections)),
+ joinable_property_group_(std::move(joinable_property_group)) {}
DocumentProto document_;
std::vector<TokenizedSection> tokenized_string_sections_;
std::vector<Section<int64_t>> integer_sections_;
+ JoinablePropertyGroup joinable_property_group_;
};
} // namespace lib
diff --git a/icing/util/tokenized-document_test.cc b/icing/util/tokenized-document_test.cc
index 3497bef..f2a9214 100644
--- a/icing/util/tokenized-document_test.cc
+++ b/icing/util/tokenized-document_test.cc
@@ -27,6 +27,7 @@
#include "icing/proto/schema.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/schema-builder.h"
+#include "icing/schema/joinable-property.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/testing/common-matchers.h"
@@ -50,19 +51,29 @@ using ::testing::IsEmpty;
using ::testing::SizeIs;
// schema types
-constexpr std::string_view kFakeType = "FakeType";
+static constexpr std::string_view kFakeType = "FakeType";
// Indexable properties and section Id. Section Id is determined by the
// lexicographical order of indexable property path.
-constexpr std::string_view kIndexableIntegerProperty1 = "indexableInteger1";
-constexpr std::string_view kIndexableIntegerProperty2 = "indexableInteger2";
-constexpr std::string_view kStringExactProperty = "stringExact";
-constexpr std::string_view kStringPrefixProperty = "stringPrefix";
-
-constexpr SectionId kIndexableInteger1SectionId = 0;
-constexpr SectionId kIndexableInteger2SectionId = 1;
-constexpr SectionId kStringExactSectionId = 2;
-constexpr SectionId kStringPrefixSectionId = 3;
+static constexpr std::string_view kIndexableIntegerProperty1 =
+ "indexableInteger1";
+static constexpr std::string_view kIndexableIntegerProperty2 =
+ "indexableInteger2";
+static constexpr std::string_view kStringExactProperty = "stringExact";
+static constexpr std::string_view kStringPrefixProperty = "stringPrefix";
+
+static constexpr SectionId kIndexableInteger1SectionId = 0;
+static constexpr SectionId kIndexableInteger2SectionId = 1;
+static constexpr SectionId kStringExactSectionId = 2;
+static constexpr SectionId kStringPrefixSectionId = 3;
+
+// Joinable properties and joinable property id. Joinable property id is
+// determined by the lexicographical order of joinable property path.
+static constexpr std::string_view kQualifiedId1 = "qualifiedId1";
+static constexpr std::string_view kQualifiedId2 = "qualifiedId2";
+
+static constexpr JoinablePropertyId kQualifiedId1JoinablePropertyId = 0;
+static constexpr JoinablePropertyId kQualifiedId2JoinablePropertyId = 1;
const SectionMetadata kIndexableInteger1SectionMetadata(
kIndexableInteger1SectionId, TYPE_INT64, TOKENIZER_NONE, TERM_MATCH_UNKNOWN,
@@ -80,7 +91,15 @@ const SectionMetadata kStringPrefixSectionMetadata(
kStringPrefixSectionId, TYPE_STRING, TOKENIZER_PLAIN, TERM_MATCH_PREFIX,
NUMERIC_MATCH_UNKNOWN, std::string(kStringPrefixProperty));
-// Other non-indexable properties.
+const JoinablePropertyMetadata kQualifiedId1JoinablePropertyMetadata(
+ kQualifiedId1JoinablePropertyId, TYPE_STRING,
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID, std::string(kQualifiedId1));
+
+const JoinablePropertyMetadata kQualifiedId2JoinablePropertyMetadata(
+ kQualifiedId2JoinablePropertyId, TYPE_STRING,
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID, std::string(kQualifiedId2));
+
+// Other non-indexable/joinable properties.
constexpr std::string_view kUnindexedStringProperty = "unindexedString";
constexpr std::string_view kUnindexedIntegerProperty = "unindexedInteger";
@@ -137,6 +156,16 @@ class TokenizedDocumentTest : public ::testing::Test {
.SetName(kStringPrefixProperty)
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kQualifiedId1)
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kQualifiedId2)
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
ICING_ASSERT_OK(schema_store_->SetSchema(schema));
@@ -177,6 +206,8 @@ TEST_F(TokenizedDocumentTest, CreateAll) {
.AddInt64Property(std::string(kUnindexedIntegerProperty), 789)
.AddInt64Property(std::string(kIndexableIntegerProperty1), 1, 2, 3)
.AddInt64Property(std::string(kIndexableIntegerProperty2), 456)
+ .AddStringProperty(std::string(kQualifiedId1), "pkg$db/ns#uri1")
+ .AddStringProperty(std::string(kQualifiedId2), "pkg$db/ns#uri2")
.Build();
ICING_ASSERT_OK_AND_ASSIGN(
@@ -210,6 +241,17 @@ TEST_F(TokenizedDocumentTest, CreateAll) {
Eq(kIndexableInteger2SectionMetadata));
EXPECT_THAT(tokenized_document.integer_sections().at(1).content,
ElementsAre(456));
+
+ // Qualified id join properties
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties(), SizeIs(2));
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties().at(0).metadata,
+ Eq(kQualifiedId1JoinablePropertyMetadata));
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties().at(0).values,
+ ElementsAre("pkg$db/ns#uri1"));
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties().at(1).metadata,
+ Eq(kQualifiedId2JoinablePropertyMetadata));
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties().at(1).values,
+ ElementsAre("pkg$db/ns#uri2"));
}
TEST_F(TokenizedDocumentTest, CreateNoIndexableIntegerProperties) {
@@ -233,6 +275,9 @@ TEST_F(TokenizedDocumentTest, CreateNoIndexableIntegerProperties) {
// integer sections
EXPECT_THAT(tokenized_document.integer_sections(), IsEmpty());
+
+ // Qualified id join properties
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty());
}
TEST_F(TokenizedDocumentTest, CreateMultipleIndexableIntegerProperties) {
@@ -266,6 +311,9 @@ TEST_F(TokenizedDocumentTest, CreateMultipleIndexableIntegerProperties) {
Eq(kIndexableInteger2SectionMetadata));
EXPECT_THAT(tokenized_document.integer_sections().at(1).content,
ElementsAre(456));
+
+ // Qualified id join properties
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty());
}
TEST_F(TokenizedDocumentTest, CreateNoIndexableStringProperties) {
@@ -290,6 +338,9 @@ TEST_F(TokenizedDocumentTest, CreateNoIndexableStringProperties) {
// integer sections
EXPECT_THAT(tokenized_document.integer_sections(), IsEmpty());
+
+ // Qualified id join properties
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty());
}
TEST_F(TokenizedDocumentTest, CreateMultipleIndexableStringProperties) {
@@ -327,6 +378,73 @@ TEST_F(TokenizedDocumentTest, CreateMultipleIndexableStringProperties) {
// integer sections
EXPECT_THAT(tokenized_document.integer_sections(), IsEmpty());
+
+ // Qualified id join properties
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty());
+}
+
+TEST_F(TokenizedDocumentTest, CreateNoJoinQualifiedIdProperties) {
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kUnindexedStringProperty),
+ "hello world unindexed")
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+
+ EXPECT_THAT(tokenized_document.document(), EqualsProto(document));
+ EXPECT_THAT(tokenized_document.num_string_tokens(), Eq(0));
+
+ // string sections
+ EXPECT_THAT(tokenized_document.tokenized_string_sections(), IsEmpty());
+
+ // integer sections
+ EXPECT_THAT(tokenized_document.integer_sections(), IsEmpty());
+
+ // Qualified id join properties
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty());
+}
+
+TEST_F(TokenizedDocumentTest, CreateMultipleJoinQualifiedIdProperties) {
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kUnindexedStringProperty),
+ "hello world unindexed")
+ .AddStringProperty(std::string(kQualifiedId1), "pkg$db/ns#uri1")
+ .AddStringProperty(std::string(kQualifiedId2), "pkg$db/ns#uri2")
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+
+ EXPECT_THAT(tokenized_document.document(), EqualsProto(document));
+ EXPECT_THAT(tokenized_document.num_string_tokens(), Eq(0));
+
+ // string sections
+ EXPECT_THAT(tokenized_document.tokenized_string_sections(), IsEmpty());
+
+ // integer sections
+ EXPECT_THAT(tokenized_document.integer_sections(), IsEmpty());
+
+ // Qualified id join properties
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties(), SizeIs(2));
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties().at(0).metadata,
+ Eq(kQualifiedId1JoinablePropertyMetadata));
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties().at(0).values,
+ ElementsAre("pkg$db/ns#uri1"));
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties().at(1).metadata,
+ Eq(kQualifiedId2JoinablePropertyMetadata));
+ EXPECT_THAT(tokenized_document.qualified_id_join_properties().at(1).values,
+ ElementsAre("pkg$db/ns#uri2"));
}
} // namespace
diff --git a/proto/icing/index/numeric/wildcard-property-storage.proto b/proto/icing/index/numeric/wildcard-property-storage.proto
new file mode 100644
index 0000000..7f02b77
--- /dev/null
+++ b/proto/icing/index/numeric/wildcard-property-storage.proto
@@ -0,0 +1,22 @@
+// Copyright 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package icing.lib;
+
+// Next tag: 2
+message WildcardPropertyStorage {
+ repeated string property_entries = 1;
+}
diff --git a/proto/icing/proto/initialize.proto b/proto/icing/proto/initialize.proto
index 7fe1e6f..40a0d0c 100644
--- a/proto/icing/proto/initialize.proto
+++ b/proto/icing/proto/initialize.proto
@@ -23,7 +23,7 @@ option java_package = "com.google.android.icing.proto";
option java_multiple_files = true;
option objc_class_prefix = "ICNG";
-// Next tag: 5
+// Next tag: 7
message IcingSearchEngineOptions {
// Directory to persist files for Icing. Required.
// If Icing was previously initialized with this directory, it will reload
@@ -58,6 +58,23 @@ message IcingSearchEngineOptions {
// Optional.
optional int32 index_merge_size = 4 [default = 1048576]; // 1 MiB
+ // Whether to use namespace id or namespace name to build up fingerprint for
+ // document_key_mapper_ and corpus_mapper_ in document store.
+ // TODO(b/259969017) Flip the default value of this flag to true at the time
+ // when we switch to use persistent hash map for document_key_mapper_ so that
+ // we just need one reconstruction of the internal mappers.
+ optional bool document_store_namespace_id_fingerprint = 5;
+
+ // The threshold of the percentage of invalid documents to rebuild index
+ // during optimize, i.e. we rebuild index if and only if
+ // |invalid_documents| / |all_documents| >= optimize_rebuild_index_threshold
+ //
+ // Rebuilding the index could be faster than optimizing the index if we have
+ // removed most of the documents.
+ // Based on benchmarks, 85%~95% seems to be a good threshold for most cases.
+ //
+ // Default to 0 for better rollout of the new index optimize.
+ optional float optimize_rebuild_index_threshold = 6 [default = 0.0];
reserved 2;
}
diff --git a/proto/icing/proto/logging.proto b/proto/icing/proto/logging.proto
index feb2643..edfcf40 100644
--- a/proto/icing/proto/logging.proto
+++ b/proto/icing/proto/logging.proto
@@ -23,7 +23,7 @@ option java_multiple_files = true;
option objc_class_prefix = "ICNG";
// Stats of the top-level function IcingSearchEngine::Initialize().
-// Next tag: 13
+// Next tag: 14
message InitializeStatsProto {
// Overall time used for the function call.
optional int32 latency_ms = 1;
@@ -105,6 +105,12 @@ message InitializeStatsProto {
// - SCHEMA_CHANGES_OUT_OF_SYNC
// - IO_ERROR
optional RecoveryCause integer_index_restoration_cause = 12;
+
+ // Possible recovery causes for qualified id join index:
+ // - INCONSISTENT_WITH_GROUND_TRUTH
+ // - SCHEMA_CHANGES_OUT_OF_SYNC
+ // - IO_ERROR
+ optional RecoveryCause qualified_id_join_index_restoration_cause = 13;
}
// Stats of the top-level function IcingSearchEngine::Put().
diff --git a/proto/icing/proto/search.proto b/proto/icing/proto/search.proto
index c9e2b1d..8bdbf0c 100644
--- a/proto/icing/proto/search.proto
+++ b/proto/icing/proto/search.proto
@@ -85,7 +85,8 @@ message SearchSpecProto {
// enable testing.
// TODO(b/208654892) Remove this field once EXPERIMENTAL_ICING_ADVANCED_QUERY
// is fully supported.
- optional SearchType.Code search_type = 6 [default = ICING_RAW_QUERY];
+ optional SearchType.Code search_type = 6
+ [default = EXPERIMENTAL_ICING_ADVANCED_QUERY];
// OPTIONAL: If this field is present, join documents based on a nested
// SearchSpec.
diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt
index 232fbe0..5ff4997 100644
--- a/synced_AOSP_CL_number.txt
+++ b/synced_AOSP_CL_number.txt
@@ -1 +1 @@
-set(synced_AOSP_CL_number=-514555603)
+set(synced_AOSP_CL_number=516534290)