Snap for 7663505 from 14ee9a8eb8f3ed47f68117208626045878c943ac to androidx-wear-wear-phone-interactions-releaseandroidx-wear-wear-phone-interactions-release

Change-Id: I91670a47e2493c2712a3a4f9bd7a9f9a6e3d1ddc
author: Android Build Coastguard Worker <android-build-coastguard-worker@google.com> 2021-08-22 10:17:49 +0000
committer: Android Build Coastguard Worker <android-build-coastguard-worker@google.com> 2021-08-22 10:17:49 +0000
commit: 20437efd05ffb505b36624f092e3e2d6aa834ed7 (patch)
tree: 55370a66f2896116815c92e4d897336cca30ea5e
parent: bbbb1f6b786dd46354a81bb88710ab8120240043 (diff)
parent: 14ee9a8eb8f3ed47f68117208626045878c943ac (diff)
download: icing-androidx-wear-wear-phone-interactions-release.tar.gz
158 files changed, 18201 insertions, 6773 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a740924..01ee8eb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,6 +15,9 @@
 cmake_minimum_required(VERSION 3.10.2)
 
 add_definitions("-DICING_REVERSE_JNI_SEGMENTATION=1")
+set(VERSION_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/icing/jni.lds")
+set(CMAKE_SHARED_LINKER_FLAGS
+    "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gc-sections -Wl,--version-script=${VERSION_SCRIPT}")
 
 set(
     Protobuf_PREBUILTS_DIR
@@ -45,7 +48,7 @@ add_subdirectory("${Protobuf_SOURCE_DIR}/cmake" ${Protobuf_TARGET_BINARY_DIR})
 # Compile libandroidicu
 set(ICU_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../icu/libandroidicu")
 set(ICU_TARGET_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/icu-target")
-add_subdirectory(${ICU_SOURCE_DIR} ${ICU_TARGET_BINARY_DIR})
+add_subdirectory("${ICU_SOURCE_DIR}/static_shim" ${ICU_TARGET_BINARY_DIR})
 
 # Glob Icing proto sources. Results look like this: icing/proto/document.proto
 file(
diff --git a/icing/file/destructible-file.h b/icing/file/destructible-file.h
new file mode 100644
index 0000000..006dcb4
--- /dev/null
+++ b/icing/file/destructible-file.h
@@ -0,0 +1,72 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_FILE_DESTRUCTIBLE_FILE_H_
+#define ICING_FILE_DESTRUCTIBLE_FILE_H_
+
+#include <unistd.h>
+
+#include <string>
+
+#include "icing/file/filesystem.h"
+#include "icing/util/logging.h"
+
+namespace icing {
+namespace lib {
+
+// A convenient RAII class which will open the specified file path for write and
+// delete the underlying file upon destruction.
+class DestructibleFile {
+ public:
+  explicit DestructibleFile(const std::string& filepath,
+                            const Filesystem* filesystem)
+      : filesystem_(filesystem), filepath_(filepath) {
+    fd_ = filesystem_->OpenForWrite(filepath_.c_str());
+  }
+
+  DestructibleFile(const DestructibleFile&) = delete;
+  DestructibleFile(DestructibleFile&& other) : filesystem_(nullptr), fd_(-1) {
+    *this = std::move(other);
+  }
+
+  DestructibleFile& operator=(const DestructibleFile&) = delete;
+  DestructibleFile& operator=(DestructibleFile&& other) {
+    std::swap(fd_, other.fd_);
+    std::swap(filesystem_, other.filesystem_);
+    std::swap(filepath_, other.filepath_);
+    return *this;
+  }
+
+  ~DestructibleFile() {
+    if (is_valid()) {
+      close(fd_);
+      if (!filesystem_->DeleteFile(filepath_.c_str())) {
+        ICING_VLOG(1) << "Failed to delete file " << filepath_;
+      }
+    }
+  }
+
+  bool is_valid() const { return fd_ >= 0; }
+  int get_fd() const { return fd_; }
+
+ private:
+  const Filesystem* filesystem_;
+  std::string filepath_;
+  int fd_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_FILE_DESTRUCTIBLE_FILE_H_
diff --git a/icing/file/destructible-file_test.cc b/icing/file/destructible-file_test.cc
new file mode 100644
index 0000000..61316d1
--- /dev/null
+++ b/icing/file/destructible-file_test.cc
@@ -0,0 +1,117 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/destructible-file.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+TEST(DestructibleFileTest, DeletesFileProperly) {
+  Filesystem filesystem;
+  std::string filepath1 = GetTestTempDir() + "/file1";
+
+  {
+    // 1. Create the file
+    ScopedFd sfd(filesystem.OpenForWrite(filepath1.c_str()));
+    ASSERT_TRUE(sfd.is_valid());
+    int i = 127;
+    ASSERT_TRUE(filesystem.Write(sfd.get(), &i, sizeof(i)));
+  }
+
+  {
+    // 2. Open with a Destructible file.
+    DestructibleFile destructible(filepath1, &filesystem);
+    ASSERT_TRUE(destructible.is_valid());
+  }
+
+  // 3. Ensure that the file doesn't exist.
+  EXPECT_FALSE(filesystem.FileExists(filepath1.c_str()));
+}
+
+TEST(DestructibleFileTest, MoveAssignDeletesFileProperly) {
+  Filesystem filesystem;
+  std::string filepath1 = GetTestTempDir() + "/file1";
+  std::string filepath2 = GetTestTempDir() + "/file2";
+
+  // 1. Create file1
+  DestructibleFile destructible1(filepath1, &filesystem);
+  ASSERT_TRUE(destructible1.is_valid());
+  int i = 127;
+  ASSERT_TRUE(filesystem.Write(destructible1.get_fd(), &i, sizeof(i)));
+
+  {
+    // 2. Create file2
+    DestructibleFile destructible2(filepath2, &filesystem);
+    ASSERT_TRUE(destructible2.is_valid());
+    i = 458;
+    ASSERT_TRUE(filesystem.Write(destructible2.get_fd(), &i, sizeof(i)));
+
+    // Move assign destructible2 into destructible1
+    destructible1 = std::move(destructible2);
+  }
+
+  // 3. file1 shouldn't exist because it was destroyed when destructible1 was
+  // move assigned to.
+  EXPECT_FALSE(filesystem.FileExists(filepath1.c_str()));
+
+  // 4. file2 should still exist because it moved into destructible1 from
+  // destructible2.
+  EXPECT_TRUE(filesystem.FileExists(filepath2.c_str()));
+}
+
+TEST(DestructibleFileTest, MoveConstructionDeletesFileProperly) {
+  Filesystem filesystem;
+  std::string filepath1 = GetTestTempDir() + "/file1";
+
+  // 1. Create destructible1, it'll be reconstructed soon anyways.
+  std::unique_ptr<DestructibleFile> destructible1;
+  {
+    // 2. Create file1
+    DestructibleFile destructible2(filepath1, &filesystem);
+    ASSERT_TRUE(destructible2.is_valid());
+    int i = 458;
+    ASSERT_TRUE(filesystem.Write(destructible2.get_fd(), &i, sizeof(i)));
+
+    // Move construct destructible1 from destructible2
+    destructible1 =
+        std::make_unique<DestructibleFile>(std::move(destructible2));
+  }
+
+  // 3. file1 should still exist because it moved into destructible1 from
+  // destructible2.
+  ASSERT_TRUE(destructible1->is_valid());
+  EXPECT_TRUE(filesystem.FileExists(filepath1.c_str()));
+
+  {
+    // 4. Move construct destructible3 from destructible1
+    DestructibleFile destructible3(std::move(*destructible1));
+    ASSERT_TRUE(destructible3.is_valid());
+  }
+
+  // 5. file1 shouldn't exist because it was destroyed when destructible3 was
+  // destroyed.
+  EXPECT_FALSE(filesystem.FileExists(filepath1.c_str()));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/file/file-backed-proto-log.h b/icing/file/file-backed-proto-log.h
index 763c93b..b2b37e8 100644
--- a/icing/file/file-backed-proto-log.h
+++ b/icing/file/file-backed-proto-log.h
@@ -70,6 +70,7 @@
 #include "icing/file/filesystem.h"
 #include "icing/file/memory-mapped-file.h"
 #include "icing/legacy/core/icing-string-util.h"
+#include "icing/portable/platform.h"
 #include "icing/portable/zlib.h"
 #include "icing/util/crc32.h"
 #include "icing/util/data-loss.h"
@@ -79,23 +80,6 @@
 namespace icing {
 namespace lib {
 
-namespace {
-
-bool IsEmptyBuffer(const char* buffer, int size) {
-  return std::all_of(buffer, buffer + size,
-                     [](const char byte) { return byte == 0; });
-}
-
-// Helper function to get stored proto size from the metadata.
-// Metadata format: 8 bits magic + 24 bits size
-int GetProtoSize(int metadata) { return metadata & 0x00FFFFFF; }
-
-// Helper function to get stored proto magic from the metadata.
-// Metadata format: 8 bits magic + 24 bits size
-uint8_t GetProtoMagic(int metadata) { return metadata >> 24; }
-
-}  // namespace
-
 template <typename ProtoT>
 class FileBackedProtoLog {
  public:
@@ -401,6 +385,28 @@ class FileBackedProtoLog {
       const Filesystem* filesystem, const std::string& file_path,
       Crc32 initial_crc, int64_t start, int64_t end);
 
+  static bool IsEmptyBuffer(const char* buffer, int size) {
+    return std::all_of(buffer, buffer + size,
+                       [](const char byte) { return byte == 0; });
+  }
+
+  // Helper function to get stored proto size from the metadata.
+  // Metadata format: 8 bits magic + 24 bits size
+  static int GetProtoSize(int metadata) { return metadata & 0x00FFFFFF; }
+
+  // Helper function to get stored proto magic from the metadata.
+  // Metadata format: 8 bits magic + 24 bits size
+  static uint8_t GetProtoMagic(int metadata) { return metadata >> 24; }
+
+  // Reads out the metadata of a proto located at file_offset from the file.
+  //
+  // Returns:
+  //   Proto's metadata on success
+  //   OUT_OF_RANGE_ERROR if file_offset exceeds file_size
+  //   INTERNAL_ERROR if the metadata is invalid or any IO errors happen
+  static libtextclassifier3::StatusOr<int> ReadProtoMetadata(
+      MemoryMappedFile* mmapped_file, int64_t file_offset, int64_t file_size);
+
   // Magic number added in front of every proto. Used when reading out protos
   // as a first check for corruption in each entry in the file. Even if there is
   // a corruption, the best we can do is roll back to our last recovery point
@@ -422,20 +428,12 @@ class FileBackedProtoLog {
   static constexpr int kDeflateCompressionLevel = 3;
 
   // Chunks of the file to mmap at a time, so we don't mmap the entire file.
-  static constexpr int kMmapChunkSize = 4 * 1024;
+  // Only used on 32-bit devices
+  static constexpr int kMmapChunkSize = 4 * 1024 * 1024;  // 4MiB
 
   ScopedFd fd_;
   const Filesystem* const filesystem_;
   const std::string file_path_;
-
-  // Reads out the metadata of a proto located at file_offset from the file.
-  //
-  // Returns:
-  //   Proto's metadata on success
-  //   OUT_OF_RANGE_ERROR if file_offset exceeds file_size
-  //   INTERNAL_ERROR if the metadata is invalid or any IO errors happen
-  static libtextclassifier3::StatusOr<int> ReadProtoMetadata(
-      MemoryMappedFile* mmapped_file, int64_t file_offset, int64_t file_size);
   std::unique_ptr<Header> header_;
 };
 
@@ -571,6 +569,7 @@ FileBackedProtoLog<ProtoT>::InitializeExistingFile(const Filesystem* filesystem,
   ICING_ASSIGN_OR_RETURN(Crc32 calculated_log_checksum,
                          ComputeChecksum(filesystem, file_path, Crc32(),
                                          sizeof(Header), file_size));
+
   // Double check that the log checksum is the same as the one that was
   // persisted last time. If not, we start recovery logic.
   if (header->log_checksum != calculated_log_checksum.Get()) {
@@ -631,6 +630,14 @@ libtextclassifier3::StatusOr<Crc32> FileBackedProtoLog<ProtoT>::ComputeChecksum(
         file_path.c_str(), static_cast<long long>(start)));
   }
 
+  if (end < start) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Ending checksum offset of file '%s' must be greater than start "
+        "'%lld', was '%lld'",
+        file_path.c_str(), static_cast<long long>(start),
+        static_cast<long long>(end)));
+  }
+
   int64_t file_size = filesystem->GetFileSize(file_path.c_str());
   if (end > file_size) {
     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
@@ -640,17 +647,41 @@ libtextclassifier3::StatusOr<Crc32> FileBackedProtoLog<ProtoT>::ComputeChecksum(
         static_cast<long long>(end)));
   }
 
-  for (int i = start; i < end; i += kMmapChunkSize) {
-    // Don't read past the file size.
-    int next_chunk_size = kMmapChunkSize;
-    if ((i + kMmapChunkSize) >= end) {
-      next_chunk_size = end - i;
+  Architecture architecture = GetArchitecture();
+  switch (architecture) {
+    case Architecture::BIT_64: {
+      // Don't mmap in chunks here since mmapping can be harmful on 64-bit
+      // devices where mmap/munmap calls need the mmap write semaphore, which
+      // blocks mmap/munmap/mprotect and all page faults from executing while
+      // they run. On 64-bit devices, this doesn't actually load into memory, it
+      // just makes the file faultable. So the whole file should be ok.
+      // b/185822878.
+      ICING_RETURN_IF_ERROR(mmapped_file.Remap(start, end - start));
+      auto mmap_str = std::string_view(mmapped_file.region(), end - start);
+      new_crc.Append(mmap_str);
+      break;
+    }
+    case Architecture::BIT_32:
+      [[fallthrough]];
+    case Architecture::UNKNOWN: {
+      // 32-bit devices only have 4GB of RAM. Mmap in chunks to not use up too
+      // much memory at once. If we're unknown, then also chunk it because we're
+      // not sure what the device can handle.
+      for (int i = start; i < end; i += kMmapChunkSize) {
+        // Don't read past the file size.
+        int next_chunk_size = kMmapChunkSize;
+        if ((i + kMmapChunkSize) >= end) {
+          next_chunk_size = end - i;
+        }
+
+        ICING_RETURN_IF_ERROR(mmapped_file.Remap(i, next_chunk_size));
+
+        auto mmap_str =
+            std::string_view(mmapped_file.region(), next_chunk_size);
+        new_crc.Append(mmap_str);
+      }
+      break;
     }
-
-    ICING_RETURN_IF_ERROR(mmapped_file.Remap(i, next_chunk_size));
-
-    auto mmap_str = std::string_view(mmapped_file.region(), next_chunk_size);
-    new_crc.Append(mmap_str);
   }
 
   return new_crc;
@@ -670,7 +701,8 @@ libtextclassifier3::StatusOr<int64_t> FileBackedProtoLog<ProtoT>::WriteProto(
         static_cast<long long>(proto_size), header_->max_proto_size));
   }
 
-  // At this point, we've guaranteed that proto_size is under kMaxProtoSize (see
+  // At this point, we've guaranteed that proto_size is under kMaxProtoSize
+  // (see
   // ::Create), so we can safely store it in an int.
   int final_size = 0;
 
@@ -735,8 +767,8 @@ libtextclassifier3::StatusOr<ProtoT> FileBackedProtoLog<ProtoT>::ReadProto(
   MemoryMappedFile mmapped_file(*filesystem_, file_path_,
                                 MemoryMappedFile::Strategy::READ_ONLY);
   if (file_offset >= file_size) {
-    // file_size points to the next byte to write at, so subtract one to get the
-    // inclusive, actual size of file.
+    // file_size points to the next byte to write at, so subtract one to get
+    // the inclusive, actual size of file.
     return absl_ports::OutOfRangeError(
         IcingStringUtil::StringPrintf("Trying to read from a location, %lld, "
                                       "out of range of the file size, %lld",
@@ -778,8 +810,8 @@ libtextclassifier3::Status FileBackedProtoLog<ProtoT>::EraseProto(
     int64_t file_offset) {
   int64_t file_size = filesystem_->GetFileSize(fd_.get());
   if (file_offset >= file_size) {
-    // file_size points to the next byte to write at, so subtract one to get the
-    // inclusive, actual size of file.
+    // file_size points to the next byte to write at, so subtract one to get
+    // the inclusive, actual size of file.
     return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
         "Trying to erase data at a location, %lld, "
         "out of range of the file size, %lld",
@@ -798,12 +830,12 @@ libtextclassifier3::Status FileBackedProtoLog<ProtoT>::EraseProto(
   ICING_RETURN_IF_ERROR(mmapped_file.Remap(file_offset + sizeof(metadata),
                                            GetProtoSize(metadata)));
 
-  // We need to update the crc checksum if the erased area is before the rewind
-  // position.
+  // We need to update the crc checksum if the erased area is before the
+  // rewind position.
   if (file_offset + sizeof(metadata) < header_->rewind_offset) {
     // We need to calculate [original string xor 0s].
-    // The xored string is the same as the original string because 0 xor 0 = 0,
-    // 1 xor 0 = 1.
+    // The xored string is the same as the original string because 0 xor 0 =
+    // 0, 1 xor 0 = 1.
     const std::string_view xored_str(mmapped_file.region(),
                                      mmapped_file.region_size());
 
@@ -896,7 +928,8 @@ int64_t FileBackedProtoLog<ProtoT>::Iterator::GetOffset() {
 template <typename ProtoT>
 typename FileBackedProtoLog<ProtoT>::Iterator
 FileBackedProtoLog<ProtoT>::GetIterator() {
-  return Iterator(*filesystem_, file_path_, /*initial_offset=*/sizeof(Header));
+  return Iterator(*filesystem_, file_path_,
+                  /*initial_offset=*/sizeof(Header));
 }
 
 template <typename ProtoT>
@@ -959,7 +992,8 @@ libtextclassifier3::Status FileBackedProtoLog<ProtoT>::PersistToDisk() {
   header_->header_checksum = header_->CalculateHeaderChecksum();
 
   if (!filesystem_->PWrite(fd_.get(), /*offset=*/0, header_.get(),
-                           sizeof(Header))) {
+                           sizeof(Header)) ||
+      !filesystem_->DataSync(fd_.get())) {
     return absl_ports::InternalError(
         absl_ports::StrCat("Failed to update header to: ", file_path_));
   }
diff --git a/icing/file/file-backed-proto-log_benchmark.cc b/icing/file/file-backed-proto-log_benchmark.cc
index 26e0fb0..c09fd5a 100644
--- a/icing/file/file-backed-proto-log_benchmark.cc
+++ b/icing/file/file-backed-proto-log_benchmark.cc
@@ -164,6 +164,88 @@ BENCHMARK(BM_Read)
                               // 16MiB, and we need some extra space for the
                               // rest of the document properties
 
+static void BM_Erase(benchmark::State& state) {
+  const Filesystem filesystem;
+  const std::string file_path = IcingStringUtil::StringPrintf(
+      "%s%s", GetTestTempDir().c_str(), "/proto.log");
+  int max_proto_size = (1 << 24) - 1;  // 16 MiB
+  bool compress = true;
+
+  // Make sure it doesn't already exist.
+  filesystem.DeleteFile(file_path.c_str());
+
+  auto proto_log =
+      FileBackedProtoLog<DocumentProto>::Create(
+          &filesystem, file_path,
+          FileBackedProtoLog<DocumentProto>::Options(compress, max_proto_size))
+          .ValueOrDie()
+          .proto_log;
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  std::default_random_engine random;
+  const std::string rand_str = RandomString(kAlNumAlphabet, /*len=*/1, &random);
+
+  auto document_properties = document.add_properties();
+  document_properties->set_name("string property");
+  document_properties->add_string_values(rand_str);
+
+  for (auto _ : state) {
+    state.PauseTiming();
+    ICING_ASSERT_OK_AND_ASSIGN(int64_t write_offset,
+                               proto_log->WriteProto(document));
+    state.ResumeTiming();
+
+    testing::DoNotOptimize(proto_log->EraseProto(write_offset));
+  }
+
+  // Cleanup after ourselves
+  filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_Erase);
+
+static void BM_ComputeChecksum(benchmark::State& state) {
+  const Filesystem filesystem;
+  const std::string file_path = GetTestTempDir() + "/proto.log";
+  int max_proto_size = (1 << 24) - 1;  // 16 MiB
+  bool compress = true;
+
+  // Make sure it doesn't already exist.
+  filesystem.DeleteFile(file_path.c_str());
+
+  auto proto_log =
+      FileBackedProtoLog<DocumentProto>::Create(
+          &filesystem, file_path,
+          FileBackedProtoLog<DocumentProto>::Options(compress, max_proto_size))
+          .ValueOrDie()
+          .proto_log;
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  // Make each document 1KiB
+  int string_length = 1024;
+  std::default_random_engine random;
+  const std::string rand_str =
+      RandomString(kAlNumAlphabet, string_length, &random);
+
+  auto document_properties = document.add_properties();
+  document_properties->set_name("string property");
+  document_properties->add_string_values(rand_str);
+
+  int num_docs = state.range(0);
+  for (int i = 0; i < num_docs; ++i) {
+    ICING_ASSERT_OK(proto_log->WriteProto(document));
+  }
+
+  for (auto _ : state) {
+    testing::DoNotOptimize(proto_log->ComputeChecksum());
+  }
+
+  // Cleanup after ourselves
+  filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_ComputeChecksum)->Range(1024, 1 << 20);
+
 }  // namespace
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/file/file-backed-vector.h b/icing/file/file-backed-vector.h
index 3ecef54..0989935 100644
--- a/icing/file/file-backed-vector.h
+++ b/icing/file/file-backed-vector.h
@@ -56,6 +56,7 @@
 #ifndef ICING_FILE_FILE_BACKED_VECTOR_H_
 #define ICING_FILE_FILE_BACKED_VECTOR_H_
 
+#include <inttypes.h>
 #include <stdint.h>
 #include <sys/mman.h>
 
@@ -175,7 +176,27 @@ class FileBackedVector {
   // synced by the system and the checksum will be updated.
   ~FileBackedVector();
 
-  // Accesses the element at idx.
+  // Gets a copy of the element at idx.
+  //
+  // This is useful if you think the FileBackedVector may grow before you need
+  // to access this return value. When the FileBackedVector grows, the
+  // underlying mmap will be unmapped and remapped, which will invalidate any
+  // pointers to the previously mapped region. Getting a copy will avoid
+  // referencing the now-invalidated region.
+  //
+  // Returns:
+  //   OUT_OF_RANGE_ERROR if idx < 0 or > num_elements()
+  libtextclassifier3::StatusOr<T> GetCopy(int32_t idx) const;
+
+  // Gets a pointer to the element at idx.
+  //
+  // WARNING: Subsequent calls to Set may invalidate the pointer returned by
+  // Get.
+  //
+  // This is useful if you do not think the FileBackedVector will grow before
+  // you need to reference this value, and you want to avoid a copy. When the
+  // FileBackedVector grows, the underlying mmap will be unmapped and remapped,
+  // which will invalidate this pointer to the previously mapped region.
   //
   // Returns:
   //   OUT_OF_RANGE_ERROR if idx < 0 or > num_elements()
@@ -183,6 +204,10 @@ class FileBackedVector {
 
   // Writes the value at idx.
   //
+  // May grow the underlying file and mmapped region as needed to fit the new
+  // value. If it does grow, then any pointers to previous values returned
+  // from Get() may be invalidated.
+  //
   // Returns:
   //   OUT_OF_RANGE_ERROR if idx < 0 or file cannot be grown idx size
   libtextclassifier3::Status Set(int32_t idx, const T& value);
@@ -399,13 +424,6 @@ FileBackedVector<T>::InitializeExistingFile(
         absl_ports::StrCat("Invalid header kMagic for ", file_path));
   }
 
-  // Mmap the content of the vector, excluding the header so its easier to
-  // access elements from the mmapped region
-  auto mmapped_file =
-      std::make_unique<MemoryMappedFile>(filesystem, file_path, mmap_strategy);
-  ICING_RETURN_IF_ERROR(
-      mmapped_file->Remap(sizeof(Header), file_size - sizeof(Header)));
-
   // Check header
   if (header->header_checksum != header->CalculateHeaderChecksum()) {
     return absl_ports::FailedPreconditionError(
@@ -418,6 +436,20 @@ FileBackedVector<T>::InitializeExistingFile(
         header->element_size));
   }
 
+  int64_t min_file_size = header->num_elements * sizeof(T) + sizeof(Header);
+  if (min_file_size > file_size) {
+    return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+        "Inconsistent file size, expected %" PRId64 ", actual %" PRId64,
+        min_file_size, file_size));
+  }
+
+  // Mmap the content of the vector, excluding the header so its easier to
+  // access elements from the mmapped region
+  auto mmapped_file =
+      std::make_unique<MemoryMappedFile>(filesystem, file_path, mmap_strategy);
+  ICING_RETURN_IF_ERROR(
+      mmapped_file->Remap(sizeof(Header), file_size - sizeof(Header)));
+
   // Check vector contents
   Crc32 vector_checksum;
   std::string_view vector_contents(
@@ -468,6 +500,13 @@ FileBackedVector<T>::~FileBackedVector() {
 }
 
 template <typename T>
+libtextclassifier3::StatusOr<T> FileBackedVector<T>::GetCopy(
+    int32_t idx) const {
+  ICING_ASSIGN_OR_RETURN(const T* value, Get(idx));
+  return *value;
+}
+
+template <typename T>
 libtextclassifier3::StatusOr<const T*> FileBackedVector<T>::Get(
     int32_t idx) const {
   if (idx < 0) {
@@ -492,8 +531,6 @@ libtextclassifier3::Status FileBackedVector<T>::Set(int32_t idx,
         IcingStringUtil::StringPrintf("Index, %d, was less than 0", idx));
   }
 
-  int32_t start_byte = idx * sizeof(T);
-
   ICING_RETURN_IF_ERROR(GrowIfNecessary(idx + 1));
 
   if (idx + 1 > header_->num_elements) {
@@ -518,6 +555,8 @@ libtextclassifier3::Status FileBackedVector<T>::Set(int32_t idx,
       changes_end_ = 0;
       header_->vector_checksum = 0;
     } else {
+      int32_t start_byte = idx * sizeof(T);
+
       changes_.push_back(idx);
       saved_original_buffer_.append(
           reinterpret_cast<char*>(const_cast<T*>(array())) + start_byte,
@@ -560,9 +599,24 @@ libtextclassifier3::Status FileBackedVector<T>::GrowIfNecessary(
   least_file_size_needed = math_util::RoundUpTo(
       least_file_size_needed,
       int64_t{FileBackedVector<T>::kGrowElements * sizeof(T)});
-  if (!filesystem_->Grow(file_path_.c_str(), least_file_size_needed)) {
-    return absl_ports::InternalError(
-        absl_ports::StrCat("Couldn't grow file ", file_path_));
+
+  // We use PWrite here rather than Grow because Grow doesn't actually allocate
+  // an underlying disk block. This can lead to problems with mmap because mmap
+  // has no effective way to signal that it was impossible to allocate the disk
+  // block and ends up crashing instead. PWrite will force the allocation of
+  // these blocks, which will ensure that any failure to grow will surface here.
+  int64_t page_size = getpagesize();
+  auto buf = std::make_unique<uint8_t[]>(page_size);
+  int64_t size_to_write = page_size - (current_file_size % page_size);
+  ScopedFd sfd(filesystem_->OpenForWrite(file_path_.c_str()));
+  while (current_file_size < least_file_size_needed) {
+    if (!filesystem_->PWrite(sfd.get(), current_file_size, buf.get(),
+                             size_to_write)) {
+      return absl_ports::InternalError(
+          absl_ports::StrCat("Couldn't grow file ", file_path_));
+    }
+    current_file_size += size_to_write;
+    size_to_write = page_size - (current_file_size % page_size);
   }
 
   ICING_RETURN_IF_ERROR(mmapped_file_->Remap(
diff --git a/icing/file/file-backed-vector_test.cc b/icing/file/file-backed-vector_test.cc
index bc2fef6..b05ce2d 100644
--- a/icing/file/file-backed-vector_test.cc
+++ b/icing/file/file-backed-vector_test.cc
@@ -32,6 +32,7 @@
 #include "icing/util/logging.h"
 
 using ::testing::Eq;
+using ::testing::IsTrue;
 using ::testing::Pointee;
 
 namespace icing {
@@ -278,7 +279,6 @@ TEST_F(FileBackedVectorTest, Grow) {
           filesystem_, file_path_,
           MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
   EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
-
   EXPECT_THAT(vector->Set(kMaxNumElts + 11, 'a'),
               StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
   EXPECT_THAT(vector->Set(-1, 'a'),
@@ -318,25 +318,32 @@ TEST_F(FileBackedVectorTest, GrowsInChunks) {
           filesystem_, file_path_,
           MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
 
-  // Our initial file size should just be the size of the header
-  EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
-              Eq(sizeof(FileBackedVector<char>::Header)));
+  // Our initial file size should just be the size of the header. Disk usage
+  // will indicate that one block has been allocated, which contains the header.
+  int header_size = sizeof(FileBackedVector<char>::Header);
+  int page_size = getpagesize();
+  EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(header_size));
+  EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(page_size));
 
-  // Once we add something though, we'll grow to kGrowElements big
+  // Once we add something though, we'll grow to be kGrowElements big. From this
+  // point on, file size and disk usage should be the same because Growing will
+  // explicitly allocate the number of blocks needed to accomodate the file.
   Insert(vector.get(), 0, "a");
-  EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
-              Eq(kGrowElements * sizeof(int)));
+  int file_size = kGrowElements * sizeof(int);
+  EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(file_size));
+  EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(file_size));
 
   // Should still be the same size, don't need to grow underlying file
   Insert(vector.get(), 1, "b");
-  EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
-              Eq(kGrowElements * sizeof(int)));
+  EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(file_size));
+  EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(file_size));
 
   // Now we grow by a kGrowElements chunk, so the underlying file is 2
   // kGrowElements big
+  file_size *= 2;
   Insert(vector.get(), 2, std::string(kGrowElements, 'c'));
-  EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
-              Eq(kGrowElements * 2 * sizeof(int)));
+  EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(file_size));
+  EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(file_size));
 
   // Destroy/persist the contents.
   vector.reset();
@@ -463,6 +470,174 @@ TEST_F(FileBackedVectorTest, TruncateAndReReadFile) {
   }
 }
 
+TEST_F(FileBackedVectorTest, InitFileTooSmallForHeaderFails) {
+  {
+    // 1. Create a vector with a few elements.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<char>> vector,
+        FileBackedVector<char>::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    Insert(vector.get(), 0, "A");
+    Insert(vector.get(), 1, "Z");
+    ASSERT_THAT(vector->PersistToDisk(), IsOk());
+  }
+
+  // 2. Shrink the file to be smaller than the header.
+  filesystem_.Truncate(fd_, sizeof(FileBackedVector<char>::Header) - 1);
+
+  {
+    // 3. Attempt to create the file and confirm that it fails.
+    EXPECT_THAT(FileBackedVector<char>::Create(
+                    filesystem_, file_path_,
+                    MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+                StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+  }
+}
+
+TEST_F(FileBackedVectorTest, InitWrongDataSizeFails) {
+  {
+    // 1. Create a vector with a few elements.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<char>> vector,
+        FileBackedVector<char>::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    Insert(vector.get(), 0, "A");
+    Insert(vector.get(), 1, "Z");
+    ASSERT_THAT(vector->PersistToDisk(), IsOk());
+  }
+
+  {
+    // 2. Attempt to create the file with a different element size and confirm
+    // that it fails.
+    EXPECT_THAT(FileBackedVector<int>::Create(
+                    filesystem_, file_path_,
+                    MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+                StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+  }
+}
+
+TEST_F(FileBackedVectorTest, InitCorruptHeaderFails) {
+  {
+    // 1. Create a vector with a few elements.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<char>> vector,
+        FileBackedVector<char>::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    Insert(vector.get(), 0, "A");
+    Insert(vector.get(), 1, "Z");
+    ASSERT_THAT(vector->PersistToDisk(), IsOk());
+  }
+
+  // 2. Modify the header, but don't update the checksum. This would be similar
+  // to corruption of the header.
+  FileBackedVector<char>::Header header;
+  ASSERT_THAT(filesystem_.PRead(fd_, &header, sizeof(header), /*offset=*/0),
+              IsTrue());
+  header.num_elements = 1;
+  ASSERT_THAT(filesystem_.PWrite(fd_, /*offset=*/0, &header, sizeof(header)),
+              IsTrue());
+
+  {
+    // 3. Attempt to create the file with a header that doesn't match its
+    // checksum and confirm that it fails.
+    EXPECT_THAT(FileBackedVector<char>::Create(
+                    filesystem_, file_path_,
+                    MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+                StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  }
+}
+
+TEST_F(FileBackedVectorTest, InitHeaderElementSizeTooBigFails) {
+  {
+    // 1. Create a vector with a few elements.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<char>> vector,
+        FileBackedVector<char>::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    Insert(vector.get(), 0, "A");
+    Insert(vector.get(), 1, "Z");
+    ASSERT_THAT(vector->PersistToDisk(), IsOk());
+  }
+
+  // 2. Modify the header so that the number of elements exceeds the actual size
+  // of the underlying file.
+  FileBackedVector<char>::Header header;
+  ASSERT_THAT(filesystem_.PRead(fd_, &header, sizeof(header), /*offset=*/0),
+              IsTrue());
+  int64_t file_size = filesystem_.GetFileSize(fd_);
+  int64_t allocated_elements_size = file_size - sizeof(header);
+  header.num_elements = (allocated_elements_size / sizeof(char)) + 1;
+  header.header_checksum = header.CalculateHeaderChecksum();
+  ASSERT_THAT(filesystem_.PWrite(fd_, /*offset=*/0, &header, sizeof(header)),
+              IsTrue());
+
+  {
+    // 3. Attempt to create the file with num_elements that is larger than the
+    // underlying file and confirm that it fails.
+    EXPECT_THAT(FileBackedVector<char>::Create(
+                    filesystem_, file_path_,
+                    MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+                StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+  }
+}
+
+TEST_F(FileBackedVectorTest, InitCorruptElementsFails) {
+  {
+    // 1. Create a vector with a few elements.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<char>> vector,
+        FileBackedVector<char>::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    Insert(vector.get(), 0, "A");
+    Insert(vector.get(), 1, "Z");
+    ASSERT_THAT(vector->PersistToDisk(), IsOk());
+  }
+
+  // 2. Overwrite the values of the first two elements.
+  std::string corrupted_content = "BY";
+  ASSERT_THAT(
+      filesystem_.PWrite(fd_, /*offset=*/sizeof(FileBackedVector<char>::Header),
+                         corrupted_content.c_str(), corrupted_content.length()),
+      IsTrue());
+
+  {
+    // 3. Attempt to create the file with elements that don't match their
+    // checksum and confirm that it fails.
+    EXPECT_THAT(FileBackedVector<char>::Create(
+                    filesystem_, file_path_,
+                    MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+                StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  }
+}
+
+TEST_F(FileBackedVectorTest, InitNormalSucceeds) {
+  {
+    // 1. Create a vector with a few elements.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<char>> vector,
+        FileBackedVector<char>::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    Insert(vector.get(), 0, "A");
+    Insert(vector.get(), 1, "Z");
+    ASSERT_THAT(vector->PersistToDisk(), IsOk());
+  }
+
+  {
+    // 2. Attempt to create the file with a completely valid header and elements
+    // region. This should succeed.
+    EXPECT_THAT(FileBackedVector<char>::Create(
+                    filesystem_, file_path_,
+                    MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+                IsOk());
+  }
+}
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/file/filesystem.cc b/icing/file/filesystem.cc
index 6a596f5..0655cb9 100644
--- a/icing/file/filesystem.cc
+++ b/icing/file/filesystem.cc
@@ -466,7 +466,13 @@ bool Filesystem::Write(const char* filename, const void* data,
 
 bool Filesystem::CopyFile(const char* src, const char* dst) const {
   ScopedFd src_fd(OpenForRead(src));
+
+  std::string dir = GetDirname(dst);
+  if (!CreateDirectoryRecursively(dir.c_str())) {
+    return false;
+  }
   ScopedFd dst_fd(OpenForWrite(dst));
+
   if (!src_fd.is_valid() || !dst_fd.is_valid()) {
     return false;
   }
@@ -478,6 +484,49 @@ bool Filesystem::CopyFile(const char* src, const char* dst) const {
   return Write(*dst_fd, buf.get(), size);
 }
 
+bool Filesystem::CopyDirectory(const char* src_dir, const char* dst_dir,
+                               bool recursive) const {
+  DIR* dir = opendir(src_dir);
+  if (!dir) {
+    LogOpenError("Unable to open directory ", src_dir, ": ", errno);
+    return false;
+  }
+
+  dirent* p;
+  // readdir's implementation seems to be thread safe.
+  while ((p = readdir(dir)) != nullptr) {
+    std::string file_name(p->d_name);
+    if (file_name == "." || file_name == "..") {
+      continue;
+    }
+
+    std::string full_src_path = absl_ports::StrCat(src_dir, "/", p->d_name);
+    std::string full_dst_path = absl_ports::StrCat(dst_dir, "/", p->d_name);
+
+    // Directories are copied when writing a non-directory file, so no
+    // explicit copying of a directory is required.
+    if (p->d_type != DT_DIR) {
+      if (!CopyFile(full_src_path.c_str(), full_dst_path.c_str())) {
+        return false;
+      }
+    }
+
+    // Recurse down directories, if requested.
+    if (recursive && (p->d_type == DT_DIR)) {
+      std::string src_sub_dir = absl_ports::StrCat(src_dir, "/", p->d_name);
+      std::string dst_sub_dir = absl_ports::StrCat(dst_dir, "/", p->d_name);
+      if (!CopyDirectory(src_sub_dir.c_str(), dst_sub_dir.c_str(), recursive)) {
+        return false;
+      }
+    }
+  }
+  if (closedir(dir) != 0) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Error closing %s: %s",
+                                                      src_dir, strerror(errno));
+  }
+  return true;
+}
+
 bool Filesystem::PWrite(int fd, off_t offset, const void* data,
                         size_t data_size) const {
   size_t write_len = data_size;
diff --git a/icing/file/filesystem.h b/icing/file/filesystem.h
index d3c7787..6bed8e6 100644
--- a/icing/file/filesystem.h
+++ b/icing/file/filesystem.h
@@ -86,8 +86,12 @@ class Filesystem {
   // Copies the src file to the dst file.
   virtual bool CopyFile(const char* src, const char* dst) const;
 
+  // Copies the src directory and its contents to the dst dir.
+  virtual bool CopyDirectory(const char* src_dir, const char* dst_dir,
+                             bool recursive) const;
+
   // Returns true if a file exists.  False if the file doesn't exist.
-  // If there is an error getting stat on the file, it logs the error and //
+  // If there is an error getting stat on the file, it logs the error and
   // asserts.
   virtual bool FileExists(const char* file_name) const;
 
diff --git a/icing/file/filesystem_test.cc b/icing/file/filesystem_test.cc
index 492a50d..214180e 100644
--- a/icing/file/filesystem_test.cc
+++ b/icing/file/filesystem_test.cc
@@ -38,6 +38,7 @@ using ::testing::Gt;
 using ::testing::Le;
 using ::testing::Ne;
 using ::testing::UnorderedElementsAre;
+using ::testing::UnorderedElementsAreArray;
 
 namespace icing {
 namespace lib {
@@ -450,5 +451,47 @@ TEST_F(FilesystemTest, ReadWrite) {
   EXPECT_THAT(hello, Eq("hello"));
 }
 
+TEST_F(FilesystemTest, CopyDirectory) {
+  Filesystem filesystem;
+
+  // File structure:
+  // <temp_dir>/
+  //   src_dir/
+  //     file1
+  //     file2
+  //     sub_dir/
+  //       file3
+  const std::string src_dir = temp_dir_ + "/src_dir";
+  const std::string sub_dir = "sub_dir";
+  const std::string sub_dir_path = src_dir + "/" + sub_dir;
+  vector<std::string> some_files = {"file1", "file2", sub_dir + "/file3"};
+
+  // Make sure there is no pre-existing test-dir structure
+  ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(src_dir.c_str()));
+
+  // Setup a test-dir structure
+  ASSERT_TRUE(filesystem.CreateDirectoryRecursively(
+      sub_dir_path.c_str()));  // deepest path for test
+  CreateTestFiles(some_files, src_dir);
+
+  const std::string dst_dir = temp_dir_ + "/dst_dir";
+  EXPECT_TRUE(filesystem.CopyDirectory(src_dir.c_str(), dst_dir.c_str(),
+                                       /*recursive=*/true));
+
+  vector<std::string> src_dir_files;
+  EXPECT_TRUE(filesystem.ListDirectory(src_dir.c_str(), /*exclude=*/{},
+                                       /*recursive=*/true, &src_dir_files));
+
+  vector<std::string> dst_dir_files;
+  EXPECT_TRUE(filesystem.ListDirectory(dst_dir.c_str(), /*exclude=*/{},
+                                       /*recursive=*/true, &dst_dir_files));
+
+  EXPECT_THAT(dst_dir_files, UnorderedElementsAreArray(src_dir_files));
+
+  // Clean up
+  ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(src_dir.c_str()));
+  ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(dst_dir.c_str()));
+}
+
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/file/mock-filesystem.h b/icing/file/mock-filesystem.h
index 88475cd..32817d4 100644
--- a/icing/file/mock-filesystem.h
+++ b/icing/file/mock-filesystem.h
@@ -44,6 +44,17 @@ class MockFilesystem : public Filesystem {
           return real_filesystem_.DeleteDirectoryRecursively(dir_name);
         });
 
+    ON_CALL(*this, CopyFile)
+        .WillByDefault([this](const char* src, const char* dst) {
+          return real_filesystem_.CopyFile(src, dst);
+        });
+
+    ON_CALL(*this, CopyDirectory)
+        .WillByDefault(
+            [this](const char* src, const char* dst, bool recursive) {
+              return real_filesystem_.CopyDirectory(src, dst, recursive);
+            });
+
     ON_CALL(*this, FileExists).WillByDefault([this](const char* file_name) {
       return real_filesystem_.FileExists(file_name);
     });
@@ -227,6 +238,9 @@ class MockFilesystem : public Filesystem {
 
   MOCK_METHOD(bool, CopyFile, (const char* src, const char* dst), (const));
 
+  MOCK_METHOD(bool, CopyDirectory,
+              (const char* src, const char* dst, bool recursive), (const));
+
   MOCK_METHOD(bool, FileExists, (const char* file_name), (const));
 
   MOCK_METHOD(bool, DirectoryExists, (const char* dir_name), (const));
diff --git a/icing/file/portable-file-backed-proto-log.h b/icing/file/portable-file-backed-proto-log.h
new file mode 100644
index 0000000..99b8941
--- /dev/null
+++ b/icing/file/portable-file-backed-proto-log.h
@@ -0,0 +1,1241 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// File-backed log of protos with append-only writes and position based reads.
+//
+// There should only be one instance of a PortableFileBackedProtoLog of the same
+// file at a time; using multiple instances at the same time may lead to
+// undefined behavior.
+//
+// The entire checksum is computed on initialization to verify the contents are
+// valid. On failure, the log will be truncated to the last verified state when
+// PersistToDisk() was called. If the log cannot successfully restore the last
+// state due to disk corruption or some other inconsistency, then the entire log
+// will be lost.
+//
+// Each proto written to the file will have a metadata written just before it.
+// The metadata consists of
+//   {
+//     1 bytes of kProtoMagic;
+//     3 bytes of the proto size
+//     n bytes of the proto itself
+//   }
+//
+// All metadata is written in a portable format, encoded with htonl before
+// writing to file and decoded with ntohl when reading from file.
+//
+// Example usage:
+//   ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
+//       PortableFileBackedProtoLog<DocumentProto>::Create(filesystem,
+//       file_path_,
+//                                                  options));
+//   auto proto_log = create_result.proto_log;
+//
+//   Document document;
+//   document.set_namespace("com.google.android.example");
+//   document.set_uri("www.google.com");
+//
+//   int64_t document_offset = proto_log->WriteProto(document));
+//   Document same_document = proto_log->ReadProto(document_offset));
+//   proto_log->PersistToDisk();
+
+#ifndef ICING_FILE_PORTABLE_FILE_BACKED_PROTO_LOG_H_
+#define ICING_FILE_PORTABLE_FILE_BACKED_PROTO_LOG_H_
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include <google/protobuf/io/gzip_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/portable/endian.h"
+#include "icing/portable/platform.h"
+#include "icing/portable/zlib.h"
+#include "icing/util/bit-util.h"
+#include "icing/util/crc32.h"
+#include "icing/util/data-loss.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+template <typename ProtoT>
+class PortableFileBackedProtoLog {
+ public:
+  struct Options {
+    // Whether to compress each proto before writing to the proto log.
+    bool compress;
+
+    // Byte-size limit for each proto written to the store. This does not
+    // include the bytes needed for the metadata of each proto.
+    //
+    // NOTE: Currently, we only support protos up to 16MiB. We store the proto
+    // size in 3 bytes within the metadata.
+    //
+    // NOTE: This limit is only enforced for future writes. If the store
+    // previously had a higher limit, then reading older entries could return
+    // larger protos.
+    //
+    // NOTE: The max_proto_size is the upper limit for input protos into the
+    // ProtoLog. Even if the proto is larger than max_proto_size, but compresses
+    // to a smaller size, ProtoLog will not accept it. Protos that result in a
+    // compressed size larger than max_proto_size are also not accepted.
+    const int32_t max_proto_size;
+
+    // Must specify values for options.
+    Options() = delete;
+    explicit Options(bool compress_in,
+                     const int32_t max_proto_size_in = kMaxProtoSize)
+        : compress(compress_in), max_proto_size(max_proto_size_in) {}
+  };
+
+  // Number of bytes we reserve for the heading at the beginning of the proto
+  // log. We reserve this so the header can grow without running into the
+  // contents of the proto log, triggering an unnecessary migration of the data.
+  static constexpr int kHeaderReservedBytes = 256;
+
+  // Header stored at the beginning of the file before the rest of the log
+  // contents. Stores metadata on the log.
+  class Header {
+   public:
+    static constexpr int32_t kMagic = 0xf4c6f67a;
+
+    static constexpr int32_t kFileFormatVersion = 0;
+
+    uint32_t CalculateHeaderChecksum() const {
+      Crc32 crc;
+
+      // Get a string_view of all the fields of the Header, excluding the
+      // magic_nbytes_ and header_checksum_nbytes_
+      std::string_view header_str(
+          reinterpret_cast<const char*>(this) +
+              offsetof(Header, header_checksum_nbytes_) +
+              sizeof(header_checksum_nbytes_),
+          sizeof(Header) - sizeof(magic_nbytes_) -
+              sizeof(header_checksum_nbytes_));
+      crc.Append(header_str);
+      return crc.Get();
+    }
+
+    int32_t GetMagic() const { return GNetworkToHostL(magic_nbytes_); }
+
+    void SetMagic(int32_t magic_in) {
+      magic_nbytes_ = GHostToNetworkL(magic_in);
+    }
+
+    int32_t GetFileFormatVersion() const {
+      return GNetworkToHostL(file_format_version_nbytes_);
+    }
+
+    void SetFileFormatVersion(int32_t file_format_version_in) {
+      file_format_version_nbytes_ = GHostToNetworkL(file_format_version_in);
+    }
+
+    int32_t GetMaxProtoSize() const {
+      return GNetworkToHostL(max_proto_size_nbytes_);
+    }
+
+    void SetMaxProtoSize(int32_t max_proto_size_in) {
+      max_proto_size_nbytes_ = GHostToNetworkL(max_proto_size_in);
+    }
+
+    int32_t GetLogChecksum() const {
+      return GNetworkToHostL(log_checksum_nbytes_);
+    }
+
+    void SetLogChecksum(int32_t log_checksum_in) {
+      log_checksum_nbytes_ = GHostToNetworkL(log_checksum_in);
+    }
+
+    int64_t GetRewindOffset() const {
+      return GNetworkToHostLL(rewind_offset_nbytes_);
+    }
+
+    void SetRewindOffset(int64_t rewind_offset_in) {
+      rewind_offset_nbytes_ = GHostToNetworkLL(rewind_offset_in);
+    }
+
+    int32_t GetHeaderChecksum() const {
+      return GNetworkToHostL(header_checksum_nbytes_);
+    }
+
+    void SetHeaderChecksum(int32_t header_checksum_in) {
+      header_checksum_nbytes_ = GHostToNetworkL(header_checksum_in);
+    }
+
+    bool GetCompressFlag() const { return GetFlag(kCompressBit); }
+
+    void SetCompressFlag(bool compress) { SetFlag(kCompressBit, compress); }
+
+    bool GetDirtyFlag() const { return GetFlag(kDirtyBit); }
+
+    void SetDirtyFlag(bool dirty) { SetFlag(kDirtyBit, dirty); }
+
+   private:
+    // The least-significant bit offset at which the compress flag is stored in
+    // 'flags_nbytes_'. Represents whether the protos in the log are compressed
+    // or not.
+    static constexpr int32_t kCompressBit = 0;
+
+    // The least-significant bit offset at which the dirty flag is stored in
+    // 'flags'. Represents whether the checksummed portion of the log has been
+    // modified after the last checksum was computed.
+    static constexpr int32_t kDirtyBit = 1;
+
+    bool GetFlag(int offset) const {
+      return bit_util::BitfieldGet(flags_, offset, /*len=*/1);
+    }
+
+    void SetFlag(int offset, bool value) {
+      bit_util::BitfieldSet(value, offset, /*len=*/1, &flags_);
+    }
+
+    // Holds the magic as a quick sanity check against file corruption.
+    //
+    // Field is in network-byte order.
+    int32_t magic_nbytes_ = GHostToNetworkL(kMagic);
+
+    // Must be at the beginning after kMagic. Contains the crc checksum of
+    // the following fields.
+    //
+    // Field is in network-byte order.
+    uint32_t header_checksum_nbytes_ = 0;
+
+    // Last known good offset at which the log and its checksum were updated.
+    // If we crash between writing to the log and updating the checksum, we can
+    // try to rewind the log to this offset and verify the checksum is still
+    // valid instead of throwing away the entire log.
+    //
+    // Field is in network-byte order.
+    int64_t rewind_offset_nbytes_ = GHostToNetworkLL(kHeaderReservedBytes);
+
+    // Version number tracking how we serialize the file to disk. If we change
+    // how/what we write to disk, this version should be updated and this class
+    // should handle a migration.
+    //
+    // Currently at kFileFormatVersion.
+    //
+    // Field is in network-byte order.
+    int32_t file_format_version_nbytes_ = 0;
+
+    // The maximum proto size that can be written to the log.
+    //
+    // Field is in network-byte order.
+    int32_t max_proto_size_nbytes_ = 0;
+
+    // Checksum of the log elements, doesn't include the header fields.
+    //
+    // Field is in network-byte order.
+    uint32_t log_checksum_nbytes_ = 0;
+
+    // Bits are used to hold various flags.
+    //   Lowest bit is whether the protos are compressed or not.
+    //
+    // Field is only 1 byte, so is byte-order agnostic.
+    uint8_t flags_ = 0;
+
+    // NOTE: New fields should *almost always* be added to the end here. Since
+    // this class may have already been written to disk, appending fields
+    // increases the chances that changes are backwards-compatible.
+  };
+  static_assert(sizeof(Header) <= kHeaderReservedBytes,
+                "Header has grown past our reserved bytes!");
+
+  struct CreateResult {
+    // A successfully initialized log.
+    std::unique_ptr<PortableFileBackedProtoLog<ProtoT>> proto_log;
+
+    // The data status after initializing from a previous state. Data loss can
+    // happen if the file is corrupted or some previously added data was
+    // unpersisted. This may be used to signal that any derived data off of the
+    // proto log may need to be regenerated.
+    DataLoss data_loss = DataLoss::NONE;
+
+    // Whether the proto log had to recalculate the checksum to check its
+    // integrity. This can be avoided if no changes were made or the log was
+    // able to update its checksum before shutting down. But it may have to
+    // recalculate if it's unclear if we crashed after updating the log, but
+    // before updating our checksum.
+    bool recalculated_checksum = false;
+
+    bool has_data_loss() {
+      return data_loss == DataLoss::PARTIAL || data_loss == DataLoss::COMPLETE;
+    }
+  };
+
+  // Factory method to create, initialize, and return a
+  // PortableFileBackedProtoLog. Will create the file if it doesn't exist.
+  //
+  // If on re-initialization the log detects disk corruption or some previously
+  // added data was unpersisted, the log will rewind to the last-good state. The
+  // log saves these checkpointed "good" states when PersistToDisk() is called
+  // or the log is safely destructed. If the log rewinds successfully to the
+  // last-good state, then the returned CreateResult.data_loss indicates
+  // whether it has a data loss and what kind of data loss it is (partial or
+  // complete) so that any derived data may know that it needs to be updated. If
+  // the log re-initializes successfully without any data loss,
+  // CreateResult.data_loss will be NONE.
+  //
+  // Params:
+  //   filesystem: Handles system level calls
+  //   file_path: Path of the underlying file. Directory of the file should
+  //   already exist
+  //   options: Configuration options for the proto log
+  //
+  // Returns:
+  //   PortableFileBackedProtoLog::CreateResult on success
+  //   INVALID_ARGUMENT on an invalid option
+  //   INTERNAL_ERROR on IO error
+  static libtextclassifier3::StatusOr<CreateResult> Create(
+      const Filesystem* filesystem, const std::string& file_path,
+      const Options& options);
+
+  // Not copyable
+  PortableFileBackedProtoLog(const PortableFileBackedProtoLog&) = delete;
+  PortableFileBackedProtoLog& operator=(const PortableFileBackedProtoLog&) =
+      delete;
+
+  // This will update the checksum of the log as well.
+  ~PortableFileBackedProtoLog();
+
+  // Writes the serialized proto to the underlying file. Writes are applied
+  // directly to the underlying file. Users do not need to sync the file after
+  // writing.
+  //
+  // Returns:
+  //   Offset of the newly appended proto in file on success
+  //   INVALID_ARGUMENT if proto is too large, as decided by
+  //     Options.max_proto_size
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<int64_t> WriteProto(const ProtoT& proto);
+
+  // Reads out a proto located at file_offset from the file.
+  //
+  // Returns:
+  //   A proto on success
+  //   NOT_FOUND if the proto at the given offset has been erased
+  //   OUT_OF_RANGE_ERROR if file_offset exceeds file size
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<ProtoT> ReadProto(int64_t file_offset) const;
+
+  // Erases the data of a proto located at file_offset from the file.
+  //
+  // Returns:
+  //   OK on success
+  //   OUT_OF_RANGE_ERROR if file_offset exceeds file size
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::Status EraseProto(int64_t file_offset);
+
+  // Calculates and returns the disk usage in bytes. Rounds up to the nearest
+  // block size.
+  //
+  // Returns:
+  //   Disk usage on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
+
+  // Returns the file size of all the elements held in the log. File size is in
+  // bytes. This excludes the size of any internal metadata of the log, e.g. the
+  // log's header.
+  //
+  // Returns:
+  //   File size on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<int64_t> GetElementsFileSize() const;
+
+  // An iterator helping to find offsets of all the protos in file.
+  // Example usage:
+  //
+  // while (iterator.Advance().ok()) {
+  //   int64_t offset = iterator.GetOffset();
+  //   // Do something
+  // }
+  class Iterator {
+   public:
+    Iterator(const Filesystem& filesystem, const std::string& file_path,
+             int64_t initial_offset);
+
+    // Advances to the position of next proto whether it has been erased or not.
+    //
+    // Returns:
+    //   OK on success
+    //   OUT_OF_RANGE_ERROR if it reaches the end
+    //   INTERNAL_ERROR on IO error
+    libtextclassifier3::Status Advance();
+
+    // Returns the file offset of current proto.
+    int64_t GetOffset();
+
+   private:
+    static constexpr int64_t kInvalidOffset = -1;
+    // Used to read proto metadata
+    MemoryMappedFile mmapped_file_;
+    // Offset of first proto
+    int64_t initial_offset_;
+    int64_t current_offset_;
+    int64_t file_size_;
+  };
+
+  // Returns an iterator of current proto log. The caller needs to keep the
+  // proto log unchanged while using the iterator, otherwise unexpected
+  // behaviors could happen.
+  Iterator GetIterator();
+
+  // Persists all changes since initialization or the last call to
+  // PersistToDisk(). Any changes that aren't persisted may be lost if the
+  // system fails to close safely.
+  //
+  // Example use case:
+  //
+  //   Document document;
+  //   document.set_namespace("com.google.android.example");
+  //   document.set_uri("www.google.com");
+  //
+  //   {
+  //     ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
+  //         PortableFileBackedProtoLog<DocumentProto>::Create(filesystem,
+  //         file_path,
+  //                                                    options));
+  //     auto proto_log = std::move(create_result.proto_log);
+  //
+  //     int64_t document_offset = proto_log->WriteProto(document));
+  //
+  //     // We lose the document here since it wasn't persisted.
+  //     // *SYSTEM CRASH*
+  //   }
+  //
+  //   {
+  //     // Can still successfully create after a crash since the log can
+  //     // rewind/truncate to recover into a previously good state
+  //     ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
+  //         PortableFileBackedProtoLog<DocumentProto>::Create(filesystem,
+  //         file_path,
+  //                                                    options));
+  //     auto proto_log = std::move(create_result.proto_log);
+  //
+  //     // Lost the proto since we didn't PersistToDisk before the crash
+  //     proto_log->ReadProto(document_offset)); // INVALID_ARGUMENT error
+  //
+  //     int64_t document_offset = proto_log->WriteProto(document));
+  //
+  //     // Persisted this time, so we should be ok.
+  //     ICING_ASSERT_OK(proto_log->PersistToDisk());
+  //   }
+  //
+  //   {
+  //     ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
+  //         PortableFileBackedProtoLog<DocumentProto>::Create(filesystem,
+  //         file_path,
+  //                                                    options));
+  //     auto proto_log = std::move(create_result.proto_log);
+  //
+  //     // SUCCESS
+  //     Document same_document = proto_log->ReadProto(document_offset));
+  //   }
+  //
+  // NOTE: Since all protos are already written to the file directly, this
+  // just updates the checksum and rewind position. Without these updates,
+  // future initializations will truncate the file and discard unpersisted
+  // changes.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::Status PersistToDisk();
+
+  // Calculates the checksum of the log contents. Excludes the header content.
+  //
+  // Returns:
+  //   Crc of the log content
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<Crc32> ComputeChecksum();
+
+ private:
+  // Object can only be instantiated via the ::Create factory.
+  PortableFileBackedProtoLog(const Filesystem* filesystem,
+                             const std::string& file_path,
+                             std::unique_ptr<Header> header);
+
+  // Initializes a new proto log.
+  //
+  // Returns:
+  //   std::unique_ptr<CreateResult> on success
+  //   INTERNAL_ERROR on IO error
+  static libtextclassifier3::StatusOr<CreateResult> InitializeNewFile(
+      const Filesystem* filesystem, const std::string& file_path,
+      const Options& options);
+
+  // Verifies that the existing proto log is in a good state. If not in a good
+  // state, then the proto log may be truncated to the last good state and
+  // content will be lost.
+  //
+  // Returns:
+  //   std::unique_ptr<CreateResult> on success
+  //   INTERNAL_ERROR on IO error or internal inconsistencies in the file
+  //   INVALID_ARGUMENT_ERROR if options aren't consistent with previous
+  //     instances
+  static libtextclassifier3::StatusOr<CreateResult> InitializeExistingFile(
+      const Filesystem* filesystem, const std::string& file_path,
+      const Options& options, int64_t file_size);
+
+  // Takes an initial checksum and updates it with the content between `start`
+  // and `end` offsets in the file.
+  //
+  // Returns:
+  //   Crc of the content between `start`, inclusive, and `end`, exclusive.
+  //   INTERNAL_ERROR on IO error
+  //   INVALID_ARGUMENT_ERROR if start and end aren't within the file size
+  static libtextclassifier3::StatusOr<Crc32> ComputeChecksum(
+      const Filesystem* filesystem, const std::string& file_path,
+      Crc32 initial_crc, int64_t start, int64_t end);
+
+  // Reads out the metadata of a proto located at file_offset from the file.
+  // Metadata will be returned in host byte order endianness.
+  //
+  // Returns:
+  //   Proto's metadata on success
+  //   OUT_OF_RANGE_ERROR if file_offset exceeds file_size
+  //   INTERNAL_ERROR if the metadata is invalid or any IO errors happen
+  static libtextclassifier3::StatusOr<int32_t> ReadProtoMetadata(
+      MemoryMappedFile* mmapped_file, int64_t file_offset, int64_t file_size);
+
+  // Writes metadata of a proto to the fd. Takes in a host byte order endianness
+  // metadata and converts it into a portable metadata before writing.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on any IO errors
+  static libtextclassifier3::Status WriteProtoMetadata(
+      const Filesystem* filesystem, int fd, int32_t host_order_metadata);
+
+  static bool IsEmptyBuffer(const char* buffer, int size) {
+    return std::all_of(buffer, buffer + size,
+                       [](const char byte) { return byte == 0; });
+  }
+
+  // Helper function to get stored proto size from the metadata.
+  // Metadata format: 8 bits magic + 24 bits size
+  static int GetProtoSize(int metadata) { return metadata & 0x00FFFFFF; }
+
+  // Helper function to get stored proto magic from the metadata.
+  // Metadata format: 8 bits magic + 24 bits size
+  static uint8_t GetProtoMagic(int metadata) { return metadata >> 24; }
+
+  // Magic number added in front of every proto. Used when reading out protos
+  // as a first check for corruption in each entry in the file. Even if there is
+  // a corruption, the best we can do is roll back to our last recovery point
+  // and throw away un-flushed data. We can discard/reuse this byte if needed so
+  // that we have 4 bytes to store the size of protos, and increase the size of
+  // protos we support.
+  static constexpr uint8_t kProtoMagic = 0x5C;
+
+  // Our internal max for protos.
+  //
+  // WARNING: Changing this to a larger number may invalidate our assumption
+  // that that proto size can safely be stored in the last 3 bytes of the proto
+  // header.
+  static constexpr int kMaxProtoSize = (1 << 24) - 1;  // 16MiB
+  static_assert(kMaxProtoSize <= 0x00FFFFFF,
+                "kMaxProtoSize doesn't fit in 3 bytes");
+
+  // Level of compression, BEST_SPEED = 1, BEST_COMPRESSION = 9
+  static constexpr int kDeflateCompressionLevel = 3;
+
+  // Chunks of the file to mmap at a time, so we don't mmap the entire file.
+  // Only used on 32-bit devices
+  static constexpr int kMmapChunkSize = 4 * 1024 * 1024;  // 4MiB
+
+  ScopedFd fd_;
+  const Filesystem* const filesystem_;
+  const std::string file_path_;
+  std::unique_ptr<Header> header_;
+};
+
+template <typename ProtoT>
+constexpr uint8_t PortableFileBackedProtoLog<ProtoT>::kProtoMagic;
+
+template <typename ProtoT>
+PortableFileBackedProtoLog<ProtoT>::PortableFileBackedProtoLog(
+    const Filesystem* filesystem, const std::string& file_path,
+    std::unique_ptr<Header> header)
+    : filesystem_(filesystem),
+      file_path_(file_path),
+      header_(std::move(header)) {
+  fd_.reset(filesystem_->OpenForAppend(file_path.c_str()));
+}
+
+template <typename ProtoT>
+PortableFileBackedProtoLog<ProtoT>::~PortableFileBackedProtoLog() {
+  if (!PersistToDisk().ok()) {
+    ICING_LOG(WARNING) << "Error persisting to disk during destruction of "
+                          "PortableFileBackedProtoLog: "
+                       << file_path_;
+  }
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<
+    typename PortableFileBackedProtoLog<ProtoT>::CreateResult>
+PortableFileBackedProtoLog<ProtoT>::Create(const Filesystem* filesystem,
+                                           const std::string& file_path,
+                                           const Options& options) {
+  if (options.max_proto_size <= 0) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "options.max_proto_size must be greater than 0, was %d",
+        options.max_proto_size));
+  }
+
+  // Since we store the proto_size in 3 bytes, we can only support protos of up
+  // to 16MiB.
+  if (options.max_proto_size > kMaxProtoSize) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "options.max_proto_size must be under 16MiB, was %d",
+        options.max_proto_size));
+  }
+
+  if (!filesystem->FileExists(file_path.c_str())) {
+    return InitializeNewFile(filesystem, file_path, options);
+  }
+
+  int64_t file_size = filesystem->GetFileSize(file_path.c_str());
+  if (file_size == Filesystem::kBadFileSize) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Bad file size '", file_path, "'"));
+  }
+
+  if (file_size == 0) {
+    return InitializeNewFile(filesystem, file_path, options);
+  }
+
+  return InitializeExistingFile(filesystem, file_path, options, file_size);
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<
+    typename PortableFileBackedProtoLog<ProtoT>::CreateResult>
+PortableFileBackedProtoLog<ProtoT>::InitializeNewFile(
+    const Filesystem* filesystem, const std::string& file_path,
+    const Options& options) {
+  // Grow to the minimum reserved bytes for the header.
+  if (!filesystem->Truncate(file_path.c_str(), kHeaderReservedBytes)) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to initialize file size: ", file_path));
+  }
+
+  // Create the header
+  std::unique_ptr<Header> header = std::make_unique<Header>();
+  header->SetCompressFlag(options.compress);
+  header->SetMaxProtoSize(options.max_proto_size);
+  header->SetHeaderChecksum(header->CalculateHeaderChecksum());
+
+  if (!filesystem->Write(file_path.c_str(), header.get(), sizeof(Header))) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to write header for file: ", file_path));
+  }
+
+  CreateResult create_result = {
+      std::unique_ptr<PortableFileBackedProtoLog<ProtoT>>(
+          new PortableFileBackedProtoLog<ProtoT>(filesystem, file_path,
+                                                 std::move(header))),
+      /*data_loss=*/DataLoss::NONE, /*recalculated_checksum=*/false};
+
+  return create_result;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<
+    typename PortableFileBackedProtoLog<ProtoT>::CreateResult>
+PortableFileBackedProtoLog<ProtoT>::InitializeExistingFile(
+    const Filesystem* filesystem, const std::string& file_path,
+    const Options& options, int64_t file_size) {
+  bool header_changed = false;
+  if (file_size < kHeaderReservedBytes) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("File header too short for: ", file_path));
+  }
+
+  std::unique_ptr<Header> header = std::make_unique<Header>();
+  if (!filesystem->PRead(file_path.c_str(), header.get(), sizeof(Header),
+                         /*offset=*/0)) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to read header for file: ", file_path));
+  }
+
+  // Make sure the header is still valid before we use any of its values. This
+  // is covered by the header_checksum check below, but this is a quick check
+  // that can save us from an extra crc computation.
+  if (header->GetMagic() != Header::kMagic) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Invalid header kMagic for file: ", file_path));
+  }
+
+  if (header->GetHeaderChecksum() != header->CalculateHeaderChecksum()) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Invalid header checksum for: ", file_path));
+  }
+
+  if (header->GetFileFormatVersion() != Header::kFileFormatVersion) {
+    // If this changes, we might need to handle a migration rather than throwing
+    // an error.
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Invalid header file format version: ", file_path));
+  }
+
+  if (header->GetCompressFlag() != options.compress) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Inconsistent compress option, expected %d, actual %d",
+        header->GetCompressFlag(), options.compress));
+  }
+
+  int32_t existing_max_proto_size = header->GetMaxProtoSize();
+  if (existing_max_proto_size > options.max_proto_size) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Max proto size cannot be smaller than previous "
+        "instantiations, previous size %d, wanted size %d",
+        header->GetMaxProtoSize(), options.max_proto_size));
+  } else if (existing_max_proto_size < options.max_proto_size) {
+    // It's fine if our new max size is greater than our previous one. Existing
+    // data is still valid.
+    header->SetMaxProtoSize(options.max_proto_size);
+    header_changed = true;
+  }
+
+  DataLoss data_loss = DataLoss::NONE;
+
+  // If we have any documents in our tail, get rid of them since they're not in
+  // our checksum. Our checksum reflects content up to the rewind offset.
+  if (file_size > header->GetRewindOffset()) {
+    if (!filesystem->Truncate(file_path.c_str(), header->GetRewindOffset())) {
+      return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+          "Failed to truncate '%s' to size %lld", file_path.data(),
+          static_cast<long long>(header->GetRewindOffset())));
+    };
+    data_loss = DataLoss::PARTIAL;
+  }
+
+  bool recalculated_checksum = false;
+
+  // If our dirty flag is set, that means we might have crashed in the middle of
+  // erasing a proto. This could have happened anywhere between:
+  //   A. Set dirty flag to true and update header checksum
+  //   B. Erase the proto
+  //   C. Set dirty flag to false, update log checksum, update header checksum
+  //
+  // Scenario 1: We went down between A and B. Maybe our dirty flag is a
+  // false alarm and we can keep all our data.
+  //
+  // Scenario 2: We went down between B and C. Our data is compromised and
+  // we need to throw everything out.
+  if (header->GetDirtyFlag()) {
+    // Recompute the log's checksum to detect which scenario we're in.
+    ICING_ASSIGN_OR_RETURN(
+        Crc32 calculated_log_checksum,
+        ComputeChecksum(filesystem, file_path, Crc32(),
+                        /*start=*/kHeaderReservedBytes, /*end=*/file_size));
+
+    if (header->GetLogChecksum() != calculated_log_checksum.Get()) {
+      // Still doesn't match, we're in Scenario 2. Throw out all our data now
+      // and initialize as a new instance.
+      ICING_ASSIGN_OR_RETURN(CreateResult create_result,
+                             InitializeNewFile(filesystem, file_path, options));
+      create_result.data_loss = DataLoss::COMPLETE;
+      create_result.recalculated_checksum = true;
+      return create_result;
+    }
+    // Otherwise we're good, checksum matches our contents so continue
+    // initializing like normal.
+    recalculated_checksum = true;
+
+    // Update our header.
+    header->SetDirtyFlag(false);
+    header_changed = true;
+  }
+
+  if (header_changed) {
+    header->SetHeaderChecksum(header->CalculateHeaderChecksum());
+
+    if (!filesystem->PWrite(file_path.c_str(), /*offset=*/0, header.get(),
+                            sizeof(Header))) {
+      return absl_ports::InternalError(
+          absl_ports::StrCat("Failed to update header to: ", file_path));
+    }
+  }
+
+  CreateResult create_result = {
+      std::unique_ptr<PortableFileBackedProtoLog<ProtoT>>(
+          new PortableFileBackedProtoLog<ProtoT>(filesystem, file_path,
+                                                 std::move(header))),
+      data_loss, recalculated_checksum};
+
+  return create_result;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<Crc32>
+PortableFileBackedProtoLog<ProtoT>::ComputeChecksum(
+    const Filesystem* filesystem, const std::string& file_path,
+    Crc32 initial_crc, int64_t start, int64_t end) {
+  auto mmapped_file = MemoryMappedFile(*filesystem, file_path,
+                                       MemoryMappedFile::Strategy::READ_ONLY);
+  Crc32 new_crc(initial_crc.Get());
+
+  if (start < 0) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Starting checksum offset of file '%s' must be greater than 0, was "
+        "%lld",
+        file_path.c_str(), static_cast<long long>(start)));
+  }
+
+  if (end < start) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Ending checksum offset of file '%s' must be greater than start "
+        "'%lld', was '%lld'",
+        file_path.c_str(), static_cast<long long>(start),
+        static_cast<long long>(end)));
+  }
+
+  int64_t file_size = filesystem->GetFileSize(file_path.c_str());
+  if (end > file_size) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Ending checksum offset of file '%s' must be within "
+        "file size of %lld, was %lld",
+        file_path.c_str(), static_cast<long long>(file_size),
+        static_cast<long long>(end)));
+  }
+
+  Architecture architecture = GetArchitecture();
+  switch (architecture) {
+    case Architecture::BIT_64: {
+      // Don't mmap in chunks here since mmapping can be harmful on 64-bit
+      // devices where mmap/munmap calls need the mmap write semaphore, which
+      // blocks mmap/munmap/mprotect and all page faults from executing while
+      // they run. On 64-bit devices, this doesn't actually load into memory, it
+      // just makes the file faultable. So the whole file should be ok.
+      // b/185822878.
+      ICING_RETURN_IF_ERROR(mmapped_file.Remap(start, end - start));
+      auto mmap_str = std::string_view(mmapped_file.region(), end - start);
+      new_crc.Append(mmap_str);
+      break;
+    }
+    case Architecture::BIT_32:
+      [[fallthrough]];
+    case Architecture::UNKNOWN: {
+      // 32-bit devices only have 4GB of RAM. Mmap in chunks to not use up too
+      // much memory at once. If we're unknown, then also chunk it because we're
+      // not sure what the device can handle.
+      for (int i = start; i < end; i += kMmapChunkSize) {
+        // Don't read past the file size.
+        int next_chunk_size = kMmapChunkSize;
+        if ((i + kMmapChunkSize) >= end) {
+          next_chunk_size = end - i;
+        }
+
+        ICING_RETURN_IF_ERROR(mmapped_file.Remap(i, next_chunk_size));
+
+        auto mmap_str =
+            std::string_view(mmapped_file.region(), next_chunk_size);
+        new_crc.Append(mmap_str);
+      }
+      break;
+    }
+  }
+
+  return new_crc;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<int64_t>
+PortableFileBackedProtoLog<ProtoT>::WriteProto(const ProtoT& proto) {
+  int64_t proto_size = proto.ByteSizeLong();
+  int32_t host_order_metadata;
+  int64_t current_position = filesystem_->GetCurrentPosition(fd_.get());
+
+  if (proto_size > header_->GetMaxProtoSize()) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "proto_size, %lld, was too large to write. Max is %d",
+        static_cast<long long>(proto_size), header_->GetMaxProtoSize()));
+  }
+
+  // At this point, we've guaranteed that proto_size is under kMaxProtoSize
+  // (see
+  // ::Create), so we can safely store it in an int.
+  int final_size = 0;
+
+  std::string proto_str;
+  google::protobuf::io::StringOutputStream proto_stream(&proto_str);
+
+  if (header_->GetCompressFlag()) {
+    google::protobuf::io::GzipOutputStream::Options options;
+    options.format = google::protobuf::io::GzipOutputStream::ZLIB;
+    options.compression_level = kDeflateCompressionLevel;
+
+    google::protobuf::io::GzipOutputStream compressing_stream(&proto_stream,
+                                                                  options);
+
+    bool success = proto.SerializeToZeroCopyStream(&compressing_stream) &&
+                   compressing_stream.Close();
+
+    if (!success) {
+      return absl_ports::InternalError("Error compressing proto.");
+    }
+
+    final_size = proto_str.size();
+
+    // In case the compressed proto is larger than the original proto, we also
+    // can't write it.
+    if (final_size > header_->GetMaxProtoSize()) {
+      return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+          "Compressed proto size, %d, was greater than "
+          "max_proto_size, %d",
+          final_size, header_->GetMaxProtoSize()));
+    }
+  } else {
+    // Serialize the proto directly into the write buffer at an offset of the
+    // metadata.
+    proto.SerializeToZeroCopyStream(&proto_stream);
+    final_size = proto_str.size();
+  }
+
+  // 1st byte for magic, next 3 bytes for proto size.
+  host_order_metadata = (kProtoMagic << 24) | final_size;
+
+  // Actually write metadata, has to be done after we know the possibly
+  // compressed proto size
+  ICING_RETURN_IF_ERROR(
+      WriteProtoMetadata(filesystem_, fd_.get(), host_order_metadata));
+
+  // Write the serialized proto
+  if (!filesystem_->Write(fd_.get(), proto_str.data(), proto_str.size())) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to write proto to: ", file_path_));
+  }
+
+  return current_position;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<ProtoT>
+PortableFileBackedProtoLog<ProtoT>::ReadProto(int64_t file_offset) const {
+  int64_t file_size = filesystem_->GetFileSize(fd_.get());
+  MemoryMappedFile mmapped_file(*filesystem_, file_path_,
+                                MemoryMappedFile::Strategy::READ_ONLY);
+  if (file_offset >= file_size) {
+    // file_size points to the next byte to write at, so subtract one to get
+    // the inclusive, actual size of file.
+    return absl_ports::OutOfRangeError(
+        IcingStringUtil::StringPrintf("Trying to read from a location, %lld, "
+                                      "out of range of the file size, %lld",
+                                      static_cast<long long>(file_offset),
+                                      static_cast<long long>(file_size - 1)));
+  }
+
+  // Read out the metadata
+  ICING_ASSIGN_OR_RETURN(
+      int32_t metadata,
+      ReadProtoMetadata(&mmapped_file, file_offset, file_size));
+
+  // Copy out however many bytes it says the proto is
+  int stored_size = GetProtoSize(metadata);
+
+  ICING_RETURN_IF_ERROR(
+      mmapped_file.Remap(file_offset + sizeof(metadata), stored_size));
+
+  if (IsEmptyBuffer(mmapped_file.region(), mmapped_file.region_size())) {
+    return absl_ports::NotFoundError("The proto data has been erased.");
+  }
+
+  google::protobuf::io::ArrayInputStream proto_stream(
+      mmapped_file.mutable_region(), stored_size);
+
+  // Deserialize proto
+  ProtoT proto;
+  if (header_->GetCompressFlag()) {
+    google::protobuf::io::GzipInputStream decompress_stream(&proto_stream);
+    proto.ParseFromZeroCopyStream(&decompress_stream);
+  } else {
+    proto.ParseFromZeroCopyStream(&proto_stream);
+  }
+
+  return proto;
+}
+
+template <typename ProtoT>
+libtextclassifier3::Status PortableFileBackedProtoLog<ProtoT>::EraseProto(
+    int64_t file_offset) {
+  int64_t file_size = filesystem_->GetFileSize(fd_.get());
+  if (file_offset >= file_size) {
+    // file_size points to the next byte to write at, so subtract one to get
+    // the inclusive, actual size of file.
+    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+        "Trying to erase data at a location, %lld, "
+        "out of range of the file size, %lld",
+        static_cast<long long>(file_offset),
+        static_cast<long long>(file_size - 1)));
+  }
+
+  MemoryMappedFile mmapped_file(
+      *filesystem_, file_path_,
+      MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC);
+
+  // Read out the metadata
+  ICING_ASSIGN_OR_RETURN(
+      int32_t metadata,
+      ReadProtoMetadata(&mmapped_file, file_offset, file_size));
+
+  ICING_RETURN_IF_ERROR(mmapped_file.Remap(file_offset + sizeof(metadata),
+                                           GetProtoSize(metadata)));
+
+  // We need to update the crc checksum if the erased area is before the
+  // rewind position.
+  int32_t new_crc;
+  int64_t erased_proto_offset = file_offset + sizeof(metadata);
+  if (erased_proto_offset < header_->GetRewindOffset()) {
+    // Set to "dirty" before we start writing anything.
+    header_->SetDirtyFlag(true);
+    header_->SetHeaderChecksum(header_->CalculateHeaderChecksum());
+    if (!filesystem_->PWrite(fd_.get(), /*offset=*/0, header_.get(),
+                             sizeof(Header))) {
+      return absl_ports::InternalError(absl_ports::StrCat(
+          "Failed to update dirty bit of header to: ", file_path_));
+    }
+
+    // We need to calculate [original string xor 0s].
+    // The xored string is the same as the original string because 0 xor 0 =
+    // 0, 1 xor 0 = 1.
+    const std::string_view xored_str(mmapped_file.region(),
+                                     mmapped_file.region_size());
+
+    Crc32 crc(header_->GetLogChecksum());
+    ICING_ASSIGN_OR_RETURN(
+        new_crc, crc.UpdateWithXor(
+                     xored_str,
+                     /*full_data_size=*/header_->GetRewindOffset() -
+                         kHeaderReservedBytes,
+                     /*position=*/erased_proto_offset - kHeaderReservedBytes));
+  }
+
+  // Clear the region.
+  memset(mmapped_file.mutable_region(), '\0', mmapped_file.region_size());
+
+  // If we cleared something in our checksummed area, we should update our
+  // checksum and reset our dirty bit.
+  if (erased_proto_offset < header_->GetRewindOffset()) {
+    header_->SetDirtyFlag(false);
+    header_->SetLogChecksum(new_crc);
+    header_->SetHeaderChecksum(header_->CalculateHeaderChecksum());
+
+    if (!filesystem_->PWrite(fd_.get(), /*offset=*/0, header_.get(),
+                             sizeof(Header))) {
+      return absl_ports::InternalError(
+          absl_ports::StrCat("Failed to update header to: ", file_path_));
+    }
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<int64_t>
+PortableFileBackedProtoLog<ProtoT>::GetDiskUsage() const {
+  int64_t size = filesystem_->GetDiskUsage(file_path_.c_str());
+  if (size == Filesystem::kBadFileSize) {
+    return absl_ports::InternalError("Failed to get disk usage of proto log");
+  }
+  return size;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<int64_t>
+PortableFileBackedProtoLog<ProtoT>::GetElementsFileSize() const {
+  int64_t total_file_size = filesystem_->GetFileSize(file_path_.c_str());
+  if (total_file_size == Filesystem::kBadFileSize) {
+    return absl_ports::InternalError(
+        "Failed to get file size of elments in the proto log");
+  }
+  return total_file_size - kHeaderReservedBytes;
+}
+
+template <typename ProtoT>
+PortableFileBackedProtoLog<ProtoT>::Iterator::Iterator(
+    const Filesystem& filesystem, const std::string& file_path,
+    int64_t initial_offset)
+    : mmapped_file_(filesystem, file_path,
+                    MemoryMappedFile::Strategy::READ_ONLY),
+      initial_offset_(initial_offset),
+      current_offset_(kInvalidOffset),
+      file_size_(filesystem.GetFileSize(file_path.c_str())) {
+  if (file_size_ == Filesystem::kBadFileSize) {
+    // Fails all Advance() calls
+    file_size_ = 0;
+  }
+}
+
+template <typename ProtoT>
+libtextclassifier3::Status
+PortableFileBackedProtoLog<ProtoT>::Iterator::Advance() {
+  if (current_offset_ == kInvalidOffset) {
+    // First Advance() call
+    current_offset_ = initial_offset_;
+  } else {
+    // Jumps to the next proto position
+    ICING_ASSIGN_OR_RETURN(
+        int32_t metadata,
+        ReadProtoMetadata(&mmapped_file_, current_offset_, file_size_));
+    current_offset_ += sizeof(metadata) + GetProtoSize(metadata);
+  }
+
+  if (current_offset_ < file_size_) {
+    return libtextclassifier3::Status::OK;
+  } else {
+    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+        "The next proto offset, %lld, is out of file range [0, %lld)",
+        static_cast<long long>(current_offset_),
+        static_cast<long long>(file_size_)));
+  }
+}
+
+template <typename ProtoT>
+int64_t PortableFileBackedProtoLog<ProtoT>::Iterator::GetOffset() {
+  return current_offset_;
+}
+
+template <typename ProtoT>
+typename PortableFileBackedProtoLog<ProtoT>::Iterator
+PortableFileBackedProtoLog<ProtoT>::GetIterator() {
+  return Iterator(*filesystem_, file_path_,
+                  /*initial_offset=*/kHeaderReservedBytes);
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<int32_t>
+PortableFileBackedProtoLog<ProtoT>::ReadProtoMetadata(
+    MemoryMappedFile* mmapped_file, int64_t file_offset, int64_t file_size) {
+  // Checks file_offset
+  if (file_offset >= file_size) {
+    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+        "offset, %lld, is out of file range [0, %lld)",
+        static_cast<long long>(file_offset),
+        static_cast<long long>(file_size)));
+  }
+  int32_t portable_metadata;
+  int metadata_size = sizeof(portable_metadata);
+  if (file_offset + metadata_size >= file_size) {
+    return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+        "Wrong metadata offset %lld, metadata doesn't fit in "
+        "with file range [0, %lld)",
+        static_cast<long long>(file_offset),
+        static_cast<long long>(file_size)));
+  }
+
+  // Reads metadata
+  ICING_RETURN_IF_ERROR(mmapped_file->Remap(file_offset, metadata_size));
+  memcpy(&portable_metadata, mmapped_file->region(), metadata_size);
+
+  // Need to switch it back to host order endianness after reading from disk.
+  int32_t host_order_metadata = GNetworkToHostL(portable_metadata);
+
+  // Checks magic number
+  uint8_t stored_k_proto_magic = GetProtoMagic(host_order_metadata);
+  if (stored_k_proto_magic != kProtoMagic) {
+    return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+        "Failed to read kProtoMagic, expected %d, actual %d", kProtoMagic,
+        stored_k_proto_magic));
+  }
+
+  return host_order_metadata;
+}
+
+template <typename ProtoT>
+libtextclassifier3::Status
+PortableFileBackedProtoLog<ProtoT>::WriteProtoMetadata(
+    const Filesystem* filesystem, int fd, int32_t host_order_metadata) {
+  // Convert it into portable endian format before writing to disk
+  int32_t portable_metadata = GHostToNetworkL(host_order_metadata);
+  int portable_metadata_size = sizeof(portable_metadata);
+
+  // Write metadata
+  if (!filesystem->Write(fd, &portable_metadata, portable_metadata_size)) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to write proto metadata."));
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename ProtoT>
+libtextclassifier3::Status PortableFileBackedProtoLog<ProtoT>::PersistToDisk() {
+  int64_t file_size = filesystem_->GetFileSize(file_path_.c_str());
+  if (file_size == header_->GetRewindOffset()) {
+    // No new protos appended, don't need to update the checksum.
+    return libtextclassifier3::Status::OK;
+  }
+
+  ICING_ASSIGN_OR_RETURN(Crc32 crc, ComputeChecksum());
+
+  header_->SetLogChecksum(crc.Get());
+  header_->SetRewindOffset(file_size);
+  header_->SetHeaderChecksum(header_->CalculateHeaderChecksum());
+
+  if (!filesystem_->PWrite(fd_.get(), /*offset=*/0, header_.get(),
+                           sizeof(Header)) ||
+      !filesystem_->DataSync(fd_.get())) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to update header to: ", file_path_));
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<Crc32>
+PortableFileBackedProtoLog<ProtoT>::ComputeChecksum() {
+  int64_t file_size = filesystem_->GetFileSize(file_path_.c_str());
+  int64_t new_content_size = file_size - header_->GetRewindOffset();
+  Crc32 crc;
+  if (new_content_size == 0) {
+    // No new protos appended, return cached checksum
+    return Crc32(header_->GetLogChecksum());
+  } else if (new_content_size < 0) {
+    // File shrunk, recalculate the entire checksum.
+    ICING_ASSIGN_OR_RETURN(
+        crc,
+        ComputeChecksum(filesystem_, file_path_, Crc32(),
+                        /*start=*/kHeaderReservedBytes, /*end=*/file_size));
+  } else {
+    // Append new changes to the existing checksum.
+    ICING_ASSIGN_OR_RETURN(
+        crc, ComputeChecksum(
+                 filesystem_, file_path_, Crc32(header_->GetLogChecksum()),
+                 /*start=*/header_->GetRewindOffset(), /*end=*/file_size));
+  }
+  return crc;
+}
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_FILE_PORTABLE_FILE_BACKED_PROTO_LOG_H_
diff --git a/icing/file/portable-file-backed-proto-log_benchmark.cc b/icing/file/portable-file-backed-proto-log_benchmark.cc
new file mode 100644
index 0000000..f83ccd6
--- /dev/null
+++ b/icing/file/portable-file-backed-proto-log_benchmark.cc
@@ -0,0 +1,343 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdint>
+#include <random>
+
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/proto/document.pb.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/random-string.h"
+#include "icing/testing/tmp-directory.h"
+
+// go/microbenchmarks
+//
+// To build and run on a local machine:
+//   $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
+//   icing/file:portable-file-backed-proto-log_benchmark
+//
+//   $ blaze-bin/icing/file/portable-file-backed-proto-log_benchmark
+//   --benchmarks=all
+//
+//
+// To build and run on an Android device (must be connected and rooted):
+//   $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
+//   --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
+//   icing/file:portable-file-backed-proto-log_benchmark
+//
+//   $ adb root
+//
+//   $ adb push
+//   blaze-bin/icing/file/portable-file-backed-proto-log_benchmark
+//   /data/local/tmp/
+//
+//   $ adb shell /data/local/tmp/portable-file-backed-proto-log-benchmark
+//   --benchmarks=all
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+static void BM_Write(benchmark::State& state) {
+  const Filesystem filesystem;
+  int string_length = state.range(0);
+  const std::string file_path = IcingStringUtil::StringPrintf(
+      "%s%s%d%s", GetTestTempDir().c_str(), "/proto_", string_length, ".log");
+  int max_proto_size = (1 << 24) - 1;  // 16 MiB
+  bool compress = true;
+
+  // Make sure it doesn't already exist.
+  filesystem.DeleteFile(file_path.c_str());
+
+  auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create(
+                       &filesystem, file_path,
+                       PortableFileBackedProtoLog<DocumentProto>::Options(
+                           compress, max_proto_size))
+                       .ValueOrDie()
+                       .proto_log;
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  std::default_random_engine random;
+  const std::string rand_str =
+      RandomString(kAlNumAlphabet, string_length, &random);
+
+  auto document_properties = document.add_properties();
+  document_properties->set_name("string property");
+  document_properties->add_string_values(rand_str);
+
+  for (auto _ : state) {
+    testing::DoNotOptimize(proto_log->WriteProto(document));
+  }
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
+                          string_length);
+
+  // Cleanup after ourselves
+  filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_Write)
+    ->Arg(1)
+    ->Arg(32)
+    ->Arg(512)
+    ->Arg(1024)
+    ->Arg(4 * 1024)
+    ->Arg(8 * 1024)
+    ->Arg(16 * 1024)
+    ->Arg(32 * 1024)
+    ->Arg(256 * 1024)
+    ->Arg(2 * 1024 * 1024)
+    ->Arg(8 * 1024 * 1024)
+    ->Arg(15 * 1024 * 1024);  // We do 15MiB here since our max proto size is
+                              // 16MiB, and we need some extra space for the
+                              // rest of the document properties
+
+static void BM_Read(benchmark::State& state) {
+  const Filesystem filesystem;
+  int string_length = state.range(0);
+  const std::string file_path = IcingStringUtil::StringPrintf(
+      "%s%s%d%s", GetTestTempDir().c_str(), "/proto_", string_length, ".log");
+  int max_proto_size = (1 << 24) - 1;  // 16 MiB
+  bool compress = true;
+
+  // Make sure it doesn't already exist.
+  filesystem.DeleteFile(file_path.c_str());
+
+  auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create(
+                       &filesystem, file_path,
+                       PortableFileBackedProtoLog<DocumentProto>::Options(
+                           compress, max_proto_size))
+                       .ValueOrDie()
+                       .proto_log;
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  std::default_random_engine random;
+  const std::string rand_str =
+      RandomString(kAlNumAlphabet, string_length, &random);
+
+  auto document_properties = document.add_properties();
+  document_properties->set_name("string property");
+  document_properties->add_string_values(rand_str);
+
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t write_offset,
+                             proto_log->WriteProto(document));
+
+  for (auto _ : state) {
+    testing::DoNotOptimize(proto_log->ReadProto(write_offset));
+  }
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
+                          string_length);
+
+  // Cleanup after ourselves
+  filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_Read)
+    ->Arg(1)
+    ->Arg(32)
+    ->Arg(512)
+    ->Arg(1024)
+    ->Arg(4 * 1024)
+    ->Arg(8 * 1024)
+    ->Arg(16 * 1024)
+    ->Arg(32 * 1024)
+    ->Arg(256 * 1024)
+    ->Arg(2 * 1024 * 1024)
+    ->Arg(8 * 1024 * 1024)
+    ->Arg(15 * 1024 * 1024);  // We do 15MiB here since our max proto size is
+                              // 16MiB, and we need some extra space for the
+                              // rest of the document properties
+                              //
+static void BM_Erase(benchmark::State& state) {
+  const Filesystem filesystem;
+  const std::string file_path = IcingStringUtil::StringPrintf(
+      "%s%s", GetTestTempDir().c_str(), "/proto.log");
+  int max_proto_size = (1 << 24) - 1;  // 16 MiB
+  bool compress = true;
+
+  // Make sure it doesn't already exist.
+  filesystem.DeleteFile(file_path.c_str());
+
+  auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create(
+                       &filesystem, file_path,
+                       PortableFileBackedProtoLog<DocumentProto>::Options(
+                           compress, max_proto_size))
+                       .ValueOrDie()
+                       .proto_log;
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  std::default_random_engine random;
+  const std::string rand_str = RandomString(kAlNumAlphabet, /*len=*/1, &random);
+
+  auto document_properties = document.add_properties();
+  document_properties->set_name("string property");
+  document_properties->add_string_values(rand_str);
+
+  for (auto _ : state) {
+    state.PauseTiming();
+    ICING_ASSERT_OK_AND_ASSIGN(int64_t write_offset,
+                               proto_log->WriteProto(document));
+    state.ResumeTiming();
+
+    testing::DoNotOptimize(proto_log->EraseProto(write_offset));
+  }
+
+  // Cleanup after ourselves
+  filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_Erase);
+
+static void BM_ComputeChecksum(benchmark::State& state) {
+  const Filesystem filesystem;
+  const std::string file_path = GetTestTempDir() + "/proto.log";
+  int max_proto_size = (1 << 24) - 1;  // 16 MiB
+  bool compress = true;
+
+  // Make sure it doesn't already exist.
+  filesystem.DeleteFile(file_path.c_str());
+
+  auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create(
+                       &filesystem, file_path,
+                       PortableFileBackedProtoLog<DocumentProto>::Options(
+                           compress, max_proto_size))
+                       .ValueOrDie()
+                       .proto_log;
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  // Make each document 1KiB
+  int string_length = 1024;
+  std::default_random_engine random;
+  const std::string rand_str =
+      RandomString(kAlNumAlphabet, string_length, &random);
+
+  auto document_properties = document.add_properties();
+  document_properties->set_name("string property");
+  document_properties->add_string_values(rand_str);
+
+  int num_docs = state.range(0);
+  for (int i = 0; i < num_docs; ++i) {
+    ICING_ASSERT_OK(proto_log->WriteProto(document));
+  }
+
+  for (auto _ : state) {
+    testing::DoNotOptimize(proto_log->ComputeChecksum());
+  }
+
+  // Cleanup after ourselves
+  filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_ComputeChecksum)->Range(1024, 1 << 20);
+
+static void BM_ComputeChecksumWithCachedChecksum(benchmark::State& state) {
+  const Filesystem filesystem;
+  const std::string file_path = GetTestTempDir() + "/proto.log";
+  int max_proto_size = (1 << 24) - 1;  // 16 MiB
+  bool compress = true;
+
+  // Make sure it doesn't already exist.
+  filesystem.DeleteFile(file_path.c_str());
+
+  auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create(
+                       &filesystem, file_path,
+                       PortableFileBackedProtoLog<DocumentProto>::Options(
+                           compress, max_proto_size))
+                       .ValueOrDie()
+                       .proto_log;
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  // Make the document 1KiB
+  int string_length = 1024;
+  std::default_random_engine random;
+  const std::string rand_str =
+      RandomString(kAlNumAlphabet, string_length, &random);
+
+  auto document_properties = document.add_properties();
+  document_properties->set_name("string property");
+  document_properties->add_string_values(rand_str);
+
+  // Write some content and persist. This should update our cached checksum to
+  // include the document.
+  ICING_ASSERT_OK(proto_log->WriteProto(document));
+  ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+  // This ComputeChecksum call shouldn't need to do any computation since we can
+  // reuse our cached checksum.
+  for (auto _ : state) {
+    testing::DoNotOptimize(proto_log->ComputeChecksum());
+  }
+
+  // Cleanup after ourselves
+  filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_ComputeChecksumWithCachedChecksum);
+
+static void BM_ComputeChecksumOnlyForTail(benchmark::State& state) {
+  const Filesystem filesystem;
+  const std::string file_path = GetTestTempDir() + "/proto.log";
+  int max_proto_size = (1 << 24) - 1;  // 16 MiB
+  bool compress = true;
+
+  // Make sure it doesn't already exist.
+  filesystem.DeleteFile(file_path.c_str());
+
+  auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create(
+                       &filesystem, file_path,
+                       PortableFileBackedProtoLog<DocumentProto>::Options(
+                           compress, max_proto_size))
+                       .ValueOrDie()
+                       .proto_log;
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  // Make the document 1KiB
+  int string_length = 1024;
+  std::default_random_engine random;
+  const std::string rand_str =
+      RandomString(kAlNumAlphabet, string_length, &random);
+
+  auto document_properties = document.add_properties();
+  document_properties->set_name("string property");
+  document_properties->add_string_values(rand_str);
+
+  // Write some content and persist. This should update our cached checksum to
+  // include the document.
+  ICING_ASSERT_OK(proto_log->WriteProto(document));
+  ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+  // Write another proto into the tail, but it's not included in our cached
+  // checksum since we didn't call persist.
+  ICING_ASSERT_OK(proto_log->WriteProto(document));
+
+  // ComputeChecksum should be calculating the checksum of the tail and adding
+  // it to the cached checksum we have.
+  for (auto _ : state) {
+    testing::DoNotOptimize(proto_log->ComputeChecksum());
+  }
+
+  // Cleanup after ourselves
+  filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_ComputeChecksumOnlyForTail);
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/file/portable-file-backed-proto-log_test.cc b/icing/file/portable-file-backed-proto-log_test.cc
new file mode 100644
index 0000000..b5fee4b
--- /dev/null
+++ b/icing/file/portable-file-backed-proto-log_test.cc
@@ -0,0 +1,1071 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/portable-file-backed-proto-log.h"
+
+#include <cstdint>
+#include <cstdlib>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/proto/document.pb.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::A;
+using ::testing::Eq;
+using ::testing::Gt;
+using ::testing::HasSubstr;
+using ::testing::Not;
+using ::testing::NotNull;
+using ::testing::Pair;
+using ::testing::Return;
+
+using Header = PortableFileBackedProtoLog<DocumentProto>::Header;
+
+Header ReadHeader(Filesystem filesystem, const std::string& file_path) {
+  Header header;
+  filesystem.PRead(file_path.c_str(), &header, sizeof(Header),
+                   /*offset=*/0);
+  return header;
+}
+
+void WriteHeader(Filesystem filesystem, const std::string& file_path,
+                 Header& header) {
+  filesystem.Write(file_path.c_str(), &header, sizeof(Header));
+}
+
+class PortableFileBackedProtoLogTest : public ::testing::Test {
+ protected:
+  // Adds a user-defined default construct because a const member variable may
+  // make the compiler accidentally delete the default constructor.
+  // https://stackoverflow.com/a/47368753
+  PortableFileBackedProtoLogTest() {}
+
+  void SetUp() override {
+    file_path_ = GetTestTempDir() + "/proto_log";
+    filesystem_.DeleteFile(file_path_.c_str());
+  }
+
+  void TearDown() override { filesystem_.DeleteFile(file_path_.c_str()); }
+
+  const Filesystem filesystem_;
+  std::string file_path_;
+  bool compress_ = true;
+  int64_t max_proto_size_ = 256 * 1024;  // 256 KiB
+};
+
+TEST_F(PortableFileBackedProtoLogTest, Initialize) {
+  // max_proto_size must be greater than 0
+  int invalid_max_proto_size = 0;
+  ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
+                  &filesystem_, file_path_,
+                  PortableFileBackedProtoLog<DocumentProto>::Options(
+                      compress_, invalid_max_proto_size)),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+      PortableFileBackedProtoLog<DocumentProto>::Create(
+          &filesystem_, file_path_,
+          PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                             max_proto_size_)));
+  EXPECT_THAT(create_result.proto_log, NotNull());
+  EXPECT_FALSE(create_result.has_data_loss());
+  EXPECT_FALSE(create_result.recalculated_checksum);
+
+  // Can't recreate the same file with different options.
+  ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
+                  &filesystem_, file_path_,
+                  PortableFileBackedProtoLog<DocumentProto>::Options(
+                      !compress_, max_proto_size_)),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(PortableFileBackedProtoLogTest, ReservedSpaceForHeader) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+      PortableFileBackedProtoLog<DocumentProto>::Create(
+          &filesystem_, file_path_,
+          PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                             max_proto_size_)));
+
+  // With no protos written yet, the log should be minimum the size of the
+  // reserved header space.
+  ASSERT_EQ(filesystem_.GetFileSize(file_path_.c_str()),
+            PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes);
+}
+
+TEST_F(PortableFileBackedProtoLogTest, WriteProtoTooLarge) {
+  int max_proto_size = 1;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+      PortableFileBackedProtoLog<DocumentProto>::Create(
+          &filesystem_, file_path_,
+          PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                             max_proto_size)));
+  auto proto_log = std::move(create_result.proto_log);
+  ASSERT_FALSE(create_result.has_data_loss());
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  // Proto is too large for the max_proto_size_in
+  ASSERT_THAT(proto_log->WriteProto(document),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(PortableFileBackedProtoLogTest, ReadProtoWrongKProtoMagic) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+      PortableFileBackedProtoLog<DocumentProto>::Create(
+          &filesystem_, file_path_,
+          PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                             max_proto_size_)));
+  auto proto_log = std::move(create_result.proto_log);
+  ASSERT_FALSE(create_result.has_data_loss());
+
+  // Write a proto
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t file_offset,
+                             proto_log->WriteProto(document));
+
+  // The 4 bytes of metadata that just doesn't have the same kProtoMagic
+  // specified in file-backed-proto-log.h
+  uint32_t wrong_magic = 0x7E000000;
+
+  // Sanity check that we opened the file correctly
+  int fd = filesystem_.OpenForWrite(file_path_.c_str());
+  ASSERT_GT(fd, 0);
+
+  // Write the wrong kProtoMagic in, kProtoMagics are stored at the beginning of
+  // a proto entry.
+  filesystem_.PWrite(fd, file_offset, &wrong_magic, sizeof(wrong_magic));
+
+  ASSERT_THAT(proto_log->ReadProto(file_offset),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_F(PortableFileBackedProtoLogTest, ReadWriteUncompressedProto) {
+  int last_offset;
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                /*compress_in=*/false, max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Write the first proto
+    DocumentProto document1 =
+        DocumentBuilder().SetKey("namespace1", "uri1").Build();
+
+    ICING_ASSERT_OK_AND_ASSIGN(int written_position,
+                               proto_log->WriteProto(document1));
+
+    int document1_offset = written_position;
+
+    // Check that what we read is what we wrote
+    ASSERT_THAT(proto_log->ReadProto(written_position),
+                IsOkAndHolds(EqualsProto(document1)));
+
+    // Write a second proto that's close to the max size. Leave some room for
+    // the rest of the proto properties.
+    std::string long_str(max_proto_size_ - 1024, 'a');
+    DocumentProto document2 = DocumentBuilder()
+                                  .SetKey("namespace2", "uri2")
+                                  .AddStringProperty("long_str", long_str)
+                                  .Build();
+
+    ICING_ASSERT_OK_AND_ASSIGN(written_position,
+                               proto_log->WriteProto(document2));
+
+    int document2_offset = written_position;
+    last_offset = written_position;
+    ASSERT_GT(document2_offset, document1_offset);
+
+    // Check the second proto
+    ASSERT_THAT(proto_log->ReadProto(written_position),
+                IsOkAndHolds(EqualsProto(document2)));
+
+    ICING_ASSERT_OK(proto_log->PersistToDisk());
+  }
+
+  {
+    // Make a new proto_log with the same file_path, and make sure we
+    // can still write to the same underlying file.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                /*compress_in=*/false, max_proto_size_)));
+    auto recreated_proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Write a third proto
+    DocumentProto document3 =
+        DocumentBuilder().SetKey("namespace3", "uri3").Build();
+
+    ASSERT_THAT(recreated_proto_log->WriteProto(document3),
+                IsOkAndHolds(Gt(last_offset)));
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, ReadWriteCompressedProto) {
+  int last_offset;
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                /*compress_in=*/true, max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Write the first proto
+    DocumentProto document1 =
+        DocumentBuilder().SetKey("namespace1", "uri1").Build();
+
+    ICING_ASSERT_OK_AND_ASSIGN(int written_position,
+                               proto_log->WriteProto(document1));
+
+    int document1_offset = written_position;
+
+    // Check that what we read is what we wrote
+    ASSERT_THAT(proto_log->ReadProto(written_position),
+                IsOkAndHolds(EqualsProto(document1)));
+
+    // Write a second proto that's close to the max size. Leave some room for
+    // the rest of the proto properties.
+    std::string long_str(max_proto_size_ - 1024, 'a');
+    DocumentProto document2 = DocumentBuilder()
+                                  .SetKey("namespace2", "uri2")
+                                  .AddStringProperty("long_str", long_str)
+                                  .Build();
+
+    ICING_ASSERT_OK_AND_ASSIGN(written_position,
+                               proto_log->WriteProto(document2));
+
+    int document2_offset = written_position;
+    last_offset = written_position;
+    ASSERT_GT(document2_offset, document1_offset);
+
+    // Check the second proto
+    ASSERT_THAT(proto_log->ReadProto(written_position),
+                IsOkAndHolds(EqualsProto(document2)));
+
+    ICING_ASSERT_OK(proto_log->PersistToDisk());
+  }
+
+  {
+    // Make a new proto_log with the same file_path, and make sure we
+    // can still write to the same underlying file.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                /*compress_in=*/true, max_proto_size_)));
+    auto recreated_proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Write a third proto
+    DocumentProto document3 =
+        DocumentBuilder().SetKey("namespace3", "uri3").Build();
+
+    ASSERT_THAT(recreated_proto_log->WriteProto(document3),
+                IsOkAndHolds(Gt(last_offset)));
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, CorruptHeader) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_)));
+    auto recreated_proto_log = std::move(create_result.proto_log);
+    EXPECT_FALSE(create_result.has_data_loss());
+  }
+
+  int corrupt_checksum = 24;
+
+  // Write the corrupted header
+  Header header = ReadHeader(filesystem_, file_path_);
+  header.SetHeaderChecksum(corrupt_checksum);
+  WriteHeader(filesystem_, file_path_, header);
+
+  {
+    // Reinitialize the same proto_log
+    ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
+                    &filesystem_, file_path_,
+                    PortableFileBackedProtoLog<DocumentProto>::Options(
+                        compress_, max_proto_size_)),
+                StatusIs(libtextclassifier3::StatusCode::INTERNAL,
+                         HasSubstr("Invalid header checksum")));
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, DifferentMagic) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_)));
+    auto recreated_proto_log = std::move(create_result.proto_log);
+    EXPECT_FALSE(create_result.has_data_loss());
+
+    // Corrupt the magic that's stored at the beginning of the header.
+    int invalid_magic = -1;
+    ASSERT_THAT(invalid_magic, Not(Eq(Header::kMagic)));
+
+    // Write the corrupted header
+    Header header = ReadHeader(filesystem_, file_path_);
+    header.SetMagic(invalid_magic);
+    WriteHeader(filesystem_, file_path_, header);
+  }
+
+  {
+    // Reinitialize the same proto_log
+    ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
+                    &filesystem_, file_path_,
+                    PortableFileBackedProtoLog<DocumentProto>::Options(
+                        compress_, max_proto_size_)),
+                StatusIs(libtextclassifier3::StatusCode::INTERNAL,
+                         HasSubstr("Invalid header kMagic")));
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest,
+       UnableToDetectCorruptContentWithoutDirtyBit) {
+  // This is intentional that we can't detect corruption. We're trading off
+  // earlier corruption detection for lower initialization latency. By not
+  // calculating the checksum on initialization, we can initialize much faster,
+  // but at the cost of detecting corruption. Note that even if we did detect
+  // corruption, there was nothing we could've done except throw an error to
+  // clients. We'll still do that, but at some later point when the log is
+  // attempting to be accessed and we can't actually deserialize a proto from
+  // it. See the description in cl/374278280 for more details.
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    EXPECT_FALSE(create_result.has_data_loss());
+
+    DocumentProto document =
+        DocumentBuilder().SetKey("namespace1", "uri1").Build();
+
+    // Write and persist an document.
+    ICING_ASSERT_OK_AND_ASSIGN(int64_t document_offset,
+                               proto_log->WriteProto(document));
+    ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+    // "Corrupt" the content written in the log.
+    document.set_uri("invalid");
+    std::string serialized_document = document.SerializeAsString();
+    ASSERT_TRUE(filesystem_.PWrite(file_path_.c_str(), document_offset,
+                                   serialized_document.data(),
+                                   serialized_document.size()));
+  }
+
+  {
+    // We can recover, and we don't have data loss.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    EXPECT_FALSE(create_result.has_data_loss());
+    EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
+    EXPECT_FALSE(create_result.recalculated_checksum);
+
+    // We still have the corrupted content in our file, we didn't throw
+    // everything out.
+    EXPECT_THAT(
+        filesystem_.GetFileSize(file_path_.c_str()),
+        Gt(PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes));
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest,
+       DetectAndThrowOutCorruptContentWithDirtyBit) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    DocumentProto document =
+        DocumentBuilder()
+            .SetKey("namespace1", "uri1")
+            .AddStringProperty("string_property", "foo", "bar")
+            .Build();
+
+    // Write and persist the protos
+    ICING_ASSERT_OK_AND_ASSIGN(int64_t document_offset,
+                               proto_log->WriteProto(document));
+
+    // Check that what we read is what we wrote
+    ASSERT_THAT(proto_log->ReadProto(document_offset),
+                IsOkAndHolds(EqualsProto(document)));
+  }
+
+  {
+    // "Corrupt" the content written in the log. Make the corrupt document
+    // smaller than our original one so we don't accidentally write past our
+    // file.
+    DocumentProto document =
+        DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build();
+    std::string serialized_document = document.SerializeAsString();
+    ASSERT_TRUE(filesystem_.PWrite(
+        file_path_.c_str(),
+        PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes,
+        serialized_document.data(), serialized_document.size()));
+
+    Header header = ReadHeader(filesystem_, file_path_);
+
+    // Set dirty bit to true to reflect that something changed in the log.
+    header.SetDirtyFlag(true);
+    header.SetHeaderChecksum(header.CalculateHeaderChecksum());
+
+    WriteHeader(filesystem_, file_path_, header);
+  }
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    EXPECT_TRUE(create_result.has_data_loss());
+    EXPECT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE));
+
+    // We had to recalculate the checksum to detect the corruption.
+    EXPECT_TRUE(create_result.recalculated_checksum);
+
+    // We lost everything, file size is back down to the header.
+    EXPECT_THAT(
+        filesystem_.GetFileSize(file_path_.c_str()),
+        Eq(PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes));
+
+    // At least the log is no longer dirty.
+    Header header = ReadHeader(filesystem_, file_path_);
+    EXPECT_FALSE(header.GetDirtyFlag());
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, DirtyBitFalseAlarmKeepsData) {
+  DocumentProto document =
+      DocumentBuilder().SetKey("namespace1", "uri1").Build();
+  int64_t document_offset;
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Write and persist the first proto
+    ICING_ASSERT_OK_AND_ASSIGN(document_offset,
+                               proto_log->WriteProto(document));
+
+    // Check that what we read is what we wrote
+    ASSERT_THAT(proto_log->ReadProto(document_offset),
+                IsOkAndHolds(EqualsProto(document)));
+  }
+
+  {
+    Header header = ReadHeader(filesystem_, file_path_);
+
+    // Simulate the dirty flag set as true, but no data has been changed yet.
+    // Maybe we crashed between writing the dirty flag and erasing a proto.
+    header.SetDirtyFlag(true);
+    header.SetHeaderChecksum(header.CalculateHeaderChecksum());
+
+    WriteHeader(filesystem_, file_path_, header);
+  }
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    EXPECT_FALSE(create_result.has_data_loss());
+
+    // Even though nothing changed, the false alarm dirty bit should have
+    // triggered us to recalculate our checksum.
+    EXPECT_TRUE(create_result.recalculated_checksum);
+
+    // Check that our document still exists even though dirty bit was true.
+    EXPECT_THAT(proto_log->ReadProto(document_offset),
+                IsOkAndHolds(EqualsProto(document)));
+
+    Header header = ReadHeader(filesystem_, file_path_);
+    EXPECT_FALSE(header.GetDirtyFlag());
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest,
+       PersistToDiskKeepsPersistedDataAndTruncatesExtraData) {
+  DocumentProto document1 =
+      DocumentBuilder().SetKey("namespace1", "uri1").Build();
+  DocumentProto document2 =
+      DocumentBuilder().SetKey("namespace2", "uri2").Build();
+  int document1_offset, document2_offset;
+  int log_size;
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Write and persist the first proto
+    ICING_ASSERT_OK_AND_ASSIGN(document1_offset,
+                               proto_log->WriteProto(document1));
+    ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+    // Write, but don't explicitly persist the second proto
+    ICING_ASSERT_OK_AND_ASSIGN(document2_offset,
+                               proto_log->WriteProto(document2));
+
+    // Check that what we read is what we wrote
+    ASSERT_THAT(proto_log->ReadProto(document1_offset),
+                IsOkAndHolds(EqualsProto(document1)));
+    ASSERT_THAT(proto_log->ReadProto(document2_offset),
+                IsOkAndHolds(EqualsProto(document2)));
+
+    log_size = filesystem_.GetFileSize(file_path_.c_str());
+    ASSERT_GT(log_size, 0);
+
+    // PersistToDisk happens implicitly during the destructor.
+  }
+
+  {
+    // The header rewind position and checksum aren't updated in this "system
+    // crash" scenario.
+
+    std::string bad_proto =
+        "some incomplete proto that we didn't finish writing before the "
+        "system crashed";
+    filesystem_.PWrite(file_path_.c_str(), log_size, bad_proto.data(),
+                       bad_proto.size());
+
+    // Double check that we actually wrote something to the underlying file
+    ASSERT_GT(filesystem_.GetFileSize(file_path_.c_str()), log_size);
+  }
+
+  {
+    // We can recover, but we have data loss
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_TRUE(create_result.has_data_loss());
+    ASSERT_THAT(create_result.data_loss, Eq(DataLoss::PARTIAL));
+    ASSERT_FALSE(create_result.recalculated_checksum);
+
+    // Check that everything was persisted across instances
+    ASSERT_THAT(proto_log->ReadProto(document1_offset),
+                IsOkAndHolds(EqualsProto(document1)));
+    ASSERT_THAT(proto_log->ReadProto(document2_offset),
+                IsOkAndHolds(EqualsProto(document2)));
+
+    // We correctly rewound to the last good state.
+    ASSERT_EQ(log_size, filesystem_.GetFileSize(file_path_.c_str()));
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest,
+       DirtyBitIsFalseAfterPutAndPersistToDisk) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    DocumentProto document =
+        DocumentBuilder().SetKey("namespace1", "uri1").Build();
+
+    // Write and persist the first proto
+    ICING_ASSERT_OK_AND_ASSIGN(int64_t document_offset,
+                               proto_log->WriteProto(document));
+    ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+    // Check that what we read is what we wrote
+    ASSERT_THAT(proto_log->ReadProto(document_offset),
+                IsOkAndHolds(EqualsProto(document)));
+  }
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_)));
+
+    // We previously persisted to disk so everything should be in a perfect
+    // state.
+    EXPECT_FALSE(create_result.has_data_loss());
+    EXPECT_FALSE(create_result.recalculated_checksum);
+
+    Header header = ReadHeader(filesystem_, file_path_);
+    EXPECT_FALSE(header.GetDirtyFlag());
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest,
+       DirtyBitIsFalseAfterDeleteAndPersistToDisk) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    DocumentProto document =
+        DocumentBuilder().SetKey("namespace1", "uri1").Build();
+
+    // Write, delete, and persist the first proto
+    ICING_ASSERT_OK_AND_ASSIGN(int64_t document_offset,
+                               proto_log->WriteProto(document));
+    ICING_ASSERT_OK(proto_log->EraseProto(document_offset));
+    ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+    // The proto has been erased.
+    ASSERT_THAT(proto_log->ReadProto(document_offset),
+                StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  }
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_)));
+
+    // We previously persisted to disk so everything should be in a perfect
+    // state.
+    EXPECT_FALSE(create_result.has_data_loss());
+    EXPECT_FALSE(create_result.recalculated_checksum);
+
+    Header header = ReadHeader(filesystem_, file_path_);
+    EXPECT_FALSE(header.GetDirtyFlag());
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, DirtyBitIsFalseAfterPutAndDestructor) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    DocumentProto document =
+        DocumentBuilder().SetKey("namespace1", "uri1").Build();
+
+    // Write and persist the first proto
+    ICING_ASSERT_OK_AND_ASSIGN(int64_t document_offset,
+                               proto_log->WriteProto(document));
+
+    // Check that what we read is what we wrote
+    ASSERT_THAT(proto_log->ReadProto(document_offset),
+                IsOkAndHolds(EqualsProto(document)));
+
+    // PersistToDisk is implicitly called as part of the destructor and
+    // PersistToDisk will clear the dirty bit.
+  }
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_)));
+
+    // We previously persisted to disk so everything should be in a perfect
+    // state.
+    EXPECT_FALSE(create_result.has_data_loss());
+    EXPECT_FALSE(create_result.recalculated_checksum);
+
+    Header header = ReadHeader(filesystem_, file_path_);
+    EXPECT_FALSE(header.GetDirtyFlag());
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest,
+       DirtyBitIsFalseAfterDeleteAndDestructor) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    DocumentProto document =
+        DocumentBuilder().SetKey("namespace1", "uri1").Build();
+
+    // Write, delete, and persist the first proto
+    ICING_ASSERT_OK_AND_ASSIGN(int64_t document_offset,
+                               proto_log->WriteProto(document));
+    ICING_ASSERT_OK(proto_log->EraseProto(document_offset));
+
+    // The proto has been erased.
+    ASSERT_THAT(proto_log->ReadProto(document_offset),
+                StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+    // PersistToDisk is implicitly called as part of the destructor and
+    // PersistToDisk will clear the dirty bit.
+  }
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_)));
+
+    // We previously persisted to disk so everything should be in a perfect
+    // state.
+    EXPECT_FALSE(create_result.has_data_loss());
+    EXPECT_FALSE(create_result.recalculated_checksum);
+
+    Header header = ReadHeader(filesystem_, file_path_);
+    EXPECT_FALSE(header.GetDirtyFlag());
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, Iterator) {
+  DocumentProto document1 =
+      DocumentBuilder().SetKey("namespace", "uri1").Build();
+  DocumentProto document2 =
+      DocumentBuilder().SetKey("namespace", "uri2").Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+      PortableFileBackedProtoLog<DocumentProto>::Create(
+          &filesystem_, file_path_,
+          PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                             max_proto_size_)));
+  auto proto_log = std::move(create_result.proto_log);
+  ASSERT_FALSE(create_result.has_data_loss());
+
+  {
+    // Empty iterator
+    auto iterator = proto_log->GetIterator();
+    ASSERT_THAT(iterator.Advance(),
+                StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  }
+
+  {
+    // Iterates through some documents
+    ICING_ASSERT_OK(proto_log->WriteProto(document1));
+    ICING_ASSERT_OK(proto_log->WriteProto(document2));
+    auto iterator = proto_log->GetIterator();
+    // 1st proto
+    ICING_ASSERT_OK(iterator.Advance());
+    ASSERT_THAT(proto_log->ReadProto(iterator.GetOffset()),
+                IsOkAndHolds(EqualsProto(document1)));
+    // 2nd proto
+    ICING_ASSERT_OK(iterator.Advance());
+    ASSERT_THAT(proto_log->ReadProto(iterator.GetOffset()),
+                IsOkAndHolds(EqualsProto(document2)));
+    // Tries to advance
+    ASSERT_THAT(iterator.Advance(),
+                StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  }
+
+  {
+    // Iterator with bad filesystem
+    MockFilesystem mock_filesystem;
+    ON_CALL(mock_filesystem, GetFileSize(A<const char*>()))
+        .WillByDefault(Return(Filesystem::kBadFileSize));
+    PortableFileBackedProtoLog<DocumentProto>::Iterator bad_iterator(
+        mock_filesystem, file_path_, /*initial_offset=*/0);
+    ASSERT_THAT(bad_iterator.Advance(),
+                StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, ComputeChecksum) {
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+  Crc32 checksum;
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    ICING_EXPECT_OK(proto_log->WriteProto(document));
+
+    ICING_ASSERT_OK_AND_ASSIGN(checksum, proto_log->ComputeChecksum());
+
+    // Calling it twice with no changes should get us the same checksum
+    EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
+  }
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Checksum should be consistent across instances
+    EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
+
+    // PersistToDisk shouldn't affect the checksum value
+    ICING_EXPECT_OK(proto_log->PersistToDisk());
+    EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
+
+    // Check that modifying the log leads to a different checksum
+    ICING_EXPECT_OK(proto_log->WriteProto(document));
+    EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Not(Eq(checksum))));
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, EraseProtoShouldSetZero) {
+  DocumentProto document1 =
+      DocumentBuilder().SetKey("namespace", "uri1").Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+      PortableFileBackedProtoLog<DocumentProto>::Create(
+          &filesystem_, file_path_,
+          PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                             max_proto_size_)));
+  auto proto_log = std::move(create_result.proto_log);
+  ASSERT_FALSE(create_result.has_data_loss());
+
+  // Writes and erases proto
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset,
+                             proto_log->WriteProto(document1));
+  ICING_ASSERT_OK(proto_log->EraseProto(document1_offset));
+
+  // Checks if the erased area is set to 0.
+  int64_t file_size = filesystem_.GetFileSize(file_path_.c_str());
+  MemoryMappedFile mmapped_file(filesystem_, file_path_,
+                                MemoryMappedFile::Strategy::READ_ONLY);
+
+  // document1_offset + sizeof(int) is the start byte of the proto where
+  // sizeof(int) is the size of the proto metadata.
+  mmapped_file.Remap(document1_offset + sizeof(int), file_size - 1);
+  for (size_t i = 0; i < mmapped_file.region_size(); ++i) {
+    ASSERT_THAT(mmapped_file.region()[i], Eq(0));
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, EraseProtoShouldReturnNotFound) {
+  DocumentProto document1 =
+      DocumentBuilder().SetKey("namespace", "uri1").Build();
+  DocumentProto document2 =
+      DocumentBuilder().SetKey("namespace", "uri2").Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+      PortableFileBackedProtoLog<DocumentProto>::Create(
+          &filesystem_, file_path_,
+          PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                             max_proto_size_)));
+  auto proto_log = std::move(create_result.proto_log);
+  ASSERT_FALSE(create_result.has_data_loss());
+
+  // Writes 2 protos
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset,
+                             proto_log->WriteProto(document1));
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t document2_offset,
+                             proto_log->WriteProto(document2));
+
+  // Erases the first proto
+  ICING_ASSERT_OK(proto_log->EraseProto(document1_offset));
+
+  // The first proto has been erased.
+  ASSERT_THAT(proto_log->ReadProto(document1_offset),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  // The second proto should be returned.
+  ASSERT_THAT(proto_log->ReadProto(document2_offset),
+              IsOkAndHolds(EqualsProto(document2)));
+}
+
+TEST_F(PortableFileBackedProtoLogTest, ChecksumShouldBeCorrectWithErasedProto) {
+  DocumentProto document1 =
+      DocumentBuilder().SetKey("namespace", "uri1").Build();
+  DocumentProto document2 =
+      DocumentBuilder().SetKey("namespace", "uri2").Build();
+  DocumentProto document3 =
+      DocumentBuilder().SetKey("namespace", "uri3").Build();
+  DocumentProto document4 =
+      DocumentBuilder().SetKey("namespace", "uri4").Build();
+
+  int64_t document2_offset;
+  int64_t document3_offset;
+
+  {
+    // Erase data after the rewind position. This won't update the checksum
+    // immediately.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Writes 3 protos
+    ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset,
+                               proto_log->WriteProto(document1));
+    ICING_ASSERT_OK_AND_ASSIGN(document2_offset,
+                               proto_log->WriteProto(document2));
+    ICING_ASSERT_OK_AND_ASSIGN(document3_offset,
+                               proto_log->WriteProto(document3));
+
+    // Erases the 1st proto, checksum won't be updated immediately because the
+    // rewind position is 0.
+    ICING_ASSERT_OK(proto_log->EraseProto(document1_offset));
+
+    EXPECT_THAT(proto_log->ComputeChecksum(),
+                IsOkAndHolds(Eq(Crc32(2175574628))));
+  }  // New checksum is updated in destructor.
+
+  {
+    // Erase data before the rewind position. This will update the checksum
+    // immediately.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Erases the 2nd proto that is now before the rewind position. Checksum
+    // is updated.
+    ICING_ASSERT_OK(proto_log->EraseProto(document2_offset));
+
+    EXPECT_THAT(proto_log->ComputeChecksum(),
+                IsOkAndHolds(Eq(Crc32(790877774))));
+  }
+
+  {
+    // Append data and erase data before the rewind position. This will update
+    // the checksum twice: in EraseProto() and destructor.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Append a new document which is after the rewind position.
+    ICING_ASSERT_OK(proto_log->WriteProto(document4));
+
+    // Erases the 3rd proto that is now before the rewind position. Checksum
+    // is updated.
+    ICING_ASSERT_OK(proto_log->EraseProto(document3_offset));
+
+    EXPECT_THAT(proto_log->ComputeChecksum(),
+                IsOkAndHolds(Eq(Crc32(2344803210))));
+  }  // Checksum is updated with the newly appended document.
+
+  {
+    // A successful creation means that the checksum matches.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    EXPECT_FALSE(create_result.has_data_loss());
+  }
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/icing-search-engine-with-icu-file_test.cc b/icing/icing-search-engine-with-icu-file_test.cc
index 5a9327e..48e81e5 100644
--- a/icing/icing-search-engine-with-icu-file_test.cc
+++ b/icing/icing-search-engine-with-icu-file_test.cc
@@ -27,6 +27,7 @@
 #include "icing/proto/search.pb.h"
 #include "icing/proto/status.pb.h"
 #include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/tmp-directory.h"
 
@@ -36,6 +37,14 @@ namespace {
 using ::icing::lib::portable_equals_proto::EqualsProto;
 using ::testing::Eq;
 
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
+    PropertyConfigProto_Cardinality_Code_REQUIRED;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+    StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+
 std::string GetTestBaseDir() {
   return GetTestTempDir() + "/icing_with_icu_files";
 }
@@ -55,23 +64,6 @@ DocumentProto CreateMessageDocument(std::string name_space, std::string uri) {
       .Build();
 }
 
-SchemaProto CreateMessageSchema() {
-  SchemaProto schema;
-  auto type = schema.add_types();
-  type->set_schema_type("Message");
-
-  auto body = type->add_properties();
-  body->set_property_name("body");
-  body->set_data_type(PropertyConfigProto::DataType::STRING);
-  body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  body->mutable_string_indexing_config()->set_term_match_type(
-      TermMatchType::PREFIX);
-  body->mutable_string_indexing_config()->set_tokenizer_type(
-      StringIndexingConfig::TokenizerType::PLAIN);
-
-  return schema;
-}
-
 ScoringSpecProto GetDefaultScoringSpec() {
   ScoringSpecProto scoring_spec;
   scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
@@ -81,15 +73,31 @@ ScoringSpecProto GetDefaultScoringSpec() {
 TEST(IcingSearchEngineWithIcuFileTest, ShouldInitialize) {
   IcingSearchEngine icing(GetDefaultIcingOptions());
   EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
+
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+  EXPECT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
 }
 
 TEST(IcingSearchEngineWithIcuFileTest, ShouldIndexAndSearch) {
   IcingSearchEngine icing(GetDefaultIcingOptions());
   ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
+
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+  ASSERT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
 
   DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
   ASSERT_THAT(icing.Put(document_one).status().code(), Eq(StatusProto::OK));
@@ -115,8 +123,8 @@ TEST(IcingSearchEngineWithIcuFileTest, ShouldIndexAndSearch) {
   // The token is a random number so we don't verify it.
   expected_search_result_proto.set_next_page_token(
       search_result_proto.next_page_token());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 }  // namespace
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
index 791368a..1b7bd89 100644
--- a/icing/icing-search-engine.cc
+++ b/icing/icing-search-engine.cc
@@ -27,14 +27,18 @@
 #include "icing/absl_ports/canonical_errors.h"
 #include "icing/absl_ports/mutex.h"
 #include "icing/absl_ports/str_cat.h"
+#include "icing/file/destructible-file.h"
+#include "icing/file/file-backed-proto.h"
 #include "icing/file/filesystem.h"
 #include "icing/index/hit/doc-hit-info.h"
 #include "icing/index/index-processor.h"
 #include "icing/index/index.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
 #include "icing/legacy/index/icing-filesystem.h"
+#include "icing/portable/endian.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/initialize.pb.h"
+#include "icing/proto/internal/optimize.pb.h"
 #include "icing/proto/logging.pb.h"
 #include "icing/proto/optimize.pb.h"
 #include "icing/proto/persist.pb.h"
@@ -73,8 +77,14 @@ namespace {
 constexpr std::string_view kDocumentSubfolderName = "document_dir";
 constexpr std::string_view kIndexSubfolderName = "index_dir";
 constexpr std::string_view kSchemaSubfolderName = "schema_dir";
-constexpr std::string_view kIcingSearchEngineHeaderFilename =
-    "icing_search_engine_header";
+constexpr std::string_view kSetSchemaMarkerFilename = "set_schema_marker";
+constexpr std::string_view kInitMarkerFilename = "init_marker";
+constexpr std::string_view kOptimizeStatusFilename = "optimize_status";
+
+// The maximum number of unsuccessful initialization attempts from the current
+// state that we will tolerate before deleting all data and starting from a
+// fresh state.
+constexpr int kMaxUnsuccessfulInitAttempts = 5;
 
 libtextclassifier3::Status ValidateOptions(
     const IcingSearchEngineOptions& options) {
@@ -94,6 +104,21 @@ libtextclassifier3::Status ValidateResultSpec(
     return absl_ports::InvalidArgumentError(
         "ResultSpecProto.num_per_page cannot be negative.");
   }
+  std::unordered_set<std::string> unique_namespaces;
+  for (const ResultSpecProto::ResultGrouping& result_grouping :
+       result_spec.result_groupings()) {
+    if (result_grouping.max_results() <= 0) {
+      return absl_ports::InvalidArgumentError(
+          "Cannot specify a result grouping with max results <= 0.");
+    }
+    for (const std::string& name_space : result_grouping.namespaces()) {
+      if (unique_namespaces.count(name_space) > 0) {
+        return absl_ports::InvalidArgumentError(
+            "Namespaces must be unique across result groups.");
+      }
+      unique_namespaces.insert(name_space);
+    }
+  }
   return libtextclassifier3::Status::OK;
 }
 
@@ -119,10 +144,6 @@ IndexProcessor::Options CreateIndexProcessorOptions(
   return index_processor_options;
 }
 
-std::string MakeHeaderFilename(const std::string& base_dir) {
-  return absl_ports::StrCat(base_dir, "/", kIcingSearchEngineHeaderFilename);
-}
-
 // Document store files are in a standalone subfolder for easier file
 // management. We can delete and recreate the subfolder and not touch/affect
 // anything else.
@@ -151,6 +172,14 @@ std::string MakeSchemaDirectoryPath(const std::string& base_dir) {
   return absl_ports::StrCat(base_dir, "/", kSchemaSubfolderName);
 }
 
+std::string MakeSetSchemaMarkerFilePath(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kSetSchemaMarkerFilename);
+}
+
+std::string MakeInitMarkerFilePath(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kInitMarkerFilename);
+}
+
 void TransformStatus(const libtextclassifier3::Status& internal_status,
                      StatusProto* status_proto) {
   StatusProto::Code code;
@@ -238,15 +267,13 @@ IcingSearchEngine::IcingSearchEngine(
       filesystem_(std::move(filesystem)),
       icing_filesystem_(std::move(icing_filesystem)),
       clock_(std::move(clock)),
-      result_state_manager_(performance_configuration_.max_num_hits_per_query,
-                            performance_configuration_.max_num_cache_results),
       jni_cache_(std::move(jni_cache)) {
   ICING_VLOG(1) << "Creating IcingSearchEngine in dir: " << options_.base_dir();
 }
 
 IcingSearchEngine::~IcingSearchEngine() {
   if (initialized_) {
-    if (PersistToDisk().status().code() != StatusProto::OK) {
+    if (PersistToDisk(PersistType::FULL).status().code() != StatusProto::OK) {
       ICING_LOG(ERROR)
           << "Error persisting to disk in IcingSearchEngine destructor";
     }
@@ -261,6 +288,66 @@ InitializeResultProto IcingSearchEngine::Initialize() {
   return InternalInitialize();
 }
 
+void IcingSearchEngine::ResetMembers() {
+  schema_store_.reset();
+  document_store_.reset();
+  language_segmenter_.reset();
+  normalizer_.reset();
+  index_.reset();
+}
+
+libtextclassifier3::Status IcingSearchEngine::CheckInitMarkerFile(
+    InitializeStatsProto* initialize_stats) {
+  // Check to see if the marker file exists and if we've already passed our max
+  // number of init attempts.
+  std::string marker_filepath = MakeInitMarkerFilePath(options_.base_dir());
+  bool file_exists = filesystem_->FileExists(marker_filepath.c_str());
+  int network_init_attempts = 0;
+  int host_init_attempts = 0;
+
+  // Read the number of previous failed init attempts from the file. If it
+  // fails, then just assume the value is zero (the most likely reason for
+  // failure would be non-existence because the last init was successful
+  // anyways).
+  ScopedFd marker_file_fd(filesystem_->OpenForWrite(marker_filepath.c_str()));
+  libtextclassifier3::Status status;
+  if (file_exists &&
+      filesystem_->PRead(marker_file_fd.get(), &network_init_attempts,
+                         sizeof(network_init_attempts), /*offset=*/0)) {
+    host_init_attempts = GNetworkToHostL(network_init_attempts);
+    if (host_init_attempts > kMaxUnsuccessfulInitAttempts) {
+      // We're tried and failed to init too many times. We need to throw
+      // everything out and start from scratch.
+      ResetMembers();
+      if (!filesystem_->DeleteDirectoryRecursively(
+              options_.base_dir().c_str())) {
+        return absl_ports::InternalError("Failed to delete icing base dir!");
+      }
+      status = absl_ports::DataLossError(
+          "Encountered failed initialization limit. Cleared all data.");
+      host_init_attempts = 0;
+    }
+  }
+
+  // Use network_init_attempts here because we might have set host_init_attempts
+  // to 0 if it exceeded the max threshold.
+  initialize_stats->set_num_previous_init_failures(
+      GNetworkToHostL(network_init_attempts));
+
+  ++host_init_attempts;
+  network_init_attempts = GHostToNetworkL(host_init_attempts);
+  // Write the updated number of attempts before we get started.
+  if (!filesystem_->PWrite(marker_file_fd.get(), /*offset=*/0,
+                           &network_init_attempts,
+                           sizeof(network_init_attempts)) ||
+      !filesystem_->DataSync(marker_file_fd.get())) {
+    return absl_ports::InternalError(
+        "Failed to write and sync init marker file");
+  }
+
+  return status;
+}
+
 InitializeResultProto IcingSearchEngine::InternalInitialize() {
   ICING_VLOG(1) << "Initializing IcingSearchEngine in dir: "
                 << options_.base_dir();
@@ -270,8 +357,8 @@ InitializeResultProto IcingSearchEngine::InternalInitialize() {
 
   InitializeResultProto result_proto;
   StatusProto* result_status = result_proto.mutable_status();
-  NativeInitializeStats* initialize_stats =
-      result_proto.mutable_native_initialize_stats();
+  InitializeStatsProto* initialize_stats =
+      result_proto.mutable_initialize_stats();
   if (initialized_) {
     // Already initialized.
     result_status->set_code(StatusProto::OK);
@@ -281,87 +368,42 @@ InitializeResultProto IcingSearchEngine::InternalInitialize() {
     return result_proto;
   }
 
-  // Releases result / query cache if any
-  result_state_manager_.InvalidateAllResultStates();
-
+  // Now go ahead and try to initialize.
   libtextclassifier3::Status status = InitializeMembers(initialize_stats);
-  if (!status.ok()) {
-    TransformStatus(status, result_status);
-    initialize_stats->set_latency_ms(
-        initialize_timer->GetElapsedMilliseconds());
-    return result_proto;
-  }
-
-  // Even if each subcomponent initialized fine independently, we need to
-  // check if they're consistent with each other.
-  if (!CheckConsistency().ok()) {
-    // The total checksum doesn't match the stored value, it could be one of the
-    // following cases:
-    // 1. Icing is initialized the first time in this directory.
-    // 2. Non-checksumed changes have been made to some files.
-    if (index_->last_added_document_id() == kInvalidDocumentId &&
-        document_store_->last_added_document_id() == kInvalidDocumentId &&
-        absl_ports::IsNotFound(schema_store_->GetSchema().status())) {
-      // First time initialize. Not recovering but creating all the files.
-      // We need to explicitly clear the recovery-related fields because some
-      // sub-components may not be able to tell if the storage is being
-      // initialized the first time or has lost some files. Sub-components may
-      // already have set these fields in earlier steps.
-      *initialize_stats = NativeInitializeStats();
-      status = RegenerateDerivedFiles();
+  if (status.ok() || absl_ports::IsDataLoss(status)) {
+    // We successfully initialized. We should delete the init marker file to
+    // indicate a successful init.
+    std::string marker_filepath = MakeInitMarkerFilePath(options_.base_dir());
+    if (!filesystem_->DeleteFile(marker_filepath.c_str())) {
+      status = absl_ports::InternalError("Failed to delete init marker file!");
     } else {
-      ICING_VLOG(1)
-          << "IcingSearchEngine in inconsistent state, regenerating all "
-             "derived data";
-      // Total checksum mismatch may not be the root cause of document store
-      // recovery. Preserve the root cause that was set by the document store.
-      bool should_log_document_store_recovery_cause =
-          initialize_stats->document_store_recovery_cause() ==
-          NativeInitializeStats::NONE;
-      if (should_log_document_store_recovery_cause) {
-        initialize_stats->set_document_store_recovery_cause(
-            NativeInitializeStats::TOTAL_CHECKSUM_MISMATCH);
-      }
-      initialize_stats->set_index_restoration_cause(
-          NativeInitializeStats::TOTAL_CHECKSUM_MISMATCH);
-      status = RegenerateDerivedFiles(initialize_stats,
-                                      should_log_document_store_recovery_cause);
-    }
-  } else {
-    DocumentId last_stored_document_id =
-        document_store_->last_added_document_id();
-    DocumentId last_indexed_document_id = index_->last_added_document_id();
-    if (last_stored_document_id != last_indexed_document_id) {
-      if (last_stored_document_id == kInvalidDocumentId) {
-        // Document store is empty but index is not. Reset the index.
-        status = index_->Reset();
-      } else {
-        // Index is inconsistent with the document store, we need to restore the
-        // index.
-        initialize_stats->set_index_restoration_cause(
-            NativeInitializeStats::INCONSISTENT_WITH_GROUND_TRUTH);
-        std::unique_ptr<Timer> index_restore_timer = clock_->GetNewTimer();
-        status = RestoreIndexIfNeeded();
-        initialize_stats->set_index_restoration_latency_ms(
-            index_restore_timer->GetElapsedMilliseconds());
-      }
+      initialized_ = true;
     }
   }
-
-  if (status.ok() || absl_ports::IsDataLoss(status)) {
-    initialized_ = true;
-  }
   TransformStatus(status, result_status);
   initialize_stats->set_latency_ms(initialize_timer->GetElapsedMilliseconds());
   return result_proto;
 }
 
 libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
-    NativeInitializeStats* initialize_stats) {
+    InitializeStatsProto* initialize_stats) {
   ICING_RETURN_ERROR_IF_NULL(initialize_stats);
-  ICING_RETURN_IF_ERROR(InitializeOptions());
+  ICING_RETURN_IF_ERROR(ValidateOptions(options_));
+
+  // Make sure the base directory exists
+  if (!filesystem_->CreateDirectoryRecursively(options_.base_dir().c_str())) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Could not create directory: ", options_.base_dir()));
+  }
+
+  // Check to see if the marker file exists and if we've already passed our max
+  // number of init attempts.
+  libtextclassifier3::Status status = CheckInitMarkerFile(initialize_stats);
+  if (!status.ok() && !absl_ports::IsDataLoss(status)) {
+    return status;
+  }
+
   ICING_RETURN_IF_ERROR(InitializeSchemaStore(initialize_stats));
-  ICING_RETURN_IF_ERROR(InitializeDocumentStore(initialize_stats));
 
   // TODO(b/156383798) : Resolve how to specify the locale.
   language_segmenter_factory::SegmenterOptions segmenter_options(
@@ -372,25 +414,86 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
   TC3_ASSIGN_OR_RETURN(normalizer_,
                        normalizer_factory::Create(options_.max_token_length()));
 
-  ICING_RETURN_IF_ERROR(InitializeIndex(initialize_stats));
+  std::string marker_filepath =
+      MakeSetSchemaMarkerFilePath(options_.base_dir());
+  libtextclassifier3::Status index_init_status;
+  if (absl_ports::IsNotFound(schema_store_->GetSchema().status())) {
+    // The schema was either lost or never set before. Wipe out the doc store
+    // and index directories and initialize them from scratch.
+    const std::string doc_store_dir =
+        MakeDocumentDirectoryPath(options_.base_dir());
+    const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
+    if (!filesystem_->DeleteDirectoryRecursively(doc_store_dir.c_str()) ||
+        !filesystem_->DeleteDirectoryRecursively(index_dir.c_str())) {
+      return absl_ports::InternalError(absl_ports::StrCat(
+          "Could not delete directories: ", index_dir, " and ", doc_store_dir));
+    }
+    ICING_RETURN_IF_ERROR(InitializeDocumentStore(
+        /*force_recovery_and_revalidate_documents=*/false, initialize_stats));
+    index_init_status = InitializeIndex(initialize_stats);
+    if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
+      return index_init_status;
+    }
+  } else if (filesystem_->FileExists(marker_filepath.c_str())) {
+    // If the marker file is still around then something wonky happened when we
+    // last tried to set the schema.
+    ICING_RETURN_IF_ERROR(InitializeDocumentStore(
+        /*force_recovery_and_revalidate_documents=*/true, initialize_stats));
+    initialize_stats->set_document_store_recovery_cause(
+        InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
+
+    // We're going to need to build the index from scratch. So just delete its
+    // files now.
+    const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
+    Index::Options index_options(index_dir, options_.index_merge_size());
+    if (!filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) ||
+        !filesystem_->CreateDirectoryRecursively(index_dir.c_str())) {
+      return absl_ports::InternalError(
+          absl_ports::StrCat("Could not recreate directory: ", index_dir));
+    }
+    ICING_ASSIGN_OR_RETURN(index_,
+                           Index::Create(index_options, filesystem_.get(),
+                                         icing_filesystem_.get()));
 
-  return libtextclassifier3::Status::OK;
-}
+    std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
+    IndexRestorationResult restore_result = RestoreIndexIfNeeded();
+    index_init_status = std::move(restore_result.status);
+    // DATA_LOSS means that we have successfully initialized and re-added
+    // content to the index. Some indexed content was lost, but otherwise the
+    // index is in a valid state and can be queried.
+    if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
+      return index_init_status;
+    }
 
-libtextclassifier3::Status IcingSearchEngine::InitializeOptions() {
-  ICING_RETURN_IF_ERROR(ValidateOptions(options_));
+    // Delete the marker file to indicate that everything is now in sync with
+    // whatever changes were made to the schema.
+    filesystem_->DeleteFile(marker_filepath.c_str());
 
-  // Make sure the base directory exists
-  if (!filesystem_->CreateDirectoryRecursively(options_.base_dir().c_str())) {
-    return absl_ports::InternalError(absl_ports::StrCat(
-        "Could not create directory: ", options_.base_dir()));
+    initialize_stats->set_index_restoration_latency_ms(
+        restore_timer->GetElapsedMilliseconds());
+    initialize_stats->set_index_restoration_cause(
+        InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
+  } else {
+    ICING_RETURN_IF_ERROR(InitializeDocumentStore(
+        /*force_recovery_and_revalidate_documents=*/false, initialize_stats));
+    index_init_status = InitializeIndex(initialize_stats);
+    if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
+      return index_init_status;
+    }
   }
 
-  return libtextclassifier3::Status::OK;
+  if (status.ok()) {
+    status = index_init_status;
+  }
+
+  result_state_manager_ = std::make_unique<ResultStateManager>(
+      performance_configuration_.max_num_total_hits, *document_store_);
+
+  return status;
 }
 
 libtextclassifier3::Status IcingSearchEngine::InitializeSchemaStore(
-    NativeInitializeStats* initialize_stats) {
+    InitializeStatsProto* initialize_stats) {
   ICING_RETURN_ERROR_IF_NULL(initialize_stats);
 
   const std::string schema_store_dir =
@@ -408,7 +511,8 @@ libtextclassifier3::Status IcingSearchEngine::InitializeSchemaStore(
 }
 
 libtextclassifier3::Status IcingSearchEngine::InitializeDocumentStore(
-    NativeInitializeStats* initialize_stats) {
+    bool force_recovery_and_revalidate_documents,
+    InitializeStatsProto* initialize_stats) {
   ICING_RETURN_ERROR_IF_NULL(initialize_stats);
 
   const std::string document_dir =
@@ -420,15 +524,16 @@ libtextclassifier3::Status IcingSearchEngine::InitializeDocumentStore(
   }
   ICING_ASSIGN_OR_RETURN(
       DocumentStore::CreateResult create_result,
-      DocumentStore::Create(filesystem_.get(), document_dir, clock_.get(),
-                            schema_store_.get(), initialize_stats));
+      DocumentStore::Create(
+          filesystem_.get(), document_dir, clock_.get(), schema_store_.get(),
+          force_recovery_and_revalidate_documents, initialize_stats));
   document_store_ = std::move(create_result.document_store);
 
   return libtextclassifier3::Status::OK;
 }
 
 libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
-    NativeInitializeStats* initialize_stats) {
+    InitializeStatsProto* initialize_stats) {
   ICING_RETURN_ERROR_IF_NULL(initialize_stats);
 
   const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
@@ -439,6 +544,7 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
   }
   Index::Options index_options(index_dir, options_.index_merge_size());
 
+  InitializeStatsProto::RecoveryCause recovery_cause;
   auto index_or =
       Index::Create(index_options, filesystem_.get(), icing_filesystem_.get());
   if (!index_or.ok()) {
@@ -448,88 +554,28 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
           absl_ports::StrCat("Could not recreate directory: ", index_dir));
     }
 
-    initialize_stats->set_index_restoration_cause(
-        NativeInitializeStats::IO_ERROR);
+    recovery_cause = InitializeStatsProto::IO_ERROR;
 
     // Try recreating it from scratch and re-indexing everything.
     ICING_ASSIGN_OR_RETURN(index_,
                            Index::Create(index_options, filesystem_.get(),
                                          icing_filesystem_.get()));
-
-    std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
-    ICING_RETURN_IF_ERROR(RestoreIndexIfNeeded());
-    initialize_stats->set_index_restoration_latency_ms(
-        restore_timer->GetElapsedMilliseconds());
   } else {
     // Index was created fine.
     index_ = std::move(index_or).ValueOrDie();
+    // If a recover does have to happen, then it must be because the index is
+    // out of sync with the document store.
+    recovery_cause = InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
   }
 
-  return libtextclassifier3::Status::OK;
-}
-
-libtextclassifier3::Status IcingSearchEngine::CheckConsistency() {
-  if (!HeaderExists()) {
-    // Without a header file, we have no checksum and can't even detect
-    // inconsistencies
-    return absl_ports::NotFoundError("No header file found.");
-  }
-
-  // Header does exist, verify that the header looks fine.
-  IcingSearchEngine::Header header;
-  if (!filesystem_->Read(MakeHeaderFilename(options_.base_dir()).c_str(),
-                         &header, sizeof(header))) {
-    return absl_ports::InternalError(absl_ports::StrCat(
-        "Couldn't read: ", MakeHeaderFilename(options_.base_dir())));
-  }
-
-  if (header.magic != IcingSearchEngine::Header::kMagic) {
-    return absl_ports::InternalError(
-        absl_ports::StrCat("Invalid header kMagic for file: ",
-                           MakeHeaderFilename(options_.base_dir())));
-  }
-
-  ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
-  if (checksum.Get() != header.checksum) {
-    return absl_ports::InternalError(
-        "IcingSearchEngine checksum doesn't match");
-  }
-
-  return libtextclassifier3::Status::OK;
-}
-
-libtextclassifier3::Status IcingSearchEngine::RegenerateDerivedFiles(
-    NativeInitializeStats* initialize_stats, bool log_document_store_stats) {
-  // Measure the latency of the data recovery. The cause of the recovery should
-  // be logged by the caller.
-  std::unique_ptr<Timer> timer = clock_->GetNewTimer();
-  ICING_RETURN_IF_ERROR(
-      document_store_->UpdateSchemaStore(schema_store_.get()));
-  if (initialize_stats != nullptr && log_document_store_stats) {
-    initialize_stats->set_document_store_recovery_latency_ms(
-        timer->GetElapsedMilliseconds());
-  }
-  // Restart timer.
-  timer = clock_->GetNewTimer();
-  ICING_RETURN_IF_ERROR(index_->Reset());
-  ICING_RETURN_IF_ERROR(RestoreIndexIfNeeded());
-  if (initialize_stats != nullptr) {
+  std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
+  IndexRestorationResult restore_result = RestoreIndexIfNeeded();
+  if (restore_result.needed_restoration) {
     initialize_stats->set_index_restoration_latency_ms(
-        timer->GetElapsedMilliseconds());
-  }
-
-  const std::string header_file =
-      MakeHeaderFilename(options_.base_dir().c_str());
-  if (HeaderExists()) {
-    if (!filesystem_->DeleteFile(header_file.c_str())) {
-      return absl_ports::InternalError(
-          absl_ports::StrCat("Unable to delete file: ", header_file));
-    }
+        restore_timer->GetElapsedMilliseconds());
+    initialize_stats->set_index_restoration_cause(recovery_cause);
   }
-  ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
-  ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
-
-  return libtextclassifier3::Status::OK;
+  return restore_result.status;
 }
 
 SetSchemaResultProto IcingSearchEngine::SetSchema(
@@ -545,33 +591,40 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
   StatusProto* result_status = result_proto.mutable_status();
 
   absl_ports::unique_lock l(&mutex_);
+  std::unique_ptr<Timer> timer = clock_->GetNewTimer();
   if (!initialized_) {
     result_status->set_code(StatusProto::FAILED_PRECONDITION);
     result_status->set_message("IcingSearchEngine has not been initialized!");
-    return result_proto;
-  }
-
-  libtextclassifier3::Status status = SchemaUtil::Validate(new_schema);
-  if (!status.ok()) {
-    TransformStatus(status, result_status);
+    result_proto.set_latency_ms(timer->GetElapsedMilliseconds());
     return result_proto;
   }
 
   auto lost_previous_schema_or = LostPreviousSchema();
   if (!lost_previous_schema_or.ok()) {
     TransformStatus(lost_previous_schema_or.status(), result_status);
+    result_proto.set_latency_ms(timer->GetElapsedMilliseconds());
     return result_proto;
   }
   bool lost_previous_schema = lost_previous_schema_or.ValueOrDie();
 
+  std::string marker_filepath =
+      MakeSetSchemaMarkerFilePath(options_.base_dir());
+  // Create the marker file indicating that we are going to apply a schema
+  // change. No need to write anything to the marker file - its existence is the
+  // only thing that matters. The marker file is used to indicate if we
+  // encountered a crash or a power loss while updating the schema and other
+  // files. So set it up to be deleted as long as we return from this function.
+  DestructibleFile marker_file(marker_filepath, filesystem_.get());
+
   auto set_schema_result_or = schema_store_->SetSchema(
       std::move(new_schema), ignore_errors_and_delete_documents);
   if (!set_schema_result_or.ok()) {
     TransformStatus(set_schema_result_or.status(), result_status);
+    result_proto.set_latency_ms(timer->GetElapsedMilliseconds());
     return result_proto;
   }
-  const SchemaStore::SetSchemaResult set_schema_result =
-      set_schema_result_or.ValueOrDie();
+  SchemaStore::SetSchemaResult set_schema_result =
+      std::move(set_schema_result_or).ValueOrDie();
 
   for (const std::string& deleted_type :
        set_schema_result.schema_types_deleted_by_name) {
@@ -583,6 +636,26 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
     result_proto.add_incompatible_schema_types(incompatible_type);
   }
 
+  for (const std::string& new_type :
+       set_schema_result.schema_types_new_by_name) {
+    result_proto.add_new_schema_types(std::move(new_type));
+  }
+
+  for (const std::string& compatible_type :
+       set_schema_result.schema_types_changed_fully_compatible_by_name) {
+    result_proto.add_fully_compatible_changed_schema_types(
+        std::move(compatible_type));
+  }
+
+  bool index_incompatible =
+      !set_schema_result.schema_types_index_incompatible_by_name.empty();
+  for (const std::string& index_incompatible_type :
+       set_schema_result.schema_types_index_incompatible_by_name) {
+    result_proto.add_index_incompatible_changed_schema_types(
+        std::move(index_incompatible_type));
+  }
+
+  libtextclassifier3::Status status;
   if (set_schema_result.success) {
     if (lost_previous_schema) {
       // No previous schema to calculate a diff against. We have to go through
@@ -590,6 +663,7 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
       status = document_store_->UpdateSchemaStore(schema_store_.get());
       if (!status.ok()) {
         TransformStatus(status, result_status);
+        result_proto.set_latency_ms(timer->GetElapsedMilliseconds());
         return result_proto;
       }
     } else if (!set_schema_result.old_schema_type_ids_changed.empty() ||
@@ -599,21 +673,28 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
                                                            set_schema_result);
       if (!status.ok()) {
         TransformStatus(status, result_status);
+        result_proto.set_latency_ms(timer->GetElapsedMilliseconds());
         return result_proto;
       }
     }
 
-    if (lost_previous_schema || set_schema_result.index_incompatible) {
+    if (lost_previous_schema || index_incompatible) {
       // Clears all index files
       status = index_->Reset();
       if (!status.ok()) {
         TransformStatus(status, result_status);
+        result_proto.set_latency_ms(timer->GetElapsedMilliseconds());
         return result_proto;
       }
 
-      status = RestoreIndexIfNeeded();
-      if (!status.ok()) {
+      IndexRestorationResult restore_result = RestoreIndexIfNeeded();
+      // DATA_LOSS means that we have successfully re-added content to the
+      // index. Some indexed content was lost, but otherwise the index is in a
+      // valid state and can be queried.
+      if (!restore_result.status.ok() &&
+          !absl_ports::IsDataLoss(restore_result.status)) {
         TransformStatus(status, result_status);
+        result_proto.set_latency_ms(timer->GetElapsedMilliseconds());
         return result_proto;
       }
     }
@@ -623,6 +704,8 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
     result_status->set_code(StatusProto::FAILED_PRECONDITION);
     result_status->set_message("Schema is incompatible.");
   }
+
+  result_proto.set_latency_ms(timer->GetElapsedMilliseconds());
   return result_proto;
 }
 
@@ -682,8 +765,8 @@ PutResultProto IcingSearchEngine::Put(DocumentProto&& document) {
 
   PutResultProto result_proto;
   StatusProto* result_status = result_proto.mutable_status();
-  NativePutDocumentStats* put_document_stats =
-      result_proto.mutable_native_put_document_stats();
+  PutDocumentStatsProto* put_document_stats =
+      result_proto.mutable_put_document_stats();
 
   // Lock must be acquired before validation because the DocumentStore uses
   // the schema file to validate, and the schema could be changed in
@@ -833,8 +916,8 @@ DeleteResultProto IcingSearchEngine::Delete(const std::string_view name_space,
     return result_proto;
   }
 
-  NativeDeleteStats* delete_stats = result_proto.mutable_delete_stats();
-  delete_stats->set_delete_type(NativeDeleteStats::DeleteType::SINGLE);
+  DeleteStatsProto* delete_stats = result_proto.mutable_delete_stats();
+  delete_stats->set_delete_type(DeleteStatsProto::DeleteType::SINGLE);
 
   std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
   // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
@@ -867,8 +950,8 @@ DeleteByNamespaceResultProto IcingSearchEngine::DeleteByNamespace(
     return delete_result;
   }
 
-  NativeDeleteStats* delete_stats = delete_result.mutable_delete_stats();
-  delete_stats->set_delete_type(NativeDeleteStats::DeleteType::NAMESPACE);
+  DeleteStatsProto* delete_stats = delete_result.mutable_delete_stats();
+  delete_stats->set_delete_type(DeleteStatsProto::DeleteType::NAMESPACE);
 
   std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
   // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
@@ -901,8 +984,8 @@ DeleteBySchemaTypeResultProto IcingSearchEngine::DeleteBySchemaType(
     return delete_result;
   }
 
-  NativeDeleteStats* delete_stats = delete_result.mutable_delete_stats();
-  delete_stats->set_delete_type(NativeDeleteStats::DeleteType::SCHEMA_TYPE);
+  DeleteStatsProto* delete_stats = delete_result.mutable_delete_stats();
+  delete_stats->set_delete_type(DeleteStatsProto::DeleteType::SCHEMA_TYPE);
 
   std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
   // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
@@ -937,8 +1020,13 @@ DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery(
     return result_proto;
   }
 
-  NativeDeleteStats* delete_stats = result_proto.mutable_delete_stats();
-  delete_stats->set_delete_type(NativeDeleteStats::DeleteType::QUERY);
+  DeleteByQueryStatsProto* delete_stats =
+      result_proto.mutable_delete_by_query_stats();
+  delete_stats->set_query_length(search_spec.query().length());
+  delete_stats->set_num_namespaces_filtered(
+      search_spec.namespace_filters_size());
+  delete_stats->set_num_schema_types_filtered(
+      search_spec.schema_type_filters_size());
 
   std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
   libtextclassifier3::Status status =
@@ -948,10 +1036,11 @@ DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery(
     return result_proto;
   }
 
+  std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
   // Gets unordered results from query processor
   auto query_processor_or = QueryProcessor::Create(
       index_.get(), language_segmenter_.get(), normalizer_.get(),
-      document_store_.get(), schema_store_.get(), clock_.get());
+      document_store_.get(), schema_store_.get());
   if (!query_processor_or.ok()) {
     TransformStatus(query_processor_or.status(), result_status);
     return result_proto;
@@ -966,9 +1055,13 @@ DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery(
   }
   QueryProcessor::QueryResults query_results =
       std::move(query_results_or).ValueOrDie();
+  delete_stats->set_parse_query_latency_ms(
+      component_timer->GetElapsedMilliseconds());
 
   ICING_VLOG(2) << "Deleting the docs that matched the query.";
   int num_deleted = 0;
+
+  component_timer = clock_->GetNewTimer();
   while (query_results.root_iterator->Advance().ok()) {
     ICING_VLOG(3) << "Deleting doc "
                   << query_results.root_iterator->doc_hit_info().document_id();
@@ -980,6 +1073,14 @@ DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery(
       return result_proto;
     }
   }
+  delete_stats->set_document_removal_latency_ms(
+      component_timer->GetElapsedMilliseconds());
+  int term_count = 0;
+  for (const auto& section_and_terms : query_results.query_terms) {
+    term_count += section_and_terms.second.size();
+  }
+  delete_stats->set_num_terms(term_count);
+
   if (num_deleted > 0) {
     result_proto.mutable_status()->set_code(StatusProto::OK);
   } else {
@@ -992,7 +1093,8 @@ DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery(
   return result_proto;
 }
 
-PersistToDiskResultProto IcingSearchEngine::PersistToDisk() {
+PersistToDiskResultProto IcingSearchEngine::PersistToDisk(
+    PersistType::Code persist_type) {
   ICING_VLOG(1) << "Persisting data to disk";
 
   PersistToDiskResultProto result_proto;
@@ -1005,7 +1107,7 @@ PersistToDiskResultProto IcingSearchEngine::PersistToDisk() {
     return result_proto;
   }
 
-  auto status = InternalPersistToDisk();
+  auto status = InternalPersistToDisk(persist_type);
   TransformStatus(status, result_status);
   return result_proto;
 }
@@ -1029,11 +1131,18 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
     return result_proto;
   }
 
-  // Releases result / query cache if any
-  result_state_manager_.InvalidateAllResultStates();
+  std::unique_ptr<Timer> optimize_timer = clock_->GetNewTimer();
+  OptimizeStatsProto* optimize_stats = result_proto.mutable_optimize_stats();
+  int64_t before_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
+  if (before_size != Filesystem::kBadFileSize) {
+    optimize_stats->set_storage_size_before(before_size);
+  } else {
+    // Set -1 as a sentinel value when failures occur.
+    optimize_stats->set_storage_size_before(-1);
+  }
 
   // Flushes data to disk before doing optimization
-  auto status = InternalPersistToDisk();
+  auto status = InternalPersistToDisk(PersistType::FULL);
   if (!status.ok()) {
     TransformStatus(status, result_status);
     return result_proto;
@@ -1041,7 +1150,11 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
 
   // TODO(b/143646633): figure out if we need to optimize index and doc store
   // at the same time.
-  libtextclassifier3::Status optimization_status = OptimizeDocumentStore();
+  std::unique_ptr<Timer> optimize_doc_store_timer = clock_->GetNewTimer();
+  libtextclassifier3::Status optimization_status =
+      OptimizeDocumentStore(optimize_stats);
+  optimize_stats->set_document_store_optimize_latency_ms(
+      optimize_doc_store_timer->GetElapsedMilliseconds());
 
   if (!optimization_status.ok() &&
       !absl_ports::IsDataLoss(optimization_status)) {
@@ -1055,6 +1168,7 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
 
   // The status is either OK or DATA_LOSS. The optimized document store is
   // guaranteed to work, so we update index according to the new document store.
+  std::unique_ptr<Timer> optimize_index_timer = clock_->GetNewTimer();
   libtextclassifier3::Status index_reset_status = index_->Reset();
   if (!index_reset_status.ok()) {
     status = absl_ports::Annotate(
@@ -1064,17 +1178,52 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
     return result_proto;
   }
 
-  libtextclassifier3::Status index_restoration_status = RestoreIndexIfNeeded();
-  if (!index_restoration_status.ok()) {
+  IndexRestorationResult index_restoration_status = RestoreIndexIfNeeded();
+  optimize_stats->set_index_restoration_latency_ms(
+      optimize_index_timer->GetElapsedMilliseconds());
+  // DATA_LOSS means that we have successfully re-added content to the index.
+  // Some indexed content was lost, but otherwise the index is in a valid state
+  // and can be queried.
+  if (!index_restoration_status.status.ok() &&
+      !absl_ports::IsDataLoss(index_restoration_status.status)) {
     status = absl_ports::Annotate(
         absl_ports::InternalError(
             "Failed to reindex documents after optimization."),
-        index_restoration_status.error_message());
+        index_restoration_status.status.error_message());
 
     TransformStatus(status, result_status);
     return result_proto;
   }
 
+  // Read the optimize status to get the time that we last ran.
+  std::string optimize_status_filename =
+      absl_ports::StrCat(options_.base_dir(), "/", kOptimizeStatusFilename);
+  FileBackedProto<OptimizeStatusProto> optimize_status_file(
+      *filesystem_, optimize_status_filename);
+  auto optimize_status_or = optimize_status_file.Read();
+  int64_t current_time = clock_->GetSystemTimeMilliseconds();
+  if (optimize_status_or.ok()) {
+    // If we have trouble reading the status or this is the first time that
+    // we've ever run, don't set this field.
+    optimize_stats->set_time_since_last_optimize_ms(
+        current_time - optimize_status_or.ValueOrDie()
+                           ->last_successful_optimize_run_time_ms());
+  }
+
+  // Update the status for this run and write it.
+  auto optimize_status = std::make_unique<OptimizeStatusProto>();
+  optimize_status->set_last_successful_optimize_run_time_ms(current_time);
+  optimize_status_file.Write(std::move(optimize_status));
+
+  int64_t after_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
+  if (after_size != Filesystem::kBadFileSize) {
+    optimize_stats->set_storage_size_after(after_size);
+  } else {
+    // Set -1 as a sentinel value when failures occur.
+    optimize_stats->set_storage_size_after(-1);
+  }
+  optimize_stats->set_latency_ms(optimize_timer->GetElapsedMilliseconds());
+
   TransformStatus(optimization_status, result_status);
   return result_proto;
 }
@@ -1092,6 +1241,22 @@ GetOptimizeInfoResultProto IcingSearchEngine::GetOptimizeInfo() {
     return result_proto;
   }
 
+  // Read the optimize status to get the time that we last ran.
+  std::string optimize_status_filename =
+      absl_ports::StrCat(options_.base_dir(), "/", kOptimizeStatusFilename);
+  FileBackedProto<OptimizeStatusProto> optimize_status_file(
+      *filesystem_, optimize_status_filename);
+  auto optimize_status_or = optimize_status_file.Read();
+  int64_t current_time = clock_->GetSystemTimeMilliseconds();
+
+  if (optimize_status_or.ok()) {
+    // If we have trouble reading the status or this is the first time that
+    // we've ever run, don't set this field.
+    result_proto.set_time_since_last_optimize_ms(
+        current_time - optimize_status_or.ValueOrDie()
+                           ->last_successful_optimize_run_time_ms());
+  }
+
   // Get stats from DocumentStore
   auto doc_store_optimize_info_or = document_store_->GetOptimizeInfo();
   if (!doc_store_optimize_info_or.ok()) {
@@ -1127,74 +1292,41 @@ GetOptimizeInfoResultProto IcingSearchEngine::GetOptimizeInfo() {
   return result_proto;
 }
 
-libtextclassifier3::Status IcingSearchEngine::InternalPersistToDisk() {
-  ICING_RETURN_IF_ERROR(schema_store_->PersistToDisk());
-  ICING_RETURN_IF_ERROR(document_store_->PersistToDisk());
-  ICING_RETURN_IF_ERROR(index_->PersistToDisk());
-
-  // Update the combined checksum and write to header file.
-  ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
-  ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
-
-  return libtextclassifier3::Status::OK;
-}
-
-libtextclassifier3::StatusOr<Crc32> IcingSearchEngine::ComputeChecksum() {
-  Crc32 total_checksum;
-  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
-  // that can support error logging.
-  auto checksum_or = schema_store_->ComputeChecksum();
-  if (!checksum_or.ok()) {
-    ICING_LOG(ERROR) << checksum_or.status().error_message()
-                     << "Failed to compute checksum of SchemaStore";
-    return checksum_or.status();
-  }
-
-  Crc32 schema_store_checksum = std::move(checksum_or).ValueOrDie();
-
-  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
-  // that can support error logging.
-  checksum_or = document_store_->ComputeChecksum();
-  if (!checksum_or.ok()) {
-    ICING_LOG(ERROR) << checksum_or.status().error_message()
-                     << "Failed to compute checksum of DocumentStore";
-    return checksum_or.status();
+StorageInfoResultProto IcingSearchEngine::GetStorageInfo() {
+  StorageInfoResultProto result;
+  absl_ports::shared_lock l(&mutex_);
+  if (!initialized_) {
+    result.mutable_status()->set_code(StatusProto::FAILED_PRECONDITION);
+    result.mutable_status()->set_message(
+        "IcingSearchEngine has not been initialized!");
+    return result;
   }
-  Crc32 document_store_checksum = std::move(checksum_or).ValueOrDie();
 
-  total_checksum.Append(std::to_string(document_store_checksum.Get()));
-  total_checksum.Append(std::to_string(schema_store_checksum.Get()));
-
-  return total_checksum;
+  int64_t index_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
+  if (index_size != Filesystem::kBadFileSize) {
+    result.mutable_storage_info()->set_total_storage_size(index_size);
+  } else {
+    result.mutable_storage_info()->set_total_storage_size(-1);
+  }
+  *result.mutable_storage_info()->mutable_document_storage_info() =
+      document_store_->GetStorageInfo();
+  *result.mutable_storage_info()->mutable_schema_store_storage_info() =
+      schema_store_->GetStorageInfo();
+  *result.mutable_storage_info()->mutable_index_storage_info() =
+      index_->GetStorageInfo();
+  result.mutable_status()->set_code(StatusProto::OK);
+  return result;
 }
 
-bool IcingSearchEngine::HeaderExists() {
-  if (!filesystem_->FileExists(
-          MakeHeaderFilename(options_.base_dir()).c_str())) {
-    return false;
+libtextclassifier3::Status IcingSearchEngine::InternalPersistToDisk(
+    PersistType::Code persist_type) {
+  if (persist_type == PersistType::LITE) {
+    return document_store_->PersistToDisk(persist_type);
   }
+  ICING_RETURN_IF_ERROR(schema_store_->PersistToDisk());
+  ICING_RETURN_IF_ERROR(document_store_->PersistToDisk(PersistType::FULL));
+  ICING_RETURN_IF_ERROR(index_->PersistToDisk());
 
-  int64_t file_size =
-      filesystem_->GetFileSize(MakeHeaderFilename(options_.base_dir()).c_str());
-
-  // If it's been truncated to size 0 before, we consider it to be a new file
-  return file_size != 0 && file_size != Filesystem::kBadFileSize;
-}
-
-libtextclassifier3::Status IcingSearchEngine::UpdateHeader(
-    const Crc32& checksum) {
-  // Write the header
-  IcingSearchEngine::Header header;
-  header.magic = IcingSearchEngine::Header::kMagic;
-  header.checksum = checksum.Get();
-
-  // This should overwrite the header.
-  if (!filesystem_->Write(MakeHeaderFilename(options_.base_dir()).c_str(),
-                          &header, sizeof(header))) {
-    return absl_ports::InternalError(
-        absl_ports::StrCat("Failed to write IcingSearchEngine header: ",
-                           MakeHeaderFilename(options_.base_dir())));
-  }
   return libtextclassifier3::Status::OK;
 }
 
@@ -1211,7 +1343,8 @@ SearchResultProto IcingSearchEngine::Search(
     return result_proto;
   }
 
-  NativeQueryStats* query_stats = result_proto.mutable_query_stats();
+  QueryStatsProto* query_stats = result_proto.mutable_query_stats();
+  query_stats->set_query_length(search_spec.query().length());
   std::unique_ptr<Timer> overall_timer = clock_->GetNewTimer();
 
   libtextclassifier3::Status status = ValidateResultSpec(result_spec);
@@ -1237,7 +1370,7 @@ SearchResultProto IcingSearchEngine::Search(
   // Gets unordered results from query processor
   auto query_processor_or = QueryProcessor::Create(
       index_.get(), language_segmenter_.get(), normalizer_.get(),
-      document_store_.get(), schema_store_.get(), clock_.get());
+      document_store_.get(), schema_store_.get());
   if (!query_processor_or.ok()) {
     TransformStatus(query_processor_or.status(), result_status);
     return result_proto;
@@ -1289,9 +1422,9 @@ SearchResultProto IcingSearchEngine::Search(
   component_timer = clock_->GetNewTimer();
   // Ranks and paginates results
   libtextclassifier3::StatusOr<PageResultState> page_result_state_or =
-      result_state_manager_.RankAndPaginate(ResultState(
+      result_state_manager_->RankAndPaginate(ResultState(
           std::move(result_document_hits), std::move(query_results.query_terms),
-          search_spec, scoring_spec, result_spec));
+          search_spec, scoring_spec, result_spec, *document_store_));
   if (!page_result_state_or.ok()) {
     TransformStatus(page_result_state_or.status(), result_status);
     return result_proto;
@@ -1307,7 +1440,7 @@ SearchResultProto IcingSearchEngine::Search(
       ResultRetriever::Create(document_store_.get(), schema_store_.get(),
                               language_segmenter_.get(), normalizer_.get());
   if (!result_retriever_or.ok()) {
-    result_state_manager_.InvalidateResultState(
+    result_state_manager_->InvalidateResultState(
         page_result_state.next_page_token);
     TransformStatus(result_retriever_or.status(), result_status);
     return result_proto;
@@ -1318,7 +1451,7 @@ SearchResultProto IcingSearchEngine::Search(
   libtextclassifier3::StatusOr<std::vector<SearchResultProto::ResultProto>>
       results_or = result_retriever->RetrieveResults(page_result_state);
   if (!results_or.ok()) {
-    result_state_manager_.InvalidateResultState(
+    result_state_manager_->InvalidateResultState(
         page_result_state.next_page_token);
     TransformStatus(results_or.status(), result_status);
     return result_proto;
@@ -1340,7 +1473,7 @@ SearchResultProto IcingSearchEngine::Search(
   query_stats->set_latency_ms(overall_timer->GetElapsedMilliseconds());
   query_stats->set_num_results_returned_current_page(
       result_proto.results_size());
-  query_stats->set_num_results_snippeted(
+  query_stats->set_num_results_with_snippets(
       std::min(result_proto.results_size(),
                result_spec.snippet_spec().num_to_snippet()));
   return result_proto;
@@ -1359,12 +1492,12 @@ SearchResultProto IcingSearchEngine::GetNextPage(uint64_t next_page_token) {
     return result_proto;
   }
 
-  NativeQueryStats* query_stats = result_proto.mutable_query_stats();
+  QueryStatsProto* query_stats = result_proto.mutable_query_stats();
   query_stats->set_is_first_page(false);
 
   std::unique_ptr<Timer> overall_timer = clock_->GetNewTimer();
   libtextclassifier3::StatusOr<PageResultState> page_result_state_or =
-      result_state_manager_.GetNextPage(next_page_token);
+      result_state_manager_->GetNextPage(next_page_token);
 
   if (!page_result_state_or.ok()) {
     if (absl_ports::IsNotFound(page_result_state_or.status())) {
@@ -1424,7 +1557,7 @@ SearchResultProto IcingSearchEngine::GetNextPage(uint64_t next_page_token) {
       std::max(page_result_state.snippet_context.snippet_spec.num_to_snippet() -
                    page_result_state.num_previously_returned,
                0);
-  query_stats->set_num_results_snippeted(
+  query_stats->set_num_results_with_snippets(
       std::min(result_proto.results_size(), num_left_to_snippet));
   return result_proto;
 }
@@ -1435,10 +1568,11 @@ void IcingSearchEngine::InvalidateNextPageToken(uint64_t next_page_token) {
     ICING_LOG(ERROR) << "IcingSearchEngine has not been initialized!";
     return;
   }
-  result_state_manager_.InvalidateResultState(next_page_token);
+  result_state_manager_->InvalidateResultState(next_page_token);
 }
 
-libtextclassifier3::Status IcingSearchEngine::OptimizeDocumentStore() {
+libtextclassifier3::Status IcingSearchEngine::OptimizeDocumentStore(
+    OptimizeStatsProto* optimize_stats) {
   // Gets the current directory path and an empty tmp directory path for
   // document store optimization.
   const std::string current_document_dir =
@@ -1455,7 +1589,7 @@ libtextclassifier3::Status IcingSearchEngine::OptimizeDocumentStore() {
 
   // Copies valid document data to tmp directory
   auto optimize_status = document_store_->OptimizeInto(
-      temporary_document_dir, language_segmenter_.get());
+      temporary_document_dir, language_segmenter_.get(), optimize_stats);
 
   // Handles error if any
   if (!optimize_status.ok()) {
@@ -1465,7 +1599,9 @@ libtextclassifier3::Status IcingSearchEngine::OptimizeDocumentStore() {
         optimize_status.error_message());
   }
 
-  // Resets before swapping
+  // result_state_manager_ depends on document_store_. So we need to reset it at
+  // the same time that we reset the document_store_.
+  result_state_manager_.reset();
   document_store_.reset();
 
   // When swapping files, always put the current working directory at the
@@ -1502,6 +1638,8 @@ libtextclassifier3::Status IcingSearchEngine::OptimizeDocumentStore() {
           create_result_or.status().error_message());
     }
     document_store_ = std::move(create_result_or.ValueOrDie().document_store);
+    result_state_manager_ = std::make_unique<ResultStateManager>(
+        performance_configuration_.max_num_total_hits, *document_store_);
 
     // Potential data loss
     // TODO(b/147373249): Find a way to detect true data loss error
@@ -1522,6 +1660,8 @@ libtextclassifier3::Status IcingSearchEngine::OptimizeDocumentStore() {
         "instance can't be created");
   }
   document_store_ = std::move(create_result_or.ValueOrDie().document_store);
+  result_state_manager_ = std::make_unique<ResultStateManager>(
+      performance_configuration_.max_num_total_hits, *document_store_);
 
   // Deletes tmp directory
   if (!filesystem_->DeleteDirectoryRecursively(
@@ -1529,23 +1669,23 @@ libtextclassifier3::Status IcingSearchEngine::OptimizeDocumentStore() {
     ICING_LOG(ERROR) << "Document store has been optimized, but it failed to "
                         "delete temporary file directory";
   }
-
   return libtextclassifier3::Status::OK;
 }
 
-libtextclassifier3::Status IcingSearchEngine::RestoreIndexIfNeeded() {
+IcingSearchEngine::IndexRestorationResult
+IcingSearchEngine::RestoreIndexIfNeeded() {
   DocumentId last_stored_document_id =
       document_store_->last_added_document_id();
   DocumentId last_indexed_document_id = index_->last_added_document_id();
 
   if (last_stored_document_id == last_indexed_document_id) {
     // No need to recover.
-    return libtextclassifier3::Status::OK;
+    return {libtextclassifier3::Status::OK, false};
   }
 
   if (last_stored_document_id == kInvalidDocumentId) {
     // Document store is empty but index is not. Reset the index.
-    return index_->Reset();
+    return {index_->Reset(), false};
   }
 
   // TruncateTo ensures that the index does not hold any data that is not
@@ -1554,17 +1694,29 @@ libtextclassifier3::Status IcingSearchEngine::RestoreIndexIfNeeded() {
   // lost documents. If the index does not contain any hits for documents with
   // document id greater than last_stored_document_id, then TruncateTo will have
   // no effect.
-  ICING_RETURN_IF_ERROR(index_->TruncateTo(last_stored_document_id));
+  auto status = index_->TruncateTo(last_stored_document_id);
+  if (!status.ok()) {
+    return {status, false};
+  }
+  // Last indexed document id may have changed thanks to TruncateTo.
+  last_indexed_document_id = index_->last_added_document_id();
   DocumentId first_document_to_reindex =
       (last_indexed_document_id != kInvalidDocumentId)
           ? index_->last_added_document_id() + 1
           : kMinDocumentId;
+  if (first_document_to_reindex > last_stored_document_id) {
+    // Nothing to restore. Just return.
+    return {libtextclassifier3::Status::OK, false};
+  }
 
-  ICING_ASSIGN_OR_RETURN(
-      std::unique_ptr<IndexProcessor> index_processor,
-      IndexProcessor::Create(normalizer_.get(), index_.get(),
-                             CreateIndexProcessorOptions(options_),
-                             clock_.get()));
+  auto index_processor_or = IndexProcessor::Create(
+      normalizer_.get(), index_.get(), CreateIndexProcessorOptions(options_),
+      clock_.get());
+  if (!index_processor_or.ok()) {
+    return {index_processor_or.status(), true};
+  }
+  std::unique_ptr<IndexProcessor> index_processor =
+      std::move(index_processor_or).ValueOrDie();
 
   ICING_VLOG(1) << "Restoring index by replaying documents from document id "
                 << first_document_to_reindex << " to document id "
@@ -1582,7 +1734,7 @@ libtextclassifier3::Status IcingSearchEngine::RestoreIndexIfNeeded() {
         continue;
       } else {
         // Returns other errors
-        return document_or.status();
+        return {document_or.status(), true};
       }
     }
     DocumentProto document(std::move(document_or).ValueOrDie());
@@ -1592,7 +1744,7 @@ libtextclassifier3::Status IcingSearchEngine::RestoreIndexIfNeeded() {
                                   language_segmenter_.get(),
                                   std::move(document));
     if (!tokenized_document_or.ok()) {
-      return tokenized_document_or.status();
+      return {tokenized_document_or.status(), true};
     }
     TokenizedDocument tokenized_document(
         std::move(tokenized_document_or).ValueOrDie());
@@ -1602,7 +1754,7 @@ libtextclassifier3::Status IcingSearchEngine::RestoreIndexIfNeeded() {
     if (!status.ok()) {
       if (!absl_ports::IsDataLoss(status)) {
         // Real error. Stop recovering and pass it up.
-        return status;
+        return {status, true};
       }
       // Just a data loss. Keep trying to add the remaining docs, but report the
       // data loss when we're done.
@@ -1610,7 +1762,7 @@ libtextclassifier3::Status IcingSearchEngine::RestoreIndexIfNeeded() {
     }
   }
 
-  return overall_status;
+  return {overall_status, true};
 }
 
 libtextclassifier3::StatusOr<bool> IcingSearchEngine::LostPreviousSchema() {
@@ -1642,24 +1794,14 @@ ResetResultProto IcingSearchEngine::Reset() {
   ResetResultProto result_proto;
   StatusProto* result_status = result_proto.mutable_status();
 
-  int64_t before_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
-
+  absl_ports::unique_lock l(&mutex_);
+  initialized_ = false;
+  ResetMembers();
   if (!filesystem_->DeleteDirectoryRecursively(options_.base_dir().c_str())) {
-    int64_t after_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
-    if (after_size != before_size) {
-      // Our filesystem doesn't atomically delete. If we have a discrepancy in
-      // size, then that means we may have deleted some files, but not others.
-      // So our data is in an invalid state now.
-      result_status->set_code(StatusProto::INTERNAL);
-      return result_proto;
-    }
-
-    result_status->set_code(StatusProto::ABORTED);
+    result_status->set_code(StatusProto::INTERNAL);
     return result_proto;
   }
 
-  absl_ports::unique_lock l(&mutex_);
-  initialized_ = false;
   if (InternalInitialize().status().code() != StatusProto::OK) {
     // We shouldn't hit the following Initialize errors:
     //   NOT_FOUND: all data was cleared, we aren't expecting anything
diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h
index a899131..65960a3 100644
--- a/icing/icing-search-engine.h
+++ b/icing/icing-search-engine.h
@@ -37,6 +37,7 @@
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/scoring.pb.h"
 #include "icing/proto/search.pb.h"
+#include "icing/proto/storage.pb.h"
 #include "icing/proto/usage.pb.h"
 #include "icing/result/result-state-manager.h"
 #include "icing/schema/schema-store.h"
@@ -52,16 +53,6 @@ namespace lib {
 // TODO(cassiewang) Top-level comments and links to design-doc.
 class IcingSearchEngine {
  public:
-  struct Header {
-    static constexpr int32_t kMagic = 0x6e650d0a;
-
-    // Holds the magic as a quick sanity check against file corruption.
-    int32_t magic;
-
-    // Checksum of the IcingSearchEngine's sub-component's checksums.
-    uint32_t checksum;
-  };
-
   // Note: It is only required to provide a pointer to a valid instance of
   // JniCache if this instance needs to perform reverse-jni calls. Users on
   // Linux and iOS should always provide a nullptr.
@@ -187,6 +178,7 @@ class IcingSearchEngine {
   //
   // Returns:
   //   OK on success
+  //   OUT_OF_SPACE if exceeds maximum number of allowed documents
   //   FAILED_PRECONDITION if a schema has not been set yet, IcingSearchEngine
   //     has not been initialized yet.
   //   NOT_FOUND if there is no SchemaTypeConfig in the SchemaProto that matches
@@ -328,12 +320,26 @@ class IcingSearchEngine {
 
   // Invalidates the next-page token so that no more results of the related
   // query can be returned.
-  void InvalidateNextPageToken(uint64_t next_page_token);
+  void InvalidateNextPageToken(uint64_t next_page_token)
+      ICING_LOCKS_EXCLUDED(mutex_);
 
   // Makes sure that every update/delete received till this point is flushed
   // to disk. If the app crashes after a call to PersistToDisk(), Icing
   // would be able to fully recover all data written up to this point.
   //
+  // If persist_type is PersistType::LITE, then only the ground truth will be
+  // synced. This should be relatively lightweight to do (order of microseconds)
+  // and ensures that there will be no data loss. At worst, Icing may need to
+  // recover internal data structures by replaying the document log upon the
+  // next startup. Clients should call PersistToDisk(LITE) after each batch of
+  // mutations.
+  //
+  // If persist_type is PersistType::FULL, then all internal data structures in
+  // Icing will be synced. This is a heavier operation (order of milliseconds).
+  // It ensures that Icing will not need to recover internal data structures
+  // upon the next startup. Clients should call PersistToDisk(FULL) before their
+  // process dies.
+  //
   // NOTE: It is not necessary to call PersistToDisk() to read back data
   // that was recently written. All read APIs will include the most recent
   // updates/deletes regardless of the data being flushed to disk.
@@ -342,7 +348,8 @@ class IcingSearchEngine {
   //   OK on success
   //   FAILED_PRECONDITION IcingSearchEngine has not been initialized yet
   //   INTERNAL on I/O error
-  PersistToDiskResultProto PersistToDisk() ICING_LOCKS_EXCLUDED(mutex_);
+  PersistToDiskResultProto PersistToDisk(PersistType::Code persist_type)
+      ICING_LOCKS_EXCLUDED(mutex_);
 
   // Allows Icing to run tasks that are too expensive and/or unnecessary to be
   // executed in real-time, but are useful to keep it fast and be
@@ -378,6 +385,12 @@ class IcingSearchEngine {
   //   INTERNAL_ERROR on IO error
   GetOptimizeInfoResultProto GetOptimizeInfo() ICING_LOCKS_EXCLUDED(mutex_);
 
+  // Calculates the StorageInfo for Icing.
+  //
+  // If an IO error occurs while trying to calculate the value for a field, then
+  // that field will be set to -1.
+  StorageInfoResultProto GetStorageInfo() ICING_LOCKS_EXCLUDED(mutex_);
+
   // Clears all data from Icing and re-initializes. Clients DO NOT need to call
   // Initialize again.
   //
@@ -416,7 +429,8 @@ class IcingSearchEngine {
   // acquired first in order to adhere to the global lock ordering:
   //   1. mutex_
   //   2. result_state_manager_.lock_
-  ResultStateManager result_state_manager_ ICING_GUARDED_BY(mutex_);
+  std::unique_ptr<ResultStateManager> result_state_manager_
+      ICING_GUARDED_BY(mutex_);
 
   // Used to provide reader and writer locks
   absl_ports::shared_mutex mutex_;
@@ -438,12 +452,27 @@ class IcingSearchEngine {
   // Pointer to JNI class references
   const std::unique_ptr<const JniCache> jni_cache_;
 
+  // Resets all members that are created during Initialize.
+  void ResetMembers() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Checks for the existence of the init marker file. If the failed init count
+  // exceeds kMaxUnsuccessfulInitAttempts, all data is deleted and the index is
+  // initialized from scratch. The updated count (original failed init count + 1
+  // ) is written to the marker file.
+  //
+  // RETURNS
+  //   OK on success
+  //   INTERNAL if an IO error occurs while trying to update the marker file.
+  libtextclassifier3::Status CheckInitMarkerFile(
+      InitializeStatsProto* initialize_stats)
+      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
   // Helper method to do the actual work to persist data to disk. We need this
   // separate method so that other public methods don't need to call
   // PersistToDisk(). Public methods calling each other may cause deadlock
   // issues.
-  libtextclassifier3::Status InternalPersistToDisk()
-      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+  libtextclassifier3::Status InternalPersistToDisk(
+      PersistType::Code persist_type) ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   // Helper method to the actual work to Initialize. We need this separate
   // method so that other public methods don't need to call Initialize(). Public
@@ -460,16 +489,7 @@ class IcingSearchEngine {
   //   NOT_FOUND if some Document's schema type is not in the SchemaStore
   //   INTERNAL on any I/O errors
   libtextclassifier3::Status InitializeMembers(
-      NativeInitializeStats* initialize_stats)
-      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
-
-  // Do any validation/setup required for the given IcingSearchEngineOptions
-  //
-  // Returns:
-  //   OK on success
-  //   INVALID_ARGUMENT if options has invalid values
-  //   INTERNAL on I/O error
-  libtextclassifier3::Status InitializeOptions()
+      InitializeStatsProto* initialize_stats)
       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   // Do any initialization/recovery necessary to create a SchemaStore instance.
@@ -479,18 +499,22 @@ class IcingSearchEngine {
   //   FAILED_PRECONDITION if initialize_stats is null
   //   INTERNAL on I/O error
   libtextclassifier3::Status InitializeSchemaStore(
-      NativeInitializeStats* initialize_stats)
+      InitializeStatsProto* initialize_stats)
       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   // Do any initialization/recovery necessary to create a DocumentStore
   // instance.
   //
+  // See comments on DocumentStore::Create for explanation of
+  // force_recovery_and_revalidate_documents.
+  //
   // Returns:
   //   OK on success
   //   FAILED_PRECONDITION if initialize_stats is null
   //   INTERNAL on I/O error
   libtextclassifier3::Status InitializeDocumentStore(
-      NativeInitializeStats* initialize_stats)
+      bool force_recovery_and_revalidate_documents,
+      InitializeStatsProto* initialize_stats)
       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   // Do any initialization/recovery necessary to create a DocumentStore
@@ -503,7 +527,7 @@ class IcingSearchEngine {
   //   NOT_FOUND if some Document's schema type is not in the SchemaStore
   //   INTERNAL on I/O error
   libtextclassifier3::Status InitializeIndex(
-      NativeInitializeStats* initialize_stats)
+      InitializeStatsProto* initialize_stats)
       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   // Many of the internal components rely on other components' derived data.
@@ -527,7 +551,7 @@ class IcingSearchEngine {
   //   OK on success
   //   INTERNAL_ERROR on any IO errors
   libtextclassifier3::Status RegenerateDerivedFiles(
-      NativeInitializeStats* initialize_stats = nullptr,
+      InitializeStatsProto* initialize_stats = nullptr,
       bool log_document_store_stats = false)
       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
@@ -545,7 +569,8 @@ class IcingSearchEngine {
   //                   document store is still available
   //   INTERNAL_ERROR on any IO errors or other errors that we can't recover
   //                  from
-  libtextclassifier3::Status OptimizeDocumentStore()
+  libtextclassifier3::Status OptimizeDocumentStore(
+      OptimizeStatsProto* optimize_stats)
       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   // Helper method to restore missing document data in index_. All documents
@@ -553,29 +578,19 @@ class IcingSearchEngine {
   // call Index::Reset first.
   //
   // Returns:
-  //   OK on success
+  //   On success, OK and a bool indicating whether or not restoration was
+  //     needed.
+  //   DATA_LOSS, if an error during index merging caused us to lose indexed
+  //     data in the main index. Despite the data loss, this is still considered
+  //     a successful run and needed_restoration will be set to true.
   //   RESOURCE_EXHAUSTED if the index fills up before finishing indexing
   //   NOT_FOUND if some Document's schema type is not in the SchemaStore
   //   INTERNAL_ERROR on any IO errors
-  libtextclassifier3::Status RestoreIndexIfNeeded()
-      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
-
-  // Computes the combined checksum of the IcingSearchEngine - includes all its
-  // subcomponents
-  //
-  // Returns:
-  //   Combined checksum on success
-  //   INTERNAL_ERROR on compute error
-  libtextclassifier3::StatusOr<Crc32> ComputeChecksum()
-      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
-
-  // Checks if the header exists already. This does not create the header file
-  // if it doesn't exist.
-  bool HeaderExists() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
-
-  // Update and replace the header file. Creates the header file if it doesn't
-  // exist.
-  libtextclassifier3::Status UpdateHeader(const Crc32& checksum)
+  struct IndexRestorationResult {
+    libtextclassifier3::Status status;
+    bool needed_restoration;
+  };
+  IndexRestorationResult RestoreIndexIfNeeded()
       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   // If we lost the schema during a previous failure, it may "look" the same as
diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc
index 9d33a82..ba9aed1 100644
--- a/icing/icing-search-engine_benchmark.cc
+++ b/icing/icing-search-engine_benchmark.cc
@@ -39,6 +39,7 @@
 #include "icing/proto/search.pb.h"
 #include "icing/proto/status.pb.h"
 #include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/document-generator.h"
 #include "icing/testing/random-string.h"
@@ -69,6 +70,7 @@ namespace lib {
 namespace {
 
 using ::testing::Eq;
+using ::testing::HasSubstr;
 
 // Icing GMSCore has, on average, 17 corpora on a device and 30 corpora at the
 // 95th pct. Most clients use a single type. This is a function of Icing's
@@ -462,6 +464,287 @@ BENCHMARK(BM_MutlipleIndices)
     ->ArgPair(10, 32768)
     ->ArgPair(10, 131072);
 
+void BM_SearchNoStackOverflow(benchmark::State& state) {
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TermMatchType::PREFIX,
+                                     StringIndexingConfig::TokenizerType::PLAIN)
+                  .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL)))
+          .Build();
+
+  // Create the index.
+  IcingSearchEngineOptions options;
+  options.set_base_dir(test_dir);
+  options.set_index_merge_size(kIcingFullIndexSize);
+  std::unique_ptr<IcingSearchEngine> icing =
+      std::make_unique<IcingSearchEngine>(options);
+
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+  // Create a document that has the term "foo"
+  DocumentProto base_document = DocumentBuilder()
+                                    .SetSchema("Message")
+                                    .SetNamespace("namespace")
+                                    .AddStringProperty("body", "foo")
+                                    .Build();
+
+  // Insert a lot of documents with the term "foo"
+  int64_t num_docs = state.range(0);
+  for (int64_t i = 0; i < num_docs; ++i) {
+    DocumentProto document =
+        DocumentBuilder(base_document).SetUri(std::to_string(i)).Build();
+    ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
+  }
+
+  // Do a query and exclude documents with the term "foo". The way this is
+  // currently implemented is that we'll iterate over all the documents in the
+  // index, then apply the exclusion check. Since all our documents have "foo",
+  // we'll consider it a "miss". Previously with recursion, we would have
+  // recursed until we got a success, which would never happen causing us to
+  // recurse through all the documents and trigger a stack overflow. With
+  // the iterative implementation, we should avoid this.
+  SearchSpecProto search_spec;
+  search_spec.set_query("-foo");
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+
+  ResultSpecProto result_spec;
+  ScoringSpecProto scoring_spec;
+  for (auto s : state) {
+    icing->Search(search_spec, scoring_spec, result_spec);
+  }
+}
+// For other reasons, we hit a limit when inserting the ~350,000th document. So
+// cap the limit to 1 << 18.
+BENCHMARK(BM_SearchNoStackOverflow)
+    ->Range(/*start=*/1 << 10, /*limit=*/1 << 18);
+
+// Added for b/184373205. Ensure that we can repeatedly put documents even if
+// the underlying mmapped areas grow past a few page sizes.
+void BM_RepeatedPut(benchmark::State& state) {
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TermMatchType::PREFIX,
+                                     StringIndexingConfig::TokenizerType::PLAIN)
+                  .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL)))
+          .Build();
+
+  // Create the index.
+  IcingSearchEngineOptions options;
+  options.set_base_dir(test_dir);
+  options.set_index_merge_size(kIcingFullIndexSize);
+  std::unique_ptr<IcingSearchEngine> icing =
+      std::make_unique<IcingSearchEngine>(options);
+
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+  // Create a document that has the term "foo"
+  DocumentProto base_document = DocumentBuilder()
+                                    .SetSchema("Message")
+                                    .SetNamespace("namespace")
+                                    .AddStringProperty("body", "foo")
+                                    .Build();
+
+  // Insert a lot of documents with the term "foo"
+  int64_t num_docs = state.range(0);
+  for (auto s : state) {
+    for (int64_t i = 0; i < num_docs; ++i) {
+      DocumentProto document =
+          DocumentBuilder(base_document).SetUri("uri").Build();
+      ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
+    }
+  }
+}
+// For other reasons, we hit a limit when inserting the ~350,000th document. So
+// cap the limit to 1 << 18.
+BENCHMARK(BM_RepeatedPut)->Range(/*start=*/100, /*limit=*/1 << 18);
+
+// This is different from BM_RepeatedPut since we're just trying to benchmark
+// one Put call, not thousands of them at once.
+void BM_Put(benchmark::State& state) {
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message"))
+          .Build();
+
+  // Create the index.
+  IcingSearchEngineOptions options;
+  options.set_base_dir(test_dir);
+  options.set_index_merge_size(kIcingFullIndexSize);
+  std::unique_ptr<IcingSearchEngine> icing =
+      std::make_unique<IcingSearchEngine>(options);
+
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+  // Create a document
+  DocumentProto document = DocumentBuilder()
+                               .SetSchema("Message")
+                               .SetNamespace("namespace")
+                               .SetUri("uri")
+                               .Build();
+
+  for (auto s : state) {
+    benchmark::DoNotOptimize(icing->Put(document));
+  }
+}
+BENCHMARK(BM_Put);
+
+void BM_Get(benchmark::State& state) {
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message"))
+          .Build();
+
+  // Create the index.
+  IcingSearchEngineOptions options;
+  options.set_base_dir(test_dir);
+  options.set_index_merge_size(kIcingFullIndexSize);
+  std::unique_ptr<IcingSearchEngine> icing =
+      std::make_unique<IcingSearchEngine>(options);
+
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+  // Create a document
+  DocumentProto document = DocumentBuilder()
+                               .SetSchema("Message")
+                               .SetNamespace("namespace")
+                               .SetUri("uri")
+                               .Build();
+
+  ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
+  for (auto s : state) {
+    benchmark::DoNotOptimize(
+        icing->Get("namespace", "uri", GetResultSpecProto::default_instance()));
+  }
+}
+BENCHMARK(BM_Get);
+
+void BM_Delete(benchmark::State& state) {
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message"))
+          .Build();
+
+  // Create the index.
+  IcingSearchEngineOptions options;
+  options.set_base_dir(test_dir);
+  options.set_index_merge_size(kIcingFullIndexSize);
+  std::unique_ptr<IcingSearchEngine> icing =
+      std::make_unique<IcingSearchEngine>(options);
+
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+  // Create a document
+  DocumentProto document = DocumentBuilder()
+                               .SetSchema("Message")
+                               .SetNamespace("namespace")
+                               .SetUri("uri")
+                               .Build();
+
+  ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
+  for (auto s : state) {
+    state.PauseTiming();
+    icing->Put(document);
+    state.ResumeTiming();
+
+    benchmark::DoNotOptimize(icing->Delete("namespace", "uri"));
+  }
+}
+BENCHMARK(BM_Delete);
+
+void BM_PutMaxAllowedDocuments(benchmark::State& state) {
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TermMatchType::PREFIX,
+                                     StringIndexingConfig::TokenizerType::PLAIN)
+                  .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL)))
+          .Build();
+
+  // Create the index.
+  IcingSearchEngineOptions options;
+  options.set_base_dir(test_dir);
+  options.set_index_merge_size(kIcingFullIndexSize);
+  std::unique_ptr<IcingSearchEngine> icing =
+      std::make_unique<IcingSearchEngine>(options);
+
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+  // Create a document that has the term "foo"
+  DocumentProto base_document = DocumentBuilder()
+                                    .SetSchema("Message")
+                                    .SetNamespace("namespace")
+                                    .AddStringProperty("body", "foo")
+                                    .Build();
+
+  // Insert a lot of documents with the term "foo"
+  for (auto s : state) {
+    for (int64_t i = 0; i <= kMaxDocumentId; ++i) {
+      DocumentProto document =
+          DocumentBuilder(base_document).SetUri(std::to_string(i)).Build();
+      EXPECT_THAT(icing->Put(document).status(), ProtoIsOk());
+    }
+  }
+
+  DocumentProto document =
+      DocumentBuilder(base_document).SetUri("out_of_space_uri").Build();
+  PutResultProto put_result_proto = icing->Put(document);
+  EXPECT_THAT(put_result_proto.status(),
+              ProtoStatusIs(StatusProto::OUT_OF_SPACE));
+  EXPECT_THAT(put_result_proto.status().message(),
+              HasSubstr("Exceeded maximum number of documents"));
+}
+BENCHMARK(BM_PutMaxAllowedDocuments);
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/icing-search-engine_flush_benchmark.cc b/icing/icing-search-engine_flush_benchmark.cc
new file mode 100644
index 0000000..de8f550
--- /dev/null
+++ b/icing/icing-search-engine_flush_benchmark.cc
@@ -0,0 +1,200 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <unistd.h>
+
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <ostream>
+#include <random>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <string_view>
+#include <unordered_set>
+#include <vector>
+
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/icing-search-engine.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/document-generator.h"
+#include "icing/testing/random-string.h"
+#include "icing/testing/schema-generator.h"
+#include "icing/testing/tmp-directory.h"
+
+// Run on a Linux workstation:
+//    $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
+//    //icing:icing-search-engine_flush_benchmark
+//
+//    $ blaze-bin/icing/icing-search-engine_flush_benchmark
+//    --benchmarks=all --benchmark_memory_usage
+//
+// Run on an Android device:
+//    $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
+//    --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
+//    //icing:icing-search-engine_flush_benchmark
+//
+//    $ adb push blaze-bin/icing/icing-search-engine_flush_benchmark
+//    /data/local/tmp/
+//
+//    $ adb shell /data/local/tmp/icing-search-engine_flush_benchmark
+//    --benchmarks=all
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Assume that there will be roughly 10 packages, each using 3 of its own types.
+constexpr int kAvgNumNamespaces = 10;
+constexpr int kAvgNumTypes = 3;
+
+// ASSUME: Types will have at most ten properties. Types will be created with
+// [1, 10] properties.
+constexpr int kMaxNumProperties = 10;
+
+// Based on logs from Icing GMSCore.
+constexpr int kAvgDocumentSize = 300;
+
+// ASSUME: ~75% of the document's size comes from its content.
+constexpr float kContentSizePct = 0.7;
+
+// Average length of word in English is 4.7 characters.
+constexpr int kAvgTokenLen = 5;
+// Made up value. This results in a fairly reasonable language - the majority of
+// generated words are 3-9 characters, ~3% of words are >=20 chars, and the
+// longest ones are 27 chars, (roughly consistent with the longest,
+// non-contrived English words
+// https://en.wikipedia.org/wiki/Longest_word_in_English)
+constexpr int kTokenStdDev = 7;
+constexpr int kLanguageSize = 1000;
+
+// The number of documents to index.
+constexpr int kNumDocuments = 1024;
+
+std::vector<std::string> CreateNamespaces(int num_namespaces) {
+  std::vector<std::string> namespaces;
+  while (--num_namespaces >= 0) {
+    namespaces.push_back("comgooglepackage" + std::to_string(num_namespaces));
+  }
+  return namespaces;
+}
+
+// Creates a vector containing num_words randomly-generated words for use by
+// documents.
+template <typename Rand>
+std::vector<std::string> CreateLanguage(int num_words, Rand* r) {
+  std::vector<std::string> language;
+  std::normal_distribution<> norm_dist(kAvgTokenLen, kTokenStdDev);
+  while (--num_words >= 0) {
+    int word_length = 0;
+    while (word_length < 1) {
+      word_length = std::round(norm_dist(*r));
+    }
+    language.push_back(RandomString(kAlNumAlphabet, word_length, r));
+  }
+  return language;
+}
+
+class DestructibleDirectory {
+ public:
+  explicit DestructibleDirectory(const Filesystem& filesystem,
+                                 const std::string& dir)
+      : filesystem_(filesystem), dir_(dir) {
+    filesystem_.CreateDirectoryRecursively(dir_.c_str());
+  }
+  ~DestructibleDirectory() {
+    filesystem_.DeleteDirectoryRecursively(dir_.c_str());
+  }
+
+ private:
+  Filesystem filesystem_;
+  std::string dir_;
+};
+
+void BM_FlushBenchmark(benchmark::State& state) {
+  PersistType::Code persist_type =
+      (state.range(0)) ? PersistType::LITE : PersistType::FULL;
+  int num_documents_per_persist = state.range(1);
+
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark/flush";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  std::default_random_engine random;
+  int num_types = kAvgNumNamespaces * kAvgNumTypes;
+  ExactStringPropertyGenerator property_generator;
+  RandomSchemaGenerator<std::default_random_engine,
+                        ExactStringPropertyGenerator>
+      schema_generator(&random, &property_generator);
+  SchemaProto schema =
+      schema_generator.GenerateSchema(num_types, kMaxNumProperties);
+  EvenDistributionTypeSelector type_selector(schema);
+
+  std::vector<std::string> namespaces = CreateNamespaces(kAvgNumNamespaces);
+  EvenDistributionNamespaceSelector namespace_selector(namespaces);
+
+  std::vector<std::string> language = CreateLanguage(kLanguageSize, &random);
+  UniformDistributionLanguageTokenGenerator<std::default_random_engine>
+      token_generator(language, &random);
+
+  DocumentGenerator<
+      EvenDistributionNamespaceSelector, EvenDistributionTypeSelector,
+      UniformDistributionLanguageTokenGenerator<std::default_random_engine>>
+      generator(&namespace_selector, &type_selector, &token_generator,
+                kAvgDocumentSize * kContentSizePct);
+
+  IcingSearchEngineOptions options;
+  options.set_base_dir(test_dir);
+  std::unique_ptr<IcingSearchEngine> icing =
+      std::make_unique<IcingSearchEngine>(options);
+
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+  for (auto s : state) {
+    for (int i = 0; i < kNumDocuments; ++i) {
+      icing->Put(generator.generateDoc());
+
+      if (i % num_documents_per_persist == num_documents_per_persist - 1) {
+        icing->PersistToDisk(persist_type);
+      }
+    }
+  }
+}
+BENCHMARK(BM_FlushBenchmark)
+    // First argument: lite_flush,
+    // Second argument: num_document_per_lite_flush
+    ->ArgPair(true, 1)
+    ->ArgPair(false, 1)
+    ->ArgPair(true, 32)
+    ->ArgPair(false, 32)
+    ->ArgPair(true, 1024)
+    ->ArgPair(false, 1024);
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/icing-search-engine_fuzz_test.cc b/icing/icing-search-engine_fuzz_test.cc
index 1f59c6e..2d07e37 100644
--- a/icing/icing-search-engine_fuzz_test.cc
+++ b/icing/icing-search-engine_fuzz_test.cc
@@ -23,6 +23,7 @@
 #include "icing/proto/document.pb.h"
 #include "icing/proto/initialize.pb.h"
 #include "icing/proto/scoring.pb.h"
+#include "icing/schema-builder.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
 
@@ -30,27 +31,20 @@ namespace icing {
 namespace lib {
 namespace {
 
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
+    PropertyConfigProto_Cardinality_Code_REQUIRED;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+    StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+
 IcingSearchEngineOptions Setup() {
   IcingSearchEngineOptions icing_options;
   icing_options.set_base_dir(GetTestTempDir() + "/icing");
   return icing_options;
 }
 
-SchemaProto SetTypes() {
-  SchemaProto schema;
-  SchemaTypeConfigProto* type = schema.add_types();
-  type->set_schema_type("Message");
-  PropertyConfigProto* body = type->add_properties();
-  body->set_property_name("body");
-  body->set_data_type(PropertyConfigProto::DataType::STRING);
-  body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  body->mutable_string_indexing_config()->set_term_match_type(
-      TermMatchType::PREFIX);
-  body->mutable_string_indexing_config()->set_tokenizer_type(
-      StringIndexingConfig::TokenizerType::PLAIN);
-  return schema;
-}
-
 DocumentProto MakeDocument(const uint8_t* data, size_t size) {
   // TODO (sidchhabra): Added more optimized fuzzing techniques.
   DocumentProto document;
@@ -83,7 +77,15 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
   // TODO (b/145758378): Deleting directory should not be required.
   filesystem_.DeleteDirectoryRecursively(icing_options.base_dir().c_str());
   icing.Initialize();
-  SchemaProto schema_proto = SetTypes();
+
+  SchemaProto schema_proto =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
   icing.SetSchema(schema_proto);
 
   // Index
diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc
index 8c64614..6ad4703 100644
--- a/icing/icing-search-engine_test.cc
+++ b/icing/icing-search-engine_test.cc
@@ -29,19 +29,24 @@
 #include "icing/file/mock-filesystem.h"
 #include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/legacy/index/icing-mock-filesystem.h"
+#include "icing/portable/endian.h"
 #include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/initialize.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/scoring.pb.h"
 #include "icing/proto/search.pb.h"
 #include "icing/proto/status.pb.h"
+#include "icing/schema-builder.h"
 #include "icing/schema/schema-store.h"
 #include "icing/schema/section.h"
+#include "icing/store/document-log-creator.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
 #include "icing/testing/jni-test-helpers.h"
-#include "icing/testing/platform.h"
 #include "icing/testing/random-string.h"
 #include "icing/testing/snippet-helpers.h"
 #include "icing/testing/test-data.h"
@@ -85,13 +90,48 @@ constexpr std::string_view kIpsumText =
     "vehicula posuere vitae, convallis eu lorem. Donec semper augue eu nibh "
     "placerat semper.";
 
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
+    PropertyConfigProto_Cardinality_Code_REQUIRED;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED =
+    PropertyConfigProto_Cardinality_Code_REPEATED;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+    StringIndexingConfig_TokenizerType_Code_PLAIN;
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_NONE =
+    StringIndexingConfig_TokenizerType_Code_NONE;
+
+#ifndef ICING_JNI_TEST
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+#endif  // !ICING_JNI_TEST
+
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+constexpr TermMatchType_Code MATCH_NONE = TermMatchType_Code_UNKNOWN;
+
+PortableFileBackedProtoLog<DocumentWrapper>::Header ReadDocumentLogHeader(
+    Filesystem filesystem, const std::string& file_path) {
+  PortableFileBackedProtoLog<DocumentWrapper>::Header header;
+  filesystem.PRead(file_path.c_str(), &header,
+                   sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header),
+                   /*offset=*/0);
+  return header;
+}
+
+void WriteDocumentLogHeader(
+    Filesystem filesystem, const std::string& file_path,
+    PortableFileBackedProtoLog<DocumentWrapper>::Header& header) {
+  filesystem.Write(file_path.c_str(), &header,
+                   sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header));
+}
+
 // For mocking purpose, we allow tests to provide a custom Filesystem.
 class TestIcingSearchEngine : public IcingSearchEngine {
  public:
   TestIcingSearchEngine(const IcingSearchEngineOptions& options,
                         std::unique_ptr<const Filesystem> filesystem,
                         std::unique_ptr<const IcingFilesystem> icing_filesystem,
-                        std::unique_ptr<FakeClock> clock,
+                        std::unique_ptr<Clock> clock,
                         std::unique_ptr<JniCache> jni_cache)
       : IcingSearchEngine(options, std::move(filesystem),
                           std::move(icing_filesystem), std::move(clock),
@@ -172,95 +212,61 @@ DocumentProto CreateEmailDocument(const std::string& name_space,
 }
 
 SchemaProto CreateMessageSchema() {
-  SchemaProto schema;
-  auto type = schema.add_types();
-  type->set_schema_type("Message");
-
-  auto body = type->add_properties();
-  body->set_property_name("body");
-  body->set_data_type(PropertyConfigProto::DataType::STRING);
-  body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  body->mutable_string_indexing_config()->set_term_match_type(
-      TermMatchType::PREFIX);
-  body->mutable_string_indexing_config()->set_tokenizer_type(
-      StringIndexingConfig::TokenizerType::PLAIN);
-
-  return schema;
+  return SchemaBuilder()
+      .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+          PropertyConfigBuilder()
+              .SetName("body")
+              .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+              .SetCardinality(CARDINALITY_REQUIRED)))
+      .Build();
 }
 
 SchemaProto CreateEmailSchema() {
-  SchemaProto schema;
-  auto* type = schema.add_types();
-  type->set_schema_type("Email");
-
-  auto* body = type->add_properties();
-  body->set_property_name("body");
-  body->set_data_type(PropertyConfigProto::DataType::STRING);
-  body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  body->mutable_string_indexing_config()->set_term_match_type(
-      TermMatchType::PREFIX);
-  body->mutable_string_indexing_config()->set_tokenizer_type(
-      StringIndexingConfig::TokenizerType::PLAIN);
-  auto* subj = type->add_properties();
-  subj->set_property_name("subject");
-  subj->set_data_type(PropertyConfigProto::DataType::STRING);
-  subj->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  subj->mutable_string_indexing_config()->set_term_match_type(
-      TermMatchType::PREFIX);
-  subj->mutable_string_indexing_config()->set_tokenizer_type(
-      StringIndexingConfig::TokenizerType::PLAIN);
-  return schema;
+  return SchemaBuilder()
+      .AddType(
+          SchemaTypeConfigBuilder()
+              .SetType("Email")
+              .AddProperty(PropertyConfigBuilder()
+                               .SetName("body")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_REQUIRED))
+              .AddProperty(PropertyConfigBuilder()
+                               .SetName("subject")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_REQUIRED)))
+      .Build();
 }
 
 SchemaProto CreatePersonAndEmailSchema() {
-  SchemaProto schema;
-
-  auto* person_type = schema.add_types();
-  person_type->set_schema_type("Person");
-  auto* name = person_type->add_properties();
-  name->set_property_name("name");
-  name->set_data_type(PropertyConfigProto::DataType::STRING);
-  name->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  name->mutable_string_indexing_config()->set_term_match_type(
-      TermMatchType::PREFIX);
-  name->mutable_string_indexing_config()->set_tokenizer_type(
-      StringIndexingConfig::TokenizerType::PLAIN);
-  auto* address = person_type->add_properties();
-  address->set_property_name("emailAddress");
-  address->set_data_type(PropertyConfigProto::DataType::STRING);
-  address->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  address->mutable_string_indexing_config()->set_term_match_type(
-      TermMatchType::PREFIX);
-  address->mutable_string_indexing_config()->set_tokenizer_type(
-      StringIndexingConfig::TokenizerType::PLAIN);
-
-  auto* type = schema.add_types();
-  type->set_schema_type("Email");
-
-  auto* body = type->add_properties();
-  body->set_property_name("body");
-  body->set_data_type(PropertyConfigProto::DataType::STRING);
-  body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  body->mutable_string_indexing_config()->set_term_match_type(
-      TermMatchType::PREFIX);
-  body->mutable_string_indexing_config()->set_tokenizer_type(
-      StringIndexingConfig::TokenizerType::PLAIN);
-  auto* subj = type->add_properties();
-  subj->set_property_name("subject");
-  subj->set_data_type(PropertyConfigProto::DataType::STRING);
-  subj->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  subj->mutable_string_indexing_config()->set_term_match_type(
-      TermMatchType::PREFIX);
-  subj->mutable_string_indexing_config()->set_tokenizer_type(
-      StringIndexingConfig::TokenizerType::PLAIN);
-  auto* sender = type->add_properties();
-  sender->set_property_name("sender");
-  sender->set_schema_type("Person");
-  sender->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  sender->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  sender->mutable_document_indexing_config()->set_index_nested_properties(true);
-
-  return schema;
+  return SchemaBuilder()
+      .AddType(
+          SchemaTypeConfigBuilder()
+              .SetType("Person")
+              .AddProperty(PropertyConfigBuilder()
+                               .SetName("name")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+              .AddProperty(PropertyConfigBuilder()
+                               .SetName("emailAddress")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+      .AddType(
+          SchemaTypeConfigBuilder()
+              .SetType("Email")
+              .AddProperty(PropertyConfigBuilder()
+                               .SetName("body")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+              .AddProperty(PropertyConfigBuilder()
+                               .SetName("subject")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+              .AddProperty(PropertyConfigBuilder()
+                               .SetName("sender")
+                               .SetDataTypeDocument(
+                                   "Person", /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+      .Build();
 }
 
 ScoringSpecProto GetDefaultScoringSpec() {
@@ -428,23 +434,23 @@ TEST_F(IcingSearchEngineTest, MaxTokenLenReturnsOkAndTruncatesTokens) {
   SearchResultProto actual_results =
       icing.Search(search_spec, GetDefaultScoringSpec(),
                    ResultSpecProto::default_instance());
-  EXPECT_THAT(actual_results,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
 
   // The query token is also truncated to length of 1, so "me"->"m" matches "m"
   search_spec.set_query("me");
   actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
                                 ResultSpecProto::default_instance());
-  EXPECT_THAT(actual_results,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
 
   // The query token is still truncated to length of 1, so "massage"->"m"
   // matches "m"
   search_spec.set_query("massage");
   actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
                                 ResultSpecProto::default_instance());
-  EXPECT_THAT(actual_results,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest,
@@ -480,8 +486,8 @@ TEST_F(IcingSearchEngineTest,
   SearchResultProto actual_results =
       icing.Search(search_spec, GetDefaultScoringSpec(),
                    ResultSpecProto::default_instance());
-  EXPECT_THAT(actual_results,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, FailToCreateDocStore) {
@@ -502,6 +508,217 @@ TEST_F(IcingSearchEngineTest, FailToCreateDocStore) {
               HasSubstr("Could not create directory"));
 }
 
+TEST_F(IcingSearchEngineTest, InitMarkerFilePreviousFailuresAtThreshold) {
+  Filesystem filesystem;
+  DocumentProto email1 =
+      CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
+  email1.set_creation_timestamp_ms(10000);
+  DocumentProto email2 =
+      CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
+  email2.set_creation_timestamp_ms(10000);
+
+  {
+    // Create an index with a few documents.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto init_result = icing.Initialize();
+    ASSERT_THAT(init_result.status(), ProtoIsOk());
+    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+                Eq(0));
+    ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+  }
+
+  // Write an init marker file with 5 previously failed attempts.
+  std::string marker_filepath = GetTestBaseDir() + "/init_marker";
+
+  {
+    ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str()));
+    int network_init_attempts = GHostToNetworkL(5);
+    // Write the updated number of attempts before we get started.
+    ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0,
+                                  &network_init_attempts,
+                                  sizeof(network_init_attempts)));
+    ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get()));
+  }
+
+  {
+    // Create the index again and verify that initialization succeeds and no
+    // data is thrown out.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto init_result = icing.Initialize();
+    ASSERT_THAT(init_result.status(), ProtoIsOk());
+    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+                Eq(5));
+    EXPECT_THAT(
+        icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
+            .document(),
+        EqualsProto(email1));
+    EXPECT_THAT(
+        icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+            .document(),
+        EqualsProto(email2));
+  }
+
+  // The successful init should have thrown out the marker file.
+  ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
+}
+
+TEST_F(IcingSearchEngineTest, InitMarkerFilePreviousFailuresBeyondThreshold) {
+  Filesystem filesystem;
+  DocumentProto email1 =
+      CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
+  DocumentProto email2 =
+      CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
+
+  {
+    // Create an index with a few documents.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto init_result = icing.Initialize();
+    ASSERT_THAT(init_result.status(), ProtoIsOk());
+    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+                Eq(0));
+    ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+  }
+
+  // Write an init marker file with 6 previously failed attempts.
+  std::string marker_filepath = GetTestBaseDir() + "/init_marker";
+
+  {
+    ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str()));
+    int network_init_attempts = GHostToNetworkL(6);
+    // Write the updated number of attempts before we get started.
+    ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0,
+                                  &network_init_attempts,
+                                  sizeof(network_init_attempts)));
+    ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get()));
+  }
+
+  {
+    // Create the index again and verify that initialization succeeds and all
+    // data is thrown out.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto init_result = icing.Initialize();
+    ASSERT_THAT(init_result.status(),
+                ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
+    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+                Eq(6));
+    EXPECT_THAT(
+        icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
+            .status(),
+        ProtoStatusIs(StatusProto::NOT_FOUND));
+    EXPECT_THAT(
+        icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+            .status(),
+        ProtoStatusIs(StatusProto::NOT_FOUND));
+  }
+
+  // The successful init should have thrown out the marker file.
+  ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
+}
+
+TEST_F(IcingSearchEngineTest, SuccessiveInitFailuresIncrementsInitMarker) {
+  Filesystem filesystem;
+  DocumentProto email1 =
+      CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
+  DocumentProto email2 =
+      CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
+
+  {
+    // 1. Create an index with a few documents.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto init_result = icing.Initialize();
+    ASSERT_THAT(init_result.status(), ProtoIsOk());
+    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+                Eq(0));
+    ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+  }
+
+  {
+    // 2. Create an index that will encounter an IO failure when trying to
+    // create the document log.
+    IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+    auto mock_filesystem = std::make_unique<MockFilesystem>();
+    std::string document_log_filepath =
+        icing_options.base_dir() + "/document_dir/document_log_v1";
+    auto get_filesize_lambda = [this,
+                                &document_log_filepath](const char* filename) {
+      if (strncmp(document_log_filepath.c_str(), filename,
+                  document_log_filepath.length()) == 0) {
+        return Filesystem::kBadFileSize;
+      }
+      return this->filesystem()->GetFileSize(filename);
+    };
+    ON_CALL(*mock_filesystem, GetFileSize(A<const char*>()))
+        .WillByDefault(get_filesize_lambda);
+
+    TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
+                                std::make_unique<IcingFilesystem>(),
+                                std::make_unique<FakeClock>(),
+                                GetTestJniCache());
+
+    // Fail to initialize six times in a row.
+    InitializeResultProto init_result = icing.Initialize();
+    ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
+    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+                Eq(0));
+
+    init_result = icing.Initialize();
+    ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
+    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+                Eq(1));
+
+    init_result = icing.Initialize();
+    ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
+    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+                Eq(2));
+
+    init_result = icing.Initialize();
+    ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
+    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+                Eq(3));
+
+    init_result = icing.Initialize();
+    ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
+    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+                Eq(4));
+
+    init_result = icing.Initialize();
+    ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
+    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+                Eq(5));
+  }
+
+  {
+    // 3. Create the index again and verify that initialization succeeds and all
+    // data is thrown out.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto init_result = icing.Initialize();
+    ASSERT_THAT(init_result.status(),
+                ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
+    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+                Eq(6));
+
+    EXPECT_THAT(
+        icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
+            .status(),
+        ProtoStatusIs(StatusProto::NOT_FOUND));
+    EXPECT_THAT(
+        icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+            .status(),
+        ProtoStatusIs(StatusProto::NOT_FOUND));
+  }
+
+  // The successful init should have thrown out the marker file.
+  std::string marker_filepath = GetTestBaseDir() + "/init_marker";
+  ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
+}
+
 TEST_F(IcingSearchEngineTest,
        CircularReferenceCreateSectionManagerReturnsInvalidArgument) {
   // Create a type config with a circular reference.
@@ -596,7 +813,7 @@ TEST_F(IcingSearchEngineTest, FailToWriteSchema) {
               HasSubstr("Unable to open file for write"));
 }
 
-TEST_F(IcingSearchEngineTest, SetSchemaDelete2) {
+TEST_F(IcingSearchEngineTest, SetSchemaIncompatibleFails) {
   {
     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -639,15 +856,18 @@ TEST_F(IcingSearchEngineTest, SetSchemaDelete2) {
     property->set_data_type(PropertyConfigProto::DataType::STRING);
     property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
 
-    EXPECT_THAT(icing.SetSchema(schema, false).status(),
-                ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+    EXPECT_THAT(
+        icing.SetSchema(schema, /*ignore_errors_and_delete_documents=*/false)
+            .status(),
+        ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
 
-    // 4. Try to delete by email type.
+    // 4. Try to delete by email type. This should succeed because email wasn't
+    // deleted in step 3.
     EXPECT_THAT(icing.DeleteBySchemaType("Email").status(), ProtoIsOk());
   }
 }
 
-TEST_F(IcingSearchEngineTest, SetSchemaDelete) {
+TEST_F(IcingSearchEngineTest, SetSchemaIncompatibleForceOverrideSucceeds) {
   {
     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -681,7 +901,8 @@ TEST_F(IcingSearchEngineTest, SetSchemaDelete) {
     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
 
-    // 3. Set a schema that deletes email. This should fail.
+    // 3. Set a schema that deletes email with force override. This should
+    // succeed and delete the email type.
     SchemaProto schema;
     SchemaTypeConfigProto* type = schema.add_types();
     type->set_schema_type("Message");
@@ -692,7 +913,8 @@ TEST_F(IcingSearchEngineTest, SetSchemaDelete) {
 
     EXPECT_THAT(icing.SetSchema(schema, true).status(), ProtoIsOk());
 
-    // 4. Try to delete by email type.
+    // 4. Try to delete by email type. This should fail because email was
+    // already deleted.
     EXPECT_THAT(icing.DeleteBySchemaType("Email").status(),
                 ProtoStatusIs(StatusProto::NOT_FOUND));
   }
@@ -731,7 +953,13 @@ TEST_F(IcingSearchEngineTest, SetSchemaCompatibleVersionUpdateSucceeds) {
     property->set_data_type(PropertyConfigProto::DataType::STRING);
     property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
 
-    EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+    SetSchemaResultProto set_schema_result = icing.SetSchema(schema);
+    // Ignore latency numbers. They're covered elsewhere.
+    set_schema_result.clear_latency_ms();
+    SetSchemaResultProto expected_set_schema_result;
+    expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+    expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+    EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
 
     EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
   }
@@ -749,12 +977,20 @@ TEST_F(IcingSearchEngineTest, SetSchemaCompatibleVersionUpdateSucceeds) {
     property->set_property_name("title");
     property->set_data_type(PropertyConfigProto::DataType::STRING);
     property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+    property = type->add_properties();
     property->set_property_name("body");
     property->set_data_type(PropertyConfigProto::DataType::STRING);
     property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
 
     // 3. SetSchema should succeed and the version number should be updated.
-    EXPECT_THAT(icing.SetSchema(schema, true).status(), ProtoIsOk());
+    SetSchemaResultProto set_schema_result = icing.SetSchema(schema, true);
+    // Ignore latency numbers. They're covered elsewhere.
+    set_schema_result.clear_latency_ms();
+    SetSchemaResultProto expected_set_schema_result;
+    expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+    expected_set_schema_result.mutable_fully_compatible_changed_schema_types()
+        ->Add("Email");
+    EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
 
     EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2));
   }
@@ -940,7 +1176,12 @@ TEST_F(IcingSearchEngineTest,
 }
 
 TEST_F(IcingSearchEngineTest, SetSchema) {
-  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(1000);
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::make_unique<Filesystem>(),
+                              std::make_unique<IcingFilesystem>(),
+                              std::move(fake_clock), GetTestJniCache());
   ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
 
   auto message_document = CreateMessageDocument("namespace", "uri");
@@ -969,26 +1210,31 @@ TEST_F(IcingSearchEngineTest, SetSchema) {
   empty_type->set_schema_type("");
 
   // Make sure we can't set invalid schemas
-  EXPECT_THAT(icing.SetSchema(invalid_schema).status(),
+  SetSchemaResultProto set_schema_result = icing.SetSchema(invalid_schema);
+  EXPECT_THAT(set_schema_result.status(),
               ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+  EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
 
   // Can add an document of a set schema
-  EXPECT_THAT(icing.SetSchema(schema_with_message).status(), ProtoIsOk());
+  set_schema_result = icing.SetSchema(schema_with_message);
+  EXPECT_THAT(set_schema_result.status(), ProtoStatusIs(StatusProto::OK));
+  EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
   EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
 
   // Schema with Email doesn't have Message, so would result incompatible
   // data
-  EXPECT_THAT(icing.SetSchema(schema_with_email).status(),
+  set_schema_result = icing.SetSchema(schema_with_email);
+  EXPECT_THAT(set_schema_result.status(),
               ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+  EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
 
   // Can expand the set of schema types and add an document of a new
   // schema type
-  EXPECT_THAT(icing.SetSchema(SchemaProto(schema_with_email_and_message))
-                  .status()
-                  .code(),
-              Eq(StatusProto::OK));
-  EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
+  set_schema_result = icing.SetSchema(schema_with_email_and_message);
+  EXPECT_THAT(set_schema_result.status(), ProtoStatusIs(StatusProto::OK));
+  EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
 
+  EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
   // Can't add an document whose schema isn't set
   auto photo_document = DocumentBuilder()
                             .SetKey("namespace", "uri")
@@ -1001,7 +1247,8 @@ TEST_F(IcingSearchEngineTest, SetSchema) {
               HasSubstr("'Photo' not found"));
 }
 
-TEST_F(IcingSearchEngineTest, SetSchemaTriggersIndexRestorationAndReturnsOk) {
+TEST_F(IcingSearchEngineTest,
+       SetSchemaNewIndexedPropertyTriggersIndexRestorationAndReturnsOk) {
   IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
   ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
 
@@ -1010,8 +1257,15 @@ TEST_F(IcingSearchEngineTest, SetSchemaTriggersIndexRestorationAndReturnsOk) {
       ->mutable_properties(0)
       ->clear_string_indexing_config();
 
-  EXPECT_THAT(icing.SetSchema(schema_with_no_indexed_property).status(),
-              ProtoIsOk());
+  SetSchemaResultProto set_schema_result =
+      icing.SetSchema(schema_with_no_indexed_property);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  SetSchemaResultProto expected_set_schema_result;
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  expected_set_schema_result.mutable_new_schema_types()->Add("Message");
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
   // Nothing will be index and Search() won't return anything.
   EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
               ProtoIsOk());
@@ -1026,13 +1280,20 @@ TEST_F(IcingSearchEngineTest, SetSchemaTriggersIndexRestorationAndReturnsOk) {
   SearchResultProto actual_results =
       icing.Search(search_spec, GetDefaultScoringSpec(),
                    ResultSpecProto::default_instance());
-  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStats(empty_result));
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(empty_result));
 
   SchemaProto schema_with_indexed_property = CreateMessageSchema();
   // Index restoration should be triggered here because new schema requires more
   // properties to be indexed.
-  EXPECT_THAT(icing.SetSchema(schema_with_indexed_property).status(),
-              ProtoIsOk());
+  set_schema_result = icing.SetSchema(schema_with_indexed_property);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  expected_set_schema_result = SetSchemaResultProto();
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+      ->Add("Message");
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
 
   SearchResultProto expected_search_result_proto;
   expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
@@ -1040,8 +1301,441 @@ TEST_F(IcingSearchEngineTest, SetSchemaTriggersIndexRestorationAndReturnsOk) {
       CreateMessageDocument("namespace", "uri");
   actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
                                 ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineTest,
+       SetSchemaChangeNestedPropertiesTriggersIndexRestorationAndReturnsOk) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  SchemaTypeConfigProto person_proto =
+      SchemaTypeConfigBuilder()
+          .SetType("Person")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("name")
+                           .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto nested_schema =
+      SchemaBuilder()
+          .AddType(person_proto)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("sender")
+                                        .SetDataTypeDocument(
+                                            "Person",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("subject")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  SetSchemaResultProto expected_set_schema_result;
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+  expected_set_schema_result.mutable_new_schema_types()->Add("Person");
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(1000)
+          .AddStringProperty("subject",
+                             "Did you get the memo about TPS reports?")
+          .AddDocumentProperty("sender",
+                               DocumentBuilder()
+                                   .SetKey("namespace1", "uri1")
+                                   .SetSchema("Person")
+                                   .AddStringProperty("name", "Bill Lundbergh")
+                                   .Build())
+          .Build();
+
+  // "sender.name" should get assigned property id 0 and subject should get
+  // property id 1.
+  EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  // document should match a query for 'Bill' in 'sender.name', but not in
+  // 'subject'
+  SearchSpecProto search_spec;
+  search_spec.set_query("sender.name:Bill");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto result;
+  result.mutable_status()->set_code(StatusProto::OK);
+  *result.mutable_results()->Add()->mutable_document() = document;
+
+  SearchResultProto actual_results =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
+
+  SearchResultProto empty_result;
+  empty_result.mutable_status()->set_code(StatusProto::OK);
+  search_spec.set_query("subject:Bill");
+  actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+  // Now update the schema with index_nested_properties=false. This should
+  // reassign property ids, lead to an index rebuild and ensure that nothing
+  // match a query for "Bill".
+  SchemaProto no_nested_schema =
+      SchemaBuilder()
+          .AddType(person_proto)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("sender")
+                                        .SetDataTypeDocument(
+                                            "Person",
+                                            /*index_nested_properties=*/false)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("subject")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  set_schema_result = icing.SetSchema(no_nested_schema);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  expected_set_schema_result = SetSchemaResultProto();
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+      ->Add("Email");
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  // document shouldn't match a query for 'Bill' in either 'sender.name' or
+  // 'subject'
+  search_spec.set_query("sender.name:Bill");
+  actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+  search_spec.set_query("subject:Bill");
+  actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
   EXPECT_THAT(actual_results,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+              EqualsSearchResultIgnoreStatsAndScores(empty_result));
+}
+
+TEST_F(IcingSearchEngineTest,
+       ForceSetSchemaPropertyDeletionTriggersIndexRestorationAndReturnsOk) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  // 'body' should have a property id of 0 and 'subject' should have a property
+  // id of 1.
+  SchemaProto email_with_body_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("subject")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("body")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SetSchemaResultProto set_schema_result =
+      icing.SetSchema(email_with_body_schema);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  SetSchemaResultProto expected_set_schema_result;
+  expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  // Create a document with only a subject property.
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(1000)
+          .AddStringProperty("subject",
+                             "Did you get the memo about TPS reports?")
+          .Build();
+  EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  // We should be able to retrieve the document by searching for 'tps' in
+  // 'subject'.
+  SearchSpecProto search_spec;
+  search_spec.set_query("subject:tps");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto result;
+  result.mutable_status()->set_code(StatusProto::OK);
+  *result.mutable_results()->Add()->mutable_document() = document;
+
+  SearchResultProto actual_results =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
+
+  // Now update the schema to remove the 'body' field. This is backwards
+  // incompatible, but document should be preserved because it doesn't contain a
+  // 'body' field. If the index is correctly rebuilt, then 'subject' will now
+  // have a property id of 0. If not, then the hits in the index will still have
+  // have a property id of 1 and therefore it won't be found.
+  SchemaProto email_no_body_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Email").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("subject")
+                  .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  set_schema_result = icing.SetSchema(
+      email_no_body_schema, /*ignore_errors_and_delete_documents=*/true);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  expected_set_schema_result = SetSchemaResultProto();
+  expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
+  expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+      ->Add("Email");
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  // We should be able to retrieve the document by searching for 'tps' in
+  // 'subject'.
+  search_spec.set_query("subject:tps");
+  actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
+}
+
+TEST_F(
+    IcingSearchEngineTest,
+    ForceSetSchemaPropertyDeletionAndAdditionTriggersIndexRestorationAndReturnsOk) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  // 'body' should have a property id of 0 and 'subject' should have a property
+  // id of 1.
+  SchemaProto email_with_body_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("subject")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("body")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SetSchemaResultProto set_schema_result =
+      icing.SetSchema(email_with_body_schema);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  SetSchemaResultProto expected_set_schema_result;
+  expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  // Create a document with only a subject property.
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(1000)
+          .AddStringProperty("subject",
+                             "Did you get the memo about TPS reports?")
+          .Build();
+  EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  // We should be able to retrieve the document by searching for 'tps' in
+  // 'subject'.
+  SearchSpecProto search_spec;
+  search_spec.set_query("subject:tps");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto result;
+  result.mutable_status()->set_code(StatusProto::OK);
+  *result.mutable_results()->Add()->mutable_document() = document;
+
+  SearchResultProto actual_results =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
+
+  // Now update the schema to remove the 'body' field. This is backwards
+  // incompatible, but document should be preserved because it doesn't contain a
+  // 'body' field. If the index is correctly rebuilt, then 'subject' and 'to'
+  // will now have property ids of 0 and 1 respectively. If not, then the hits
+  // in the index will still have have a property id of 1 and therefore it won't
+  // be found.
+  SchemaProto email_no_body_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("subject")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("to")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  set_schema_result = icing.SetSchema(
+      email_no_body_schema, /*ignore_errors_and_delete_documents=*/true);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  expected_set_schema_result = SetSchemaResultProto();
+  expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
+  expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+      ->Add("Email");
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  // We should be able to retrieve the document by searching for 'tps' in
+  // 'subject'.
+  search_spec.set_query("subject:tps");
+  actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
+}
+
+TEST_F(IcingSearchEngineTest, ForceSetSchemaIncompatibleNestedDocsAreDeleted) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  SchemaTypeConfigProto email_schema_type =
+      SchemaTypeConfigBuilder()
+          .SetType("Email")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("sender")
+                  .SetDataTypeDocument("Person",
+                                       /*index_nested_properties=*/true)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto nested_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Person")
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("name")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("company")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(email_schema_type)
+          .Build();
+
+  SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  SetSchemaResultProto expected_set_schema_result;
+  expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+  expected_set_schema_result.mutable_new_schema_types()->Add("Person");
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  // Create two documents - a person document and an email document - both docs
+  // should be deleted when we remove the 'company' field from the person type.
+  DocumentProto person_document =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema("Person")
+          .SetCreationTimestampMs(1000)
+          .AddStringProperty("name", "Bill Lundbergh")
+          .AddStringProperty("company", "Initech Corp.")
+          .Build();
+  EXPECT_THAT(icing.Put(person_document).status(), ProtoIsOk());
+
+  DocumentProto email_document =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri2")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(1000)
+          .AddStringProperty("subject",
+                             "Did you get the memo about TPS reports?")
+          .AddDocumentProperty("sender", person_document)
+          .Build();
+  EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk());
+
+  // We should be able to retrieve both documents.
+  GetResultProto get_result =
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance());
+  EXPECT_THAT(get_result.status(), ProtoIsOk());
+  EXPECT_THAT(get_result.document(), EqualsProto(person_document));
+
+  get_result =
+      icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance());
+  EXPECT_THAT(get_result.status(), ProtoIsOk());
+  EXPECT_THAT(get_result.document(), EqualsProto(email_document));
+
+  // Now update the schema to remove the 'company' field. This is backwards
+  // incompatible, *both* documents should be deleted because both fail
+  // validation (they each contain a 'Person' that has a non-existent property).
+  nested_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(email_schema_type)
+          .Build();
+
+  set_schema_result = icing.SetSchema(
+      nested_schema, /*ignore_errors_and_delete_documents=*/true);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  expected_set_schema_result = SetSchemaResultProto();
+  expected_set_schema_result.mutable_incompatible_schema_types()->Add("Person");
+  expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
+  expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+      ->Add("Email");
+  expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+      ->Add("Person");
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  // Both documents should be deleted now.
+  get_result =
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance());
+  EXPECT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
+
+  get_result =
+      icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance());
+  EXPECT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
 }
 
 TEST_F(IcingSearchEngineTest, SetSchemaRevalidatesDocumentsAndReturnsOk) {
@@ -1089,6 +1783,10 @@ TEST_F(IcingSearchEngineTest, SetSchemaRevalidatesDocumentsAndReturnsOk) {
   property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
 
   // Can't set the schema since it's incompatible
+  SetSchemaResultProto set_schema_result =
+      icing.SetSchema(schema_with_required_subject);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
   SetSchemaResultProto expected_set_schema_result_proto;
   expected_set_schema_result_proto.mutable_status()->set_code(
       StatusProto::FAILED_PRECONDITION);
@@ -1096,15 +1794,17 @@ TEST_F(IcingSearchEngineTest, SetSchemaRevalidatesDocumentsAndReturnsOk) {
       "Schema is incompatible.");
   expected_set_schema_result_proto.add_incompatible_schema_types("email");
 
-  EXPECT_THAT(icing.SetSchema(schema_with_required_subject),
-              EqualsProto(expected_set_schema_result_proto));
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result_proto));
 
   // Force set it
+  set_schema_result =
+      icing.SetSchema(schema_with_required_subject,
+                      /*ignore_errors_and_delete_documents=*/true);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
   expected_set_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
   expected_set_schema_result_proto.mutable_status()->clear_message();
-  EXPECT_THAT(icing.SetSchema(schema_with_required_subject,
-                              /*ignore_errors_and_delete_documents=*/true),
-              EqualsProto(expected_set_schema_result_proto));
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result_proto));
 
   GetResultProto expected_get_result_proto;
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
@@ -1161,19 +1861,25 @@ TEST_F(IcingSearchEngineTest, SetSchemaDeletesDocumentsAndReturnsOk) {
   type->set_schema_type("email");
 
   // Can't set the schema since it's incompatible
+  SetSchemaResultProto set_schema_result = icing.SetSchema(new_schema);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
   SetSchemaResultProto expected_result;
   expected_result.mutable_status()->set_code(StatusProto::FAILED_PRECONDITION);
   expected_result.mutable_status()->set_message("Schema is incompatible.");
   expected_result.add_deleted_schema_types("message");
 
-  EXPECT_THAT(icing.SetSchema(new_schema), EqualsProto(expected_result));
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_result));
 
   // Force set it
+  set_schema_result =
+      icing.SetSchema(new_schema,
+                      /*ignore_errors_and_delete_documents=*/true);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
   expected_result.mutable_status()->set_code(StatusProto::OK);
   expected_result.mutable_status()->clear_message();
-  EXPECT_THAT(icing.SetSchema(new_schema,
-                              /*ignore_errors_and_delete_documents=*/true),
-              EqualsProto(expected_result));
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_result));
 
   // "email" document is still there
   GetResultProto expected_get_result_proto;
@@ -1500,24 +2206,21 @@ TEST_F(IcingSearchEngineTest, SearchReturnsValidResults) {
       icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
   EXPECT_THAT(results.status(), ProtoIsOk());
   EXPECT_THAT(results.results(), SizeIs(2));
-  EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
-  EXPECT_THAT(GetMatch(results.results(0).document(),
-                       results.results(0).snippet(), "body",
-                       /*snippet_index=*/0),
-              Eq("message"));
-  EXPECT_THAT(
-      GetWindow(results.results(0).document(), results.results(0).snippet(),
-                "body", /*snippet_index=*/0),
-      Eq("message body"));
+
+  const DocumentProto& document = results.results(0).document();
+  EXPECT_THAT(document, EqualsProto(document_two));
+
+  const SnippetProto& snippet = results.results(0).snippet();
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("message body"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("message"));
+
   EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
-  EXPECT_THAT(
-      GetMatch(results.results(1).document(), results.results(1).snippet(),
-               "body", /*snippet_index=*/0),
-      IsEmpty());
-  EXPECT_THAT(
-      GetWindow(results.results(1).document(), results.results(1).snippet(),
-                "body", /*snippet_index=*/0),
-      IsEmpty());
+  EXPECT_THAT(results.results(1).snippet().entries(), IsEmpty());
 
   search_spec.set_query("foo");
 
@@ -1526,8 +2229,79 @@ TEST_F(IcingSearchEngineTest, SearchReturnsValidResults) {
   SearchResultProto actual_results =
       icing.Search(search_spec, GetDefaultScoringSpec(),
                    ResultSpecProto::default_instance());
-  EXPECT_THAT(actual_results,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineTest, SearchReturnsScoresDocumentScore) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
+  document_one.set_score(93);
+  document_one.set_creation_timestamp_ms(10000);
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
+  document_two.set_score(15);
+  document_two.set_creation_timestamp_ms(12000);
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("message");
+
+  // Rank by DOCUMENT_SCORE and ensure that the score field is populated with
+  // document score.
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+  SearchResultProto results = icing.Search(search_spec, scoring_spec,
+                                           ResultSpecProto::default_instance());
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(2));
+
+  EXPECT_THAT(results.results(0).document(), EqualsProto(document_one));
+  EXPECT_THAT(results.results(0).score(), 93);
+  EXPECT_THAT(results.results(1).document(), EqualsProto(document_two));
+  EXPECT_THAT(results.results(1).score(), 15);
+}
+
+TEST_F(IcingSearchEngineTest, SearchReturnsScoresCreationTimestamp) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
+  document_one.set_score(93);
+  document_one.set_creation_timestamp_ms(10000);
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
+  document_two.set_score(15);
+  document_two.set_creation_timestamp_ms(12000);
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("message");
+
+  // Rank by CREATION_TS and ensure that the score field is populated with
+  // creation ts.
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(
+      ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+
+  SearchResultProto results = icing.Search(search_spec, scoring_spec,
+                                           ResultSpecProto::default_instance());
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(2));
+
+  EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+  EXPECT_THAT(results.results(0).score(), 12000);
+  EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
+  EXPECT_THAT(results.results(1).score(), 10000);
 }
 
 TEST_F(IcingSearchEngineTest, SearchReturnsOneResult) {
@@ -1559,8 +2333,8 @@ TEST_F(IcingSearchEngineTest, SearchReturnsOneResult) {
   // The token is a random number so we don't verify it.
   expected_search_result_proto.set_next_page_token(
       search_result_proto.next_page_token());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, SearchZeroResultLimitReturnsEmptyResults) {
@@ -1578,8 +2352,8 @@ TEST_F(IcingSearchEngineTest, SearchZeroResultLimitReturnsEmptyResults) {
   expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
   SearchResultProto actual_results =
       icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
-  EXPECT_THAT(actual_results,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, SearchNegativeResultLimitReturnsInvalidArgument) {
@@ -1600,8 +2374,8 @@ TEST_F(IcingSearchEngineTest, SearchNegativeResultLimitReturnsInvalidArgument) {
       "ResultSpecProto.num_per_page cannot be negative.");
   SearchResultProto actual_results =
       icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
-  EXPECT_THAT(actual_results,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, SearchWithPersistenceReturnsValidResults) {
@@ -1645,8 +2419,8 @@ TEST_F(IcingSearchEngineTest, SearchWithPersistenceReturnsValidResults) {
     SearchResultProto actual_results =
         icing.Search(search_spec, GetDefaultScoringSpec(),
                      ResultSpecProto::default_instance());
-    EXPECT_THAT(actual_results,
-                EqualsSearchResultIgnoreStats(expected_search_result_proto));
+    EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                    expected_search_result_proto));
 
     search_spec.set_query("foo");
 
@@ -1654,7 +2428,8 @@ TEST_F(IcingSearchEngineTest, SearchWithPersistenceReturnsValidResults) {
     empty_result.mutable_status()->set_code(StatusProto::OK);
     actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
                                   ResultSpecProto::default_instance());
-    EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStats(empty_result));
+    EXPECT_THAT(actual_results,
+                EqualsSearchResultIgnoreStatsAndScores(empty_result));
   }
 }
 
@@ -1675,8 +2450,8 @@ TEST_F(IcingSearchEngineTest, SearchShouldReturnEmpty) {
       icing.Search(search_spec, GetDefaultScoringSpec(),
                    ResultSpecProto::default_instance());
 
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, SearchShouldReturnMultiplePages) {
@@ -1716,8 +2491,8 @@ TEST_F(IcingSearchEngineTest, SearchShouldReturnMultiplePages) {
   uint64_t next_page_token = search_result_proto.next_page_token();
   // Since the token is a random number, we don't need to verify
   expected_search_result_proto.set_next_page_token(next_page_token);
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 
   // Second page, 2 results
   expected_search_result_proto.clear_results();
@@ -1726,8 +2501,8 @@ TEST_F(IcingSearchEngineTest, SearchShouldReturnMultiplePages) {
   *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
       document2;
   search_result_proto = icing.GetNextPage(next_page_token);
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 
   // Third page, 1 result
   expected_search_result_proto.clear_results();
@@ -1737,14 +2512,14 @@ TEST_F(IcingSearchEngineTest, SearchShouldReturnMultiplePages) {
   // token.
   expected_search_result_proto.clear_next_page_token();
   search_result_proto = icing.GetNextPage(next_page_token);
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 
   // No more results
   expected_search_result_proto.clear_results();
   search_result_proto = icing.GetNextPage(next_page_token);
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, SearchWithNoScoringShouldReturnMultiplePages) {
@@ -1787,8 +2562,8 @@ TEST_F(IcingSearchEngineTest, SearchWithNoScoringShouldReturnMultiplePages) {
   uint64_t next_page_token = search_result_proto.next_page_token();
   // Since the token is a random number, we don't need to verify
   expected_search_result_proto.set_next_page_token(next_page_token);
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 
   // Second page, 2 results
   expected_search_result_proto.clear_results();
@@ -1797,8 +2572,8 @@ TEST_F(IcingSearchEngineTest, SearchWithNoScoringShouldReturnMultiplePages) {
   *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
       document2;
   search_result_proto = icing.GetNextPage(next_page_token);
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 
   // Third page, 1 result
   expected_search_result_proto.clear_results();
@@ -1808,14 +2583,14 @@ TEST_F(IcingSearchEngineTest, SearchWithNoScoringShouldReturnMultiplePages) {
   // token.
   expected_search_result_proto.clear_next_page_token();
   search_result_proto = icing.GetNextPage(next_page_token);
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 
   // No more results
   expected_search_result_proto.clear_results();
   search_result_proto = icing.GetNextPage(next_page_token);
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, ShouldReturnMultiplePagesWithSnippets) {
@@ -1852,24 +2627,28 @@ TEST_F(IcingSearchEngineTest, ShouldReturnMultiplePagesWithSnippets) {
   ASSERT_THAT(search_result.results(), SizeIs(2));
   ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
 
-  EXPECT_THAT(search_result.results(0).document(), EqualsProto(document5));
-  EXPECT_THAT(GetMatch(search_result.results(0).document(),
-                       search_result.results(0).snippet(), "body",
-                       /*snippet_index=*/0),
-              Eq("message"));
-  EXPECT_THAT(GetWindow(search_result.results(0).document(),
-                        search_result.results(0).snippet(), "body",
-                        /*snippet_index=*/0),
-              Eq("message body"));
-  EXPECT_THAT(search_result.results(1).document(), EqualsProto(document4));
-  EXPECT_THAT(GetMatch(search_result.results(1).document(),
-                       search_result.results(1).snippet(), "body",
-                       /*snippet_index=*/0),
-              Eq("message"));
-  EXPECT_THAT(GetWindow(search_result.results(1).document(),
-                        search_result.results(1).snippet(), "body",
-                        /*snippet_index=*/0),
-              Eq("message body"));
+  const DocumentProto& document_result_1 = search_result.results(0).document();
+  EXPECT_THAT(document_result_1, EqualsProto(document5));
+  const SnippetProto& snippet_result_1 = search_result.results(0).snippet();
+  EXPECT_THAT(snippet_result_1.entries(), SizeIs(1));
+  EXPECT_THAT(snippet_result_1.entries(0).property_name(), Eq("body"));
+  std::string_view content = GetString(
+      &document_result_1, snippet_result_1.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet_result_1.entries(0)),
+              ElementsAre("message body"));
+  EXPECT_THAT(GetMatches(content, snippet_result_1.entries(0)),
+              ElementsAre("message"));
+
+  const DocumentProto& document_result_2 = search_result.results(1).document();
+  EXPECT_THAT(document_result_2, EqualsProto(document4));
+  const SnippetProto& snippet_result_2 = search_result.results(1).snippet();
+  EXPECT_THAT(snippet_result_2.entries(0).property_name(), Eq("body"));
+  content = GetString(&document_result_2,
+                      snippet_result_2.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet_result_2.entries(0)),
+              ElementsAre("message body"));
+  EXPECT_THAT(GetMatches(content, snippet_result_2.entries(0)),
+              ElementsAre("message"));
 
   // Second page, 2 result with 1 snippet
   search_result = icing.GetNextPage(search_result.next_page_token());
@@ -1877,17 +2656,19 @@ TEST_F(IcingSearchEngineTest, ShouldReturnMultiplePagesWithSnippets) {
   ASSERT_THAT(search_result.results(), SizeIs(2));
   ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
 
-  EXPECT_THAT(search_result.results(0).document(), EqualsProto(document3));
-  EXPECT_THAT(GetMatch(search_result.results(0).document(),
-                       search_result.results(0).snippet(), "body",
-                       /*snippet_index=*/0),
-              Eq("message"));
-  EXPECT_THAT(GetWindow(search_result.results(0).document(),
-                        search_result.results(0).snippet(), "body",
-                        /*snippet_index=*/0),
-              Eq("message body"));
+  const DocumentProto& document_result_3 = search_result.results(0).document();
+  EXPECT_THAT(document_result_3, EqualsProto(document3));
+  const SnippetProto& snippet_result_3 = search_result.results(0).snippet();
+  EXPECT_THAT(snippet_result_3.entries(0).property_name(), Eq("body"));
+  content = GetString(&document_result_3,
+                      snippet_result_3.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet_result_3.entries(0)),
+              ElementsAre("message body"));
+  EXPECT_THAT(GetMatches(content, snippet_result_3.entries(0)),
+              ElementsAre("message"));
+
   EXPECT_THAT(search_result.results(1).document(), EqualsProto(document2));
-  EXPECT_THAT(search_result.results(1).snippet().entries_size(), Eq(0));
+  EXPECT_THAT(search_result.results(1).snippet().entries(), IsEmpty());
 
   // Third page, 1 result with 0 snippets
   search_result = icing.GetNextPage(search_result.next_page_token());
@@ -1896,7 +2677,7 @@ TEST_F(IcingSearchEngineTest, ShouldReturnMultiplePagesWithSnippets) {
   ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
 
   EXPECT_THAT(search_result.results(0).document(), EqualsProto(document1));
-  EXPECT_THAT(search_result.results(0).snippet().entries_size(), Eq(0));
+  EXPECT_THAT(search_result.results(0).snippet().entries(), IsEmpty());
 }
 
 TEST_F(IcingSearchEngineTest, ShouldInvalidateNextPageToken) {
@@ -1927,8 +2708,8 @@ TEST_F(IcingSearchEngineTest, ShouldInvalidateNextPageToken) {
   uint64_t next_page_token = search_result_proto.next_page_token();
   // Since the token is a random number, we don't need to verify
   expected_search_result_proto.set_next_page_token(next_page_token);
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
   // Now document1 is still to be fetched.
 
   // Invalidates token
@@ -1938,8 +2719,8 @@ TEST_F(IcingSearchEngineTest, ShouldInvalidateNextPageToken) {
   expected_search_result_proto.clear_results();
   expected_search_result_proto.clear_next_page_token();
   search_result_proto = icing.GetNextPage(next_page_token);
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest,
@@ -1971,22 +2752,24 @@ TEST_F(IcingSearchEngineTest,
   uint64_t next_page_token = search_result_proto.next_page_token();
   // Since the token is a random number, we don't need to verify
   expected_search_result_proto.set_next_page_token(next_page_token);
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
   // Now document1 is still to be fetched.
 
   OptimizeResultProto optimize_result_proto;
   optimize_result_proto.mutable_status()->set_code(StatusProto::OK);
   optimize_result_proto.mutable_status()->set_message("");
-  ASSERT_THAT(icing.Optimize(), EqualsProto(optimize_result_proto));
+  OptimizeResultProto actual_result = icing.Optimize();
+  actual_result.clear_optimize_stats();
+  ASSERT_THAT(actual_result, EqualsProto(optimize_result_proto));
 
   // Tries to fetch the second page, no results since all tokens have been
   // invalidated during Optimize()
   expected_search_result_proto.clear_results();
   expected_search_result_proto.clear_next_page_token();
   search_result_proto = icing.GetNextPage(next_page_token);
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, OptimizationShouldRemoveDeletedDocs) {
@@ -2007,7 +2790,8 @@ TEST_F(IcingSearchEngineTest, OptimizationShouldRemoveDeletedDocs) {
     // Deletes document1
     ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
     const std::string document_log_path =
-        icing_options.base_dir() + "/document_dir/document_log";
+        icing_options.base_dir() + "/document_dir/" +
+        DocumentLogCreator::GetDocumentLogFilename();
     int64_t document_log_size_before =
         filesystem()->GetFileSize(document_log_path.c_str());
     ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
@@ -2063,59 +2847,78 @@ TEST_F(IcingSearchEngineTest, GetOptimizeInfoHasCorrectStats) {
                                 .SetTtlMs(500)
                                 .Build();
 
-  auto fake_clock = std::make_unique<FakeClock>();
-  fake_clock->SetSystemTimeMilliseconds(1000);
+  {
+    auto fake_clock = std::make_unique<FakeClock>();
+    fake_clock->SetSystemTimeMilliseconds(1000);
 
-  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
-                              std::make_unique<Filesystem>(),
-                              std::make_unique<IcingFilesystem>(),
-                              std::move(fake_clock), GetTestJniCache());
-  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                                std::make_unique<Filesystem>(),
+                                std::make_unique<IcingFilesystem>(),
+                                std::move(fake_clock), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
 
-  // Just initialized, nothing is optimizable yet.
-  GetOptimizeInfoResultProto optimize_info = icing.GetOptimizeInfo();
-  EXPECT_THAT(optimize_info.status(), ProtoIsOk());
-  EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
-  EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+    // Just initialized, nothing is optimizable yet.
+    GetOptimizeInfoResultProto optimize_info = icing.GetOptimizeInfo();
+    EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+    EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
+    EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+    EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
 
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
 
-  // Only have active documents, nothing is optimizable yet.
-  optimize_info = icing.GetOptimizeInfo();
-  EXPECT_THAT(optimize_info.status(), ProtoIsOk());
-  EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
-  EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
-
-  // Deletes document1
-  ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
-
-  optimize_info = icing.GetOptimizeInfo();
-  EXPECT_THAT(optimize_info.status(), ProtoIsOk());
-  EXPECT_THAT(optimize_info.optimizable_docs(), Eq(1));
-  EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Gt(0));
-  int64_t first_estimated_optimizable_bytes =
-      optimize_info.estimated_optimizable_bytes();
-
-  // Add a second document, but it'll be expired since the time (1000) is
-  // greater than the document's creation timestamp (100) + the document's ttl
-  // (500)
-  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+    // Only have active documents, nothing is optimizable yet.
+    optimize_info = icing.GetOptimizeInfo();
+    EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+    EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
+    EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+    EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
+
+    // Deletes document1
+    ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
+
+    optimize_info = icing.GetOptimizeInfo();
+    EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+    EXPECT_THAT(optimize_info.optimizable_docs(), Eq(1));
+    EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Gt(0));
+    EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
+    int64_t first_estimated_optimizable_bytes =
+        optimize_info.estimated_optimizable_bytes();
+
+    // Add a second document, but it'll be expired since the time (1000) is
+    // greater than the document's creation timestamp (100) + the document's ttl
+    // (500)
+    ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+    optimize_info = icing.GetOptimizeInfo();
+    EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+    EXPECT_THAT(optimize_info.optimizable_docs(), Eq(2));
+    EXPECT_THAT(optimize_info.estimated_optimizable_bytes(),
+                Gt(first_estimated_optimizable_bytes));
+    EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
+
+    // Optimize
+    ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+  }
 
-  optimize_info = icing.GetOptimizeInfo();
-  EXPECT_THAT(optimize_info.status(), ProtoIsOk());
-  EXPECT_THAT(optimize_info.optimizable_docs(), Eq(2));
-  EXPECT_THAT(optimize_info.estimated_optimizable_bytes(),
-              Gt(first_estimated_optimizable_bytes));
+  {
+    // Recreate with new time
+    auto fake_clock = std::make_unique<FakeClock>();
+    fake_clock->SetSystemTimeMilliseconds(5000);
 
-  // Optimize
-  ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+    TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                                std::make_unique<Filesystem>(),
+                                std::make_unique<IcingFilesystem>(),
+                                std::move(fake_clock), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
 
-  // Nothing is optimizable now that everything has been optimized away.
-  optimize_info = icing.GetOptimizeInfo();
-  EXPECT_THAT(optimize_info.status(), ProtoIsOk());
-  EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
-  EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+    // Nothing is optimizable now that everything has been optimized away.
+    GetOptimizeInfoResultProto optimize_info = icing.GetOptimizeInfo();
+    EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+    EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
+    EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+    EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(4000));
+  }
 }
 
 TEST_F(IcingSearchEngineTest, GetAndPutShouldWorkAfterOptimization) {
@@ -2351,8 +3154,8 @@ TEST_F(IcingSearchEngineTest, DeleteBySchemaType) {
   DeleteBySchemaTypeResultProto result_proto =
       icing.DeleteBySchemaType("message");
   EXPECT_THAT(result_proto.status(), ProtoIsOk());
-  NativeDeleteStats exp_stats;
-  exp_stats.set_delete_type(NativeDeleteStats::DeleteType::SCHEMA_TYPE);
+  DeleteStatsProto exp_stats;
+  exp_stats.set_delete_type(DeleteStatsProto::DeleteType::SCHEMA_TYPE);
   exp_stats.set_latency_ms(7);
   exp_stats.set_num_documents_deleted(1);
   EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
@@ -2383,8 +3186,8 @@ TEST_F(IcingSearchEngineTest, DeleteBySchemaType) {
   SearchResultProto search_result_proto =
       icing.Search(search_spec, GetDefaultScoringSpec(),
                    ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, DeleteSchemaTypeByQuery) {
@@ -2458,8 +3261,8 @@ TEST_F(IcingSearchEngineTest, DeleteSchemaTypeByQuery) {
   SearchResultProto search_result_proto =
       icing.Search(search_spec, GetDefaultScoringSpec(),
                    ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, DeleteByNamespace) {
@@ -2519,8 +3322,8 @@ TEST_F(IcingSearchEngineTest, DeleteByNamespace) {
   DeleteByNamespaceResultProto result_proto =
       icing.DeleteByNamespace("namespace1");
   EXPECT_THAT(result_proto.status(), ProtoIsOk());
-  NativeDeleteStats exp_stats;
-  exp_stats.set_delete_type(NativeDeleteStats::DeleteType::NAMESPACE);
+  DeleteStatsProto exp_stats;
+  exp_stats.set_delete_type(DeleteStatsProto::DeleteType::NAMESPACE);
   exp_stats.set_latency_ms(7);
   exp_stats.set_num_documents_deleted(2);
   EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
@@ -2559,8 +3362,8 @@ TEST_F(IcingSearchEngineTest, DeleteByNamespace) {
   SearchResultProto search_result_proto =
       icing.Search(search_spec, GetDefaultScoringSpec(),
                    ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, DeleteNamespaceByQuery) {
@@ -2629,8 +3432,8 @@ TEST_F(IcingSearchEngineTest, DeleteNamespaceByQuery) {
   SearchResultProto search_result_proto =
       icing.Search(search_spec, GetDefaultScoringSpec(),
                    ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, DeleteByQuery) {
@@ -2679,11 +3482,16 @@ TEST_F(IcingSearchEngineTest, DeleteByQuery) {
   search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
   DeleteByQueryResultProto result_proto = icing.DeleteByQuery(search_spec);
   EXPECT_THAT(result_proto.status(), ProtoIsOk());
-  NativeDeleteStats exp_stats;
-  exp_stats.set_delete_type(NativeDeleteStats::DeleteType::QUERY);
+  DeleteByQueryStatsProto exp_stats;
   exp_stats.set_latency_ms(7);
   exp_stats.set_num_documents_deleted(1);
-  EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
+  exp_stats.set_query_length(search_spec.query().length());
+  exp_stats.set_num_terms(1);
+  exp_stats.set_num_namespaces_filtered(0);
+  exp_stats.set_num_schema_types_filtered(0);
+  exp_stats.set_parse_query_latency_ms(7);
+  exp_stats.set_document_removal_latency_ms(7);
+  EXPECT_THAT(result_proto.delete_by_query_stats(), EqualsProto(exp_stats));
 
   expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
   expected_get_result_proto.mutable_status()->set_message(
@@ -2711,8 +3519,8 @@ TEST_F(IcingSearchEngineTest, DeleteByQuery) {
   SearchResultProto search_result_proto =
       icing.Search(search_spec, GetDefaultScoringSpec(),
                    ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, DeleteByQueryNotFound) {
@@ -2784,8 +3592,8 @@ TEST_F(IcingSearchEngineTest, DeleteByQueryNotFound) {
   SearchResultProto search_result_proto =
       icing.Search(search_spec, GetDefaultScoringSpec(),
                    ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, SetSchemaShouldWorkAfterOptimization) {
@@ -2848,8 +3656,8 @@ TEST_F(IcingSearchEngineTest, SearchShouldWorkAfterOptimization) {
     SearchResultProto search_result_proto =
         icing.Search(search_spec, GetDefaultScoringSpec(),
                      ResultSpecProto::default_instance());
-    EXPECT_THAT(search_result_proto,
-                EqualsSearchResultIgnoreStats(expected_search_result_proto));
+    EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                         expected_search_result_proto));
   }  // Destroys IcingSearchEngine to make sure nothing is cached.
 
   IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
@@ -2857,8 +3665,8 @@ TEST_F(IcingSearchEngineTest, SearchShouldWorkAfterOptimization) {
   SearchResultProto search_result_proto =
       icing.Search(search_spec, GetDefaultScoringSpec(),
                    ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, IcingShouldWorkFineIfOptimizationIsAborted) {
@@ -2913,8 +3721,8 @@ TEST_F(IcingSearchEngineTest, IcingShouldWorkFineIfOptimizationIsAborted) {
   SearchResultProto search_result_proto =
       icing.Search(search_spec, GetDefaultScoringSpec(),
                    ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest,
@@ -2974,8 +3782,8 @@ TEST_F(IcingSearchEngineTest,
   SearchResultProto search_result_proto =
       icing.Search(search_spec, GetDefaultScoringSpec(),
                    ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 
   search_spec.set_query("n");
 
@@ -2985,8 +3793,8 @@ TEST_F(IcingSearchEngineTest,
   // Searching new content returns the new document
   search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
                                      ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, OptimizationShouldRecoverIfDataFilesAreMissing) {
@@ -3046,8 +3854,8 @@ TEST_F(IcingSearchEngineTest, OptimizationShouldRecoverIfDataFilesAreMissing) {
   SearchResultProto search_result_proto =
       icing.Search(search_spec, GetDefaultScoringSpec(),
                    ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 
   search_spec.set_query("n");
 
@@ -3057,8 +3865,8 @@ TEST_F(IcingSearchEngineTest, OptimizationShouldRecoverIfDataFilesAreMissing) {
   // Searching new content returns the new document
   search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
                                      ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, SearchIncludesDocumentsBeforeTtl) {
@@ -3110,8 +3918,8 @@ TEST_F(IcingSearchEngineTest, SearchIncludesDocumentsBeforeTtl) {
   SearchResultProto search_result_proto =
       icing.Search(search_spec, GetDefaultScoringSpec(),
                    ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, SearchDoesntIncludeDocumentsPastTtl) {
@@ -3161,8 +3969,8 @@ TEST_F(IcingSearchEngineTest, SearchDoesntIncludeDocumentsPastTtl) {
   SearchResultProto search_result_proto =
       icing.Search(search_spec, GetDefaultScoringSpec(),
                    ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, SearchWorksAfterSchemaTypesCompatiblyModified) {
@@ -3200,8 +4008,8 @@ TEST_F(IcingSearchEngineTest, SearchWorksAfterSchemaTypesCompatiblyModified) {
   SearchResultProto search_result_proto =
       icing.Search(search_spec, GetDefaultScoringSpec(),
                    ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 
   // With just the schema type filter, we can search for the message
   search_spec.Clear();
@@ -3212,8 +4020,8 @@ TEST_F(IcingSearchEngineTest, SearchWorksAfterSchemaTypesCompatiblyModified) {
 
   search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
                                      ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 
   // Since SchemaTypeIds are assigned based on order in the SchemaProto, this
   // will force a change in the DocumentStore's cached SchemaTypeIds
@@ -3244,8 +4052,8 @@ TEST_F(IcingSearchEngineTest, SearchWorksAfterSchemaTypesCompatiblyModified) {
   // We can still search for the message document
   search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
                                      ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, RecoverFromMissingHeaderFile) {
@@ -3276,8 +4084,8 @@ TEST_F(IcingSearchEngineTest, RecoverFromMissingHeaderFile) {
     SearchResultProto search_result_proto =
         icing.Search(search_spec, GetDefaultScoringSpec(),
                      ResultSpecProto::default_instance());
-    EXPECT_THAT(search_result_proto,
-                EqualsSearchResultIgnoreStats(expected_search_result_proto));
+    EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                         expected_search_result_proto));
   }  // This should shut down IcingSearchEngine and persist anything it needs to
 
   EXPECT_TRUE(filesystem()->DeleteFile(GetHeaderFilename().c_str()));
@@ -3295,127 +4103,8 @@ TEST_F(IcingSearchEngineTest, RecoverFromMissingHeaderFile) {
   SearchResultProto search_result_proto =
       icing.Search(search_spec, GetDefaultScoringSpec(),
                    ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
-
-  // Checks that Schema is still since it'll be needed to validate the document
-  EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
-              ProtoIsOk());
-}
-
-TEST_F(IcingSearchEngineTest, RecoverFromInvalidHeaderMagic) {
-  SearchSpecProto search_spec;
-  search_spec.set_query("message");
-  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      CreateMessageDocument("namespace", "uri");
-
-  GetResultProto expected_get_result_proto;
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_get_result_proto.mutable_document() =
-      CreateMessageDocument("namespace", "uri");
-
-  {
-    // Basic initialization/setup
-    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-    EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-    EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-    EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
-                ProtoIsOk());
-    EXPECT_THAT(
-        icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
-        EqualsProto(expected_get_result_proto));
-    SearchResultProto search_result_proto =
-        icing.Search(search_spec, GetDefaultScoringSpec(),
-                     ResultSpecProto::default_instance());
-    EXPECT_THAT(search_result_proto,
-                EqualsSearchResultIgnoreStats(expected_search_result_proto));
-  }  // This should shut down IcingSearchEngine and persist anything it needs to
-
-  // Change the header's magic value
-  int32_t invalid_magic = 1;  // Anything that's not the actual kMagic value.
-  filesystem()->PWrite(GetHeaderFilename().c_str(),
-                       offsetof(IcingSearchEngine::Header, magic),
-                       &invalid_magic, sizeof(invalid_magic));
-
-  // We should be able to recover from this and access all our previous data
-  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-
-  // Checks that DocumentLog is still ok
-  EXPECT_THAT(
-      icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
-      EqualsProto(expected_get_result_proto));
-
-  // Checks that the index is still ok so we can search over it
-  SearchResultProto search_result_proto =
-      icing.Search(search_spec, GetDefaultScoringSpec(),
-                   ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
-
-  // Checks that Schema is still since it'll be needed to validate the document
-  EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
-              ProtoIsOk());
-}
-
-TEST_F(IcingSearchEngineTest, RecoverFromInvalidHeaderChecksum) {
-  SearchSpecProto search_spec;
-  search_spec.set_query("message");
-  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      CreateMessageDocument("namespace", "uri");
-
-  GetResultProto expected_get_result_proto;
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_get_result_proto.mutable_document() =
-      CreateMessageDocument("namespace", "uri");
-
-  {
-    // Basic initialization/setup
-    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-    EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-    EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-    EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
-                ProtoIsOk());
-    EXPECT_THAT(
-        icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
-        EqualsProto(expected_get_result_proto));
-    SearchResultProto search_result_proto =
-        icing.Search(search_spec, GetDefaultScoringSpec(),
-                     ResultSpecProto::default_instance());
-    EXPECT_THAT(search_result_proto,
-                EqualsSearchResultIgnoreStats(expected_search_result_proto));
-  }  // This should shut down IcingSearchEngine and persist anything it needs to
-
-  // Change the header's checksum value
-  uint32_t invalid_checksum =
-      1;  // Anything that's not the actual checksum value
-  filesystem()->PWrite(GetHeaderFilename().c_str(),
-                       offsetof(IcingSearchEngine::Header, checksum),
-                       &invalid_checksum, sizeof(invalid_checksum));
-
-  // We should be able to recover from this and access all our previous data
-  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-
-  // Checks that DocumentLog is still ok
-  EXPECT_THAT(
-      icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
-      EqualsProto(expected_get_result_proto));
-
-  // Checks that the index is still ok so we can search over it
-  SearchResultProto search_result_proto =
-      icing.Search(search_spec, GetDefaultScoringSpec(),
-                   ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 
   // Checks that Schema is still since it'll be needed to validate the document
   EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
@@ -3471,8 +4160,8 @@ TEST_F(IcingSearchEngineTest, UnableToRecoverFromCorruptDocumentLog) {
         EqualsProto(expected_get_result_proto));
   }  // This should shut down IcingSearchEngine and persist anything it needs to
 
-  const std::string document_log_file =
-      absl_ports::StrCat(GetDocumentDir(), "/document_log");
+  const std::string document_log_file = absl_ports::StrCat(
+      GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
   const std::string corrupt_data = "1234";
   EXPECT_TRUE(filesystem()->Write(document_log_file.c_str(),
                                   corrupt_data.data(), corrupt_data.size()));
@@ -3493,9 +4182,10 @@ TEST_F(IcingSearchEngineTest, RecoverFromInconsistentSchemaStore) {
           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
           .Build();
 
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
   {
     // Initializes folder and schema
-    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    IcingSearchEngine icing(options, GetTestJniCache());
     EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
 
     SchemaProto schema;
@@ -3532,8 +4222,8 @@ TEST_F(IcingSearchEngineTest, RecoverFromInconsistentSchemaStore) {
     SearchResultProto search_result_proto =
         icing.Search(search_spec, GetDefaultScoringSpec(),
                      ResultSpecProto::default_instance());
-    EXPECT_THAT(search_result_proto,
-                EqualsSearchResultIgnoreStats(expected_search_result_proto));
+    EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                         expected_search_result_proto));
   }  // This should shut down IcingSearchEngine and persist anything it needs to
 
   {
@@ -3569,6 +4259,13 @@ TEST_F(IcingSearchEngineTest, RecoverFromInconsistentSchemaStore) {
     property->mutable_string_indexing_config()->set_tokenizer_type(
         StringIndexingConfig::TokenizerType::PLAIN);
 
+    // Write the marker file
+    std::string marker_filepath =
+        absl_ports::StrCat(options.base_dir(), "/set_schema_marker");
+    ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str()));
+    ASSERT_TRUE(sfd.is_valid());
+
+    // Write the new schema
     FakeClock fake_clock;
     ICING_ASSERT_OK_AND_ASSIGN(
         std::unique_ptr<SchemaStore> schema_store,
@@ -3615,8 +4312,8 @@ TEST_F(IcingSearchEngineTest, RecoverFromInconsistentSchemaStore) {
   SearchResultProto search_result_proto =
       icing.Search(search_spec, GetDefaultScoringSpec(),
                    ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, RecoverFromInconsistentDocumentStore) {
@@ -3684,8 +4381,8 @@ TEST_F(IcingSearchEngineTest, RecoverFromInconsistentDocumentStore) {
   SearchResultProto search_result_proto =
       icing.Search(search_spec, GetDefaultScoringSpec(),
                    ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, RecoverFromInconsistentIndex) {
@@ -3708,8 +4405,8 @@ TEST_F(IcingSearchEngineTest, RecoverFromInconsistentIndex) {
     SearchResultProto search_result_proto =
         icing.Search(search_spec, GetDefaultScoringSpec(),
                      ResultSpecProto::default_instance());
-    EXPECT_THAT(search_result_proto,
-                EqualsSearchResultIgnoreStats(expected_search_result_proto));
+    EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                         expected_search_result_proto));
   }  // This should shut down IcingSearchEngine and persist anything it needs to
 
   // Pretend we lost the entire index
@@ -3723,8 +4420,8 @@ TEST_F(IcingSearchEngineTest, RecoverFromInconsistentIndex) {
   SearchResultProto search_result_proto =
       icing.Search(search_spec, GetDefaultScoringSpec(),
                    ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, RecoverFromCorruptIndex) {
@@ -3747,8 +4444,8 @@ TEST_F(IcingSearchEngineTest, RecoverFromCorruptIndex) {
     SearchResultProto search_result_proto =
         icing.Search(search_spec, GetDefaultScoringSpec(),
                      ResultSpecProto::default_instance());
-    EXPECT_THAT(search_result_proto,
-                EqualsSearchResultIgnoreStats(expected_search_result_proto));
+    EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                         expected_search_result_proto));
   }  // This should shut down IcingSearchEngine and persist anything it needs to
 
   // Pretend index is corrupted
@@ -3764,8 +4461,8 @@ TEST_F(IcingSearchEngineTest, RecoverFromCorruptIndex) {
   SearchResultProto search_result_proto =
       icing.Search(search_spec, GetDefaultScoringSpec(),
                    ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByDocumentScore) {
@@ -3825,8 +4522,8 @@ TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByDocumentScore) {
   scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
   SearchResultProto search_result_proto = icing.Search(
       search_spec, scoring_spec, ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, SearchShouldAllowNoScoring) {
@@ -3884,8 +4581,8 @@ TEST_F(IcingSearchEngineTest, SearchShouldAllowNoScoring) {
   scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE);
   SearchResultProto search_result_proto = icing.Search(
       search_spec, scoring_spec, ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByCreationTimestamp) {
@@ -3940,8 +4637,8 @@ TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByCreationTimestamp) {
       ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
   SearchResultProto search_result_proto = icing.Search(
       search_spec, scoring_spec, ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByUsageCount) {
@@ -4011,8 +4708,8 @@ TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByUsageCount) {
       ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT);
   SearchResultProto search_result_proto = icing.Search(
       search_spec, scoring_spec, ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest,
@@ -4069,8 +4766,8 @@ TEST_F(IcingSearchEngineTest,
       ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT);
   SearchResultProto search_result_proto = icing.Search(
       search_spec, scoring_spec, ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByUsageTimestamp) {
@@ -4139,8 +4836,8 @@ TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByUsageTimestamp) {
       ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
   SearchResultProto search_result_proto = icing.Search(
       search_spec, scoring_spec, ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, Bm25fRelevanceScoringOneNamespace) {
@@ -4303,24 +5000,21 @@ TEST_F(IcingSearchEngineTest,
 
   SearchSpecProto search_spec;
   search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-  search_spec.set_query("body:coffee OR body:food");
+  search_spec.set_query("subject:coffee OR body:food");
   ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
   scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
   SearchResultProto search_result_proto = icing.Search(
       search_spec, scoring_spec, ResultSpecProto::default_instance());
 
-  // Result should be in descending score order, section restrict doesn't impact
-  // the BM25F score.
+  // Result should be in descending score order
   EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
-  // Both doc5 and doc7 have "coffee" in name and text sections.
-  // However, doc5 has more matches.
+  // The term frequencies of "coffee" and "food" are calculated respectively
+  // from the subject section and the body section.
   // Documents with "food" are ranked lower as the term "food" is commonly
   // present in this corpus, and thus, has a lower IDF.
   EXPECT_THAT(
       GetUrisFromSearchResults(search_result_proto),
-      ElementsAre("namespace1/uri5",    // 'coffee' 2 times in section subject,
-                                        // 1 time in section body
-                  "namespace1/uri7",    // 'coffee' 2 times in section body
+      ElementsAre("namespace1/uri5",    // 'coffee' 2 times in section subject
                   "namespace1/uri1",    // 'food' 2 times in section body
                   "namespace1/uri4",    // 'food' 2 times in section body
                   "namespace1/uri2",    // 'food' 1 time in section body
@@ -4583,8 +5277,8 @@ TEST_F(IcingSearchEngineTest,
       ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
   SearchResultProto search_result_proto = icing.Search(
       search_spec, scoring_spec, ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, OlderUsageTimestampShouldNotOverrideNewerOnes) {
@@ -4652,8 +5346,8 @@ TEST_F(IcingSearchEngineTest, OlderUsageTimestampShouldNotOverrideNewerOnes) {
       ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
   SearchResultProto search_result_proto = icing.Search(
       search_spec, scoring_spec, ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedAscendingly) {
@@ -4714,8 +5408,218 @@ TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedAscendingly) {
   scoring_spec.set_order_by(ScoringSpecProto::Order::ASC);
   SearchResultProto search_result_proto = icing.Search(
       search_spec, scoring_spec, ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStats(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineTest,
+       SearchResultGroupingDuplicateNamespaceShouldReturnError) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Creates 2 documents and ensures the relationship in terms of document
+  // score is: document1 < document2
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetScore(1)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace2", "uri/2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message2")
+          .SetScore(2)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  // "m" will match all 2 documents
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("m");
+
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+  // Specify "namespace1" twice. This should result in an error.
+  ResultSpecProto result_spec;
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  result_grouping->set_max_results(1);
+  result_grouping->add_namespaces("namespace1");
+  result_grouping->add_namespaces("namespace2");
+  result_grouping = result_spec.add_result_groupings();
+  result_grouping->set_max_results(1);
+  result_grouping->add_namespaces("namespace1");
+
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, scoring_spec, result_spec);
+  EXPECT_THAT(search_result_proto.status(),
+              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineTest,
+       SearchResultGroupingNonPositiveMaxResultsShouldReturnError) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Creates 2 documents and ensures the relationship in terms of document
+  // score is: document1 < document2
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetScore(1)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace2", "uri/2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message2")
+          .SetScore(2)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  // "m" will match all 2 documents
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("m");
+
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+  // Specify zero results. This should result in an error.
+  ResultSpecProto result_spec;
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  result_grouping->set_max_results(0);
+  result_grouping->add_namespaces("namespace1");
+  result_grouping->add_namespaces("namespace2");
+
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, scoring_spec, result_spec);
+  EXPECT_THAT(search_result_proto.status(),
+              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+
+  // Specify negative results. This should result in an error.
+  result_spec.mutable_result_groupings(0)->set_max_results(-1);
+  EXPECT_THAT(search_result_proto.status(),
+              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineTest, SearchResultGroupingMultiNamespaceGrouping) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Creates 3 documents and ensures the relationship in terms of document
+  // score is: document1 < document2 < document3 < document4 < document5 <
+  // document6
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetScore(1)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri/2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message2")
+          .SetScore(2)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document3 =
+      DocumentBuilder()
+          .SetKey("namespace2", "uri/3")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message3")
+          .SetScore(3)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document4 =
+      DocumentBuilder()
+          .SetKey("namespace2", "uri/4")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetScore(4)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document5 =
+      DocumentBuilder()
+          .SetKey("namespace3", "uri/5")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message3")
+          .SetScore(5)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document6 =
+      DocumentBuilder()
+          .SetKey("namespace3", "uri/6")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetScore(6)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document6).status(), ProtoIsOk());
+
+  // "m" will match all 6 documents
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("m");
+
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+  ResultSpecProto result_spec;
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  result_grouping->set_max_results(1);
+  result_grouping->add_namespaces("namespace1");
+  result_grouping = result_spec.add_result_groupings();
+  result_grouping->set_max_results(2);
+  result_grouping->add_namespaces("namespace2");
+  result_grouping->add_namespaces("namespace3");
+
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, scoring_spec, result_spec);
+
+  // The last result (document1) in namespace "namespace1" should not be
+  // included. "namespace2" and "namespace3" are grouped together. So only the
+  // two highest scored documents between the two (both of which are in
+  // "namespace3") should be returned.
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document6;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document5;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document2;
+
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest,
@@ -4797,8 +5701,8 @@ TEST_F(IcingSearchEngineTest, SetSchemaCanDetectPreviousSchemaWasLost) {
     SearchResultProto search_result_proto =
         icing.Search(search_spec, GetDefaultScoringSpec(),
                      ResultSpecProto::default_instance());
-    EXPECT_THAT(search_result_proto,
-                EqualsSearchResultIgnoreStats(expected_search_result_proto));
+    EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                         expected_search_result_proto));
   }  // This should shut down IcingSearchEngine and persist anything it needs to
 
   ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(GetSchemaDir().c_str()));
@@ -4824,35 +5728,234 @@ TEST_F(IcingSearchEngineTest, SetSchemaCanDetectPreviousSchemaWasLost) {
   SearchResultProto search_result_proto =
       icing.Search(search_spec, GetDefaultScoringSpec(),
                    ResultSpecProto::default_instance());
-  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStats(empty_result));
+  EXPECT_THAT(search_result_proto,
+              EqualsSearchResultIgnoreStatsAndScores(empty_result));
 }
 
-TEST_F(IcingSearchEngineTest, PersistToDisk) {
-  GetResultProto expected_get_result_proto;
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_get_result_proto.mutable_document() =
-      CreateMessageDocument("namespace", "uri");
-
+TEST_F(IcingSearchEngineTest, ImplicitPersistToDiskFullSavesEverything) {
+  DocumentProto document = CreateMessageDocument("namespace", "uri");
   {
     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
     EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
     EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-    EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
-                ProtoIsOk());
+    EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+  }  // Destructing calls a PersistToDisk(FULL)
 
-    // Persisting shouldn't affect anything
-    EXPECT_THAT(icing.PersistToDisk().status(), ProtoIsOk());
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
 
-    EXPECT_THAT(
-        icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
-        EqualsProto(expected_get_result_proto));
-  }  // Destructing persists as well
+  // There should be no recovery since everything should be saved properly.
+  InitializeResultProto init_result = icing.Initialize();
+  EXPECT_THAT(init_result.status(), ProtoIsOk());
+  EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+              Eq(InitializeStatsProto::NO_DATA_LOSS));
+  EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+              Eq(InitializeStatsProto::NONE));
+
+  // Schema is still intact.
+  GetSchemaResultProto expected_get_schema_result_proto;
+  expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema();
+
+  EXPECT_THAT(icing.GetSchema(), EqualsProto(expected_get_schema_result_proto));
+
+  // Documents are still intact.
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() = document;
 
-  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
   EXPECT_THAT(
       icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
       EqualsProto(expected_get_result_proto));
+
+  // Index is still intact.
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("message");  // Content in the Message document.
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document;
+
+  SearchResultProto actual_results =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineTest, ExplicitPersistToDiskFullSavesEverything) {
+  DocumentProto document = CreateMessageDocument("namespace", "uri");
+
+  // Add schema and documents to our first icing1 instance.
+  IcingSearchEngine icing1(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing1.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing1.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+  EXPECT_THAT(icing1.Put(document).status(), ProtoIsOk());
+  EXPECT_THAT(icing1.PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
+
+  // Initialize a second icing2 instance which should have it's own memory
+  // space. If data from icing1 isn't being persisted to the files, then icing2
+  // won't be able to see those changes.
+  IcingSearchEngine icing2(GetDefaultIcingOptions(), GetTestJniCache());
+
+  // There should be no recovery since everything should be saved properly.
+  InitializeResultProto init_result = icing2.Initialize();
+  EXPECT_THAT(init_result.status(), ProtoIsOk());
+  EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+              Eq(InitializeStatsProto::NO_DATA_LOSS));
+  EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+              Eq(InitializeStatsProto::NONE));
+
+  // Schema is still intact.
+  GetSchemaResultProto expected_get_schema_result_proto;
+  expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema();
+
+  EXPECT_THAT(icing2.GetSchema(),
+              EqualsProto(expected_get_schema_result_proto));
+
+  // Documents are still intact.
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() = document;
+
+  EXPECT_THAT(
+      icing2.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  // Index is still intact.
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("message");  // Content in the Message document.
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document;
+
+  SearchResultProto actual_results =
+      icing2.Search(search_spec, GetDefaultScoringSpec(),
+                    ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineTest, NoPersistToDiskLosesAllDocumentsAndIndex) {
+  IcingSearchEngine icing1(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing1.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing1.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+  DocumentProto document = CreateMessageDocument("namespace", "uri");
+  EXPECT_THAT(icing1.Put(document).status(), ProtoIsOk());
+  EXPECT_THAT(
+      icing1.Get("namespace", "uri", GetResultSpecProto::default_instance())
+          .document(),
+      EqualsProto(document));
+
+  // It's intentional that no PersistToDisk call is made before initializing a
+  // second instance of icing.
+
+  IcingSearchEngine icing2(GetDefaultIcingOptions(), GetTestJniCache());
+  InitializeResultProto init_result = icing2.Initialize();
+  EXPECT_THAT(init_result.status(), ProtoIsOk());
+  EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+              Eq(InitializeStatsProto::PARTIAL_LOSS));
+  EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+              Eq(InitializeStatsProto::DATA_LOSS));
+  EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+              Eq(InitializeStatsProto::NONE));
+
+  // The document shouldn't be found because we forgot to call
+  // PersistToDisk(LITE)!
+  EXPECT_THAT(
+      icing2.Get("namespace", "uri", GetResultSpecProto::default_instance())
+          .status(),
+      ProtoStatusIs(StatusProto::NOT_FOUND));
+
+  // Searching also shouldn't get us anything because the index wasn't
+  // recovered.
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("message");  // Content in the Message document.
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+  SearchResultProto actual_results =
+      icing2.Search(search_spec, GetDefaultScoringSpec(),
+                    ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineTest, PersistToDiskLiteSavesGroundTruth) {
+  DocumentProto document = CreateMessageDocument("namespace", "uri");
+
+  IcingSearchEngine icing1(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing1.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing1.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+  EXPECT_THAT(icing1.Put(document).status(), ProtoIsOk());
+  EXPECT_THAT(icing1.PersistToDisk(PersistType::LITE).status(), ProtoIsOk());
+  EXPECT_THAT(
+      icing1.Get("namespace", "uri", GetResultSpecProto::default_instance())
+          .document(),
+      EqualsProto(document));
+
+  IcingSearchEngine icing2(GetDefaultIcingOptions(), GetTestJniCache());
+  InitializeResultProto init_result = icing2.Initialize();
+  EXPECT_THAT(init_result.status(), ProtoIsOk());
+  EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+              Eq(InitializeStatsProto::NO_DATA_LOSS));
+  EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+              Eq(InitializeStatsProto::NONE));
+
+  // A checksum mismatch gets reported as an IO error. The document store and
+  // index didn't have their derived files included in the checksum previously,
+  // so reinitializing will trigger a checksum mismatch.
+  EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+              Eq(InitializeStatsProto::IO_ERROR));
+  EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+              Eq(InitializeStatsProto::IO_ERROR));
+
+  // Schema is still intact.
+  GetSchemaResultProto expected_get_schema_result_proto;
+  expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema();
+
+  EXPECT_THAT(icing2.GetSchema(),
+              EqualsProto(expected_get_schema_result_proto));
+
+  // The document should be found because we called PersistToDisk(LITE)!
+  EXPECT_THAT(
+      icing2.Get("namespace", "uri", GetResultSpecProto::default_instance())
+          .document(),
+      EqualsProto(document));
+
+  // Recovered index is still intact.
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("message");  // Content in the Message document.
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document;
+
+  SearchResultProto actual_results =
+      icing2.Search(search_spec, GetDefaultScoringSpec(),
+                    ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, ResetOk) {
@@ -4886,11 +5989,11 @@ TEST_F(IcingSearchEngineTest, ResetOk) {
   EXPECT_THAT(icing.SetSchema(empty_schema).status(), ProtoIsOk());
 }
 
-TEST_F(IcingSearchEngineTest, ResetAbortedError) {
+TEST_F(IcingSearchEngineTest, ResetDeleteFailureCausesInternalError) {
   auto mock_filesystem = std::make_unique<MockFilesystem>();
 
-  // This fails IcingSearchEngine::Reset(). But since we didn't actually delete
-  // anything, we'll be able to consider this just an ABORTED call.
+  // This fails IcingSearchEngine::Reset() with status code INTERNAL and leaves
+  // the IcingSearchEngine instance in an uninitialized state.
   ON_CALL(*mock_filesystem,
           DeleteDirectoryRecursively(StrEq(GetTestBaseDir().c_str())))
       .WillByDefault(Return(false));
@@ -4904,46 +6007,17 @@ TEST_F(IcingSearchEngineTest, ResetAbortedError) {
 
   DocumentProto document = CreateMessageDocument("namespace", "uri");
   ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
-  EXPECT_THAT(icing.Reset().status(), ProtoStatusIs(StatusProto::ABORTED));
+  EXPECT_THAT(icing.Reset().status(), ProtoStatusIs(StatusProto::INTERNAL));
 
-  // Everything is still intact.
-  // Can get old data.
   GetResultProto expected_get_result_proto;
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  expected_get_result_proto.mutable_status()->set_code(
+      StatusProto::FAILED_PRECONDITION);
   *expected_get_result_proto.mutable_document() = document;
-  EXPECT_THAT(icing.Get(document.namespace_(), document.uri(),
-                        GetResultSpecProto::default_instance()),
-              EqualsProto(expected_get_result_proto));
-
-  // Can add new data.
-  EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
-              ProtoIsOk());
-}
-
-TEST_F(IcingSearchEngineTest, ResetInternalError) {
-  auto mock_filesystem = std::make_unique<MockFilesystem>();
-
-  // Let all other calls succeed.
-  EXPECT_CALL(*mock_filesystem, Write(Matcher<const char*>(_), _, _))
-      .WillRepeatedly(Return(true));
-
-  // This prevents IcingSearchEngine from creating a DocumentStore instance on
-  // reinitialization
-  const std::string document_log_path =
-      GetTestBaseDir() + "/document_dir/document_log";
-  EXPECT_CALL(
-      *mock_filesystem,
-      Write(Matcher<const char*>(StrEq(document_log_path.c_str())), _, _))
-      .WillOnce(Return(true))
-      .WillOnce(Return(false));
-
-  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
-                              std::move(mock_filesystem),
-                              std::make_unique<IcingFilesystem>(),
-                              std::make_unique<FakeClock>(), GetTestJniCache());
-  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-  EXPECT_THAT(icing.Reset().status(), ProtoStatusIs(StatusProto::INTERNAL));
+  EXPECT_THAT(icing
+                  .Get(document.namespace_(), document.uri(),
+                       GetResultSpecProto::default_instance())
+                  .status(),
+              ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
 }
 
 TEST_F(IcingSearchEngineTest, SnippetNormalization) {
@@ -4985,34 +6059,28 @@ TEST_F(IcingSearchEngineTest, SnippetNormalization) {
   const DocumentProto& result_document_1 = results.results(0).document();
   const SnippetProto& result_snippet_1 = results.results(0).snippet();
   EXPECT_THAT(result_document_1, EqualsProto(document_two));
-  EXPECT_THAT(GetMatch(result_document_1, result_snippet_1, "body",
-                       /*snippet_index=*/0),
-              Eq("mdi"));
-  EXPECT_THAT(GetWindow(result_document_1, result_snippet_1, "body",
-                        /*snippet_index=*/0),
-              Eq("mdi Zürich Team Meeting"));
-  EXPECT_THAT(GetMatch(result_document_1, result_snippet_1, "body",
-                       /*snippet_index=*/1),
-              Eq("Zürich"));
-  EXPECT_THAT(GetWindow(result_document_1, result_snippet_1, "body",
-                        /*snippet_index=*/1),
-              Eq("mdi Zürich Team Meeting"));
+  EXPECT_THAT(result_snippet_1.entries(), SizeIs(1));
+  EXPECT_THAT(result_snippet_1.entries(0).property_name(), Eq("body"));
+  std::string_view content = GetString(
+      &result_document_1, result_snippet_1.entries(0).property_name());
+  EXPECT_THAT(
+      GetWindows(content, result_snippet_1.entries(0)),
+      ElementsAre("mdi Zürich Team Meeting", "mdi Zürich Team Meeting"));
+  EXPECT_THAT(GetMatches(content, result_snippet_1.entries(0)),
+              ElementsAre("mdi", "Zürich"));
 
   const DocumentProto& result_document_2 = results.results(1).document();
   const SnippetProto& result_snippet_2 = results.results(1).snippet();
   EXPECT_THAT(result_document_2, EqualsProto(document_one));
-  EXPECT_THAT(GetMatch(result_document_2, result_snippet_2, "body",
-                       /*snippet_index=*/0),
-              Eq("MDI"));
-  EXPECT_THAT(GetWindow(result_document_2, result_snippet_2, "body",
-                        /*snippet_index=*/0),
-              Eq("MDI zurich Team Meeting"));
-  EXPECT_THAT(GetMatch(result_document_2, result_snippet_2, "body",
-                       /*snippet_index=*/1),
-              Eq("zurich"));
-  EXPECT_THAT(GetWindow(result_document_2, result_snippet_2, "body",
-                        /*snippet_index=*/1),
-              Eq("MDI zurich Team Meeting"));
+  EXPECT_THAT(result_snippet_2.entries(), SizeIs(1));
+  EXPECT_THAT(result_snippet_2.entries(0).property_name(), Eq("body"));
+  content = GetString(&result_document_2,
+                      result_snippet_2.entries(0).property_name());
+  EXPECT_THAT(
+      GetWindows(content, result_snippet_2.entries(0)),
+      ElementsAre("MDI zurich Team Meeting", "MDI zurich Team Meeting"));
+  EXPECT_THAT(GetMatches(content, result_snippet_2.entries(0)),
+              ElementsAre("MDI", "zurich"));
 }
 
 TEST_F(IcingSearchEngineTest, SnippetNormalizationPrefix) {
@@ -5054,34 +6122,28 @@ TEST_F(IcingSearchEngineTest, SnippetNormalizationPrefix) {
   const DocumentProto& result_document_1 = results.results(0).document();
   const SnippetProto& result_snippet_1 = results.results(0).snippet();
   EXPECT_THAT(result_document_1, EqualsProto(document_two));
-  EXPECT_THAT(GetMatch(result_document_1, result_snippet_1, "body",
-                       /*snippet_index=*/0),
-              Eq("mdi"));
-  EXPECT_THAT(GetWindow(result_document_1, result_snippet_1, "body",
-                        /*snippet_index=*/0),
-              Eq("mdi Zürich Team Meeting"));
-  EXPECT_THAT(GetMatch(result_document_1, result_snippet_1, "body",
-                       /*snippet_index=*/1),
-              Eq("Zürich"));
-  EXPECT_THAT(GetWindow(result_document_1, result_snippet_1, "body",
-                        /*snippet_index=*/1),
-              Eq("mdi Zürich Team Meeting"));
+  EXPECT_THAT(result_snippet_1.entries(), SizeIs(1));
+  EXPECT_THAT(result_snippet_1.entries(0).property_name(), Eq("body"));
+  std::string_view content = GetString(
+      &result_document_1, result_snippet_1.entries(0).property_name());
+  EXPECT_THAT(
+      GetWindows(content, result_snippet_1.entries(0)),
+      ElementsAre("mdi Zürich Team Meeting", "mdi Zürich Team Meeting"));
+  EXPECT_THAT(GetMatches(content, result_snippet_1.entries(0)),
+              ElementsAre("mdi", "Zürich"));
 
   const DocumentProto& result_document_2 = results.results(1).document();
   const SnippetProto& result_snippet_2 = results.results(1).snippet();
   EXPECT_THAT(result_document_2, EqualsProto(document_one));
-  EXPECT_THAT(GetMatch(result_document_2, result_snippet_2, "body",
-                       /*snippet_index=*/0),
-              Eq("MDI"));
-  EXPECT_THAT(GetWindow(result_document_2, result_snippet_2, "body",
-                        /*snippet_index=*/0),
-              Eq("MDI zurich Team Meeting"));
-  EXPECT_THAT(GetMatch(result_document_2, result_snippet_2, "body",
-                       /*snippet_index=*/1),
-              Eq("zurich"));
-  EXPECT_THAT(GetWindow(result_document_2, result_snippet_2, "body",
-                        /*snippet_index=*/1),
-              Eq("MDI zurich Team Meeting"));
+  EXPECT_THAT(result_snippet_2.entries(), SizeIs(1));
+  EXPECT_THAT(result_snippet_2.entries(0).property_name(), Eq("body"));
+  content = GetString(&result_document_2,
+                      result_snippet_2.entries(0).property_name());
+  EXPECT_THAT(
+      GetWindows(content, result_snippet_2.entries(0)),
+      ElementsAre("MDI zurich Team Meeting", "MDI zurich Team Meeting"));
+  EXPECT_THAT(GetMatches(content, result_snippet_2.entries(0)),
+              ElementsAre("MDI", "zurich"));
 }
 
 TEST_F(IcingSearchEngineTest, SnippetSectionRestrict) {
@@ -5112,21 +6174,18 @@ TEST_F(IcingSearchEngineTest, SnippetSectionRestrict) {
       icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
   EXPECT_THAT(results.status(), ProtoIsOk());
   ASSERT_THAT(results.results(), SizeIs(1));
+
   const DocumentProto& result_document = results.results(0).document();
   const SnippetProto& result_snippet = results.results(0).snippet();
   EXPECT_THAT(result_document, EqualsProto(document_one));
-  EXPECT_THAT(
-      GetMatch(result_document, result_snippet, "body", /*snippet_index=*/0),
-      Eq("zurich"));
-  EXPECT_THAT(
-      GetWindow(result_document, result_snippet, "body", /*snippet_index=*/0),
-      Eq("MDI zurich Team Meeting"));
-  EXPECT_THAT(
-      GetMatch(result_document, result_snippet, "subject", /*snippet_index=*/0),
-      IsEmpty());
-  EXPECT_THAT(GetWindow(result_document, result_snippet, "subject",
-                        /*snippet_index=*/0),
-              IsEmpty());
+  EXPECT_THAT(result_snippet.entries(), SizeIs(1));
+  EXPECT_THAT(result_snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&result_document, result_snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, result_snippet.entries(0)),
+              ElementsAre("MDI zurich Team Meeting"));
+  EXPECT_THAT(GetMatches(content, result_snippet.entries(0)),
+              ElementsAre("zurich"));
 }
 
 TEST_F(IcingSearchEngineTest, UninitializedInstanceFailsSafely) {
@@ -5167,7 +6226,7 @@ TEST_F(IcingSearchEngineTest, UninitializedInstanceFailsSafely) {
               ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
   icing.InvalidateNextPageToken(kSomePageToken);  // Verify this doesn't crash.
 
-  EXPECT_THAT(icing.PersistToDisk().status(),
+  EXPECT_THAT(icing.PersistToDisk(PersistType::FULL).status(),
               ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
   EXPECT_THAT(icing.Optimize().status(),
               ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
@@ -5401,15 +6460,16 @@ TEST_F(IcingSearchEngineTest, RestoreIndexLoseLiteIndex) {
 
   // 2. Delete the last document from the document log
   {
-    const std::string document_log_file =
-        absl_ports::StrCat(GetDocumentDir(), "/document_log");
+    const std::string document_log_file = absl_ports::StrCat(
+        GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
     filesystem()->DeleteFile(document_log_file.c_str());
-    ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
-                               FileBackedProtoLog<DocumentWrapper>::Create(
-                                   filesystem(), document_log_file.c_str(),
-                                   FileBackedProtoLog<DocumentWrapper>::Options(
-                                       /*compress_in=*/true)));
-    std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> document_log =
+    ICING_ASSERT_OK_AND_ASSIGN(
+        auto create_result,
+        PortableFileBackedProtoLog<DocumentWrapper>::Create(
+            filesystem(), document_log_file.c_str(),
+            PortableFileBackedProtoLog<DocumentWrapper>::Options(
+                /*compress_in=*/true)));
+    std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log =
         std::move(create_result.proto_log);
 
     document = DocumentBuilder(document).SetUri("fake_type/0").Build();
@@ -5474,15 +6534,16 @@ TEST_F(IcingSearchEngineTest, RestoreIndexLoseIndex) {
 
   // 2. Delete the last two documents from the document log.
   {
-    const std::string document_log_file =
-        absl_ports::StrCat(GetDocumentDir(), "/document_log");
+    const std::string document_log_file = absl_ports::StrCat(
+        GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
     filesystem()->DeleteFile(document_log_file.c_str());
-    ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
-                               FileBackedProtoLog<DocumentWrapper>::Create(
-                                   filesystem(), document_log_file.c_str(),
-                                   FileBackedProtoLog<DocumentWrapper>::Options(
-                                       /*compress_in=*/true)));
-    std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> document_log =
+    ICING_ASSERT_OK_AND_ASSIGN(
+        auto create_result,
+        PortableFileBackedProtoLog<DocumentWrapper>::Create(
+            filesystem(), document_log_file.c_str(),
+            PortableFileBackedProtoLog<DocumentWrapper>::Options(
+                /*compress_in=*/true)));
+    std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log =
         std::move(create_result.proto_log);
 
     document = DocumentBuilder(document).SetUri("fake_type/0").Build();
@@ -5514,6 +6575,88 @@ TEST_F(IcingSearchEngineTest, RestoreIndexLoseIndex) {
   }
 }
 
+TEST_F(IcingSearchEngineTest,
+       DocumentWithNoIndexedContentDoesntCauseRestoreIndex) {
+  // 1. Create an index with a single document in it that has no indexed
+  // content.
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // Set a schema for a single type that has no indexed properties.
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+                PropertyConfigBuilder()
+                    .SetName("unindexedField")
+                    .SetDataTypeString(MATCH_NONE, TOKENIZER_NONE)
+                    .SetCardinality(CARDINALITY_REQUIRED)))
+            .Build();
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    // Add a document that contains no indexed content.
+    DocumentProto document =
+        DocumentBuilder()
+            .SetKey("icing", "fake_type/0")
+            .SetSchema("Message")
+            .AddStringProperty("unindexedField",
+                               "Don't you dare search over this!")
+            .Build();
+    EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+  }
+
+  // 2. Create the index again. This should NOT trigger a recovery of any kind.
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto init_result = icing.Initialize();
+    EXPECT_THAT(init_result.status(), ProtoIsOk());
+    EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+                Eq(InitializeStatsProto::NO_DATA_LOSS));
+    EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+  }
+}
+
+TEST_F(IcingSearchEngineTest,
+       DocumentWithNoValidIndexedContentDoesntCauseRestoreIndex) {
+  // 1. Create an index with a single document in it that has no valid indexed
+  // tokens in its content.
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // Set a schema for a single type that has no indexed properties.
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+    // Add a document that contains no valid indexed content - just punctuation.
+    DocumentProto document = DocumentBuilder()
+                                 .SetKey("icing", "fake_type/0")
+                                 .SetSchema("Message")
+                                 .AddStringProperty("body", "?...!")
+                                 .Build();
+    EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+  }
+
+  // 2. Create the index again. This should NOT trigger a recovery of any kind.
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto init_result = icing.Initialize();
+    EXPECT_THAT(init_result.status(), ProtoIsOk());
+    EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+                Eq(InitializeStatsProto::NO_DATA_LOSS));
+    EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+  }
+}
+
 TEST_F(IcingSearchEngineTest, IndexingDocMergeFailureResets) {
   DocumentProto document = DocumentBuilder()
                                .SetKey("icing", "fake_type/0")
@@ -5596,8 +6739,7 @@ TEST_F(IcingSearchEngineTest, InitializeShouldLogFunctionLatency) {
                               std::move(fake_clock), GetTestJniCache());
   InitializeResultProto initialize_result_proto = icing.Initialize();
   EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
-  EXPECT_THAT(initialize_result_proto.native_initialize_stats().latency_ms(),
-              Eq(10));
+  EXPECT_THAT(initialize_result_proto.initialize_stats().latency_ms(), Eq(10));
 }
 
 TEST_F(IcingSearchEngineTest, InitializeShouldLogNumberOfDocuments) {
@@ -5617,9 +6759,8 @@ TEST_F(IcingSearchEngineTest, InitializeShouldLogNumberOfDocuments) {
     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
     InitializeResultProto initialize_result_proto = icing.Initialize();
     EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
-    EXPECT_THAT(
-        initialize_result_proto.native_initialize_stats().num_documents(),
-        Eq(0));
+    EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
+                Eq(0));
 
     ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
     ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
@@ -5629,9 +6770,8 @@ TEST_F(IcingSearchEngineTest, InitializeShouldLogNumberOfDocuments) {
     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
     InitializeResultProto initialize_result_proto = icing.Initialize();
     EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
-    EXPECT_THAT(
-        initialize_result_proto.native_initialize_stats().num_documents(),
-        Eq(1));
+    EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
+                Eq(1));
 
     // Put another document.
     ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
@@ -5641,9 +6781,8 @@ TEST_F(IcingSearchEngineTest, InitializeShouldLogNumberOfDocuments) {
     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
     InitializeResultProto initialize_result_proto = icing.Initialize();
     EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
-    EXPECT_THAT(
-        initialize_result_proto.native_initialize_stats().num_documents(),
-        Eq(2));
+    EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
+                Eq(2));
   }
 }
 
@@ -5659,25 +6798,25 @@ TEST_F(IcingSearchEngineTest,
                               std::move(fake_clock), GetTestJniCache());
   InitializeResultProto initialize_result_proto = icing.Initialize();
   EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
-  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
                   .document_store_recovery_cause(),
-              Eq(NativeInitializeStats::NONE));
-  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
                   .document_store_recovery_latency_ms(),
               Eq(0));
-  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
-                  .document_store_data_status(),
-              Eq(NativeInitializeStats::NO_DATA_LOSS));
-  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
-                  .index_restoration_cause(),
-              Eq(NativeInitializeStats::NONE));
-  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
-                  .index_restoration_latency_ms(),
-              Eq(0));
-  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
-                  .schema_store_recovery_cause(),
-              Eq(NativeInitializeStats::NONE));
-  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().document_store_data_status(),
+      Eq(InitializeStatsProto::NO_DATA_LOSS));
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().index_restoration_cause(),
+      Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
+      Eq(0));
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
+      Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
                   .schema_store_recovery_latency_ms(),
               Eq(0));
 }
@@ -5701,8 +6840,8 @@ TEST_F(IcingSearchEngineTest, InitializeShouldLogRecoveryCausePartialDataLoss) {
     // Append a non-checksummed document. This will mess up the checksum of the
     // proto log, forcing it to rewind and later return a DATA_LOSS error.
     const std::string serialized_document = document.SerializeAsString();
-    const std::string document_log_file =
-        absl_ports::StrCat(GetDocumentDir(), "/document_log");
+    const std::string document_log_file = absl_ports::StrCat(
+        GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
 
     int64_t file_size = filesystem()->GetFileSize(document_log_file.c_str());
     filesystem()->PWrite(document_log_file.c_str(), file_size,
@@ -5721,25 +6860,25 @@ TEST_F(IcingSearchEngineTest, InitializeShouldLogRecoveryCausePartialDataLoss) {
                                 std::move(fake_clock), GetTestJniCache());
     InitializeResultProto initialize_result_proto = icing.Initialize();
     EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
                     .document_store_recovery_cause(),
-                Eq(NativeInitializeStats::DATA_LOSS));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                Eq(InitializeStatsProto::DATA_LOSS));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
                     .document_store_recovery_latency_ms(),
                 Eq(10));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
-                    .document_store_data_status(),
-                Eq(NativeInitializeStats::PARTIAL_LOSS));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
-                    .index_restoration_cause(),
-                Eq(NativeInitializeStats::NONE));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().document_store_data_status(),
+        Eq(InitializeStatsProto::PARTIAL_LOSS));
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().index_restoration_cause(),
+        Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
                     .index_restoration_latency_ms(),
                 Eq(0));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
                     .schema_store_recovery_cause(),
-                Eq(NativeInitializeStats::NONE));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
                     .schema_store_recovery_latency_ms(),
                 Eq(0));
   }
@@ -5752,31 +6891,47 @@ TEST_F(IcingSearchEngineTest,
                                 .SetSchema("Message")
                                 .AddStringProperty("body", "message body")
                                 .Build();
+
+  const std::string document_log_file = absl_ports::StrCat(
+      GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
+  int64_t corruptible_offset;
+
   {
     // Initialize and put a document.
     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+
     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // There's some space at the beginning of the file (e.g. header, kmagic,
+    // etc) that is necessary to initialize the FileBackedProtoLog. We can't
+    // corrupt that region, so we need to figure out the offset at which
+    // documents will be written to - which is the file size after
+    // initialization.
+    corruptible_offset = filesystem()->GetFileSize(document_log_file.c_str());
+
     ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
     EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
   }
 
   {
-    // Modify the document log checksum to trigger a complete document log
-    // rewind.
-    const std::string document_log_file =
-        absl_ports::StrCat(GetDocumentDir(), "/document_log");
-
-    FileBackedProtoLog<DocumentWrapper>::Header document_log_header;
-    filesystem()->PRead(document_log_file.c_str(), &document_log_header,
-                        sizeof(FileBackedProtoLog<DocumentWrapper>::Header),
-                        /*offset=*/0);
-    // Set a garbage checksum.
-    document_log_header.log_checksum = 10;
-    document_log_header.header_checksum =
-        document_log_header.CalculateHeaderChecksum();
-    filesystem()->PWrite(document_log_file.c_str(), /*offset=*/0,
-                         &document_log_header,
-                         sizeof(FileBackedProtoLog<DocumentWrapper>::Header));
+    // "Corrupt" the content written in the log. Make the corrupt document
+    // smaller than our original one so we don't accidentally write past our
+    // file.
+    DocumentProto document =
+        DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build();
+    std::string serialized_document = document.SerializeAsString();
+    ASSERT_TRUE(filesystem()->PWrite(
+        document_log_file.c_str(), corruptible_offset,
+        serialized_document.data(), serialized_document.size()));
+
+    PortableFileBackedProtoLog<DocumentWrapper>::Header header =
+        ReadDocumentLogHeader(*filesystem(), document_log_file);
+
+    // Set dirty bit to true to reflect that something changed in the log.
+    header.SetDirtyFlag(true);
+    header.SetHeaderChecksum(header.CalculateHeaderChecksum());
+
+    WriteDocumentLogHeader(*filesystem(), document_log_file, header);
   }
 
   {
@@ -5790,27 +6945,27 @@ TEST_F(IcingSearchEngineTest,
                                 std::move(fake_clock), GetTestJniCache());
     InitializeResultProto initialize_result_proto = icing.Initialize();
     EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
                     .document_store_recovery_cause(),
-                Eq(NativeInitializeStats::DATA_LOSS));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                Eq(InitializeStatsProto::DATA_LOSS));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
                     .document_store_recovery_latency_ms(),
                 Eq(10));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
-                    .document_store_data_status(),
-                Eq(NativeInitializeStats::COMPLETE_LOSS));
-    // The complete rewind of ground truth causes the mismatch of total
-    // checksum, so index should be restored.
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
-                    .index_restoration_cause(),
-                Eq(NativeInitializeStats::TOTAL_CHECKSUM_MISMATCH));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().document_store_data_status(),
+        Eq(InitializeStatsProto::COMPLETE_LOSS));
+    // The complete rewind of ground truth causes us to clear the index, but
+    // that's not considered a restoration.
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().index_restoration_cause(),
+        Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
                     .index_restoration_latency_ms(),
-                Eq(10));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                Eq(0));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
                     .schema_store_recovery_cause(),
-                Eq(NativeInitializeStats::NONE));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
                     .schema_store_recovery_latency_ms(),
                 Eq(0));
   }
@@ -5848,51 +7003,76 @@ TEST_F(IcingSearchEngineTest,
                                 std::move(fake_clock), GetTestJniCache());
     InitializeResultProto initialize_result_proto = icing.Initialize();
     EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
-                    .index_restoration_cause(),
-                Eq(NativeInitializeStats::INCONSISTENT_WITH_GROUND_TRUTH));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().index_restoration_cause(),
+        Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
                     .index_restoration_latency_ms(),
                 Eq(10));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
                     .document_store_recovery_cause(),
-                Eq(NativeInitializeStats::NONE));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
                     .document_store_recovery_latency_ms(),
                 Eq(0));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
-                    .document_store_data_status(),
-                Eq(NativeInitializeStats::NO_DATA_LOSS));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().document_store_data_status(),
+        Eq(InitializeStatsProto::NO_DATA_LOSS));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
                     .schema_store_recovery_cause(),
-                Eq(NativeInitializeStats::NONE));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
                     .schema_store_recovery_latency_ms(),
                 Eq(0));
   }
 }
 
 TEST_F(IcingSearchEngineTest,
-       InitializeShouldLogRecoveryCauseTotalChecksumMismatch) {
+       InitializeShouldLogRecoveryCauseSchemaChangesOutofSync) {
   DocumentProto document = DocumentBuilder()
                                .SetKey("icing", "fake_type/0")
                                .SetSchema("Message")
                                .AddStringProperty("body", "message body")
                                .Build();
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
   {
     // Initialize and put one document.
-    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    IcingSearchEngine icing(options, GetTestJniCache());
     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
     ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
     ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
   }
 
   {
-    // Change the header's checksum value to a random value.
-    uint32_t invalid_checksum = 1;
-    filesystem()->PWrite(GetHeaderFilename().c_str(),
-                         offsetof(IcingSearchEngine::Header, checksum),
-                         &invalid_checksum, sizeof(invalid_checksum));
+    // Simulate a schema change where power is lost after the schema is written.
+    SchemaProto new_schema =
+        SchemaBuilder()
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType("Message")
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName("body")
+                            .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                            .SetCardinality(CARDINALITY_REQUIRED))
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName("subject")
+                            .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                            .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
+    // Write the marker file
+    std::string marker_filepath =
+        absl_ports::StrCat(options.base_dir(), "/set_schema_marker");
+    ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str()));
+    ASSERT_TRUE(sfd.is_valid());
+
+    // Write the new schema
+    FakeClock fake_clock;
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
+    ICING_EXPECT_OK(schema_store->SetSchema(new_schema));
   }
 
   {
@@ -5905,25 +7085,58 @@ TEST_F(IcingSearchEngineTest,
                                 std::move(fake_clock), GetTestJniCache());
     InitializeResultProto initialize_result_proto = icing.Initialize();
     EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
-                    .index_restoration_cause(),
-                Eq(NativeInitializeStats::TOTAL_CHECKSUM_MISMATCH));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().index_restoration_cause(),
+        Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
                     .index_restoration_latency_ms(),
                 Eq(10));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
                     .document_store_recovery_cause(),
-                Eq(NativeInitializeStats::TOTAL_CHECKSUM_MISMATCH));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
                     .document_store_recovery_latency_ms(),
                 Eq(10));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
-                    .document_store_data_status(),
-                Eq(NativeInitializeStats::NO_DATA_LOSS));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().document_store_data_status(),
+        Eq(InitializeStatsProto::NO_DATA_LOSS));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .schema_store_recovery_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .schema_store_recovery_latency_ms(),
+                Eq(0));
+  }
+
+  {
+    // No recovery should be needed.
+    auto fake_clock = std::make_unique<FakeClock>();
+    fake_clock->SetTimerElapsedMilliseconds(10);
+    TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                                std::make_unique<Filesystem>(),
+                                std::make_unique<IcingFilesystem>(),
+                                std::move(fake_clock), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().index_restoration_cause(),
+        Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .index_restoration_latency_ms(),
+                Eq(0));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .document_store_recovery_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .document_store_recovery_latency_ms(),
+                Eq(0));
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().document_store_data_status(),
+        Eq(InitializeStatsProto::NO_DATA_LOSS));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
                     .schema_store_recovery_cause(),
-                Eq(NativeInitializeStats::NONE));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
                     .schema_store_recovery_latency_ms(),
                 Eq(0));
   }
@@ -5970,25 +7183,25 @@ TEST_F(IcingSearchEngineTest, InitializeShouldLogRecoveryCauseIndexIOError) {
 
   InitializeResultProto initialize_result_proto = icing.Initialize();
   EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
-  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
-                  .index_restoration_cause(),
-              Eq(NativeInitializeStats::IO_ERROR));
-  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
-                  .index_restoration_latency_ms(),
-              Eq(10));
-  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().index_restoration_cause(),
+      Eq(InitializeStatsProto::IO_ERROR));
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
+      Eq(10));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
                   .document_store_recovery_cause(),
-              Eq(NativeInitializeStats::NONE));
-  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
                   .document_store_recovery_latency_ms(),
               Eq(0));
-  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
-                  .document_store_data_status(),
-              Eq(NativeInitializeStats::NO_DATA_LOSS));
-  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
-                  .schema_store_recovery_cause(),
-              Eq(NativeInitializeStats::NONE));
-  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().document_store_data_status(),
+      Eq(InitializeStatsProto::NO_DATA_LOSS));
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
+      Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
                   .schema_store_recovery_latency_ms(),
               Eq(0));
 }
@@ -6036,25 +7249,25 @@ TEST_F(IcingSearchEngineTest, InitializeShouldLogRecoveryCauseDocStoreIOError) {
 
   InitializeResultProto initialize_result_proto = icing.Initialize();
   EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
-  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
                   .document_store_recovery_cause(),
-              Eq(NativeInitializeStats::IO_ERROR));
-  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+              Eq(InitializeStatsProto::IO_ERROR));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
                   .document_store_recovery_latency_ms(),
               Eq(10));
-  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
-                  .document_store_data_status(),
-              Eq(NativeInitializeStats::NO_DATA_LOSS));
-  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
-                  .index_restoration_cause(),
-              Eq(NativeInitializeStats::NONE));
-  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
-                  .index_restoration_latency_ms(),
-              Eq(0));
-  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
-                  .schema_store_recovery_cause(),
-              Eq(NativeInitializeStats::NONE));
-  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().document_store_data_status(),
+      Eq(InitializeStatsProto::NO_DATA_LOSS));
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().index_restoration_cause(),
+      Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
+      Eq(0));
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
+      Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
                   .schema_store_recovery_latency_ms(),
               Eq(0));
 }
@@ -6083,25 +7296,25 @@ TEST_F(IcingSearchEngineTest,
                                 std::move(fake_clock), GetTestJniCache());
     InitializeResultProto initialize_result_proto = icing.Initialize();
     EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
                     .schema_store_recovery_cause(),
-                Eq(NativeInitializeStats::IO_ERROR));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                Eq(InitializeStatsProto::IO_ERROR));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
                     .schema_store_recovery_latency_ms(),
                 Eq(10));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
                     .document_store_recovery_cause(),
-                Eq(NativeInitializeStats::NONE));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
                     .document_store_recovery_latency_ms(),
                 Eq(0));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
-                    .document_store_data_status(),
-                Eq(NativeInitializeStats::NO_DATA_LOSS));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
-                    .index_restoration_cause(),
-                Eq(NativeInitializeStats::NONE));
-    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().document_store_data_status(),
+        Eq(InitializeStatsProto::NO_DATA_LOSS));
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().index_restoration_cause(),
+        Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
                     .index_restoration_latency_ms(),
                 Eq(0));
   }
@@ -6114,9 +7327,8 @@ TEST_F(IcingSearchEngineTest, InitializeShouldLogNumberOfSchemaTypes) {
     InitializeResultProto initialize_result_proto = icing.Initialize();
     EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
     // There should be 0 schema types.
-    EXPECT_THAT(
-        initialize_result_proto.native_initialize_stats().num_schema_types(),
-        Eq(0));
+    EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
+                Eq(0));
 
     // Set a schema with one type config.
     ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
@@ -6127,9 +7339,8 @@ TEST_F(IcingSearchEngineTest, InitializeShouldLogNumberOfSchemaTypes) {
     InitializeResultProto initialize_result_proto = icing.Initialize();
     EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
     // There should be 1 schema type.
-    EXPECT_THAT(
-        initialize_result_proto.native_initialize_stats().num_schema_types(),
-        Eq(1));
+    EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
+                Eq(1));
 
     // Create and set a schema with two type configs: Email and Message.
     SchemaProto schema = CreateEmailSchema();
@@ -6152,9 +7363,8 @@ TEST_F(IcingSearchEngineTest, InitializeShouldLogNumberOfSchemaTypes) {
     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
     InitializeResultProto initialize_result_proto = icing.Initialize();
     EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
-    EXPECT_THAT(
-        initialize_result_proto.native_initialize_stats().num_schema_types(),
-        Eq(2));
+    EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
+                Eq(2));
   }
 }
 
@@ -6176,8 +7386,7 @@ TEST_F(IcingSearchEngineTest, PutDocumentShouldLogFunctionLatency) {
 
   PutResultProto put_result_proto = icing.Put(document);
   EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
-  EXPECT_THAT(put_result_proto.native_put_document_stats().latency_ms(),
-              Eq(10));
+  EXPECT_THAT(put_result_proto.put_document_stats().latency_ms(), Eq(10));
 }
 
 TEST_F(IcingSearchEngineTest, PutDocumentShouldLogDocumentStoreStats) {
@@ -6200,11 +7409,9 @@ TEST_F(IcingSearchEngineTest, PutDocumentShouldLogDocumentStoreStats) {
 
   PutResultProto put_result_proto = icing.Put(document);
   EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
-  EXPECT_THAT(
-      put_result_proto.native_put_document_stats().document_store_latency_ms(),
-      Eq(10));
-  size_t document_size =
-      put_result_proto.native_put_document_stats().document_size();
+  EXPECT_THAT(put_result_proto.put_document_stats().document_store_latency_ms(),
+              Eq(10));
+  size_t document_size = put_result_proto.put_document_stats().document_size();
   EXPECT_THAT(document_size, Ge(document.ByteSizeLong()));
   EXPECT_THAT(document_size, Le(document.ByteSizeLong() +
                                 sizeof(DocumentProto::InternalFields)));
@@ -6228,18 +7435,16 @@ TEST_F(IcingSearchEngineTest, PutDocumentShouldLogIndexingStats) {
 
   PutResultProto put_result_proto = icing.Put(document);
   EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
-  EXPECT_THAT(put_result_proto.native_put_document_stats().index_latency_ms(),
-              Eq(10));
+  EXPECT_THAT(put_result_proto.put_document_stats().index_latency_ms(), Eq(10));
   // No merge should happen.
-  EXPECT_THAT(
-      put_result_proto.native_put_document_stats().index_merge_latency_ms(),
-      Eq(0));
+  EXPECT_THAT(put_result_proto.put_document_stats().index_merge_latency_ms(),
+              Eq(0));
   // Number of tokens should not exceed.
-  EXPECT_FALSE(put_result_proto.native_put_document_stats()
+  EXPECT_FALSE(put_result_proto.put_document_stats()
                    .tokenization_stats()
                    .exceeded_max_token_num());
   // The input document has 2 tokens.
-  EXPECT_THAT(put_result_proto.native_put_document_stats()
+  EXPECT_THAT(put_result_proto.put_document_stats()
                   .tokenization_stats()
                   .num_tokens_indexed(),
               Eq(2));
@@ -6263,10 +7468,10 @@ TEST_F(IcingSearchEngineTest, PutDocumentShouldLogWhetherNumTokensExceeds) {
   PutResultProto put_result_proto = icing.Put(document);
   EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
   // Number of tokens(2) exceeds the max allowed value(1).
-  EXPECT_TRUE(put_result_proto.native_put_document_stats()
+  EXPECT_TRUE(put_result_proto.put_document_stats()
                   .tokenization_stats()
                   .exceeded_max_token_num());
-  EXPECT_THAT(put_result_proto.native_put_document_stats()
+  EXPECT_THAT(put_result_proto.put_document_stats()
                   .tokenization_stats()
                   .num_tokens_indexed(),
               Eq(1));
@@ -6300,9 +7505,8 @@ TEST_F(IcingSearchEngineTest, PutDocumentShouldLogIndexMergeLatency) {
   // Putting document2 should trigger an index merge.
   PutResultProto put_result_proto = icing.Put(document2);
   EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
-  EXPECT_THAT(
-      put_result_proto.native_put_document_stats().index_merge_latency_ms(),
-      Eq(10));
+  EXPECT_THAT(put_result_proto.put_document_stats().index_merge_latency_ms(),
+              Eq(10));
 }
 
 TEST_F(IcingSearchEngineTest, SearchWithProjectionEmptyFieldPath) {
@@ -6491,7 +7695,7 @@ TEST_F(IcingSearchEngineTest, SearchWithProjectionMultipleFieldPaths) {
               EqualsProto(projected_document_one));
 }
 
-TEST_F(IcingSearchEngineTest, NativeQueryStatsTest) {
+TEST_F(IcingSearchEngineTest, QueryStatsProtoTest) {
   auto fake_clock = std::make_unique<FakeClock>();
   fake_clock->SetTimerElapsedMilliseconds(5);
   TestIcingSearchEngine icing(GetDefaultIcingOptions(),
@@ -6537,7 +7741,8 @@ TEST_F(IcingSearchEngineTest, NativeQueryStatsTest) {
   ASSERT_THAT(search_result.next_page_token(), Ne(kInvalidNextPageToken));
 
   // Check the stats
-  NativeQueryStats exp_stats;
+  QueryStatsProto exp_stats;
+  exp_stats.set_query_length(7);
   exp_stats.set_num_terms(1);
   exp_stats.set_num_namespaces_filtered(1);
   exp_stats.set_num_schema_types_filtered(1);
@@ -6547,7 +7752,7 @@ TEST_F(IcingSearchEngineTest, NativeQueryStatsTest) {
   exp_stats.set_requested_page_size(2);
   exp_stats.set_num_results_returned_current_page(2);
   exp_stats.set_num_documents_scored(5);
-  exp_stats.set_num_results_snippeted(2);
+  exp_stats.set_num_results_with_snippets(2);
   exp_stats.set_latency_ms(5);
   exp_stats.set_parse_query_latency_ms(5);
   exp_stats.set_scoring_latency_ms(5);
@@ -6561,11 +7766,11 @@ TEST_F(IcingSearchEngineTest, NativeQueryStatsTest) {
   ASSERT_THAT(search_result.results(), SizeIs(2));
   ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
 
-  exp_stats = NativeQueryStats();
+  exp_stats = QueryStatsProto();
   exp_stats.set_is_first_page(false);
   exp_stats.set_requested_page_size(2);
   exp_stats.set_num_results_returned_current_page(2);
-  exp_stats.set_num_results_snippeted(1);
+  exp_stats.set_num_results_with_snippets(1);
   exp_stats.set_latency_ms(5);
   exp_stats.set_document_retrieval_latency_ms(5);
   EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
@@ -6576,16 +7781,434 @@ TEST_F(IcingSearchEngineTest, NativeQueryStatsTest) {
   ASSERT_THAT(search_result.results(), SizeIs(1));
   ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
 
-  exp_stats = NativeQueryStats();
+  exp_stats = QueryStatsProto();
   exp_stats.set_is_first_page(false);
   exp_stats.set_requested_page_size(2);
   exp_stats.set_num_results_returned_current_page(1);
-  exp_stats.set_num_results_snippeted(0);
+  exp_stats.set_num_results_with_snippets(0);
   exp_stats.set_latency_ms(5);
   exp_stats.set_document_retrieval_latency_ms(5);
   EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
 }
 
+TEST_F(IcingSearchEngineTest, OptimizeStatsProtoTest) {
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(5);
+  fake_clock->SetSystemTimeMilliseconds(10000);
+  auto icing = std::make_unique<TestIcingSearchEngine>(
+      GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+      std::make_unique<IcingFilesystem>(), std::move(fake_clock),
+      GetTestJniCache());
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Create three documents.
+  DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+  DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+  document2.set_creation_timestamp_ms(9000);
+  document2.set_ttl_ms(500);
+  DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+  ASSERT_THAT(icing->Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing->Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing->Put(document3).status(), ProtoIsOk());
+
+  // Delete the first document.
+  ASSERT_THAT(icing->Delete(document1.namespace_(), document1.uri()).status(),
+              ProtoIsOk());
+  ASSERT_THAT(icing->PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
+
+  OptimizeStatsProto expected;
+  expected.set_latency_ms(5);
+  expected.set_document_store_optimize_latency_ms(5);
+  expected.set_index_restoration_latency_ms(5);
+  expected.set_num_original_documents(3);
+  expected.set_num_deleted_documents(1);
+  expected.set_num_expired_documents(1);
+
+  // Run Optimize
+  OptimizeResultProto result = icing->Optimize();
+  // Depending on how many blocks the documents end up spread across, it's
+  // possible that Optimize can remove documents without shrinking storage. The
+  // first Optimize call will also write the OptimizeStatusProto for the first
+  // time which will take up 1 block. So make sure that before_size is no less
+  // than after_size - 1 block.
+  uint32_t page_size = getpagesize();
+  EXPECT_THAT(result.optimize_stats().storage_size_before(),
+              Ge(result.optimize_stats().storage_size_after() - page_size));
+  result.mutable_optimize_stats()->clear_storage_size_before();
+  result.mutable_optimize_stats()->clear_storage_size_after();
+  EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
+
+  fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(5);
+  fake_clock->SetSystemTimeMilliseconds(20000);
+  icing = std::make_unique<TestIcingSearchEngine>(
+      GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+      std::make_unique<IcingFilesystem>(), std::move(fake_clock),
+      GetTestJniCache());
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+
+  expected = OptimizeStatsProto();
+  expected.set_latency_ms(5);
+  expected.set_document_store_optimize_latency_ms(5);
+  expected.set_index_restoration_latency_ms(5);
+  expected.set_num_original_documents(1);
+  expected.set_num_deleted_documents(0);
+  expected.set_num_expired_documents(0);
+  expected.set_time_since_last_optimize_ms(10000);
+
+  // Run Optimize
+  result = icing->Optimize();
+  EXPECT_THAT(result.optimize_stats().storage_size_before(),
+              Eq(result.optimize_stats().storage_size_after()));
+  result.mutable_optimize_stats()->clear_storage_size_before();
+  result.mutable_optimize_stats()->clear_storage_size_after();
+  EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
+}
+
+TEST_F(IcingSearchEngineTest, StorageInfoTest) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Create three documents.
+  DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+  DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+  DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+  // Ensure that total_storage_size is set. All the other stats are covered by
+  // the classes that generate them.
+  StorageInfoResultProto result = icing.GetStorageInfo();
+  EXPECT_THAT(result.status(), ProtoIsOk());
+  EXPECT_THAT(result.storage_info().total_storage_size(), Ge(0));
+}
+
+TEST_F(IcingSearchEngineTest, SnippetErrorTest) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Generic").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("subject")
+                  .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetScore(10)
+          .SetSchema("Generic")
+          .AddStringProperty("subject", "I like cats", "I like dogs",
+                             "I like birds", "I like fish")
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetScore(20)
+          .SetSchema("Generic")
+          .AddStringProperty("subject", "I like red", "I like green",
+                             "I like blue", "I like yellow")
+          .Build();
+  DocumentProto document3 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri3")
+          .SetScore(5)
+          .SetSchema("Generic")
+          .AddStringProperty("subject", "I like cupcakes", "I like donuts",
+                             "I like eclairs", "I like froyo")
+          .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.add_schema_type_filters("Generic");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_query("like");
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  ResultSpecProto result_spec;
+  result_spec.mutable_snippet_spec()->set_num_to_snippet(2);
+  result_spec.mutable_snippet_spec()->set_num_matches_per_property(3);
+  result_spec.mutable_snippet_spec()->set_max_window_bytes(4);
+  SearchResultProto search_results =
+      icing.Search(search_spec, scoring_spec, result_spec);
+
+  ASSERT_THAT(search_results.results(), SizeIs(3));
+  const SearchResultProto::ResultProto* result = &search_results.results(0);
+  EXPECT_THAT(result->document().uri(), Eq("uri2"));
+  ASSERT_THAT(result->snippet().entries(), SizeIs(3));
+  const SnippetProto::EntryProto* entry = &result->snippet().entries(0);
+  EXPECT_THAT(entry->property_name(), "subject[0]");
+  std::string_view content = GetString(&result->document(), "subject[0]");
+  EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+  entry = &result->snippet().entries(1);
+  EXPECT_THAT(entry->property_name(), "subject[1]");
+  content = GetString(&result->document(), "subject[1]");
+  EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+  entry = &result->snippet().entries(2);
+  EXPECT_THAT(entry->property_name(), "subject[2]");
+  content = GetString(&result->document(), "subject[2]");
+  EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+  result = &search_results.results(1);
+  EXPECT_THAT(result->document().uri(), Eq("uri1"));
+  ASSERT_THAT(result->snippet().entries(), SizeIs(3));
+  entry = &result->snippet().entries(0);
+  EXPECT_THAT(entry->property_name(), "subject[0]");
+  content = GetString(&result->document(), "subject[0]");
+  EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+  entry = &result->snippet().entries(1);
+  ASSERT_THAT(entry->property_name(), "subject[1]");
+  content = GetString(&result->document(), "subject[1]");
+  EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+  entry = &result->snippet().entries(2);
+  ASSERT_THAT(entry->property_name(), "subject[2]");
+  content = GetString(&result->document(), "subject[2]");
+  EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+  result = &search_results.results(2);
+  ASSERT_THAT(result->document().uri(), Eq("uri3"));
+  ASSERT_THAT(result->snippet().entries(), IsEmpty());
+}
+
+TEST_F(IcingSearchEngineTest, CJKSnippetTest) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // String:     "我每天走路去上班。"
+  //              ^ ^  ^   ^^
+  // UTF8 idx:    0 3  9  15 18
+  // UTF16 idx:   0 1  3   5 6
+  // Breaks into segments: "我", "每天", "走路", "去", "上班"
+  constexpr std::string_view kChinese = "我每天走路去上班。";
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("namespace", "uri1")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", kChinese)
+                               .Build();
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  // Search and request snippet matching but no windowing.
+  SearchSpecProto search_spec;
+  search_spec.set_query("走");
+  search_spec.set_term_match_type(MATCH_PREFIX);
+
+  ResultSpecProto result_spec;
+  result_spec.mutable_snippet_spec()->set_num_to_snippet(
+      std::numeric_limits<int>::max());
+  result_spec.mutable_snippet_spec()->set_num_matches_per_property(
+      std::numeric_limits<int>::max());
+
+  // Search and make sure that we got a single successful result
+  SearchResultProto search_results = icing.Search(
+      search_spec, ScoringSpecProto::default_instance(), result_spec);
+  ASSERT_THAT(search_results.status(), ProtoIsOk());
+  ASSERT_THAT(search_results.results(), SizeIs(1));
+  const SearchResultProto::ResultProto* result = &search_results.results(0);
+  EXPECT_THAT(result->document().uri(), Eq("uri1"));
+
+  // Ensure that one and only one property was matched and it was "body"
+  ASSERT_THAT(result->snippet().entries(), SizeIs(1));
+  const SnippetProto::EntryProto* entry = &result->snippet().entries(0);
+  EXPECT_THAT(entry->property_name(), Eq("body"));
+
+  // Get the content for "subject" and see what the match is.
+  std::string_view content = GetString(&result->document(), "body");
+  ASSERT_THAT(content, Eq(kChinese));
+
+  // Ensure that there is one and only one match within "subject"
+  ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+  const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+
+  EXPECT_THAT(match_proto.exact_match_byte_position(), Eq(9));
+  EXPECT_THAT(match_proto.exact_match_byte_length(), Eq(6));
+  std::string_view match =
+      content.substr(match_proto.exact_match_byte_position(),
+                     match_proto.exact_match_byte_length());
+  ASSERT_THAT(match, Eq("走路"));
+
+  // Ensure that the utf-16 values are also as expected
+  EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(3));
+  EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2));
+}
+
+#ifndef ICING_JNI_TEST
+// We skip this test case when we're running in a jni_test since the data files
+// will be stored in the android-instrumented storage location, rather than the
+// normal cc_library runfiles directory. To get that storage location, it's
+// recommended to use the TestStorage APIs which handles different API
+// levels/absolute vs relative/etc differences. Since that's only accessible on
+// the java-side, and I haven't figured out a way to pass that directory path to
+// this native side yet, we're just going to disable this. The functionality is
+// already well-tested across 4 different emulated OS's so we're not losing much
+// test coverage here.
+TEST_F(IcingSearchEngineTest, MigrateToPortableFileBackedProtoLog) {
+  // Copy the testdata files into our IcingSearchEngine directory
+  std::string dir_without_portable_log;
+  if (IsAndroidX86()) {
+    dir_without_portable_log = GetTestFilePath(
+        "icing/testdata/not_portable_log/"
+        "icing_search_engine_android_x86");
+  } else if (IsAndroidArm()) {
+    dir_without_portable_log = GetTestFilePath(
+        "icing/testdata/not_portable_log/"
+        "icing_search_engine_android_arm");
+  } else if (IsIosPlatform()) {
+    dir_without_portable_log = GetTestFilePath(
+        "icing/testdata/not_portable_log/"
+        "icing_search_engine_ios");
+  } else {
+    dir_without_portable_log = GetTestFilePath(
+        "icing/testdata/not_portable_log/"
+        "icing_search_engine_linux");
+  }
+
+  // Create dst directory that we'll initialize the IcingSearchEngine over.
+  std::string base_dir = GetTestBaseDir() + "_migrate";
+  ASSERT_THAT(filesystem()->DeleteDirectoryRecursively(base_dir.c_str()), true);
+  ASSERT_THAT(filesystem()->CreateDirectoryRecursively(base_dir.c_str()), true);
+
+  ASSERT_TRUE(filesystem()->CopyDirectory(dir_without_portable_log.c_str(),
+                                          base_dir.c_str(),
+                                          /*recursive=*/true));
+
+  IcingSearchEngineOptions icing_options;
+  icing_options.set_base_dir(base_dir);
+
+  IcingSearchEngine icing(icing_options, GetTestJniCache());
+  InitializeResultProto init_result = icing.Initialize();
+  EXPECT_THAT(init_result.status(), ProtoIsOk());
+  EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+              Eq(InitializeStatsProto::NO_DATA_LOSS));
+  EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+              Eq(InitializeStatsProto::NONE));
+
+  // Set up schema, this is the one used to validate documents in the testdata
+  // files. Do not change unless you're also updating the testdata files.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("email")
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("subject")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("body")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // Make sure our schema is still the same as we expect. If not, there's
+  // definitely no way we're getting the documents back that we expect.
+  GetSchemaResultProto expected_get_schema_result_proto;
+  expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_schema_result_proto.mutable_schema() = schema;
+  ASSERT_THAT(icing.GetSchema(), EqualsProto(expected_get_schema_result_proto));
+
+  // These are the documents that are stored in the testdata files. Do not
+  // change unless you're also updating the testdata files.
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "foo")
+                                .AddStringProperty("body", "bar")
+                                .Build();
+
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace1", "uri2")
+                                .SetSchema("email")
+                                .SetCreationTimestampMs(20)
+                                .SetScore(321)
+                                .AddStringProperty("body", "baz bat")
+                                .Build();
+
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace2", "uri1")
+                                .SetSchema("email")
+                                .SetCreationTimestampMs(30)
+                                .SetScore(123)
+                                .AddStringProperty("subject", "phoo")
+                                .Build();
+
+  // Document 1 and 3 were put normally, and document 2 was deleted in our
+  // testdata files.
+  EXPECT_THAT(icing
+                  .Get(document1.namespace_(), document1.uri(),
+                       GetResultSpecProto::default_instance())
+                  .document(),
+              EqualsProto(document1));
+  EXPECT_THAT(icing
+                  .Get(document2.namespace_(), document2.uri(),
+                       GetResultSpecProto::default_instance())
+                  .status(),
+              ProtoStatusIs(StatusProto::NOT_FOUND));
+  EXPECT_THAT(icing
+                  .Get(document3.namespace_(), document3.uri(),
+                       GetResultSpecProto::default_instance())
+                  .document(),
+              EqualsProto(document3));
+
+  // Searching for "foo" should get us document1.
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("foo");
+
+  SearchResultProto expected_document1;
+  expected_document1.mutable_status()->set_code(StatusProto::OK);
+  *expected_document1.mutable_results()->Add()->mutable_document() = document1;
+
+  SearchResultProto actual_results =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(expected_document1));
+
+  // Searching for "baz" would've gotten us document2, except it got deleted.
+  // Make sure that it's cleared from our index too.
+  search_spec.set_query("baz");
+
+  SearchResultProto expected_no_documents;
+  expected_no_documents.mutable_status()->set_code(StatusProto::OK);
+
+  actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(expected_no_documents));
+
+  // Searching for "phoo" should get us document3.
+  search_spec.set_query("phoo");
+
+  SearchResultProto expected_document3;
+  expected_document3.mutable_status()->set_code(StatusProto::OK);
+  *expected_document3.mutable_results()->Add()->mutable_document() = document3;
+
+  actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(expected_document3));
+}
+#endif  // !ICING_JNI_TEST
+
 }  // namespace
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/index/hit/hit.cc b/icing/index/hit/hit.cc
index 2a5a0d9..887e6e4 100644
--- a/icing/index/hit/hit.cc
+++ b/icing/index/hit/hit.cc
@@ -67,9 +67,10 @@ Hit::Hit(SectionId section_id, DocumentId document_id,
                         &temp_value);
   bit_util::BitfieldSet(section_id, kNumFlags, kSectionIdBits, &temp_value);
   bit_util::BitfieldSet(term_frequency != kDefaultTermFrequency,
-                        kHasTermFrequency, 1, &temp_value);
-  bit_util::BitfieldSet(is_prefix_hit, kPrefixHit, 1, &temp_value);
-  bit_util::BitfieldSet(is_in_prefix_section, kInPrefixSection, 1, &temp_value);
+                        kHasTermFrequency, /*len=*/1, &temp_value);
+  bit_util::BitfieldSet(is_prefix_hit, kPrefixHit, /*len=*/1, &temp_value);
+  bit_util::BitfieldSet(is_in_prefix_section, kInPrefixSection,
+                        /*len=*/1, &temp_value);
   value_ = temp_value;
 }
 
diff --git a/icing/index/index-processor.cc b/icing/index/index-processor.cc
index d2f9d41..6d8632f 100644
--- a/icing/index/index-processor.cc
+++ b/icing/index/index-processor.cc
@@ -55,7 +55,7 @@ IndexProcessor::Create(const Normalizer* normalizer, Index* index,
 
 libtextclassifier3::Status IndexProcessor::IndexDocument(
     const TokenizedDocument& tokenized_document, DocumentId document_id,
-    NativePutDocumentStats* put_document_stats) {
+    PutDocumentStatsProto* put_document_stats) {
   std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
 
   if (index_->last_added_document_id() != kInvalidDocumentId &&
@@ -64,6 +64,7 @@ libtextclassifier3::Status IndexProcessor::IndexDocument(
         "DocumentId %d must be greater than last added document_id %d",
         document_id, index_->last_added_document_id()));
   }
+  index_->set_last_added_document_id(document_id);
   uint32_t num_tokens = 0;
   libtextclassifier3::Status overall_status;
   for (const TokenizedSection& section : tokenized_document.sections()) {
diff --git a/icing/index/index-processor.h b/icing/index/index-processor.h
index 9fc7c46..6b07c98 100644
--- a/icing/index/index-processor.h
+++ b/icing/index/index-processor.h
@@ -81,7 +81,7 @@ class IndexProcessor {
   //   INTERNAL_ERROR if any other errors occur
   libtextclassifier3::Status IndexDocument(
       const TokenizedDocument& tokenized_document, DocumentId document_id,
-      NativePutDocumentStats* put_document_stats = nullptr);
+      PutDocumentStatsProto* put_document_stats = nullptr);
 
  private:
   IndexProcessor(const Normalizer* normalizer, Index* index,
diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc
index e6bb615..8a6a9f5 100644
--- a/icing/index/index-processor_test.cc
+++ b/icing/index/index-processor_test.cc
@@ -36,9 +36,11 @@
 #include "icing/index/term-property-id.h"
 #include "icing/legacy/index/icing-filesystem.h"
 #include "icing/legacy/index/icing-mock-filesystem.h"
+#include "icing/portable/platform.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
 #include "icing/schema/schema-store.h"
 #include "icing/schema/schema-util.h"
 #include "icing/schema/section-manager.h"
@@ -46,7 +48,6 @@
 #include "icing/store/document-id.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
-#include "icing/testing/platform.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
 #include "icing/tokenization/language-segmenter-factory.h"
@@ -103,6 +104,22 @@ using ::testing::Eq;
 using ::testing::IsEmpty;
 using ::testing::Test;
 
+constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
+    PropertyConfigProto_DataType_Code_STRING;
+constexpr PropertyConfigProto_DataType_Code TYPE_BYTES =
+    PropertyConfigProto_DataType_Code_BYTES;
+
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED =
+    PropertyConfigProto_Cardinality_Code_REPEATED;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+    StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+
 class IndexProcessorTest : public Test {
  protected:
   void SetUp() override {
@@ -131,7 +148,49 @@ class IndexProcessorTest : public Test {
     ICING_ASSERT_OK_AND_ASSIGN(
         schema_store_,
         SchemaStore::Create(&filesystem_, GetTestTempDir(), &fake_clock_));
-    SchemaProto schema = CreateFakeSchema();
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType(kFakeType)
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName(kExactProperty)
+                            .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                            .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName(kPrefixedProperty)
+                            .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                            .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kUnindexedProperty1)
+                                     .SetDataType(TYPE_STRING)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kUnindexedProperty2)
+                                     .SetDataType(TYPE_BYTES)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName(kRepeatedProperty)
+                            .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                            .SetCardinality(CARDINALITY_REPEATED))
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName(kSubProperty)
+                            .SetDataTypeDocument(
+                                kNestedType, /*index_nested_properties=*/true)
+                            .SetCardinality(CARDINALITY_OPTIONAL)))
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType(kNestedType)
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName(kNestedProperty)
+                            .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                            .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
     ICING_ASSERT_OK(schema_store_->SetSchema(schema));
 
     IndexProcessor::Options processor_options;
@@ -162,72 +221,6 @@ class IndexProcessorTest : public Test {
   std::unique_ptr<Index> index_;
   std::unique_ptr<SchemaStore> schema_store_;
   std::unique_ptr<IndexProcessor> index_processor_;
-
- private:
-  static void AddStringProperty(std::string_view name, DataType::Code type,
-                                Cardinality::Code cardinality,
-                                TermMatchType::Code term_match_type,
-                                SchemaTypeConfigProto* type_config) {
-    auto* prop = type_config->add_properties();
-    prop->set_property_name(std::string(name));
-    prop->set_data_type(type);
-    prop->set_cardinality(cardinality);
-    prop->mutable_string_indexing_config()->set_term_match_type(
-        term_match_type);
-    prop->mutable_string_indexing_config()->set_tokenizer_type(
-        StringIndexingConfig::TokenizerType::PLAIN);
-  }
-
-  static void AddNonIndexedProperty(std::string_view name, DataType::Code type,
-                                    Cardinality::Code cardinality,
-                                    SchemaTypeConfigProto* type_config) {
-    auto* prop = type_config->add_properties();
-    prop->set_property_name(std::string(name));
-    prop->set_data_type(type);
-    prop->set_cardinality(cardinality);
-  }
-
-  static SchemaProto CreateFakeSchema() {
-    SchemaProto schema;
-
-    // Add top-level type
-    auto* type_config = schema.add_types();
-    type_config->set_schema_type(std::string(kFakeType));
-
-    AddStringProperty(std::string(kExactProperty), DataType::STRING,
-                      Cardinality::OPTIONAL, TermMatchType::EXACT_ONLY,
-                      type_config);
-
-    AddStringProperty(std::string(kPrefixedProperty), DataType::STRING,
-                      Cardinality::OPTIONAL, TermMatchType::PREFIX,
-                      type_config);
-
-    AddNonIndexedProperty(std::string(kUnindexedProperty1), DataType::STRING,
-                          Cardinality::OPTIONAL, type_config);
-
-    AddNonIndexedProperty(std::string(kUnindexedProperty2), DataType::BYTES,
-                          Cardinality::OPTIONAL, type_config);
-
-    AddStringProperty(std::string(kRepeatedProperty), DataType::STRING,
-                      Cardinality::REPEATED, TermMatchType::PREFIX,
-                      type_config);
-
-    auto* prop = type_config->add_properties();
-    prop->set_property_name(std::string(kSubProperty));
-    prop->set_data_type(DataType::DOCUMENT);
-    prop->set_cardinality(Cardinality::OPTIONAL);
-    prop->set_schema_type(std::string(kNestedType));
-    prop->mutable_document_indexing_config()->set_index_nested_properties(true);
-
-    // Add nested type
-    type_config = schema.add_types();
-    type_config->set_schema_type(std::string(kNestedType));
-
-    AddStringProperty(kNestedProperty, DataType::STRING, Cardinality::OPTIONAL,
-                      TermMatchType::PREFIX, type_config);
-
-    return schema;
-  }
 };
 
 std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) {
@@ -268,7 +261,23 @@ TEST_F(IndexProcessorTest, NoTermMatchTypeContent) {
                                 document));
   EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
               IsOk());
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kInvalidDocumentId));
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+}
+
+TEST_F(IndexProcessorTest, NoValidContent) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kExactProperty), "?...!")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 }
 
 TEST_F(IndexProcessorTest, OneDoc) {
@@ -434,9 +443,8 @@ TEST_F(IndexProcessorTest, TooManyTokensReturnError) {
       IndexProcessor::Options::TokenLimitBehavior::kReturnError;
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      index_processor_,
-      IndexProcessor::Create(normalizer_.get(), index_.get(), options,
-                             &fake_clock_));
+      index_processor_, IndexProcessor::Create(normalizer_.get(), index_.get(),
+                                               options, &fake_clock_));
 
   DocumentProto document =
       DocumentBuilder()
@@ -477,9 +485,8 @@ TEST_F(IndexProcessorTest, TooManyTokensSuppressError) {
       IndexProcessor::Options::TokenLimitBehavior::kSuppressError;
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      index_processor_,
-      IndexProcessor::Create(normalizer_.get(), index_.get(), options,
-                             &fake_clock_));
+      index_processor_, IndexProcessor::Create(normalizer_.get(), index_.get(),
+                                               options, &fake_clock_));
 
   DocumentProto document =
       DocumentBuilder()
@@ -522,9 +529,8 @@ TEST_F(IndexProcessorTest, TooLongTokens) {
                                  /*max_term_byte_size=*/4));
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      index_processor_,
-      IndexProcessor::Create(normalizer.get(), index_.get(), options,
-                             &fake_clock_));
+      index_processor_, IndexProcessor::Create(normalizer.get(), index_.get(),
+                                               options, &fake_clock_));
 
   DocumentProto document =
       DocumentBuilder()
@@ -693,8 +699,8 @@ TEST_F(IndexProcessorTest, NonAsciiIndexing) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       index_processor_,
-      IndexProcessor::Create(normalizer_.get(), index_.get(),
-                             processor_options, &fake_clock_));
+      IndexProcessor::Create(normalizer_.get(), index_.get(), processor_options,
+                             &fake_clock_));
 
   DocumentProto document =
       DocumentBuilder()
diff --git a/icing/index/index.cc b/icing/index/index.cc
index bd41b51..db59ad2 100644
--- a/icing/index/index.cc
+++ b/icing/index/index.cc
@@ -164,7 +164,7 @@ libtextclassifier3::StatusOr<std::unique_ptr<Index>> Index::Create(
                         icing_filesystem));
   return std::unique_ptr<Index>(new Index(options, std::move(term_id_codec),
                                           std::move(lite_index),
-                                          std::move(main_index)));
+                                          std::move(main_index), filesystem));
 }
 
 libtextclassifier3::Status Index::TruncateTo(DocumentId document_id) {
@@ -277,6 +277,18 @@ Index::FindTermsByPrefix(const std::string& prefix,
                             std::move(main_term_metadata_list), num_to_return);
 }
 
+IndexStorageInfoProto Index::GetStorageInfo() const {
+  IndexStorageInfoProto storage_info;
+  int64_t directory_size = filesystem_->GetDiskUsage(options_.base_dir.c_str());
+  if (directory_size != Filesystem::kBadFileSize) {
+    storage_info.set_index_size(directory_size);
+  } else {
+    storage_info.set_index_size(-1);
+  }
+  storage_info = lite_index_->GetStorageInfo(std::move(storage_info));
+  return main_index_->GetStorageInfo(std::move(storage_info));
+}
+
 libtextclassifier3::Status Index::Editor::BufferTerm(const char* term) {
   // Step 1: See if this term is already in the lexicon
   uint32_t tvi;
diff --git a/icing/index/index.h b/icing/index/index.h
index a4ea719..eab5be8 100644
--- a/icing/index/index.h
+++ b/icing/index/index.h
@@ -32,6 +32,7 @@
 #include "icing/index/term-id-codec.h"
 #include "icing/index/term-metadata.h"
 #include "icing/legacy/index/icing-filesystem.h"
+#include "icing/proto/storage.pb.h"
 #include "icing/proto/term.pb.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
@@ -126,6 +127,16 @@ class Index {
     return main_index_->last_added_document_id();
   }
 
+  // Sets last_added_document_id to document_id so long as document_id >
+  // last_added_document_id()
+  void set_last_added_document_id(DocumentId document_id) {
+    DocumentId lite_document_id = lite_index_->last_added_document_id();
+    if (lite_document_id == kInvalidDocumentId ||
+        document_id >= lite_document_id) {
+      lite_index_->set_last_added_document_id(document_id);
+    }
+  }
+
   // Returns debug information for the index in out.
   // verbosity <= 0, simplest debug information - just the lexicons and lite
   //                 index.
@@ -151,6 +162,12 @@ class Index {
     return lite_index_size + main_index_size;
   }
 
+  // Calculates the StorageInfo for the Index.
+  //
+  // If an IO error occurs while trying to calculate the value for a field, then
+  // that field will be set to -1.
+  IndexStorageInfoProto GetStorageInfo() const;
+
   // Create an iterator to iterate through all doc hit infos in the index that
   // match the term. section_id_mask can be set to ignore hits from sections not
   // listed in the mask. Eg. section_id_mask = 1U << 3; would only return hits
@@ -242,11 +259,12 @@ class Index {
  private:
   Index(const Options& options, std::unique_ptr<TermIdCodec> term_id_codec,
         std::unique_ptr<LiteIndex> lite_index,
-        std::unique_ptr<MainIndex> main_index)
+        std::unique_ptr<MainIndex> main_index, const Filesystem* filesystem)
       : lite_index_(std::move(lite_index)),
         main_index_(std::move(main_index)),
         options_(options),
-        term_id_codec_(std::move(term_id_codec)) {}
+        term_id_codec_(std::move(term_id_codec)),
+        filesystem_(filesystem) {}
 
   libtextclassifier3::StatusOr<std::vector<TermMetadata>> FindLiteTermsByPrefix(
       const std::string& prefix, const std::vector<NamespaceId>& namespace_ids,
@@ -256,6 +274,7 @@ class Index {
   std::unique_ptr<MainIndex> main_index_;
   const Options options_;
   std::unique_ptr<TermIdCodec> term_id_codec_;
+  const Filesystem* filesystem_;
 };
 
 }  // namespace lib
diff --git a/icing/index/index_test.cc b/icing/index/index_test.cc
index 3479ab1..16593ef 100644
--- a/icing/index/index_test.cc
+++ b/icing/index/index_test.cc
@@ -31,6 +31,7 @@
 #include "icing/index/iterator/doc-hit-info-iterator.h"
 #include "icing/legacy/index/icing-filesystem.h"
 #include "icing/legacy/index/icing-mock-filesystem.h"
+#include "icing/proto/storage.pb.h"
 #include "icing/proto/term.pb.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
@@ -46,6 +47,7 @@ namespace {
 
 using ::testing::ElementsAre;
 using ::testing::Eq;
+using ::testing::Ge;
 using ::testing::Gt;
 using ::testing::IsEmpty;
 using ::testing::IsTrue;
@@ -151,8 +153,6 @@ TEST_F(IndexTest, EmptyIndex) {
       index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
   EXPECT_THAT(itr->Advance(),
               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
-
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kInvalidDocumentId));
 }
 
 TEST_F(IndexTest, EmptyIndexAfterMerge) {
@@ -170,8 +170,6 @@ TEST_F(IndexTest, EmptyIndexAfterMerge) {
       index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
   EXPECT_THAT(itr->Advance(),
               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
-
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kInvalidDocumentId));
 }
 
 TEST_F(IndexTest, AdvancePastEnd) {
@@ -236,8 +234,6 @@ TEST_F(IndexTest, SingleHitSingleTermIndex) {
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 }
 
 TEST_F(IndexTest, SingleHitSingleTermIndexAfterMerge) {
@@ -254,8 +250,6 @@ TEST_F(IndexTest, SingleHitSingleTermIndexAfterMerge) {
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 }
 
 TEST_F(IndexTest, SingleHitMultiTermIndex) {
@@ -271,8 +265,6 @@ TEST_F(IndexTest, SingleHitMultiTermIndex) {
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 }
 
 TEST_F(IndexTest, SingleHitMultiTermIndexAfterMerge) {
@@ -290,8 +282,6 @@ TEST_F(IndexTest, SingleHitMultiTermIndexAfterMerge) {
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 }
 
 TEST_F(IndexTest, NoHitMultiTermIndex) {
@@ -306,7 +296,6 @@ TEST_F(IndexTest, NoHitMultiTermIndex) {
       index_->GetIterator("baz", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
   EXPECT_THAT(itr->Advance(),
               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 }
 
 TEST_F(IndexTest, NoHitMultiTermIndexAfterMerge) {
@@ -323,7 +312,6 @@ TEST_F(IndexTest, NoHitMultiTermIndexAfterMerge) {
       index_->GetIterator("baz", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
   EXPECT_THAT(itr->Advance(),
               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 }
 
 TEST_F(IndexTest, MultiHitMultiTermIndex) {
@@ -350,7 +338,6 @@ TEST_F(IndexTest, MultiHitMultiTermIndex) {
       ElementsAre(
           EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
           EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId2));
 }
 
 TEST_F(IndexTest, MultiHitMultiTermIndexAfterMerge) {
@@ -379,7 +366,6 @@ TEST_F(IndexTest, MultiHitMultiTermIndexAfterMerge) {
       ElementsAre(
           EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
           EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId2));
 }
 
 TEST_F(IndexTest, MultiHitSectionRestrict) {
@@ -400,8 +386,6 @@ TEST_F(IndexTest, MultiHitSectionRestrict) {
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
 }
 
 TEST_F(IndexTest, MultiHitSectionRestrictAfterMerge) {
@@ -424,8 +408,6 @@ TEST_F(IndexTest, MultiHitSectionRestrictAfterMerge) {
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
 }
 
 TEST_F(IndexTest, SingleHitDedupeIndex) {
@@ -447,8 +429,6 @@ TEST_F(IndexTest, SingleHitDedupeIndex) {
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 }
 
 TEST_F(IndexTest, PrefixHit) {
@@ -463,8 +443,6 @@ TEST_F(IndexTest, PrefixHit) {
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 }
 
 TEST_F(IndexTest, PrefixHitAfterMerge) {
@@ -481,8 +459,6 @@ TEST_F(IndexTest, PrefixHitAfterMerge) {
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 }
 
 TEST_F(IndexTest, MultiPrefixHit) {
@@ -504,8 +480,6 @@ TEST_F(IndexTest, MultiPrefixHit) {
       ElementsAre(
           EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
           EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
 }
 
 TEST_F(IndexTest, MultiPrefixHitAfterMerge) {
@@ -529,8 +503,6 @@ TEST_F(IndexTest, MultiPrefixHitAfterMerge) {
       ElementsAre(
           EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
           EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
 }
 
 TEST_F(IndexTest, NoExactHitInPrefixQuery) {
@@ -550,7 +522,6 @@ TEST_F(IndexTest, NoExactHitInPrefixQuery) {
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId1, std::vector<SectionId>{kSectionId3})));
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
 }
 
 TEST_F(IndexTest, NoExactHitInPrefixQueryAfterMerge) {
@@ -572,7 +543,6 @@ TEST_F(IndexTest, NoExactHitInPrefixQueryAfterMerge) {
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId1, std::vector<SectionId>{kSectionId3})));
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
 }
 
 TEST_F(IndexTest, PrefixHitDedupe) {
@@ -588,7 +558,6 @@ TEST_F(IndexTest, PrefixHitDedupe) {
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 }
 
 TEST_F(IndexTest, PrefixHitDedupeAfterMerge) {
@@ -606,7 +575,6 @@ TEST_F(IndexTest, PrefixHitDedupeAfterMerge) {
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 }
 
 TEST_F(IndexTest, PrefixToString) {
@@ -703,9 +671,11 @@ TEST_F(IndexTest, FullIndex) {
 
   std::default_random_engine random;
   std::vector<std::string> query_terms;
+  std::string prefix = "prefix";
   for (int i = 0; i < 2600; ++i) {
     constexpr int kTokenSize = 5;
-    query_terms.push_back(RandomString(kAlNumAlphabet, kTokenSize, &random));
+    query_terms.push_back(prefix +
+                          RandomString(kAlNumAlphabet, kTokenSize, &random));
   }
 
   DocumentId document_id = 0;
@@ -714,7 +684,7 @@ TEST_F(IndexTest, FullIndex) {
   while (status.ok()) {
     for (int i = 0; i < 100; ++i) {
       Index::Editor edit =
-          index_->Edit(document_id, kSectionId2, TermMatchType::EXACT_ONLY,
+          index_->Edit(document_id, kSectionId2, TermMatchType::PREFIX,
                        /*namespace_id=*/0);
       size_t idx = uniform(random);
       status = edit.BufferTerm(query_terms.at(idx).c_str());
@@ -731,11 +701,14 @@ TEST_F(IndexTest, FullIndex) {
 
   // Adding more hits should fail.
   Index::Editor edit =
-      index_->Edit(document_id + 1, kSectionId2, TermMatchType::EXACT_ONLY,
+      index_->Edit(document_id + 1, kSectionId2, TermMatchType::PREFIX,
                    /*namespace_id=*/0);
-  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
-  EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
-  EXPECT_THAT(edit.BufferTerm("baz"), IsOk());
+  std::string term = prefix + "foo";
+  EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
+  term = prefix + "bar";
+  EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
+  term = prefix + "baz";
+  EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(),
               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
 
@@ -743,12 +716,17 @@ TEST_F(IndexTest, FullIndex) {
     ICING_ASSERT_OK_AND_ASSIGN(
         std::unique_ptr<DocHitInfoIterator> itr,
         index_->GetIterator(query_terms.at(i).c_str(), kSectionIdMaskAll,
-                            TermMatchType::EXACT_ONLY));
+                            TermMatchType::PREFIX));
     // Each query term should contain at least one hit - there may have been
     // other hits for this term that were added.
     EXPECT_THAT(itr->Advance(), IsOk());
   }
-  EXPECT_THAT(index_->last_added_document_id(), Eq(document_id - 1));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> last_itr,
+      index_->GetIterator(prefix.c_str(), kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
+  EXPECT_THAT(last_itr->Advance(), IsOk());
+  EXPECT_THAT(last_itr->doc_hit_info().document_id(), Eq(document_id - 1));
 }
 
 TEST_F(IndexTest, FullIndexMerge) {
@@ -759,9 +737,11 @@ TEST_F(IndexTest, FullIndexMerge) {
 
   std::default_random_engine random;
   std::vector<std::string> query_terms;
+  std::string prefix = "prefix";
   for (int i = 0; i < 2600; ++i) {
     constexpr int kTokenSize = 5;
-    query_terms.push_back(RandomString(kAlNumAlphabet, kTokenSize, &random));
+    query_terms.push_back(prefix +
+                          RandomString(kAlNumAlphabet, kTokenSize, &random));
   }
 
   DocumentId document_id = 0;
@@ -770,7 +750,7 @@ TEST_F(IndexTest, FullIndexMerge) {
   while (status.ok()) {
     for (int i = 0; i < 100; ++i) {
       Index::Editor edit =
-          index_->Edit(document_id, kSectionId2, TermMatchType::EXACT_ONLY,
+          index_->Edit(document_id, kSectionId2, TermMatchType::PREFIX,
                        /*namespace_id=*/0);
       size_t idx = uniform(random);
       status = edit.BufferTerm(query_terms.at(idx).c_str());
@@ -789,30 +769,45 @@ TEST_F(IndexTest, FullIndexMerge) {
 
   // Adding more hits should fail.
   Index::Editor edit =
-      index_->Edit(document_id + 1, kSectionId2, TermMatchType::EXACT_ONLY,
+      index_->Edit(document_id + 1, kSectionId2, TermMatchType::PREFIX,
                    /*namespace_id=*/0);
-  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
-  EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
-  EXPECT_THAT(edit.BufferTerm("baz"), IsOk());
+  std::string term = prefix + "foo";
+  EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
+  term = prefix + "bar";
+  EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
+  term = prefix + "baz";
+  EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(),
               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
-  EXPECT_THAT(index_->last_added_document_id(), Eq(document_id - 1));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> last_itr,
+      index_->GetIterator(prefix.c_str(), kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
+  EXPECT_THAT(last_itr->Advance(), IsOk());
+  EXPECT_THAT(last_itr->doc_hit_info().document_id(), Eq(document_id - 1));
 
   // After merging with the main index. Adding more hits should succeed now.
   ICING_ASSERT_OK(index_->Merge());
-  edit =
-      index_->Edit(document_id + 1, kSectionId2, TermMatchType::EXACT_ONLY, 0);
-  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
-  EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
-  EXPECT_THAT(edit.BufferTerm("baz"), IsOk());
+  edit = index_->Edit(document_id + 1, kSectionId2, TermMatchType::PREFIX, 0);
+  prefix + "foo";
+  EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
+  term = prefix + "bar";
+  EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
+  term = prefix + "baz";
+  EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<DocHitInfoIterator> itr,
-      index_->GetIterator("bar", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+      index_->GetIterator(prefix + "bar", kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
   // We know that "bar" should have at least one hit because we just added it!
   EXPECT_THAT(itr->Advance(), IsOk());
   EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(document_id + 1));
-  EXPECT_THAT(index_->last_added_document_id(), Eq(document_id + 1));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      last_itr, index_->GetIterator(prefix.c_str(), kSectionIdMaskAll,
+                                    TermMatchType::PREFIX));
+  EXPECT_THAT(last_itr->Advance(), IsOk());
+  EXPECT_THAT(last_itr->doc_hit_info().document_id(), Eq(document_id + 1));
 }
 
 TEST_F(IndexTest, IndexCreateIOFailure) {
@@ -881,8 +876,6 @@ TEST_F(IndexTest, IndexPersistence) {
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 }
 
 TEST_F(IndexTest, IndexPersistenceAfterMerge) {
@@ -910,8 +903,6 @@ TEST_F(IndexTest, IndexPersistenceAfterMerge) {
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 }
 
 TEST_F(IndexTest, InvalidHitBufferSize) {
@@ -1278,8 +1269,6 @@ TEST_F(IndexTest, ExactResultsFromLiteAndMain) {
       ElementsAre(
           EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
           EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId2));
 }
 
 TEST_F(IndexTest, PrefixResultsFromLiteAndMain) {
@@ -1312,8 +1301,6 @@ TEST_F(IndexTest, PrefixResultsFromLiteAndMain) {
           EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
           EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
           EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId2));
 }
 
 TEST_F(IndexTest, GetDebugInfo) {
@@ -1420,8 +1407,6 @@ TEST_F(IndexTest, BackfillingMultipleTermsSucceeds) {
       ElementsAre(
           EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
           EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId3})));
-
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId2));
 }
 
 TEST_F(IndexTest, BackfillingNewTermsSucceeds) {
@@ -1476,8 +1461,6 @@ TEST_F(IndexTest, BackfillingNewTermsSucceeds) {
       ElementsAre(
           EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
           EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3})));
-
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId3));
 }
 
 TEST_F(IndexTest, TruncateToInvalidDocumentIdHasNoEffect) {
@@ -1525,8 +1508,6 @@ TEST_F(IndexTest, TruncateToInvalidDocumentIdHasNoEffect) {
       ElementsAre(
           EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
           EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
 }
 
 TEST_F(IndexTest, TruncateToLastAddedDocumentIdHasNoEffect) {
@@ -1542,6 +1523,7 @@ TEST_F(IndexTest, TruncateToLastAddedDocumentIdHasNoEffect) {
                                     TermMatchType::PREFIX, /*namespace_id=*/0);
   ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  index_->set_last_added_document_id(kDocumentId0);
   ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
   // Clipping to invalid should have no effect.
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -1563,6 +1545,7 @@ TEST_F(IndexTest, TruncateToLastAddedDocumentIdHasNoEffect) {
                       /*namespace_id=*/0);
   ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  index_->set_last_added_document_id(kDocumentId1);
 
   // Clipping to invalid should still have no effect even if both indices have
   // hits.
@@ -1574,8 +1557,6 @@ TEST_F(IndexTest, TruncateToLastAddedDocumentIdHasNoEffect) {
       ElementsAre(
           EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
           EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
 }
 
 TEST_F(IndexTest, TruncateToThrowsOutLiteIndex) {
@@ -1584,6 +1565,7 @@ TEST_F(IndexTest, TruncateToThrowsOutLiteIndex) {
                                     TermMatchType::PREFIX, /*namespace_id=*/0);
   ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  index_->set_last_added_document_id(kDocumentId0);
 
   ICING_ASSERT_OK(index_->Merge());
 
@@ -1592,6 +1574,7 @@ TEST_F(IndexTest, TruncateToThrowsOutLiteIndex) {
                       /*namespace_id=*/0);
   ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  index_->set_last_added_document_id(kDocumentId1);
 
   EXPECT_THAT(index_->TruncateTo(kDocumentId0), IsOk());
 
@@ -1602,8 +1585,6 @@ TEST_F(IndexTest, TruncateToThrowsOutLiteIndex) {
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 }
 
 TEST_F(IndexTest, TruncateToThrowsOutBothIndices) {
@@ -1612,10 +1593,12 @@ TEST_F(IndexTest, TruncateToThrowsOutBothIndices) {
                                     TermMatchType::PREFIX, /*namespace_id=*/0);
   ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  index_->set_last_added_document_id(kDocumentId0);
   edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::PREFIX,
                       /*namespace_id=*/0);
   ASSERT_THAT(edit.BufferTerm("foul"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  index_->set_last_added_document_id(kDocumentId1);
 
   ICING_ASSERT_OK(index_->Merge());
 
@@ -1624,6 +1607,7 @@ TEST_F(IndexTest, TruncateToThrowsOutBothIndices) {
                       /*namespace_id=*/0);
   ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  index_->set_last_added_document_id(kDocumentId2);
 
   EXPECT_THAT(index_->TruncateTo(kDocumentId0), IsOk());
 
@@ -1632,8 +1616,33 @@ TEST_F(IndexTest, TruncateToThrowsOutBothIndices) {
       std::unique_ptr<DocHitInfoIterator> itr,
       index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
   EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+}
+
+TEST_F(IndexTest, IndexStorageInfoProto) {
+  // Add two documents to the lite index and merge them into main.
+  {
+    Index::Editor edit = index_->Edit(
+        kDocumentId0, kSectionId2, TermMatchType::PREFIX, /*namespace_id=*/0);
+    ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+    EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+    edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::PREFIX,
+                        /*namespace_id=*/0);
+    ASSERT_THAT(edit.BufferTerm("foul"), IsOk());
+    EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+    ICING_ASSERT_OK(index_->Merge());
+  }
 
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kInvalidDocumentId));
+  IndexStorageInfoProto storage_info = index_->GetStorageInfo();
+  EXPECT_THAT(storage_info.index_size(), Ge(0));
+  EXPECT_THAT(storage_info.lite_index_lexicon_size(), Ge(0));
+  EXPECT_THAT(storage_info.lite_index_hit_buffer_size(), Ge(0));
+  EXPECT_THAT(storage_info.main_index_lexicon_size(), Ge(0));
+  EXPECT_THAT(storage_info.main_index_storage_size(), Ge(0));
+  EXPECT_THAT(storage_info.main_index_block_size(), Ge(0));
+  // There should be 1 block for the header and 1 block for two posting lists.
+  EXPECT_THAT(storage_info.num_blocks(), Eq(2));
+  EXPECT_THAT(storage_info.min_free_fraction(), Ge(0));
 }
 
 }  // namespace
diff --git a/icing/index/iterator/doc-hit-info-iterator-and.cc b/icing/index/iterator/doc-hit-info-iterator-and.cc
index 66f87bd..39aa969 100644
--- a/icing/index/iterator/doc-hit-info-iterator-and.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-and.cc
@@ -162,6 +162,7 @@ libtextclassifier3::Status DocHitInfoIteratorAndNary::Advance() {
         DocumentId unused;
         ICING_ASSIGN_OR_RETURN(
             unused, AdvanceTo(iterator.get(), potential_document_id));
+        (void)unused;  // Silence unused warning.
       }
 
       if (iterator->doc_hit_info().document_id() == potential_document_id) {
diff --git a/icing/index/iterator/doc-hit-info-iterator-and.h b/icing/index/iterator/doc-hit-info-iterator-and.h
index faca785..8ceff44 100644
--- a/icing/index/iterator/doc-hit-info-iterator-and.h
+++ b/icing/index/iterator/doc-hit-info-iterator-and.h
@@ -47,13 +47,16 @@ class DocHitInfoIteratorAnd : public DocHitInfoIterator {
   std::string ToString() const override;
 
   void PopulateMatchedTermsStats(
-      std::vector<TermMatchInfo> *matched_terms_stats) const override {
+      std::vector<TermMatchInfo> *matched_terms_stats,
+      SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
     if (doc_hit_info_.document_id() == kInvalidDocumentId) {
       // Current hit isn't valid, return.
       return;
     }
-    short_->PopulateMatchedTermsStats(matched_terms_stats);
-    long_->PopulateMatchedTermsStats(matched_terms_stats);
+    short_->PopulateMatchedTermsStats(matched_terms_stats,
+                                      filtering_section_mask);
+    long_->PopulateMatchedTermsStats(matched_terms_stats,
+                                     filtering_section_mask);
   }
 
  private:
@@ -78,13 +81,15 @@ class DocHitInfoIteratorAndNary : public DocHitInfoIterator {
   std::string ToString() const override;
 
   void PopulateMatchedTermsStats(
-      std::vector<TermMatchInfo> *matched_terms_stats) const override {
+      std::vector<TermMatchInfo> *matched_terms_stats,
+      SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
     if (doc_hit_info_.document_id() == kInvalidDocumentId) {
       // Current hit isn't valid, return.
       return;
     }
     for (size_t i = 0; i < iterators_.size(); ++i) {
-      iterators_.at(i)->PopulateMatchedTermsStats(matched_terms_stats);
+      iterators_.at(i)->PopulateMatchedTermsStats(matched_terms_stats,
+                                                  filtering_section_mask);
     }
   }
 
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.cc b/icing/index/iterator/doc-hit-info-iterator-filter.cc
index c6cb86d..933f9b5 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-filter.cc
@@ -31,7 +31,6 @@
 #include "icing/store/document-filter-data.h"
 #include "icing/store/document-id.h"
 #include "icing/store/document-store.h"
-#include "icing/util/clock.h"
 
 namespace icing {
 namespace lib {
@@ -39,12 +38,11 @@ namespace lib {
 DocHitInfoIteratorFilter::DocHitInfoIteratorFilter(
     std::unique_ptr<DocHitInfoIterator> delegate,
     const DocumentStore* document_store, const SchemaStore* schema_store,
-    const Clock* clock, const Options& options)
+    const Options& options)
     : delegate_(std::move(delegate)),
       document_store_(*document_store),
       schema_store_(*schema_store),
-      options_(options),
-      current_time_milliseconds_(clock->GetSystemTimeMilliseconds()) {
+      options_(options) {
   // Precompute all the NamespaceIds
   for (std::string_view name_space : options_.namespaces) {
     auto namespace_id_or = document_store_.GetNamespaceId(name_space);
@@ -67,61 +65,50 @@ DocHitInfoIteratorFilter::DocHitInfoIteratorFilter(
 }
 
 libtextclassifier3::Status DocHitInfoIteratorFilter::Advance() {
-  if (!delegate_->Advance().ok()) {
-    // Didn't find anything on the delegate iterator.
-    doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
-    hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
-    return absl_ports::ResourceExhaustedError(
-        "No more DocHitInfos in iterator");
-  }
-
-  if (current_time_milliseconds_ < 0) {
-    // This shouldn't happen, but we add a sanity check here for any unknown
-    // errors.
-    return absl_ports::InternalError(
-        "Couldn't get current time. Try again in a bit");
-  }
-
-  if (!document_store_.DoesDocumentExist(
-          delegate_->doc_hit_info().document_id())) {
-    // Document doesn't exist, keep searching
-    return Advance();
-  }
+  while (delegate_->Advance().ok()) {
+    if (!document_store_.DoesDocumentExist(
+            delegate_->doc_hit_info().document_id())) {
+      // Document doesn't exist, keep searching. This handles deletions and
+      // expired documents.
+      continue;
+    }
 
-  // Try to get the DocumentFilterData
-  auto document_filter_data_or = document_store_.GetDocumentFilterData(
-      delegate_->doc_hit_info().document_id());
-  if (!document_filter_data_or.ok()) {
-    // Didn't find the DocumentFilterData in the filter cache. This could be
-    // because the DocumentId isn't valid or the filter cache is in some invalid
-    // state. This is bad, but not the query's responsibility to fix, so just
-    // skip this result for now.
-    return Advance();
-  }
-  // We should be guaranteed that this exists now.
-  DocumentFilterData data = std::move(document_filter_data_or).ValueOrDie();
+    // Try to get the DocumentFilterData
+    auto document_filter_data_or = document_store_.GetDocumentFilterData(
+        delegate_->doc_hit_info().document_id());
+    if (!document_filter_data_or.ok()) {
+      // Didn't find the DocumentFilterData in the filter cache. This could be
+      // because the DocumentId isn't valid or the filter cache is in some
+      // invalid state. This is bad, but not the query's responsibility to fix,
+      // so just skip this result for now.
+      continue;
+    }
+    // We should be guaranteed that this exists now.
+    DocumentFilterData data = std::move(document_filter_data_or).ValueOrDie();
 
-  if (!options_.namespaces.empty() &&
-      target_namespace_ids_.count(data.namespace_id()) == 0) {
-    // Doesn't match one of the specified namespaces. Keep searching
-    return Advance();
-  }
+    if (!options_.namespaces.empty() &&
+        target_namespace_ids_.count(data.namespace_id()) == 0) {
+      // Doesn't match one of the specified namespaces. Keep searching
+      continue;
+    }
 
-  if (!options_.schema_types.empty() &&
-      target_schema_type_ids_.count(data.schema_type_id()) == 0) {
-    // Doesn't match one of the specified schema types. Keep searching
-    return Advance();
-  }
+    if (!options_.schema_types.empty() &&
+        target_schema_type_ids_.count(data.schema_type_id()) == 0) {
+      // Doesn't match one of the specified schema types. Keep searching
+      continue;
+    }
 
-  if (current_time_milliseconds_ >= data.expiration_timestamp_ms()) {
-    // Current time has exceeded the document's expiration time
-    return Advance();
+    // Satisfied all our specified filters
+    doc_hit_info_ = delegate_->doc_hit_info();
+    hit_intersect_section_ids_mask_ =
+        delegate_->hit_intersect_section_ids_mask();
+    return libtextclassifier3::Status::OK;
   }
 
-  // Satisfied all our specified filters
-  doc_hit_info_ = delegate_->doc_hit_info();
-  hit_intersect_section_ids_mask_ = delegate_->hit_intersect_section_ids_mask();
-  return libtextclassifier3::Status::OK;
+  // Didn't find anything on the delegate iterator.
+  doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+  hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
+  return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
 }
 
 int32_t DocHitInfoIteratorFilter::GetNumBlocksInspected() const {
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.h b/icing/index/iterator/doc-hit-info-iterator-filter.h
index fb60e38..5051607 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter.h
+++ b/icing/index/iterator/doc-hit-info-iterator-filter.h
@@ -27,7 +27,6 @@
 #include "icing/schema/schema-store.h"
 #include "icing/store/document-store.h"
 #include "icing/store/namespace-id.h"
-#include "icing/util/clock.h"
 
 namespace icing {
 namespace lib {
@@ -57,7 +56,7 @@ class DocHitInfoIteratorFilter : public DocHitInfoIterator {
   explicit DocHitInfoIteratorFilter(
       std::unique_ptr<DocHitInfoIterator> delegate,
       const DocumentStore* document_store, const SchemaStore* schema_store,
-      const Clock* clock, const Options& options);
+      const Options& options);
 
   libtextclassifier3::Status Advance() override;
 
@@ -68,8 +67,10 @@ class DocHitInfoIteratorFilter : public DocHitInfoIterator {
   std::string ToString() const override;
 
   void PopulateMatchedTermsStats(
-      std::vector<TermMatchInfo>* matched_terms_stats) const override {
-    delegate_->PopulateMatchedTermsStats(matched_terms_stats);
+      std::vector<TermMatchInfo>* matched_terms_stats,
+      SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
+    delegate_->PopulateMatchedTermsStats(matched_terms_stats,
+                                         filtering_section_mask);
   }
 
  private:
@@ -79,7 +80,6 @@ class DocHitInfoIteratorFilter : public DocHitInfoIterator {
   const Options options_;
   std::unordered_set<NamespaceId> target_namespace_ids_;
   std::unordered_set<SchemaTypeId> target_schema_type_ids_;
-  const int64_t current_time_milliseconds_;
 };
 
 }  // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
index e0a8cd0..f80d1ea 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
@@ -28,6 +28,7 @@
 #include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
 #include "icing/proto/document.pb.h"
+#include "icing/schema-builder.h"
 #include "icing/schema/schema-store.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
@@ -59,10 +60,10 @@ class DocHitInfoIteratorDeletedFilterTest : public ::testing::Test {
     test_document3_ =
         DocumentBuilder().SetKey("icing", "email/3").SetSchema("email").Build();
 
-    SchemaProto schema;
-    auto type_config = schema.add_types();
-    type_config->set_schema_type("email");
-
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(SchemaTypeConfigBuilder().SetType("email"))
+            .Build();
     ICING_ASSERT_OK_AND_ASSIGN(
         schema_store_,
         SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
@@ -100,9 +101,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, EmptyOriginalIterator) {
   std::unique_ptr<DocHitInfoIterator> original_iterator_empty =
       std::make_unique<DocHitInfoIteratorDummy>();
 
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator_empty), document_store_.get(),
-      schema_store_.get(), &fake_clock_, options_);
+  DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator_empty),
+                                             document_store_.get(),
+                                             schema_store_.get(), options_);
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
 }
@@ -124,9 +125,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, DeletedDocumentsAreFiltered) {
   std::unique_ptr<DocHitInfoIterator> original_iterator =
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+  DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+                                             document_store_.get(),
+                                             schema_store_.get(), options_);
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator),
               ElementsAre(document_id1, document_id3));
@@ -150,9 +151,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, NonExistingDocumentsAreFiltered) {
   std::unique_ptr<DocHitInfoIterator> original_iterator =
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+  DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+                                             document_store_.get(),
+                                             schema_store_.get(), options_);
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator),
               ElementsAre(document_id1, document_id2, document_id3));
@@ -163,9 +164,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, NegativeDocumentIdIsIgnored) {
   std::unique_ptr<DocHitInfoIterator> original_iterator =
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+  DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+                                             document_store_.get(),
+                                             schema_store_.get(), options_);
 
   EXPECT_THAT(filtered_iterator.Advance(),
               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
@@ -177,9 +178,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, InvalidDocumentIdIsIgnored) {
   std::unique_ptr<DocHitInfoIterator> original_iterator =
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+  DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+                                             document_store_.get(),
+                                             schema_store_.get(), options_);
 
   EXPECT_THAT(filtered_iterator.Advance(),
               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
@@ -194,9 +195,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, GreaterThanMaxDocumentIdIsIgnored) {
   std::unique_ptr<DocHitInfoIterator> original_iterator =
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+  DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+                                             document_store_.get(),
+                                             schema_store_.get(), options_);
 
   EXPECT_THAT(filtered_iterator.Advance(),
               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
@@ -226,10 +227,10 @@ class DocHitInfoIteratorNamespaceFilterTest : public ::testing::Test {
                                 .SetSchema("email")
                                 .Build();
 
-    SchemaProto schema;
-    auto type_config = schema.add_types();
-    type_config->set_schema_type("email");
-
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(SchemaTypeConfigBuilder().SetType("email"))
+            .Build();
     ICING_ASSERT_OK_AND_ASSIGN(
         schema_store_,
         SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
@@ -270,9 +271,9 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest, EmptyOriginalIterator) {
       std::make_unique<DocHitInfoIteratorDummy>();
 
   options_.namespaces = std::vector<std::string_view>{};
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator_empty), document_store_.get(),
-      schema_store_.get(), &fake_clock_, options_);
+  DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator_empty),
+                                             document_store_.get(),
+                                             schema_store_.get(), options_);
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
 }
@@ -288,9 +289,9 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest,
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
   options_.namespaces = std::vector<std::string_view>{"nonexistent_namespace"};
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+  DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+                                             document_store_.get(),
+                                             schema_store_.get(), options_);
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
 }
@@ -305,9 +306,9 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest, NoNamespacesReturnsAll) {
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
   options_.namespaces = std::vector<std::string_view>{};
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+  DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+                                             document_store_.get(),
+                                             schema_store_.get(), options_);
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
 }
@@ -329,9 +330,9 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest,
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
   options_.namespaces = std::vector<std::string_view>{namespace1_};
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+  DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+                                             document_store_.get(),
+                                             schema_store_.get(), options_);
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator),
               ElementsAre(document_id1, document_id2));
@@ -355,9 +356,9 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest, FilterForMultipleNamespacesOk) {
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
   options_.namespaces = std::vector<std::string_view>{namespace1_, namespace3_};
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+  DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+                                             document_store_.get(),
+                                             schema_store_.get(), options_);
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator),
               ElementsAre(document_id1, document_id2, document_id4));
@@ -379,14 +380,12 @@ class DocHitInfoIteratorSchemaTypeFilterTest : public ::testing::Test {
     document4_schema1_ =
         DocumentBuilder().SetKey("namespace", "4").SetSchema(schema1_).Build();
 
-    SchemaProto schema;
-    auto type_config = schema.add_types();
-    type_config->set_schema_type(schema1_);
-    type_config = schema.add_types();
-    type_config->set_schema_type(schema2_);
-    type_config = schema.add_types();
-    type_config->set_schema_type(schema3_);
-
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(SchemaTypeConfigBuilder().SetType(schema1_))
+            .AddType(SchemaTypeConfigBuilder().SetType(schema2_))
+            .AddType(SchemaTypeConfigBuilder().SetType(schema3_))
+            .Build();
     ICING_ASSERT_OK_AND_ASSIGN(
         schema_store_,
         SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
@@ -427,9 +426,9 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, EmptyOriginalIterator) {
       std::make_unique<DocHitInfoIteratorDummy>();
 
   options_.schema_types = std::vector<std::string_view>{};
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator_empty), document_store_.get(),
-      schema_store_.get(), &fake_clock_, options_);
+  DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator_empty),
+                                             document_store_.get(),
+                                             schema_store_.get(), options_);
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
 }
@@ -446,9 +445,9 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest,
 
   options_.schema_types =
       std::vector<std::string_view>{"nonexistent_schema_type"};
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+  DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+                                             document_store_.get(),
+                                             schema_store_.get(), options_);
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
 }
@@ -463,9 +462,9 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, NoSchemaTypesReturnsAll) {
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
   options_.schema_types = std::vector<std::string_view>{};
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+  DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+                                             document_store_.get(),
+                                             schema_store_.get(), options_);
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
 }
@@ -484,9 +483,9 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest,
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
   options_.schema_types = std::vector<std::string_view>{schema1_};
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+  DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+                                             document_store_.get(),
+                                             schema_store_.get(), options_);
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
 }
@@ -507,9 +506,9 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, FilterForMultipleSchemaTypesOk) {
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
   options_.schema_types = std::vector<std::string_view>{schema2_, schema3_};
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+  DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+                                             document_store_.get(),
+                                             schema_store_.get(), options_);
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator),
               ElementsAre(document_id2, document_id3));
@@ -523,10 +522,10 @@ class DocHitInfoIteratorExpirationFilterTest : public ::testing::Test {
   void SetUp() override {
     filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
 
-    SchemaProto schema;
-    auto type_config = schema.add_types();
-    type_config->set_schema_type(email_schema_);
-
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(SchemaTypeConfigBuilder().SetType(email_schema_))
+            .Build();
     ICING_ASSERT_OK_AND_ASSIGN(
         schema_store_,
         SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
@@ -557,6 +556,16 @@ class DocHitInfoIteratorExpirationFilterTest : public ::testing::Test {
 };
 
 TEST_F(DocHitInfoIteratorExpirationFilterTest, TtlZeroIsntFilteredOut) {
+  // Arbitrary value
+  fake_clock_.SetSystemTimeMilliseconds(100);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+                            schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
   // Insert a document
   DocumentProto document = DocumentBuilder()
                                .SetKey("namespace", "1")
@@ -565,23 +574,30 @@ TEST_F(DocHitInfoIteratorExpirationFilterTest, TtlZeroIsntFilteredOut) {
                                .SetTtlMs(0)
                                .Build();
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
-                             document_store_->Put(document));
+                             document_store->Put(document));
 
   std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)};
   std::unique_ptr<DocHitInfoIterator> original_iterator =
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
-  // Arbitrary value
-  fake_clock_.SetSystemTimeMilliseconds(100);
-
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+  DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+                                             document_store.get(),
+                                             schema_store_.get(), options_);
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
 }
 
 TEST_F(DocHitInfoIteratorExpirationFilterTest, BeforeTtlNotFilteredOut) {
+  // Arbitrary value, but must be less than document's creation_timestamp + ttl
+  fake_clock_.SetSystemTimeMilliseconds(50);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+                            schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
   // Insert a document
   DocumentProto document = DocumentBuilder()
                                .SetKey("namespace", "1")
@@ -590,92 +606,84 @@ TEST_F(DocHitInfoIteratorExpirationFilterTest, BeforeTtlNotFilteredOut) {
                                .SetTtlMs(100)
                                .Build();
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
-                             document_store_->Put(document));
+                             document_store->Put(document));
 
   std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)};
   std::unique_ptr<DocHitInfoIterator> original_iterator =
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
-  // Arbitrary value, but must be less than document's creation_timestamp + ttl
-  fake_clock_.SetSystemTimeMilliseconds(50);
-
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+  DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+                                             document_store.get(),
+                                             schema_store_.get(), options_);
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
 }
 
 TEST_F(DocHitInfoIteratorExpirationFilterTest, EqualTtlFilteredOut) {
+  // Current time is exactly the document's creation_timestamp + ttl
+  fake_clock_.SetSystemTimeMilliseconds(150);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+                            schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
   // Insert a document
   DocumentProto document = DocumentBuilder()
                                .SetKey("namespace", "1")
                                .SetSchema(email_schema_)
-                               .SetCreationTimestampMs(0)
+                               .SetCreationTimestampMs(50)
                                .SetTtlMs(100)
                                .Build();
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
-                             document_store_->Put(document));
+                             document_store->Put(document));
 
   std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)};
   std::unique_ptr<DocHitInfoIterator> original_iterator =
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
-  // Current time is exactly the document's creation_timestamp + ttl
-  fake_clock_.SetSystemTimeMilliseconds(100);
-
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+  DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+                                             document_store.get(),
+                                             schema_store_.get(), options_);
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
 }
 
 TEST_F(DocHitInfoIteratorExpirationFilterTest, PastTtlFilteredOut) {
+  // Arbitrary value, but must be greater than the document's
+  // creation_timestamp + ttl
+  fake_clock_.SetSystemTimeMilliseconds(151);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+                            schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
   // Insert a document
   DocumentProto document = DocumentBuilder()
                                .SetKey("namespace", "1")
                                .SetSchema(email_schema_)
-                               .SetCreationTimestampMs(0)
+                               .SetCreationTimestampMs(50)
                                .SetTtlMs(100)
                                .Build();
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
-                             document_store_->Put(document));
+                             document_store->Put(document));
 
   std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)};
   std::unique_ptr<DocHitInfoIterator> original_iterator =
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
-  // Arbitrary value, but must be greater than the document's
-  // creation_timestamp + ttl
-  fake_clock_.SetSystemTimeMilliseconds(101);
-
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+  DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+                                             document_store.get(),
+                                             schema_store_.get(), options_);
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
 }
 
-TEST_F(DocHitInfoIteratorExpirationFilterTest,
-       InvalidTimeFiltersReturnsInternalError) {
-  // Put something in the original iterator so we don't get a ResourceExhausted
-  // error
-  std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(/*document_id_in=*/0)};
-  std::unique_ptr<DocHitInfoIterator> original_iterator =
-      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
-
-  // -1 is an invalid timestamp
-  fake_clock_.SetSystemTimeMilliseconds(-1);
-
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
-
-  EXPECT_THAT(filtered_iterator.Advance(),
-              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
-}
-
 class DocHitInfoIteratorFilterTest : public ::testing::Test {
  protected:
   DocHitInfoIteratorFilterTest() : test_dir_(GetTestTempDir() + "/icing") {}
@@ -709,16 +717,15 @@ class DocHitInfoIteratorFilterTest : public ::testing::Test {
     document5_namespace1_schema1_ = DocumentBuilder()
                                         .SetKey(namespace1_, "5")
                                         .SetSchema(schema1_)
-                                        .SetCreationTimestampMs(0)
+                                        .SetCreationTimestampMs(1)
                                         .SetTtlMs(100)
                                         .Build();
 
-    SchemaProto schema;
-    auto type_config = schema.add_types();
-    type_config->set_schema_type(schema1_);
-    type_config = schema.add_types();
-    type_config->set_schema_type(schema2_);
-
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(SchemaTypeConfigBuilder().SetType(schema1_))
+            .AddType(SchemaTypeConfigBuilder().SetType(schema2_))
+            .Build();
     ICING_ASSERT_OK_AND_ASSIGN(
         schema_store_,
         SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
@@ -756,26 +763,36 @@ class DocHitInfoIteratorFilterTest : public ::testing::Test {
 };
 
 TEST_F(DocHitInfoIteratorFilterTest, CombineAllFiltersOk) {
+  // Filters out document5 since it's expired
+  fake_clock_.SetSystemTimeMilliseconds(199);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+                            schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentId document_id1,
-      document_store_->Put(document1_namespace1_schema1_));
+      document_store->Put(document1_namespace1_schema1_));
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentId document_id2,
-      document_store_->Put(document2_namespace1_schema1_));
+      document_store->Put(document2_namespace1_schema1_));
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentId document_id3,
-      document_store_->Put(document3_namespace2_schema1_));
+      document_store->Put(document3_namespace2_schema1_));
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentId document_id4,
-      document_store_->Put(document4_namespace1_schema2_));
+      document_store->Put(document4_namespace1_schema2_));
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentId document_id5,
-      document_store_->Put(document5_namespace1_schema1_));
+      document_store->Put(document5_namespace1_schema1_));
 
   // Deletes document2, causing it to be filtered out
   ICING_ASSERT_OK(
-      document_store_->Delete(document2_namespace1_schema1_.namespace_(),
-                              document2_namespace1_schema1_.uri()));
+      document_store->Delete(document2_namespace1_schema1_.namespace_(),
+                             document2_namespace1_schema1_.uri()));
 
   std::vector<DocHitInfo> doc_hit_infos = {
       DocHitInfo(document_id1), DocHitInfo(document_id2),
@@ -793,13 +810,9 @@ TEST_F(DocHitInfoIteratorFilterTest, CombineAllFiltersOk) {
   // Filters out document4 by schema type
   options.schema_types = std::vector<std::string_view>{schema1_};
 
-  // Filters out document5 since it's expired
-  FakeClock fake_clock;
-  fake_clock.SetSystemTimeMilliseconds(199);
-
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock, options);
+  DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+                                             document_store.get(),
+                                             schema_store_.get(), options);
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
 }
@@ -830,9 +843,9 @@ TEST_F(DocHitInfoIteratorFilterTest, SectionIdMasksArePopulatedCorrectly) {
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
   DocHitInfoIteratorFilter::Options options;
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options);
+  DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+                                             document_store_.get(),
+                                             schema_store_.get(), options);
 
   EXPECT_THAT(GetDocHitInfos(&filtered_iterator),
               ElementsAre(EqualsDocHitInfo(document_id1, section_ids1),
@@ -845,9 +858,9 @@ TEST_F(DocHitInfoIteratorFilterTest, GetNumBlocksInspected) {
   original_iterator->SetNumBlocksInspected(5);
 
   DocHitInfoIteratorFilter::Options options;
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options);
+  DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+                                             document_store_.get(),
+                                             schema_store_.get(), options);
 
   EXPECT_THAT(filtered_iterator.GetNumBlocksInspected(), Eq(5));
 }
@@ -857,9 +870,9 @@ TEST_F(DocHitInfoIteratorFilterTest, GetNumLeafAdvanceCalls) {
   original_iterator->SetNumLeafAdvanceCalls(6);
 
   DocHitInfoIteratorFilter::Options options;
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options);
+  DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+                                             document_store_.get(),
+                                             schema_store_.get(), options);
 
   EXPECT_THAT(filtered_iterator.GetNumLeafAdvanceCalls(), Eq(6));
 }
diff --git a/icing/index/iterator/doc-hit-info-iterator-not.cc b/icing/index/iterator/doc-hit-info-iterator-not.cc
index e1ece5c..8fb3659 100644
--- a/icing/index/iterator/doc-hit-info-iterator-not.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-not.cc
@@ -35,30 +35,29 @@ DocHitInfoIteratorNot::DocHitInfoIteratorNot(
           DocHitInfoIteratorAllDocumentId(document_id_limit)) {}
 
 libtextclassifier3::Status DocHitInfoIteratorNot::Advance() {
-  if (!all_document_id_iterator_.Advance().ok()) {
-    doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
-    return absl_ports::ResourceExhaustedError(
-        "No more DocHitInfos in iterator");
-  }
+  while (all_document_id_iterator_.Advance().ok()) {
+    if (all_document_id_iterator_.doc_hit_info().document_id() <
+        to_be_excluded_->doc_hit_info().document_id()) {
+      // Since DocumentIds are returned from DocHitInfoIterators in decreasing
+      // order, we have passed the last NOT result if we're smaller than its
+      // DocumentId. Advance the NOT result if so.
+      to_be_excluded_->Advance().IgnoreError();
+    }
 
-  if (all_document_id_iterator_.doc_hit_info().document_id() <
-      to_be_excluded_->doc_hit_info().document_id()) {
-    // Since DocumentIds are returned from DocHitInfoIterators in decreasing
-    // order, we have passed the last NOT result if we're smaller than its
-    // DocumentId. Advance the NOT result if so.
-    to_be_excluded_->Advance().IgnoreError();
-  }
+    if (all_document_id_iterator_.doc_hit_info().document_id() ==
+        to_be_excluded_->doc_hit_info().document_id()) {
+      // This is a NOT result, skip and Advance to the next result.
+      continue;
+    }
 
-  if (all_document_id_iterator_.doc_hit_info().document_id() ==
-      to_be_excluded_->doc_hit_info().document_id()) {
-    // This is a NOT result, skip and Advance to the next result.
-    return Advance();
+    // No errors, we've found a valid result
+    doc_hit_info_ = all_document_id_iterator_.doc_hit_info();
+    return libtextclassifier3::Status::OK;
   }
 
-  // No errors, we've found a valid result
-  doc_hit_info_ = all_document_id_iterator_.doc_hit_info();
-
-  return libtextclassifier3::Status::OK;
+  // Didn't find a hit, return with error
+  doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+  return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
 }
 
 int32_t DocHitInfoIteratorNot::GetNumBlocksInspected() const {
diff --git a/icing/index/iterator/doc-hit-info-iterator-or.h b/icing/index/iterator/doc-hit-info-iterator-or.h
index 2f49430..2dae68d 100644
--- a/icing/index/iterator/doc-hit-info-iterator-or.h
+++ b/icing/index/iterator/doc-hit-info-iterator-or.h
@@ -43,15 +43,18 @@ class DocHitInfoIteratorOr : public DocHitInfoIterator {
   std::string ToString() const override;
 
   void PopulateMatchedTermsStats(
-      std::vector<TermMatchInfo> *matched_terms_stats) const override {
+      std::vector<TermMatchInfo> *matched_terms_stats,
+      SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
     if (doc_hit_info_.document_id() == kInvalidDocumentId) {
       // Current hit isn't valid, return.
       return;
     }
-    current_->PopulateMatchedTermsStats(matched_terms_stats);
+    current_->PopulateMatchedTermsStats(matched_terms_stats,
+                                        filtering_section_mask);
     // If equal, then current_ == left_. Combine with results from right_.
     if (left_document_id_ == right_document_id_) {
-      right_->PopulateMatchedTermsStats(matched_terms_stats);
+      right_->PopulateMatchedTermsStats(matched_terms_stats,
+                                        filtering_section_mask);
     }
   }
 
@@ -83,13 +86,15 @@ class DocHitInfoIteratorOrNary : public DocHitInfoIterator {
   std::string ToString() const override;
 
   void PopulateMatchedTermsStats(
-      std::vector<TermMatchInfo> *matched_terms_stats) const override {
+      std::vector<TermMatchInfo> *matched_terms_stats,
+      SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
     if (doc_hit_info_.document_id() == kInvalidDocumentId) {
       // Current hit isn't valid, return.
       return;
     }
     for (size_t i = 0; i < current_iterators_.size(); i++) {
-      current_iterators_.at(i)->PopulateMatchedTermsStats(matched_terms_stats);
+      current_iterators_.at(i)->PopulateMatchedTermsStats(
+          matched_terms_stats, filtering_section_mask);
     }
   }
 
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
index 8acb91a..034c8cb 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
@@ -45,57 +45,54 @@ DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict(
       target_section_(target_section) {}
 
 libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() {
-  if (!delegate_->Advance().ok()) {
-    // Didn't find anything on the delegate iterator.
-    doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
-    hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
-    return absl_ports::ResourceExhaustedError(
-        "No more DocHitInfos in iterator");
-  }
-
-  DocumentId document_id = delegate_->doc_hit_info().document_id();
+  while (delegate_->Advance().ok()) {
+    DocumentId document_id = delegate_->doc_hit_info().document_id();
 
-  SectionIdMask section_id_mask =
-      delegate_->doc_hit_info().hit_section_ids_mask();
+    SectionIdMask section_id_mask =
+        delegate_->doc_hit_info().hit_section_ids_mask();
 
-  auto data_or = document_store_.GetDocumentFilterData(document_id);
-  if (!data_or.ok()) {
-    // Ran into some error retrieving information on this hit, skip
-    return Advance();
-  }
+    auto data_or = document_store_.GetDocumentFilterData(document_id);
+    if (!data_or.ok()) {
+      // Ran into some error retrieving information on this hit, skip
+      continue;
+    }
 
-  // Guaranteed that the DocumentFilterData exists at this point
-  DocumentFilterData data = std::move(data_or).ValueOrDie();
-  SchemaTypeId schema_type_id = data.schema_type_id();
-
-  // A hit can be in multiple sections at once, need to check that at least one
-  // of the confirmed section ids match the name of the target section
-  while (section_id_mask != 0) {
-    // There was a hit in this section id
-    SectionId section_id = __builtin_ctz(section_id_mask);
-
-    auto section_metadata_or =
-        schema_store_.GetSectionMetadata(schema_type_id, section_id);
-
-    if (section_metadata_or.ok()) {
-      const SectionMetadata* section_metadata =
-          section_metadata_or.ValueOrDie();
-
-      if (section_metadata->path == target_section_) {
-        // The hit was in the target section name, return OK/found
-        doc_hit_info_ = delegate_->doc_hit_info();
-        hit_intersect_section_ids_mask_ =
-            delegate_->hit_intersect_section_ids_mask();
-        return libtextclassifier3::Status::OK;
+    // Guaranteed that the DocumentFilterData exists at this point
+    DocumentFilterData data = std::move(data_or).ValueOrDie();
+    SchemaTypeId schema_type_id = data.schema_type_id();
+
+    // A hit can be in multiple sections at once, need to check that at least
+    // one of the confirmed section ids match the name of the target section
+    while (section_id_mask != 0) {
+      // There was a hit in this section id
+      SectionId section_id = __builtin_ctz(section_id_mask);
+
+      auto section_metadata_or =
+          schema_store_.GetSectionMetadata(schema_type_id, section_id);
+
+      if (section_metadata_or.ok()) {
+        const SectionMetadata* section_metadata =
+            section_metadata_or.ValueOrDie();
+
+        if (section_metadata->path == target_section_) {
+          // The hit was in the target section name, return OK/found
+          doc_hit_info_ = delegate_->doc_hit_info();
+          hit_intersect_section_ids_mask_ = 1u << section_id;
+          return libtextclassifier3::Status::OK;
+        }
       }
+
+      // Mark this section as checked
+      section_id_mask &= ~(1U << section_id);
     }
 
-    // Mark this section as checked
-    section_id_mask &= ~(1U << section_id);
+    // Didn't find a matching section name for this hit. Continue.
   }
 
-  // Didn't find a matching section name for this hit, go to the next hit
-  return Advance();
+  // Didn't find anything on the delegate iterator.
+  doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+  hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
+  return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
 }
 
 int32_t DocHitInfoIteratorSectionRestrict::GetNumBlocksInspected() const {
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
index ba74384..52b243a 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
@@ -52,13 +52,21 @@ class DocHitInfoIteratorSectionRestrict : public DocHitInfoIterator {
 
   std::string ToString() const override;
 
-  // NOTE: currently, section restricts does decide which documents to
-  // return, but doesn't impact the relevance score of a document.
-  // TODO(b/173156803): decide whether we want to filter the matched_terms_stats
-  // for the restricted sections.
+  // Note that the DocHitInfoIteratorSectionRestrict is the only iterator that
+  // should set filtering_section_mask, hence the received
+  // filtering_section_mask is ignored and the filtering_section_mask passed to
+  // the delegate will be set to hit_intersect_section_ids_mask_. This will
+  // allow to filter the matching sections in the delegate.
   void PopulateMatchedTermsStats(
-      std::vector<TermMatchInfo>* matched_terms_stats) const override {
-    delegate_->PopulateMatchedTermsStats(matched_terms_stats);
+      std::vector<TermMatchInfo>* matched_terms_stats,
+      SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
+    if (doc_hit_info_.document_id() == kInvalidDocumentId) {
+      // Current hit isn't valid, return.
+      return;
+    }
+    delegate_->PopulateMatchedTermsStats(
+        matched_terms_stats,
+        /*filtering_section_mask=*/hit_intersect_section_ids_mask_);
   }
 
  private:
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
index 91e0cbe..43a846b 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
@@ -29,6 +29,7 @@
 #include "icing/proto/document.pb.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
 #include "icing/schema/schema-store.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
@@ -43,9 +44,18 @@ namespace lib {
 namespace {
 
 using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
 using ::testing::Eq;
 using ::testing::IsEmpty;
 
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto_Cardinality_Code_OPTIONAL;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+    StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+
 class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test {
  protected:
   DocHitInfoIteratorSectionRestrictTest()
@@ -56,18 +66,18 @@ class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test {
     document_ =
         DocumentBuilder().SetKey("namespace", "uri").SetSchema("email").Build();
 
-    auto type_config = schema_.add_types();
-    type_config->set_schema_type("email");
-
-    // Add an indexed property so we generate section metadata on it
-    auto property = type_config->add_properties();
-    property->set_property_name(indexed_property_);
-    property->set_data_type(PropertyConfigProto::DataType::STRING);
-    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    property->mutable_string_indexing_config()->set_term_match_type(
-        TermMatchType::EXACT_ONLY);
-    property->mutable_string_indexing_config()->set_tokenizer_type(
-        StringIndexingConfig::TokenizerType::PLAIN);
+    schema_ = SchemaBuilder()
+                  .AddType(SchemaTypeConfigBuilder()
+                               .SetType("email")
+                               // Add an indexed property so we generate section
+                               // metadata on it
+                               .AddProperty(
+                                   PropertyConfigBuilder()
+                                       .SetName(indexed_property_)
+                                       .SetDataTypeString(MATCH_EXACT,
+                                                          TOKENIZER_PLAIN)
+                                       .SetCardinality(CARDINALITY_OPTIONAL)))
+                  .Build();
 
     // First and only indexed property, so it gets the first id of 0
     indexed_section_id_ = 0;
@@ -101,6 +111,57 @@ class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test {
   FakeClock fake_clock_;
 };
 
+TEST_F(DocHitInfoIteratorSectionRestrictTest,
+       PopulateMatchedTermsStats_IncludesHitWithMatchingSection) {
+  // Populate the DocumentStore's FilterCache with this document's data
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(document_));
+
+  // Arbitrary section ids for the documents in the DocHitInfoIterators.
+  // Created to test correct section_id_mask behavior.
+  SectionIdMask original_section_id_mask = 0b00000101;  // hits in sections 0, 2
+
+  DocHitInfo doc_hit_info1 = DocHitInfo(document_id);
+  doc_hit_info1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+  doc_hit_info1.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/2);
+
+  // Create a hit that was found in the indexed section
+  std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1};
+
+  auto original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "hi");
+  original_iterator->set_hit_intersect_section_ids_mask(
+      original_section_id_mask);
+
+  // Filtering for the indexed section name (which has a section id of 0) should
+  // get a result.
+  DocHitInfoIteratorSectionRestrict section_restrict_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      /*target_section=*/indexed_property_);
+
+  std::vector<TermMatchInfo> matched_terms_stats;
+  section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, IsEmpty());
+
+  ICING_EXPECT_OK(section_restrict_iterator.Advance());
+  EXPECT_THAT(section_restrict_iterator.doc_hit_info().document_id(),
+              Eq(document_id));
+  SectionIdMask expected_section_id_mask = 0b00000001;  // hits in sections 0
+  EXPECT_EQ(section_restrict_iterator.hit_intersect_section_ids_mask(),
+            expected_section_id_mask);
+
+  section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_EQ(matched_terms_stats.at(0).term, "hi");
+  std::array<Hit::TermFrequency, kMaxSectionId> expected_term_frequencies{
+      1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+              ElementsAreArray(expected_term_frequencies));
+  EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask,
+            expected_section_id_mask);
+
+  EXPECT_FALSE(section_restrict_iterator.Advance().ok());
+}
+
 TEST_F(DocHitInfoIteratorSectionRestrictTest, EmptyOriginalIterator) {
   std::unique_ptr<DocHitInfoIterator> original_iterator_empty =
       std::make_unique<DocHitInfoIteratorDummy>();
@@ -110,6 +171,9 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, EmptyOriginalIterator) {
       schema_store_.get(), /*target_section=*/"");
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
+  std::vector<TermMatchInfo> matched_terms_stats;
+  filtered_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, IsEmpty());
 }
 
 TEST_F(DocHitInfoIteratorSectionRestrictTest, IncludesHitWithMatchingSection) {
@@ -148,6 +212,9 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, NoMatchingDocumentFilterData) {
       /*target_section=*/"");
 
   EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
+  std::vector<TermMatchInfo> matched_terms_stats;
+  section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, IsEmpty());
 }
 
 TEST_F(DocHitInfoIteratorSectionRestrictTest,
@@ -171,6 +238,9 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
       "some_section_name");
 
   EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
+  std::vector<TermMatchInfo> matched_terms_stats;
+  section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, IsEmpty());
 }
 
 TEST_F(DocHitInfoIteratorSectionRestrictTest,
@@ -192,6 +262,9 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
       indexed_property_);
 
   EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
+  std::vector<TermMatchInfo> matched_terms_stats;
+  section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, IsEmpty());
 }
 
 TEST_F(DocHitInfoIteratorSectionRestrictTest,
@@ -216,6 +289,9 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
       indexed_property_);
 
   EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
+  std::vector<TermMatchInfo> matched_terms_stats;
+  section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, IsEmpty());
 }
 
 TEST_F(DocHitInfoIteratorSectionRestrictTest, GetNumBlocksInspected) {
diff --git a/icing/index/iterator/doc-hit-info-iterator-test-util.h b/icing/index/iterator/doc-hit-info-iterator-test-util.h
index 913696a..45acc8f 100644
--- a/icing/index/iterator/doc-hit-info-iterator-test-util.h
+++ b/icing/index/iterator/doc-hit-info-iterator-test-util.h
@@ -56,23 +56,25 @@ class DocHitInfoIteratorDummy : public DocHitInfoIterator {
 
   // Imitates behavior of DocHitInfoIteratorTermMain/DocHitInfoIteratorTermLite
   void PopulateMatchedTermsStats(
-      std::vector<TermMatchInfo>* matched_terms_stats) const override {
+      std::vector<TermMatchInfo>* matched_terms_stats,
+      SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
     if (doc_hit_info_.document_id() == kInvalidDocumentId) {
       // Current hit isn't valid, return.
       return;
     }
-    SectionIdMask section_mask = doc_hit_info_.hit_section_ids_mask();
+    SectionIdMask section_mask =
+        doc_hit_info_.hit_section_ids_mask() & filtering_section_mask;
+    SectionIdMask section_mask_copy = section_mask;
     std::array<Hit::TermFrequency, kMaxSectionId> section_term_frequencies = {
         Hit::kNoTermFrequency};
-
-    while (section_mask) {
-      SectionId section_id = __builtin_ctz(section_mask);
+    while (section_mask_copy) {
+      SectionId section_id = __builtin_ctz(section_mask_copy);
       section_term_frequencies.at(section_id) =
           doc_hit_info_.hit_term_frequency(section_id);
-      section_mask &= ~(1u << section_id);
+      section_mask_copy &= ~(1u << section_id);
     }
-    TermMatchInfo term_stats(term_, doc_hit_info_.hit_section_ids_mask(),
-                             section_term_frequencies);
+    TermMatchInfo term_stats(term_, section_mask,
+                             std::move(section_term_frequencies));
 
     for (auto& cur_term_stats : *matched_terms_stats) {
       if (cur_term_stats.term == term_stats.term) {
diff --git a/icing/index/iterator/doc-hit-info-iterator.h b/icing/index/iterator/doc-hit-info-iterator.h
index 67bd74f..bf90202 100644
--- a/icing/index/iterator/doc-hit-info-iterator.h
+++ b/icing/index/iterator/doc-hit-info-iterator.h
@@ -66,6 +66,8 @@ class DocHitInfoIterator {
 
   // Returns:
   //   OK if was able to advance to a new document_id.
+  //   INVALID_ARGUMENT if there are less than 2 iterators for an AND/OR
+  //       iterator
   //   RESOUCE_EXHAUSTED if we've run out of document_ids to iterate over
   virtual libtextclassifier3::Status Advance() = 0;
 
@@ -94,11 +96,14 @@ class DocHitInfoIterator {
 
   // For the last hit docid, retrieves all the matched query terms and other
   // stats, see TermMatchInfo.
+  // filtering_section_mask filters the matching sections and should be set only
+  // by DocHitInfoIteratorSectionRestrict.
   // If Advance() wasn't called after construction, Advance() returned false or
   // the concrete HitIterator didn't override this method, the vectors aren't
   // populated.
   virtual void PopulateMatchedTermsStats(
-      std::vector<TermMatchInfo>* matched_terms_stats) const {}
+      std::vector<TermMatchInfo>* matched_terms_stats,
+      SectionIdMask filtering_section_mask = kSectionIdMaskAll) const {}
 
  protected:
   DocHitInfo doc_hit_info_;
diff --git a/icing/index/iterator/doc-hit-info-iterator_benchmark.cc b/icing/index/iterator/doc-hit-info-iterator_benchmark.cc
index 90e4888..f975989 100644
--- a/icing/index/iterator/doc-hit-info-iterator_benchmark.cc
+++ b/icing/index/iterator/doc-hit-info-iterator_benchmark.cc
@@ -14,15 +14,15 @@
 
 #include <vector>
 
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
 #include "icing/index/hit/doc-hit-info.h"
 #include "icing/index/iterator/doc-hit-info-iterator-and.h"
 #include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
-#include "testing/base/public/benchmark.h"
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
 
 namespace icing {
 namespace lib {
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.cc b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
index d535d7f..08df4fc 100644
--- a/icing/index/lite/doc-hit-info-iterator-term-lite.cc
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
@@ -45,8 +45,13 @@ libtextclassifier3::Status DocHitInfoIteratorTermLite::Advance() {
   if (cached_hits_idx_ == -1) {
     libtextclassifier3::Status status = RetrieveMoreHits();
     if (!status.ok()) {
-      ICING_LOG(ERROR) << "Failed to retrieve more hits "
-                       << status.error_message();
+      if (!absl_ports::IsNotFound(status)) {
+        // NOT_FOUND is expected to happen (not every term will be in the main
+        // index!). Other errors are worth logging.
+        ICING_LOG(ERROR)
+            << "Encountered unexpected failure while retrieving  hits "
+            << status.error_message();
+      }
       return absl_ports::ResourceExhaustedError(
           "No more DocHitInfos in iterator");
     }
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.h b/icing/index/lite/doc-hit-info-iterator-term-lite.h
index ac5e97f..179fc93 100644
--- a/icing/index/lite/doc-hit-info-iterator-term-lite.h
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.h
@@ -50,21 +50,24 @@ class DocHitInfoIteratorTermLite : public DocHitInfoIterator {
   int32_t GetNumLeafAdvanceCalls() const override { return num_advance_calls_; }
 
   void PopulateMatchedTermsStats(
-      std::vector<TermMatchInfo>* matched_terms_stats) const override {
+      std::vector<TermMatchInfo>* matched_terms_stats,
+      SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
     if (doc_hit_info_.document_id() == kInvalidDocumentId) {
       // Current hit isn't valid, return.
       return;
     }
-    SectionIdMask section_mask = doc_hit_info_.hit_section_ids_mask();
+    SectionIdMask section_mask =
+        doc_hit_info_.hit_section_ids_mask() & filtering_section_mask;
+    SectionIdMask section_mask_copy = section_mask;
     std::array<Hit::TermFrequency, kMaxSectionId> section_term_frequencies = {
         Hit::kNoTermFrequency};
-    while (section_mask) {
-      SectionId section_id = __builtin_ctz(section_mask);
+    while (section_mask_copy) {
+      SectionId section_id = __builtin_ctz(section_mask_copy);
       section_term_frequencies.at(section_id) =
           doc_hit_info_.hit_term_frequency(section_id);
-      section_mask &= ~(1u << section_id);
+      section_mask_copy &= ~(1u << section_id);
     }
-    TermMatchInfo term_stats(term_, doc_hit_info_.hit_section_ids_mask(),
+    TermMatchInfo term_stats(term_, section_mask,
                              std::move(section_term_frequencies));
 
     for (const TermMatchInfo& cur_term_stats : *matched_terms_stats) {
@@ -79,6 +82,11 @@ class DocHitInfoIteratorTermLite : public DocHitInfoIterator {
 
  protected:
   // Add DocHitInfos corresponding to term_ to cached_hits_.
+  //
+  // Returns:
+  //   - OK, on success
+  //   - NOT_FOUND if no term matching term_ was found in the lexicon.
+  //   - INVALID_ARGUMENT if unable to properly encode the termid
   virtual libtextclassifier3::Status RetrieveMoreHits() = 0;
 
   const std::string term_;
diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc
index e0379b8..fb23934 100644
--- a/icing/index/lite/lite-index.cc
+++ b/icing/index/lite/lite-index.cc
@@ -310,8 +310,6 @@ libtextclassifier3::Status LiteIndex::AddHit(uint32_t term_id, const Hit& hit) {
     return absl_ports::ResourceExhaustedError("Hit buffer is full!");
   }
 
-  header_->set_last_added_docid(hit.document_id());
-
   TermIdHitPair term_id_hit_pair(term_id, hit);
   uint32_t cur_size = header_->cur_size();
   TermIdHitPair::Value* valp =
@@ -394,26 +392,36 @@ void LiteIndex::GetDebugInfo(int verbosity, std::string* out) const {
 }
 
 libtextclassifier3::StatusOr<int64_t> LiteIndex::GetElementsSize() const {
-  int64_t header_and_hit_buffer_file_size =
-      filesystem_->GetFileSize(hit_buffer_fd_.get());
-
-  if (header_and_hit_buffer_file_size == Filesystem::kBadFileSize) {
-    return absl_ports::InternalError(
-        "Failed to get element size of the LiteIndex's header and hit buffer");
-  }
-
-  int64_t lexicon_disk_usage = lexicon_.GetElementsSize();
-  if (lexicon_disk_usage == IcingFilesystem::kBadFileSize) {
-    return absl_ports::InternalError(
-        "Failed to get element size of LiteIndex's lexicon");
+  IndexStorageInfoProto storage_info = GetStorageInfo(IndexStorageInfoProto());
+  if (storage_info.lite_index_hit_buffer_size() == -1 ||
+      storage_info.lite_index_lexicon_size() == -1) {
+    return absl_ports::AbortedError(
+        "Failed to get size of LiteIndex's members.");
   }
-
   // On initialization, we grow the file to a padded size first. So this size
   // won't count towards the size taken up by elements
   size_t header_padded_size = IcingMMapper::page_aligned_size(header_size());
+  return storage_info.lite_index_hit_buffer_size() - header_padded_size +
+         storage_info.lite_index_lexicon_size();
+}
 
-  return header_and_hit_buffer_file_size - header_padded_size +
-         lexicon_disk_usage;
+IndexStorageInfoProto LiteIndex::GetStorageInfo(
+    IndexStorageInfoProto storage_info) const {
+  int64_t header_and_hit_buffer_file_size =
+      filesystem_->GetFileSize(hit_buffer_fd_.get());
+  if (header_and_hit_buffer_file_size != Filesystem::kBadFileSize) {
+    storage_info.set_lite_index_hit_buffer_size(
+        header_and_hit_buffer_file_size);
+  } else {
+    storage_info.set_lite_index_hit_buffer_size(-1);
+  }
+  int64_t lexicon_disk_usage = lexicon_.GetElementsSize();
+  if (lexicon_disk_usage != Filesystem::kBadFileSize) {
+    storage_info.set_lite_index_lexicon_size(lexicon_disk_usage);
+  } else {
+    storage_info.set_lite_index_lexicon_size(-1);
+  }
+  return storage_info;
 }
 
 uint32_t LiteIndex::Seek(uint32_t term_id) {
diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h
index 7b51aa4..b134aba 100644
--- a/icing/index/lite/lite-index.h
+++ b/icing/index/lite/lite-index.h
@@ -37,6 +37,7 @@
 #include "icing/legacy/index/icing-lite-index-header.h"
 #include "icing/legacy/index/icing-lite-index-options.h"
 #include "icing/legacy/index/icing-mmapper.h"
+#include "icing/proto/storage.pb.h"
 #include "icing/proto/term.pb.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
@@ -224,6 +225,9 @@ class LiteIndex {
   DocumentId last_added_document_id() const {
     return header_->last_added_docid();
   }
+  void set_last_added_document_id(DocumentId document_id) const {
+    header_->set_last_added_docid(document_id);
+  }
 
   const IcingDynamicTrie& lexicon() const { return lexicon_; }
 
@@ -240,6 +244,14 @@ class LiteIndex {
   //   INTERNAL_ERROR on IO error
   libtextclassifier3::StatusOr<int64_t> GetElementsSize() const;
 
+  // Takes the provided storage_info, populates the fields related to the lite
+  // index and returns that storage_info.
+  //
+  // If an IO error occurs while trying to calculate the value for a field, then
+  // that field will be set to -1.
+  IndexStorageInfoProto GetStorageInfo(
+      IndexStorageInfoProto storage_info) const;
+
  private:
   static IcingDynamicTrie::RuntimeOptions MakeTrieRuntimeOptions();
 
diff --git a/icing/index/main/doc-hit-info-iterator-term-main.cc b/icing/index/main/doc-hit-info-iterator-term-main.cc
index 5553c1e..98bc18e 100644
--- a/icing/index/main/doc-hit-info-iterator-term-main.cc
+++ b/icing/index/main/doc-hit-info-iterator-term-main.cc
@@ -57,8 +57,9 @@ libtextclassifier3::Status DocHitInfoIteratorTermMain::Advance() {
       if (!absl_ports::IsNotFound(status)) {
         // NOT_FOUND is expected to happen (not every term will be in the main
         // index!). Other errors are worth logging.
-        ICING_LOG(ERROR) << "Failed to retrieve more hits "
-                         << status.error_message();
+        ICING_LOG(ERROR)
+            << "Encountered unexpected failure while retrieving  hits "
+            << status.error_message();
       }
       return absl_ports::ResourceExhaustedError(
           "No more DocHitInfos in iterator");
diff --git a/icing/index/main/doc-hit-info-iterator-term-main.h b/icing/index/main/doc-hit-info-iterator-term-main.h
index d626d7a..f3cf701 100644
--- a/icing/index/main/doc-hit-info-iterator-term-main.h
+++ b/icing/index/main/doc-hit-info-iterator-term-main.h
@@ -50,21 +50,24 @@ class DocHitInfoIteratorTermMain : public DocHitInfoIterator {
   int32_t GetNumLeafAdvanceCalls() const override { return num_advance_calls_; }
 
   void PopulateMatchedTermsStats(
-      std::vector<TermMatchInfo>* matched_terms_stats) const override {
+      std::vector<TermMatchInfo>* matched_terms_stats,
+      SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
     if (doc_hit_info_.document_id() == kInvalidDocumentId) {
       // Current hit isn't valid, return.
       return;
     }
-    SectionIdMask section_mask = doc_hit_info_.hit_section_ids_mask();
+    SectionIdMask section_mask =
+        doc_hit_info_.hit_section_ids_mask() & filtering_section_mask;
+    SectionIdMask section_mask_copy = section_mask;
     std::array<Hit::TermFrequency, kMaxSectionId> section_term_frequencies = {
         Hit::kNoTermFrequency};
-    while (section_mask) {
-      SectionId section_id = __builtin_ctz(section_mask);
+    while (section_mask_copy) {
+      SectionId section_id = __builtin_ctz(section_mask_copy);
       section_term_frequencies.at(section_id) =
           doc_hit_info_.hit_term_frequency(section_id);
-      section_mask &= ~(1u << section_id);
+      section_mask_copy &= ~(1u << section_id);
     }
-    TermMatchInfo term_stats(term_, doc_hit_info_.hit_section_ids_mask(),
+    TermMatchInfo term_stats(term_, section_mask,
                              std::move(section_term_frequencies));
 
     for (const TermMatchInfo& cur_term_stats : *matched_terms_stats) {
diff --git a/icing/index/main/main-index.cc b/icing/index/main/main-index.cc
index 636f631..8ae6b27 100644
--- a/icing/index/main/main-index.cc
+++ b/icing/index/main/main-index.cc
@@ -121,14 +121,34 @@ libtextclassifier3::Status MainIndex::Init(
 }
 
 libtextclassifier3::StatusOr<int64_t> MainIndex::GetElementsSize() const {
+  IndexStorageInfoProto storage_info = GetStorageInfo(IndexStorageInfoProto());
+  if (storage_info.main_index_storage_size() == -1 ||
+      storage_info.main_index_lexicon_size() == -1) {
+    return absl_ports::AbortedError(
+        "Failed to get size of MainIndex's members.");
+  }
+  return storage_info.main_index_storage_size() +
+         storage_info.main_index_lexicon_size();
+}
+
+IndexStorageInfoProto MainIndex::GetStorageInfo(
+    IndexStorageInfoProto storage_info) const {
   int64_t lexicon_elt_size = main_lexicon_->GetElementsSize();
+  if (lexicon_elt_size != IcingFilesystem::kBadFileSize) {
+    storage_info.set_main_index_lexicon_size(lexicon_elt_size);
+  } else {
+    storage_info.set_main_index_lexicon_size(-1);
+  }
   int64_t index_elt_size = flash_index_storage_->GetElementsSize();
-  if (lexicon_elt_size == IcingFilesystem::kBadFileSize ||
-      index_elt_size == IcingFilesystem::kBadFileSize) {
-    return absl_ports::InternalError(
-        "Failed to get element size of LiteIndex's lexicon");
+  if (lexicon_elt_size != IcingFilesystem::kBadFileSize) {
+    storage_info.set_main_index_storage_size(index_elt_size);
+  } else {
+    storage_info.set_main_index_storage_size(-1);
   }
-  return lexicon_elt_size + index_elt_size;
+  storage_info.set_main_index_block_size(flash_index_storage_->block_size());
+  storage_info.set_num_blocks(flash_index_storage_->num_blocks());
+  storage_info.set_min_free_fraction(flash_index_storage_->min_free_fraction());
+  return storage_info;
 }
 
 libtextclassifier3::StatusOr<std::unique_ptr<PostingListAccessor>>
diff --git a/icing/index/main/main-index.h b/icing/index/main/main-index.h
index 7403b8c..43635ca 100644
--- a/icing/index/main/main-index.h
+++ b/icing/index/main/main-index.h
@@ -27,6 +27,7 @@
 #include "icing/index/term-metadata.h"
 #include "icing/legacy/index/icing-dynamic-trie.h"
 #include "icing/legacy/index/icing-filesystem.h"
+#include "icing/proto/storage.pb.h"
 #include "icing/store/namespace-id.h"
 #include "icing/util/status-macros.h"
 
@@ -172,6 +173,14 @@ class MainIndex {
   //  - INTERNAL on IO error
   libtextclassifier3::StatusOr<int64_t> GetElementsSize() const;
 
+  // Takes the provided storage_info, populates the fields related to the main
+  // index and returns that storage_info.
+  //
+  // If an IO error occurs while trying to calculate the value for a field, then
+  // that field will be set to -1.
+  IndexStorageInfoProto GetStorageInfo(
+      IndexStorageInfoProto storage_info) const;
+
   // Returns debug information for the main index in out.
   // verbosity <= 0, simplest debug information - just the lexicon
   // verbosity > 0, more detailed debug information including raw postings
diff --git a/icing/index/main/posting-list-free.h b/icing/index/main/posting-list-free.h
index 4b27401..4f06057 100644
--- a/icing/index/main/posting-list-free.h
+++ b/icing/index/main/posting-list-free.h
@@ -115,7 +115,7 @@ class PostingListFree {
   // bytes which will store the next posting list index, the rest are unused and
   // can be anything.
   uint8_t *posting_list_buffer_;
-  uint32_t size_in_bytes_;
+  [[maybe_unused]] uint32_t size_in_bytes_;
 
   static_assert(sizeof(PostingListIndex) <=
                     posting_list_utils::min_posting_list_size(),
diff --git a/icing/jni.lds b/icing/jni.lds
new file mode 100644
index 0000000..401682a
--- /dev/null
+++ b/icing/jni.lds
@@ -0,0 +1,10 @@
+VERS_1.0 {
+  # Export JNI symbols.
+  global:
+    Java_*;
+    JNI_OnLoad;
+
+  # Hide everything else
+  local:
+    *;
+};
diff --git a/icing/jni/icing-search-engine-jni.cc b/icing/jni/icing-search-engine-jni.cc
index bf709cd..ea2bcf7 100644
--- a/icing/jni/icing-search-engine-jni.cc
+++ b/icing/jni/icing-search-engine-jni.cc
@@ -27,6 +27,7 @@
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/scoring.pb.h"
 #include "icing/proto/search.pb.h"
+#include "icing/proto/storage.pb.h"
 #include "icing/proto/usage.pb.h"
 #include "icing/util/status-macros.h"
 
@@ -356,12 +357,19 @@ Java_com_google_android_icing_IcingSearchEngine_nativeDeleteByQuery(
 
 JNIEXPORT jbyteArray JNICALL
 Java_com_google_android_icing_IcingSearchEngine_nativePersistToDisk(
-    JNIEnv* env, jclass clazz, jobject object) {
+    JNIEnv* env, jclass clazz, jobject object, jint persist_type_code) {
   icing::lib::IcingSearchEngine* icing =
       GetIcingSearchEnginePointer(env, object);
 
+  if (!icing::lib::PersistType::Code_IsValid(persist_type_code)) {
+    ICING_LOG(ERROR) << persist_type_code
+                     << " is an invalid value for PersistType::Code";
+    return nullptr;
+  }
+  icing::lib::PersistType::Code persist_type_code_enum =
+      static_cast<icing::lib::PersistType::Code>(persist_type_code);
   icing::lib::PersistToDiskResultProto persist_to_disk_result_proto =
-      icing->PersistToDisk();
+      icing->PersistToDisk(persist_type_code_enum);
 
   return SerializeProtoToJniByteArray(env, persist_to_disk_result_proto);
 }
@@ -390,6 +398,18 @@ Java_com_google_android_icing_IcingSearchEngine_nativeGetOptimizeInfo(
 }
 
 JNIEXPORT jbyteArray JNICALL
+Java_com_google_android_icing_IcingSearchEngine_nativeGetStorageInfo(
+    JNIEnv* env, jclass clazz, jobject object) {
+  icing::lib::IcingSearchEngine* icing =
+      GetIcingSearchEnginePointer(env, object);
+
+  icing::lib::StorageInfoResultProto storage_info_result_proto =
+      icing->GetStorageInfo();
+
+  return SerializeProtoToJniByteArray(env, storage_info_result_proto);
+}
+
+JNIEXPORT jbyteArray JNICALL
 Java_com_google_android_icing_IcingSearchEngine_nativeReset(
     JNIEnv* env, jclass clazz, jobject object) {
   icing::lib::IcingSearchEngine* icing =
diff --git a/icing/jni/jni-cache.cc b/icing/jni/jni-cache.cc
index 58eb8bf..9b75db6 100644
--- a/icing/jni/jni-cache.cc
+++ b/icing/jni/jni-cache.cc
@@ -14,6 +14,8 @@
 
 #include "icing/jni/jni-cache.h"
 
+#ifdef ICING_REVERSE_JNI_SEGMENTATION
+
 #include "icing/text_classifier/lib3/utils/java/jni-base.h"
 #include "icing/text_classifier/lib3/utils/java/jni-helper.h"
 #include "icing/absl_ports/canonical_errors.h"
@@ -214,3 +216,5 @@ JniCache::ConvertToJavaString(const char* utf8_text,
 
 }  // namespace lib
 }  // namespace icing
+
+#endif  // ICING_REVERSE_JNI_SEGMENTATION
diff --git a/icing/jni/jni-cache.h b/icing/jni/jni-cache.h
index a5f16c7..3faaed6 100644
--- a/icing/jni/jni-cache.h
+++ b/icing/jni/jni-cache.h
@@ -15,6 +15,16 @@
 #ifndef ICING_JNI_JNI_CACHE_H_
 #define ICING_JNI_JNI_CACHE_H_
 
+#ifndef ICING_REVERSE_JNI_SEGMENTATION
+namespace icing {
+namespace lib {
+
+class JniCache {};  // Declare an empty class definition for non-Android builds.
+
+}  // namespace lib
+}  // namespace icing
+#else  // ICING_REVERSE_JNI_SEGMENTATION
+
 #include <jni.h>
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
@@ -75,4 +85,6 @@ struct JniCache {
 }  // namespace lib
 }  // namespace icing
 
+#endif  // !ICING_REVERSE_JNI_SEGMENTATION
+
 #endif  // ICING_JNI_JNI_CACHE_H_
diff --git a/icing/performance-configuration.cc b/icing/performance-configuration.cc
index aeaa449..4020dd0 100644
--- a/icing/performance-configuration.cc
+++ b/icing/performance-configuration.cc
@@ -15,6 +15,7 @@
 #include "icing/performance-configuration.h"
 
 #include "icing/result/result-state.h"
+#include "icing/scoring/scored-document-hit.h"
 
 namespace icing {
 namespace lib {
@@ -54,38 +55,21 @@ constexpr int kMaxQueryLength = 23000;
 constexpr int kDefaultNumToScore = 30000;
 
 // New Android devices nowadays all allow more than 16 MB memory per app. Using
-// that as a guideline, we set 16 MB as the safe memory threshold.
+// that as a guideline and being more conservative, we set 4 MB as the safe
+// memory threshold.
 // TODO(b/150029642): Android apps / framework have better understanding of how
 // much memory is allowed, so it would be better to let clients pass in this
 // value.
-constexpr int kSafeMemoryUsage = 16 * 1024 * 1024;  // 16MB
+constexpr int kSafeMemoryUsage = 4 * 1024 * 1024;  // 4MB
 
-// This number is not determined by benchmarks. We just assume that returning
-// the best 1000 scored document hits of a query is enough. To find the best
-// 1000 scored document hits from a heap, we need roughly 0.7 ms on a Pixel 3 XL
-// according to //icing/scoring:ranker_benchmark.
-constexpr int kMaxNumHitsPerQuery = 1000;
+// The maximum number of hits that can fit below the kSafeMemoryUsage threshold.
+constexpr int kMaxNumTotalHits = kSafeMemoryUsage / sizeof(ScoredDocumentHit);
 
-// A rough estimation of the size of ResultState if it stores the maximum number
-// of scored document hits.
-constexpr int kMaxMemoryPerResult =
-    sizeof(ResultState) + kMaxNumHitsPerQuery * sizeof(ScoredDocumentHit);
-
-// To be safer, we assume that all the Results contain the maximum number of
-// hits and only use half of the memory allowed.
-constexpr int kDefaultNumResultsToCache =
-    kSafeMemoryUsage / 2 / kMaxMemoryPerResult;
-
-static_assert(
-    kDefaultNumResultsToCache > 500,
-    "Default number of results to cache has changed, please update and make "
-    "sure it still meets our requirements.");
 }  // namespace
 
 PerformanceConfiguration::PerformanceConfiguration()
     : PerformanceConfiguration(kMaxQueryLength, kDefaultNumToScore,
-                               kMaxNumHitsPerQuery, kDefaultNumResultsToCache) {
-}
+                               kMaxNumTotalHits) {}
 
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/performance-configuration.h b/icing/performance-configuration.h
index fa4050b..b9282ca 100644
--- a/icing/performance-configuration.h
+++ b/icing/performance-configuration.h
@@ -24,12 +24,10 @@ struct PerformanceConfiguration {
   PerformanceConfiguration();
 
   PerformanceConfiguration(int max_query_length_in, int num_to_score_in,
-                           int max_num_hits_per_query_in,
-                           int max_num_cache_results_in)
+                           int max_num_total_hits)
       : max_query_length(max_query_length_in),
         num_to_score(num_to_score_in),
-        max_num_hits_per_query(max_num_hits_per_query_in),
-        max_num_cache_results(max_num_cache_results_in) {}
+        max_num_total_hits(max_num_total_hits) {}
 
   // Search performance
 
@@ -41,11 +39,9 @@ struct PerformanceConfiguration {
 
   // Memory
 
-  // Maximum number of ScoredDocumentHits to return per query.
-  int max_num_hits_per_query;
-
-  // Maximum number of ResultStates to store in ResultStateManager.
-  int max_num_cache_results;
+  // Maximum number of ScoredDocumentHits to cache in the ResultStateManager at
+  // one time.
+  int max_num_total_hits;
 };
 
 // TODO(b/149040810): Consider creating a class to manage performance
diff --git a/icing/portable/endian.h b/icing/portable/endian.h
new file mode 100644
index 0000000..595b956
--- /dev/null
+++ b/icing/portable/endian.h
@@ -0,0 +1,208 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Utility functions that depend on bytesex. We define versions of htonll and
+// ntohll (HostToNetworkLL and NetworkToHostLL in our naming), as well as
+// "Google" versions of all the standards: ghtonl, ghtons, and so on
+// (GHostToNetworkL, GHostToNetworkS, etc in our naming). These functions do
+// exactly the same as their standard variants, but don't require including the
+// dangerous netinet/in.h.
+
+#ifndef ICING_PORTABLE_ENDIAN_H_
+#define ICING_PORTABLE_ENDIAN_H_
+
+#include <cstdint>
+
+// IS_LITTLE_ENDIAN, IS_BIG_ENDIAN
+#if defined OS_LINUX || defined OS_ANDROID || defined(__ANDROID__)
+// _BIG_ENDIAN
+#include <endian.h>
+
+#elif defined(__APPLE__)
+
+// BIG_ENDIAN
+#include <machine/endian.h>  // NOLINT(build/include)
+
+/* Let's try and follow the Linux convention */
+#define __BYTE_ORDER BYTE_ORDER
+#define __LITTLE_ENDIAN LITTLE_ENDIAN
+#define __BIG_ENDIAN BIG_ENDIAN
+
+#endif  // operating system
+
+// defines __BYTE_ORDER for MSVC
+#ifdef COMPILER_MSVC
+#define __BYTE_ORDER __LITTLE_ENDIAN
+#define IS_LITTLE_ENDIAN
+#else  // COMPILER_MSVC
+
+// define the macros IS_LITTLE_ENDIAN or IS_BIG_ENDIAN
+// using the above endian definitions from endian.h if
+// endian.h was included
+#ifdef __BYTE_ORDER
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define IS_LITTLE_ENDIAN
+#endif  // __BYTE_ORDER == __LITTLE_ENDIAN
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define IS_BIG_ENDIAN
+#endif  // __BYTE_ORDER == __BIG_ENDIAN
+
+#else  // __BYTE_ORDER
+
+#if defined(__LITTLE_ENDIAN__)
+#define IS_LITTLE_ENDIAN
+#elif defined(__BIG_ENDIAN__)
+#define IS_BIG_ENDIAN
+#endif  // __LITTLE_ENDIAN__ or __BIG_ENDIAN__
+
+#endif  // __BYTE_ORDER
+#endif  // COMPILER_MSVC
+
+// byte swap functions (bswap_16, bswap_32, bswap_64).
+// byte swap functions reverse the order of bytes, e.g.
+//   byteswap of 102030405060708 = 807060504030201
+//   byteswap of 1020304 = 4030201
+
+// The following guarantees declaration of the byte swap functions
+#ifdef COMPILER_MSVC
+#include <stdlib.h>  // NOLINT(build/include)
+
+#define bswap_16(x) _byteswap_ushort(x)
+#define bswap_32(x) _byteswap_ulong(x)
+#define bswap_64(x) _byteswap_uint64(x)
+
+#elif defined(__APPLE__)
+// Mac OS X / Darwin features
+#include <libkern/OSByteOrder.h>
+
+#define bswap_16(x) OSSwapInt16(x)
+#define bswap_32(x) OSSwapInt32(x)
+#define bswap_64(x) OSSwapInt64(x)
+
+#elif defined(__GLIBC__) || defined(__BIONIC__) || defined(__ASYLO__)
+#include <byteswap.h>  // IWYU pragma: export
+
+#else  // built-in byteswap functions
+
+static inline uint16 bswap_16(uint16 x) {
+#ifdef __cplusplus
+  return static_cast<uint16>(((x & 0xFF) << 8) | ((x & 0xFF00) >> 8));
+#else   // __cplusplus
+  return (uint16)(((x & 0xFF) << 8) | ((x & 0xFF00) >> 8));  // NOLINT
+#endif  // __cplusplus
+}
+#define bswap_16(x) bswap_16(x)
+static inline uint32 bswap_32(uint32 x) {
+  return (((x & 0xFF) << 24) | ((x & 0xFF00) << 8) | ((x & 0xFF0000) >> 8) |
+          ((x & 0xFF000000) >> 24));
+}
+#define bswap_32(x) bswap_32(x)
+static inline uint64 bswap_64(uint64 x) {
+  return (((x & (uint64_t)0xFF) << 56) | ((x & (uint64_t)0xFF00) << 40) |
+          ((x & (uint64_t)0xFF0000) << 24) | ((x & (uint64_t)0xFF000000) << 8) |
+          ((x & (uint64_t)0xFF00000000) >> 8) |
+          ((x & (uint64_t)0xFF0000000000) >> 24) |
+          ((x & (uint64_t)0xFF000000000000) >> 40) |
+          ((x & (uint64_t)0xFF00000000000000) >> 56));
+}
+#define bswap_64(x) bswap_64(x)
+
+#endif  // end byteswap functions
+
+// Use compiler byte-swapping intrinsics if they are available.  32-bit
+// and 64-bit versions are available in Clang and GCC as of GCC 4.3.0.
+// The 16-bit version is available in Clang and GCC only as of GCC 4.8.0.
+// For simplicity, we enable them all only for GCC 4.8.0 or later.
+#if defined(__clang__) || \
+    (defined(__GNUC__) && \
+     ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ >= 5))
+
+inline uint64_t gbswap_64(uint64_t host_int) {
+  return __builtin_bswap64(host_int);
+}
+inline uint32_t gbswap_32(uint32_t host_int) {
+  return __builtin_bswap32(host_int);
+}
+inline uint16_t gbswap_16(uint16_t host_int) {
+  return __builtin_bswap16(host_int);
+}
+
+#else  // intrinsics available
+
+inline uint64 gbswap_64(uint64 host_int) {
+#if defined(__GNUC__) && defined(__x86_64__) && \
+    !(defined(__APPLE__) && defined(__MACH__))
+  // Adapted from /usr/include/byteswap.h.  Not available on Mac.
+  if (__builtin_constant_p(host_int)) {
+    return __bswap_constant_64(host_int);
+  } else {
+    uint64 result;
+    __asm__("bswap %0" : "=r"(result) : "0"(host_int));
+    return result;
+  }
+#elif defined(bswap_64)
+  return bswap_64(host_int);
+#else   // bswap_64
+  return static_cast<uint64>(bswap_32(static_cast<uint32>(host_int >> 32))) |
+         (static_cast<uint64>(bswap_32(static_cast<uint32>(host_int))) << 32);
+#endif  // bswap_64
+}
+inline uint32 gbswap_32(uint32 host_int) { return bswap_32(host_int); }
+inline uint16 gbswap_16(uint16 host_int) { return bswap_16(host_int); }
+
+#endif  // intrinsics available
+
+#ifdef IS_LITTLE_ENDIAN
+
+// Definitions for ntohl etc. that don't require us to include
+// netinet/in.h. We wrap gbswap_32 and gbswap_16 in functions rather
+// than just #defining them because in debug mode, gcc doesn't
+// correctly handle the (rather involved) definitions of bswap_32.
+// gcc guarantees that inline functions are as fast as macros, so
+// this isn't a performance hit.
+inline uint16_t GHostToNetworkS(uint16_t x) { return gbswap_16(x); }
+inline uint32_t GHostToNetworkL(uint32_t x) { return gbswap_32(x); }
+inline uint64_t GHostToNetworkLL(uint64_t x) { return gbswap_64(x); }
+
+#elif defined IS_BIG_ENDIAN
+
+// These definitions are simpler on big-endian machines
+// These are functions instead of macros to avoid self-assignment warnings
+// on calls such as "i = ghtnol(i);".  This also provides type checking.
+inline uint16 GHostToNetworkS(uint16 x) { return x; }
+inline uint32 GHostToNetworkL(uint32 x) { return x; }
+inline uint64 GHostToNetworkLL(uint64 x) { return x; }
+
+#else  // bytesex
+#error \
+    "Unsupported bytesex: Either IS_BIG_ENDIAN or IS_LITTLE_ENDIAN must be defined"  // NOLINT
+#endif  // bytesex
+
+#ifndef HostToNetworkLL
+// With the rise of 64-bit, some systems are beginning to define this.
+#define HostToNetworkLL(x) GHostToNetworkLL(x)
+#endif  // HostToNetworkLL
+
+// ntoh* and hton* are the same thing for any size and bytesex,
+// since the function is an involution, i.e., its own inverse.
+inline uint16_t GNetworkToHostS(uint16_t x) { return GHostToNetworkS(x); }
+inline uint32_t GNetworkToHostL(uint32_t x) { return GHostToNetworkL(x); }
+inline uint64_t GNetworkToHostLL(uint64_t x) { return GHostToNetworkLL(x); }
+
+#ifndef NetworkToHostLL
+#define NetworkToHostLL(x) GHostToNetworkLL(x)
+#endif  // NetworkToHostLL
+
+#endif  // ICING_PORTABLE_ENDIAN_H_
diff --git a/icing/testing/platform.h b/icing/portable/platform.h
index ad612d5..150eede 100644
--- a/icing/testing/platform.h
+++ b/icing/portable/platform.h
@@ -12,11 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#ifndef ICING_TESTING_PLATFORM_H_
-#define ICING_TESTING_PLATFORM_H_
+#ifndef ICING_PORTABLE_PLATFORM_H_
+#define ICING_PORTABLE_PLATFORM_H_
 
-// This file is meant to hold util functions for tests that help the test
-// determine which platform-specific configuration it may be running in.
 namespace icing {
 namespace lib {
 
@@ -36,11 +34,19 @@ inline bool IsReverseJniTokenization() {
   return false;
 }
 
-// Whether the running test is an Android test.
-inline bool IsAndroidPlatform() {
-#if defined(__ANDROID__)
+// Whether we're running on android_x86
+inline bool IsAndroidX86() {
+#if defined(__ANDROID__) && defined(__i386__)
   return true;
-#endif  // defined(__ANDROID__)
+#endif  // defined(__ANDROID__) && defined(__i386__)
+  return false;
+}
+
+// Whether we're running on android_armeabi-v7a
+inline bool IsAndroidArm() {
+#if defined(__ANDROID__) && defined(__arm__)
+  return true;
+#endif  // defined(__ANDROID__) && defined(__arm__)
   return false;
 }
 
@@ -52,7 +58,27 @@ inline bool IsIosPlatform() {
   return false;
 }
 
+enum Architecture {
+  UNKNOWN,
+  BIT_32,
+  BIT_64,
+};
+
+// Returns which architecture we're running on.
+//
+// Architecture macros pulled from
+// https://developer.android.com/ndk/guides/cpu-features
+inline Architecture GetArchitecture() {
+#if defined(__arm__) || defined(__i386__)
+  return BIT_32;
+#elif defined(__aarch64__) || defined(__x86_64__)
+  return BIT_64;
+#else
+  return UNKNOWN;
+#endif
+}
+
 }  // namespace lib
 }  // namespace icing
 
-#endif  // ICING_TESTING_PLATFORM_H_
+#endif  // ICING_PORTABLE_PLATFORM_H_
diff --git a/icing/query/query-processor.cc b/icing/query/query-processor.cc
index 0732ed0..1f937fd 100644
--- a/icing/query/query-processor.cc
+++ b/icing/query/query-processor.cc
@@ -46,7 +46,6 @@
 #include "icing/tokenization/tokenizer-factory.h"
 #include "icing/tokenization/tokenizer.h"
 #include "icing/transform/normalizer.h"
-#include "icing/util/clock.h"
 #include "icing/util/status-macros.h"
 
 namespace icing {
@@ -105,31 +104,27 @@ QueryProcessor::Create(Index* index,
                        const LanguageSegmenter* language_segmenter,
                        const Normalizer* normalizer,
                        const DocumentStore* document_store,
-                       const SchemaStore* schema_store, const Clock* clock) {
+                       const SchemaStore* schema_store) {
   ICING_RETURN_ERROR_IF_NULL(index);
   ICING_RETURN_ERROR_IF_NULL(language_segmenter);
   ICING_RETURN_ERROR_IF_NULL(normalizer);
   ICING_RETURN_ERROR_IF_NULL(document_store);
   ICING_RETURN_ERROR_IF_NULL(schema_store);
-  ICING_RETURN_ERROR_IF_NULL(clock);
 
-  return std::unique_ptr<QueryProcessor>(
-      new QueryProcessor(index, language_segmenter, normalizer, document_store,
-                         schema_store, clock));
+  return std::unique_ptr<QueryProcessor>(new QueryProcessor(
+      index, language_segmenter, normalizer, document_store, schema_store));
 }
 
 QueryProcessor::QueryProcessor(Index* index,
                                const LanguageSegmenter* language_segmenter,
                                const Normalizer* normalizer,
                                const DocumentStore* document_store,
-                               const SchemaStore* schema_store,
-                               const Clock* clock)
+                               const SchemaStore* schema_store)
     : index_(*index),
       language_segmenter_(*language_segmenter),
       normalizer_(*normalizer),
       document_store_(*document_store),
-      schema_store_(*schema_store),
-      clock_(*clock) {}
+      schema_store_(*schema_store) {}
 
 DocHitInfoIteratorFilter::Options QueryProcessor::getFilterOptions(
     const SearchSpecProto& search_spec) {
@@ -156,7 +151,7 @@ QueryProcessor::ParseSearch(const SearchSpecProto& search_spec) {
   DocHitInfoIteratorFilter::Options options = getFilterOptions(search_spec);
   results.root_iterator = std::make_unique<DocHitInfoIteratorFilter>(
       std::move(results.root_iterator), &document_store_, &schema_store_,
-      &clock_, options);
+      options);
   return results;
 }
 
@@ -279,7 +274,7 @@ QueryProcessor::ParseRawQuery(const SearchSpecProto& search_spec) {
           results.query_term_iterators[normalized_text] =
               std::make_unique<DocHitInfoIteratorFilter>(
                   std::move(term_iterator), &document_store_, &schema_store_,
-                  &clock_, options);
+                  options);
 
           results.query_terms[frames.top().section_restrict].insert(
               std::move(normalized_text));
diff --git a/icing/query/query-processor.h b/icing/query/query-processor.h
index 0932ec5..bdf9ef2 100644
--- a/icing/query/query-processor.h
+++ b/icing/query/query-processor.h
@@ -27,7 +27,6 @@
 #include "icing/store/document-store.h"
 #include "icing/tokenization/language-segmenter.h"
 #include "icing/transform/normalizer.h"
-#include "icing/util/clock.h"
 
 namespace icing {
 namespace lib {
@@ -47,7 +46,7 @@ class QueryProcessor {
   static libtextclassifier3::StatusOr<std::unique_ptr<QueryProcessor>> Create(
       Index* index, const LanguageSegmenter* language_segmenter,
       const Normalizer* normalizer, const DocumentStore* document_store,
-      const SchemaStore* schema_store, const Clock* clock);
+      const SchemaStore* schema_store);
 
   struct QueryResults {
     std::unique_ptr<DocHitInfoIterator> root_iterator;
@@ -77,7 +76,7 @@ class QueryProcessor {
                           const LanguageSegmenter* language_segmenter,
                           const Normalizer* normalizer,
                           const DocumentStore* document_store,
-                          const SchemaStore* schema_store, const Clock* clock);
+                          const SchemaStore* schema_store);
 
   // Parse the query into a one DocHitInfoIterator that represents the root of a
   // query tree.
@@ -103,7 +102,6 @@ class QueryProcessor {
   const Normalizer& normalizer_;
   const DocumentStore& document_store_;
   const SchemaStore& schema_store_;
-  const Clock& clock_;
 };
 
 }  // namespace lib
diff --git a/icing/query/query-processor_benchmark.cc b/icing/query/query-processor_benchmark.cc
index eb8b7a4..bdd40aa 100644
--- a/icing/query/query-processor_benchmark.cc
+++ b/icing/query/query-processor_benchmark.cc
@@ -147,7 +147,7 @@ void BM_QueryOneTerm(benchmark::State& state) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index.get(), language_segmenter.get(),
                              normalizer.get(), document_store.get(),
-                             schema_store.get(), &clock));
+                             schema_store.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query(input_string);
@@ -278,7 +278,7 @@ void BM_QueryFiveTerms(benchmark::State& state) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index.get(), language_segmenter.get(),
                              normalizer.get(), document_store.get(),
-                             schema_store.get(), &clock));
+                             schema_store.get()));
 
   const std::string query_string = absl_ports::StrCat(
       input_string_a, " ", input_string_b, " ", input_string_c, " ",
@@ -402,7 +402,7 @@ void BM_QueryDiacriticTerm(benchmark::State& state) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index.get(), language_segmenter.get(),
                              normalizer.get(), document_store.get(),
-                             schema_store.get(), &clock));
+                             schema_store.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query(input_string);
@@ -522,7 +522,7 @@ void BM_QueryHiragana(benchmark::State& state) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index.get(), language_segmenter.get(),
                              normalizer.get(), document_store.get(),
-                             schema_store.get(), &clock));
+                             schema_store.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query(input_string);
diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc
index 6ec0a2a..daeb479 100644
--- a/icing/query/query-processor_test.cc
+++ b/icing/query/query-processor_test.cc
@@ -29,9 +29,11 @@
 #include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
 #include "icing/legacy/index/icing-filesystem.h"
+#include "icing/portable/platform.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/search.pb.h"
 #include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
 #include "icing/schema/schema-store.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
@@ -39,7 +41,6 @@
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
 #include "icing/testing/jni-test-helpers.h"
-#include "icing/testing/platform.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
 #include "icing/tokenization/language-segmenter-factory.h"
@@ -60,30 +61,16 @@ using ::testing::SizeIs;
 using ::testing::Test;
 using ::testing::UnorderedElementsAre;
 
-SchemaTypeConfigProto* AddSchemaType(SchemaProto* schema,
-                                     std::string schema_type) {
-  SchemaTypeConfigProto* type_config = schema->add_types();
-  type_config->set_schema_type(schema_type);
-  return type_config;
-}
+constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
+    PropertyConfigProto_DataType_Code_STRING;
 
-void AddIndexedProperty(SchemaTypeConfigProto* type_config, std::string name) {
-  PropertyConfigProto* property_config = type_config->add_properties();
-  property_config->set_property_name(name);
-  property_config->set_data_type(PropertyConfigProto::DataType::STRING);
-  property_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  property_config->mutable_string_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  property_config->mutable_string_indexing_config()->set_tokenizer_type(
-      StringIndexingConfig::TokenizerType::PLAIN);
-}
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto_Cardinality_Code_OPTIONAL;
 
-void AddUnindexedProperty(SchemaTypeConfigProto* type_config,
-                          std::string name) {
-  PropertyConfigProto* property_config = type_config->add_properties();
-  property_config->set_property_name(name);
-  property_config->set_data_type(PropertyConfigProto::DataType::STRING);
-}
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+    StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
 
 class QueryProcessorTest : public Test {
  protected:
@@ -159,37 +146,33 @@ TEST_F(QueryProcessorTest, CreationWithNullPointerShouldFail) {
   EXPECT_THAT(
       QueryProcessor::Create(/*index=*/nullptr, language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_),
+                             schema_store_.get()),
       StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
   EXPECT_THAT(
       QueryProcessor::Create(index_.get(), /*language_segmenter=*/nullptr,
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_),
+                             schema_store_.get()),
       StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
   EXPECT_THAT(
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              /*normalizer=*/nullptr, document_store_.get(),
-                             schema_store_.get(), &fake_clock_),
+                             schema_store_.get()),
       StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
-  EXPECT_THAT(
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), /*document_store=*/nullptr,
-                             schema_store_.get(), &fake_clock_),
-      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
-  EXPECT_THAT(QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                                     normalizer_.get(), document_store_.get(),
-                                     /*schema_store=*/nullptr, &fake_clock_),
+  EXPECT_THAT(QueryProcessor::Create(
+                  index_.get(), language_segmenter_.get(), normalizer_.get(),
+                  /*document_store=*/nullptr, schema_store_.get()),
               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
   EXPECT_THAT(QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                                      normalizer_.get(), document_store_.get(),
-                                     schema_store_.get(), /*clock=*/nullptr),
+                                     /*schema_store=*/nullptr),
               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 }
 
 TEST_F(QueryProcessorTest, EmptyGroupMatchAllDocuments) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -221,7 +204,7 @@ TEST_F(QueryProcessorTest, EmptyGroupMatchAllDocuments) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query("()");
@@ -238,8 +221,9 @@ TEST_F(QueryProcessorTest, EmptyGroupMatchAllDocuments) {
 
 TEST_F(QueryProcessorTest, EmptyQueryMatchAllDocuments) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -271,7 +255,7 @@ TEST_F(QueryProcessorTest, EmptyQueryMatchAllDocuments) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query("");
@@ -288,8 +272,9 @@ TEST_F(QueryProcessorTest, EmptyQueryMatchAllDocuments) {
 
 TEST_F(QueryProcessorTest, QueryTermNormalized) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -330,7 +315,7 @@ TEST_F(QueryProcessorTest, QueryTermNormalized) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query("hElLo WORLD");
@@ -363,8 +348,9 @@ TEST_F(QueryProcessorTest, QueryTermNormalized) {
 
 TEST_F(QueryProcessorTest, OneTermPrefixMatch) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -402,7 +388,7 @@ TEST_F(QueryProcessorTest, OneTermPrefixMatch) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query("he");
@@ -430,8 +416,9 @@ TEST_F(QueryProcessorTest, OneTermPrefixMatch) {
 
 TEST_F(QueryProcessorTest, OneTermExactMatch) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -469,7 +456,7 @@ TEST_F(QueryProcessorTest, OneTermExactMatch) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query("hello");
@@ -497,8 +484,9 @@ TEST_F(QueryProcessorTest, OneTermExactMatch) {
 
 TEST_F(QueryProcessorTest, AndSameTermExactMatch) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -536,7 +524,7 @@ TEST_F(QueryProcessorTest, AndSameTermExactMatch) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query("hello hello");
@@ -566,8 +554,9 @@ TEST_F(QueryProcessorTest, AndSameTermExactMatch) {
 
 TEST_F(QueryProcessorTest, AndTwoTermExactMatch) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -608,7 +597,7 @@ TEST_F(QueryProcessorTest, AndTwoTermExactMatch) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query("hello world");
@@ -640,8 +629,9 @@ TEST_F(QueryProcessorTest, AndTwoTermExactMatch) {
 
 TEST_F(QueryProcessorTest, AndSameTermPrefixMatch) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -679,7 +669,7 @@ TEST_F(QueryProcessorTest, AndSameTermPrefixMatch) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query("he he");
@@ -709,8 +699,9 @@ TEST_F(QueryProcessorTest, AndSameTermPrefixMatch) {
 
 TEST_F(QueryProcessorTest, AndTwoTermPrefixMatch) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -751,7 +742,7 @@ TEST_F(QueryProcessorTest, AndTwoTermPrefixMatch) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query("he wo");
@@ -784,8 +775,9 @@ TEST_F(QueryProcessorTest, AndTwoTermPrefixMatch) {
 
 TEST_F(QueryProcessorTest, AndTwoTermPrefixAndExactMatch) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -826,7 +818,7 @@ TEST_F(QueryProcessorTest, AndTwoTermPrefixAndExactMatch) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query("hello wo");
@@ -859,8 +851,9 @@ TEST_F(QueryProcessorTest, AndTwoTermPrefixAndExactMatch) {
 
 TEST_F(QueryProcessorTest, OrTwoTermExactMatch) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -906,7 +899,7 @@ TEST_F(QueryProcessorTest, OrTwoTermExactMatch) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query("hello OR world");
@@ -947,8 +940,9 @@ TEST_F(QueryProcessorTest, OrTwoTermExactMatch) {
 
 TEST_F(QueryProcessorTest, OrTwoTermPrefixMatch) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -994,7 +988,7 @@ TEST_F(QueryProcessorTest, OrTwoTermPrefixMatch) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query("he OR wo");
@@ -1034,8 +1028,9 @@ TEST_F(QueryProcessorTest, OrTwoTermPrefixMatch) {
 
 TEST_F(QueryProcessorTest, OrTwoTermPrefixAndExactMatch) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -1080,7 +1075,7 @@ TEST_F(QueryProcessorTest, OrTwoTermPrefixAndExactMatch) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query("hello OR wo");
@@ -1120,8 +1115,9 @@ TEST_F(QueryProcessorTest, OrTwoTermPrefixAndExactMatch) {
 
 TEST_F(QueryProcessorTest, CombinedAndOrTerms) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -1179,7 +1175,7 @@ TEST_F(QueryProcessorTest, CombinedAndOrTerms) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   {
     // OR gets precedence over AND, this is parsed as ((puppy OR kitten) AND
@@ -1305,8 +1301,9 @@ TEST_F(QueryProcessorTest, CombinedAndOrTerms) {
 
 TEST_F(QueryProcessorTest, OneGroup) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -1356,7 +1353,7 @@ TEST_F(QueryProcessorTest, OneGroup) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   // Without grouping, this would be parsed as ((puppy OR kitten) AND foo) and
   // no documents would match. But with grouping, Document 1 matches puppy
@@ -1380,8 +1377,9 @@ TEST_F(QueryProcessorTest, OneGroup) {
 
 TEST_F(QueryProcessorTest, TwoGroups) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -1430,7 +1428,7 @@ TEST_F(QueryProcessorTest, TwoGroups) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   // Without grouping, this would be parsed as (puppy AND (dog OR kitten) AND
   // cat) and wouldn't match any documents. But with grouping, Document 1
@@ -1457,8 +1455,9 @@ TEST_F(QueryProcessorTest, TwoGroups) {
 
 TEST_F(QueryProcessorTest, ManyLevelNestedGrouping) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -1508,7 +1507,7 @@ TEST_F(QueryProcessorTest, ManyLevelNestedGrouping) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   // Without grouping, this would be parsed as ((puppy OR kitten) AND foo) and
   // no documents would match. But with grouping, Document 1 matches puppy
@@ -1532,8 +1531,9 @@ TEST_F(QueryProcessorTest, ManyLevelNestedGrouping) {
 
 TEST_F(QueryProcessorTest, OneLevelNestedGrouping) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -1583,7 +1583,7 @@ TEST_F(QueryProcessorTest, OneLevelNestedGrouping) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   // Document 1 will match puppy and Document 2 matches (kitten AND (cat))
   SearchSpecProto search_spec;
@@ -1608,8 +1608,9 @@ TEST_F(QueryProcessorTest, OneLevelNestedGrouping) {
 
 TEST_F(QueryProcessorTest, ExcludeTerm) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -1652,7 +1653,7 @@ TEST_F(QueryProcessorTest, ExcludeTerm) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query("-hello");
@@ -1672,8 +1673,9 @@ TEST_F(QueryProcessorTest, ExcludeTerm) {
 
 TEST_F(QueryProcessorTest, ExcludeNonexistentTerm) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -1715,7 +1717,7 @@ TEST_F(QueryProcessorTest, ExcludeNonexistentTerm) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query("-foo");
@@ -1734,8 +1736,9 @@ TEST_F(QueryProcessorTest, ExcludeNonexistentTerm) {
 
 TEST_F(QueryProcessorTest, ExcludeAnd) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -1785,7 +1788,7 @@ TEST_F(QueryProcessorTest, ExcludeAnd) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   {
     SearchSpecProto search_spec;
@@ -1823,8 +1826,9 @@ TEST_F(QueryProcessorTest, ExcludeAnd) {
 
 TEST_F(QueryProcessorTest, ExcludeOr) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -1874,7 +1878,7 @@ TEST_F(QueryProcessorTest, ExcludeOr) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   {
     SearchSpecProto search_spec;
@@ -1918,8 +1922,9 @@ TEST_F(QueryProcessorTest, ExcludeOr) {
 
 TEST_F(QueryProcessorTest, DeletedFilter) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -1970,7 +1975,7 @@ TEST_F(QueryProcessorTest, DeletedFilter) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query("animal");
@@ -1991,8 +1996,9 @@ TEST_F(QueryProcessorTest, DeletedFilter) {
 
 TEST_F(QueryProcessorTest, NamespaceFilter) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -2042,7 +2048,7 @@ TEST_F(QueryProcessorTest, NamespaceFilter) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query("animal");
@@ -2064,9 +2070,11 @@ TEST_F(QueryProcessorTest, NamespaceFilter) {
 
 TEST_F(QueryProcessorTest, SchemaTypeFilter) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
-  AddSchemaType(&schema, "message");
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email"))
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -2112,7 +2120,7 @@ TEST_F(QueryProcessorTest, SchemaTypeFilter) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query("animal");
@@ -2134,11 +2142,15 @@ TEST_F(QueryProcessorTest, SchemaTypeFilter) {
 
 TEST_F(QueryProcessorTest, SectionFilterForOneDocument) {
   // Create the schema and document store
-  SchemaProto schema;
-  SchemaTypeConfigProto* email_type = AddSchemaType(&schema, "email");
-
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("subject")
+                  .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
   // First and only indexed property, so it gets a section_id of 0
-  AddIndexedProperty(email_type, "subject");
   int subject_section_id = 0;
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -2174,7 +2186,7 @@ TEST_F(QueryProcessorTest, SectionFilterForOneDocument) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   // Create a section filter '<section name>:<query term>'
@@ -2196,18 +2208,31 @@ TEST_F(QueryProcessorTest, SectionFilterForOneDocument) {
 
 TEST_F(QueryProcessorTest, SectionFilterAcrossSchemaTypes) {
   // Create the schema and document store
-  SchemaProto schema;
-  SchemaTypeConfigProto* email_type = AddSchemaType(&schema, "email");
-  // SectionIds are assigned in ascending order per schema type,
-  // alphabetically.
-  AddIndexedProperty(email_type, "a");  // Section "a" would get sectionId 0
-  AddIndexedProperty(email_type, "foo");
-  int email_foo_section_id = 1;
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("email")
+                       // Section "a" would get sectionId 0
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("a")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("foo")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("foo")
+                  .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
-  SchemaTypeConfigProto* message_type = AddSchemaType(&schema, "message");
   // SectionIds are assigned in ascending order per schema type,
   // alphabetically.
-  AddIndexedProperty(message_type, "foo");
+  int email_foo_section_id = 1;
   int message_foo_section_id = 0;
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -2253,7 +2278,7 @@ TEST_F(QueryProcessorTest, SectionFilterAcrossSchemaTypes) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   // Create a section filter '<section name>:<query term>'
@@ -2277,18 +2302,20 @@ TEST_F(QueryProcessorTest, SectionFilterAcrossSchemaTypes) {
 }
 
 TEST_F(QueryProcessorTest, SectionFilterWithinSchemaType) {
-  // Create the schema and document store
-  SchemaProto schema;
-  SchemaTypeConfigProto* email_type = AddSchemaType(&schema, "email");
-  // SectionIds are assigned in ascending order per schema type,
-  // alphabetically.
-  AddIndexedProperty(email_type, "foo");
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("foo")
+                  .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("foo")
+                  .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
   int email_foo_section_id = 0;
-
-  SchemaTypeConfigProto* message_type = AddSchemaType(&schema, "message");
-  // SectionIds are assigned in ascending order per schema type,
-  // alphabetically.
-  AddIndexedProperty(message_type, "foo");
   int message_foo_section_id = 0;
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -2334,7 +2361,7 @@ TEST_F(QueryProcessorTest, SectionFilterWithinSchemaType) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   // Create a section filter '<section name>:<query term>', but only look
@@ -2359,17 +2386,20 @@ TEST_F(QueryProcessorTest, SectionFilterWithinSchemaType) {
 
 TEST_F(QueryProcessorTest, SectionFilterRespectsDifferentSectionIds) {
   // Create the schema and document store
-  SchemaProto schema;
-  SchemaTypeConfigProto* email_type = AddSchemaType(&schema, "email");
-  // SectionIds are assigned in ascending order per schema type,
-  // alphabetically.
-  AddIndexedProperty(email_type, "foo");
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("foo")
+                  .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("bar")
+                  .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
   int email_foo_section_id = 0;
-
-  SchemaTypeConfigProto* message_type = AddSchemaType(&schema, "message");
-  // SectionIds are assigned in ascending order per schema type,
-  // alphabetically.
-  AddIndexedProperty(message_type, "bar");
   int message_foo_section_id = 0;
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -2417,7 +2447,7 @@ TEST_F(QueryProcessorTest, SectionFilterRespectsDifferentSectionIds) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   // Create a section filter '<section name>:<query term>', but only look
@@ -2441,8 +2471,9 @@ TEST_F(QueryProcessorTest, SectionFilterRespectsDifferentSectionIds) {
 
 TEST_F(QueryProcessorTest, NonexistentSectionFilterReturnsEmptyResults) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -2477,7 +2508,7 @@ TEST_F(QueryProcessorTest, NonexistentSectionFilterReturnsEmptyResults) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   // Create a section filter '<section name>:<query term>', but only look
@@ -2499,9 +2530,17 @@ TEST_F(QueryProcessorTest, NonexistentSectionFilterReturnsEmptyResults) {
 
 TEST_F(QueryProcessorTest, UnindexedSectionFilterReturnsEmptyResults) {
   // Create the schema and document store
-  SchemaProto schema;
-  SchemaTypeConfigProto* email_type = AddSchemaType(&schema, "email");
-  AddUnindexedProperty(email_type, "foo");
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("email")
+                       // Add an unindexed property so we generate section
+                       // metadata on it
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("foo")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
@@ -2536,7 +2575,7 @@ TEST_F(QueryProcessorTest, UnindexedSectionFilterReturnsEmptyResults) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   // Create a section filter '<section name>:<query term>', but only look
@@ -2557,17 +2596,20 @@ TEST_F(QueryProcessorTest, UnindexedSectionFilterReturnsEmptyResults) {
 
 TEST_F(QueryProcessorTest, SectionFilterTermAndUnrestrictedTerm) {
   // Create the schema and document store
-  SchemaProto schema;
-  SchemaTypeConfigProto* email_type = AddSchemaType(&schema, "email");
-  // SectionIds are assigned in ascending order per schema type,
-  // alphabetically.
-  AddIndexedProperty(email_type, "foo");
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("foo")
+                  .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("foo")
+                  .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
   int email_foo_section_id = 0;
-
-  SchemaTypeConfigProto* message_type = AddSchemaType(&schema, "message");
-  // SectionIds are assigned in ascending order per schema type,
-  // alphabetically.
-  AddIndexedProperty(message_type, "foo");
   int message_foo_section_id = 0;
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -2615,7 +2657,7 @@ TEST_F(QueryProcessorTest, SectionFilterTermAndUnrestrictedTerm) {
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   // Create a section filter '<section name>:<query term>'
@@ -2641,27 +2683,34 @@ TEST_F(QueryProcessorTest, SectionFilterTermAndUnrestrictedTerm) {
 
 TEST_F(QueryProcessorTest, DocumentBeforeTtlNotFilteredOut) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
       SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
+  // Arbitrary value, just has to be less than the document's creation
+  // timestamp + ttl
+  FakeClock fake_clock;
+  fake_clock.SetSystemTimeMilliseconds(50);
+
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
+      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock,
                             schema_store_.get()));
   document_store_ = std::move(create_result.document_store);
 
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
-                             document_store_->Put(DocumentBuilder()
-                                                      .SetKey("namespace", "1")
-                                                      .SetSchema("email")
-                                                      .SetCreationTimestampMs(0)
-                                                      .SetTtlMs(100)
-                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id,
+      document_store_->Put(DocumentBuilder()
+                               .SetKey("namespace", "1")
+                               .SetSchema("email")
+                               .SetCreationTimestampMs(10)
+                               .SetTtlMs(100)
+                               .Build()));
 
   // Populate the index
   int section_id = 0;
@@ -2671,17 +2720,12 @@ TEST_F(QueryProcessorTest, DocumentBeforeTtlNotFilteredOut) {
       AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
       IsOk());
 
-  // Arbitrary value, just has to be less than the document's creation
-  // timestamp + ttl
-  FakeClock fake_clock;
-  fake_clock.SetSystemTimeMilliseconds(50);
-
   // Perform query
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query("hello");
@@ -2698,27 +2742,34 @@ TEST_F(QueryProcessorTest, DocumentBeforeTtlNotFilteredOut) {
 
 TEST_F(QueryProcessorTest, DocumentPastTtlFilteredOut) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
       SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
+  // Arbitrary value, just has to be greater than the document's creation
+  // timestamp + ttl
+  FakeClock fake_clock;
+  fake_clock.SetSystemTimeMilliseconds(200);
+
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
+      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock,
                             schema_store_.get()));
   document_store_ = std::move(create_result.document_store);
 
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
-                             document_store_->Put(DocumentBuilder()
-                                                      .SetKey("namespace", "1")
-                                                      .SetSchema("email")
-                                                      .SetCreationTimestampMs(0)
-                                                      .SetTtlMs(100)
-                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id,
+      document_store_->Put(DocumentBuilder()
+                               .SetKey("namespace", "1")
+                               .SetSchema("email")
+                               .SetCreationTimestampMs(50)
+                               .SetTtlMs(100)
+                               .Build()));
 
   // Populate the index
   int section_id = 0;
@@ -2728,17 +2779,12 @@ TEST_F(QueryProcessorTest, DocumentPastTtlFilteredOut) {
       AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
       IsOk());
 
-  // Arbitrary value, just has to be greater than the document's creation
-  // timestamp + ttl
-  FakeClock fake_clock;
-  fake_clock.SetSystemTimeMilliseconds(200);
-
   // Perform query
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<QueryProcessor> query_processor,
       QueryProcessor::Create(index_.get(), language_segmenter_.get(),
                              normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock));
+                             schema_store_.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query("hello");
diff --git a/icing/result/result-retriever.cc b/icing/result/result-retriever.cc
index 85e78a8..943350c 100644
--- a/icing/result/result-retriever.cc
+++ b/icing/result/result-retriever.cc
@@ -107,6 +107,7 @@ ResultRetriever::RetrieveResults(
 
     // Add the document, itself.
     *result.mutable_document() = std::move(document);
+    result.set_score(scored_document_hit.score());
     search_results.push_back(std::move(result));
   }
   return search_results;
diff --git a/icing/result/result-retriever_test.cc b/icing/result/result-retriever_test.cc
index 7cb2d62..1c9684d 100644
--- a/icing/result/result-retriever_test.cc
+++ b/icing/result/result-retriever_test.cc
@@ -24,17 +24,18 @@
 #include "icing/file/mock-filesystem.h"
 #include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/search.pb.h"
 #include "icing/proto/term.pb.h"
 #include "icing/result/projection-tree.h"
+#include "icing/schema-builder.h"
 #include "icing/schema/schema-store.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
-#include "icing/testing/platform.h"
 #include "icing/testing/snippet-helpers.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
@@ -54,6 +55,15 @@ using ::testing::IsEmpty;
 using ::testing::Return;
 using ::testing::SizeIs;
 
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto_Cardinality_Code_OPTIONAL;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+    StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+
 class ResultRetrieverTest : public testing::Test {
  protected:
   ResultRetrieverTest() : test_dir_(GetTestTempDir() + "/icing") {
@@ -78,65 +88,47 @@ class ResultRetrieverTest : public testing::Test {
     ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
                                                 /*max_term_byte_size=*/10000));
 
-    ASSERT_THAT(schema_store_->SetSchema(CreatePersonAndEmailSchema()), IsOk());
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(SchemaTypeConfigBuilder()
+                         .SetType("Email")
+                         .AddProperty(PropertyConfigBuilder()
+                                          .SetName("name")
+                                          .SetDataTypeString(MATCH_PREFIX,
+                                                             TOKENIZER_PLAIN)
+                                          .SetCardinality(CARDINALITY_OPTIONAL))
+                         .AddProperty(PropertyConfigBuilder()
+                                          .SetName("body")
+                                          .SetDataTypeString(MATCH_EXACT,
+                                                             TOKENIZER_PLAIN)
+                                          .SetCardinality(CARDINALITY_OPTIONAL))
+                         .AddProperty(
+                             PropertyConfigBuilder()
+                                 .SetName("sender")
+                                 .SetDataTypeDocument(
+                                     "Person", /*index_nested_properties=*/true)
+                                 .SetCardinality(CARDINALITY_OPTIONAL)))
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType("Person")
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName("name")
+                            .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                            .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName("emailAddress")
+                            .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                            .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
+    ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
   }
 
   void TearDown() override {
     filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
   }
 
-  SchemaProto CreatePersonAndEmailSchema() {
-    SchemaProto schema;
-
-    auto* type = schema.add_types();
-    type->set_schema_type("Email");
-
-    auto* subj = type->add_properties();
-    subj->set_property_name("name");
-    subj->set_data_type(PropertyConfigProto::DataType::STRING);
-    subj->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    subj->mutable_string_indexing_config()->set_term_match_type(
-        TermMatchType::PREFIX);
-    subj->mutable_string_indexing_config()->set_tokenizer_type(
-        StringIndexingConfig::TokenizerType::PLAIN);
-    auto* body = type->add_properties();
-    body->set_property_name("body");
-    body->set_data_type(PropertyConfigProto::DataType::STRING);
-    body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    body->mutable_string_indexing_config()->set_term_match_type(
-        TermMatchType::EXACT_ONLY);
-    body->mutable_string_indexing_config()->set_tokenizer_type(
-        StringIndexingConfig::TokenizerType::PLAIN);
-    auto* sender = type->add_properties();
-    sender->set_property_name("sender");
-    sender->set_schema_type("Person");
-    sender->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-    sender->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    sender->mutable_document_indexing_config()->set_index_nested_properties(
-        true);
-
-    auto* person_type = schema.add_types();
-    person_type->set_schema_type("Person");
-    auto* name = person_type->add_properties();
-    name->set_property_name("name");
-    name->set_data_type(PropertyConfigProto::DataType::STRING);
-    name->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    name->mutable_string_indexing_config()->set_term_match_type(
-        TermMatchType::PREFIX);
-    name->mutable_string_indexing_config()->set_tokenizer_type(
-        StringIndexingConfig::TokenizerType::PLAIN);
-    auto* address = person_type->add_properties();
-    address->set_property_name("emailAddress");
-    address->set_data_type(PropertyConfigProto::DataType::STRING);
-    address->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    address->mutable_string_indexing_config()->set_term_match_type(
-        TermMatchType::PREFIX);
-    address->mutable_string_indexing_config()->set_tokenizer_type(
-        StringIndexingConfig::TokenizerType::PLAIN);
-
-    return schema;
-  }
-
   SectionId GetSectionId(const std::string& type, const std::string& property) {
     auto type_id_or = schema_store_->GetSchemaTypeId(type);
     if (!type_id_or.ok()) {
@@ -236,9 +228,9 @@ TEST_F(ResultRetrieverTest, ShouldRetrieveSimpleResults) {
                                             GetSectionId("Email", "body")};
   SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
   std::vector<ScoredDocumentHit> scored_document_hits = {
-      {document_id1, hit_section_id_mask, /*score=*/0},
-      {document_id2, hit_section_id_mask, /*score=*/0},
-      {document_id3, hit_section_id_mask, /*score=*/0}};
+      {document_id1, hit_section_id_mask, /*score=*/19},
+      {document_id2, hit_section_id_mask, /*score=*/5},
+      {document_id3, hit_section_id_mask, /*score=*/1}};
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ResultRetriever> result_retriever,
       ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -246,10 +238,13 @@ TEST_F(ResultRetrieverTest, ShouldRetrieveSimpleResults) {
 
   SearchResultProto::ResultProto result1;
   *result1.mutable_document() = CreateDocument(/*id=*/1);
+  result1.set_score(19);
   SearchResultProto::ResultProto result2;
   *result2.mutable_document() = CreateDocument(/*id=*/2);
+  result2.set_score(5);
   SearchResultProto::ResultProto result3;
   *result3.mutable_document() = CreateDocument(/*id=*/3);
+  result3.set_score(1);
 
   SnippetContext snippet_context(
       /*query_terms_in=*/{},
@@ -285,8 +280,8 @@ TEST_F(ResultRetrieverTest, IgnoreErrors) {
                                             GetSectionId("Email", "body")};
   SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
   std::vector<ScoredDocumentHit> scored_document_hits = {
-      {document_id1, hit_section_id_mask, /*score=*/0},
-      {document_id2, hit_section_id_mask, /*score=*/0},
+      {document_id1, hit_section_id_mask, /*score=*/12},
+      {document_id2, hit_section_id_mask, /*score=*/4},
       {invalid_document_id, hit_section_id_mask, /*score=*/0}};
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ResultRetriever> result_retriever,
@@ -296,8 +291,10 @@ TEST_F(ResultRetrieverTest, IgnoreErrors) {
 
   SearchResultProto::ResultProto result1;
   *result1.mutable_document() = CreateDocument(/*id=*/1);
+  result1.set_score(12);
   SearchResultProto::ResultProto result2;
   *result2.mutable_document() = CreateDocument(/*id=*/2);
+  result2.set_score(4);
 
   SnippetContext snippet_context(
       /*query_terms_in=*/{},
@@ -495,35 +492,63 @@ TEST_F(ResultRetrieverTest, SimpleSnippeted) {
       std::vector<SearchResultProto::ResultProto> result,
       result_retriever->RetrieveResults(page_result_state));
   EXPECT_THAT(result, SizeIs(3));
-  EXPECT_THAT(result[0].document(), EqualsProto(CreateDocument(/*id=*/1)));
-  EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "name", 0),
-              Eq("subject foo 1"));
-  EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "name", 0),
-              Eq("foo"));
-  EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "body", 0),
-              Eq("body bar 1"));
-  EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "body", 0),
-              Eq("bar"));
 
-  EXPECT_THAT(result[1].document(), EqualsProto(CreateDocument(/*id=*/2)));
-  EXPECT_THAT(GetWindow(result[1].document(), result[1].snippet(), "name", 0),
-              Eq("subject foo 2"));
-  EXPECT_THAT(GetMatch(result[1].document(), result[1].snippet(), "name", 0),
-              Eq("foo"));
-  EXPECT_THAT(GetWindow(result[1].document(), result[1].snippet(), "body", 0),
-              Eq("body bar 2"));
-  EXPECT_THAT(GetMatch(result[1].document(), result[1].snippet(), "body", 0),
-              Eq("bar"));
-
-  EXPECT_THAT(result[2].document(), EqualsProto(CreateDocument(/*id=*/3)));
-  EXPECT_THAT(GetWindow(result[2].document(), result[2].snippet(), "name", 0),
-              Eq("subject foo 3"));
-  EXPECT_THAT(GetMatch(result[2].document(), result[2].snippet(), "name", 0),
-              Eq("foo"));
-  EXPECT_THAT(GetWindow(result[2].document(), result[2].snippet(), "body", 0),
-              Eq("body bar 3"));
-  EXPECT_THAT(GetMatch(result[2].document(), result[2].snippet(), "body", 0),
-              Eq("bar"));
+  const DocumentProto& result_document_one = result.at(0).document();
+  const SnippetProto& result_snippet_one = result.at(0).snippet();
+  EXPECT_THAT(result_document_one, EqualsProto(CreateDocument(/*id=*/1)));
+  EXPECT_THAT(result_snippet_one.entries(), SizeIs(2));
+  EXPECT_THAT(result_snippet_one.entries(0).property_name(), Eq("body"));
+  std::string_view content = GetString(
+      &result_document_one, result_snippet_one.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, result_snippet_one.entries(0)),
+              ElementsAre("body bar 1"));
+  EXPECT_THAT(GetMatches(content, result_snippet_one.entries(0)),
+              ElementsAre("bar"));
+  EXPECT_THAT(result_snippet_one.entries(1).property_name(), Eq("name"));
+  content = GetString(&result_document_one,
+                      result_snippet_one.entries(1).property_name());
+  EXPECT_THAT(GetWindows(content, result_snippet_one.entries(1)),
+              ElementsAre("subject foo 1"));
+  EXPECT_THAT(GetMatches(content, result_snippet_one.entries(1)),
+              ElementsAre("foo"));
+
+  const DocumentProto& result_document_two = result.at(1).document();
+  const SnippetProto& result_snippet_two = result.at(1).snippet();
+  EXPECT_THAT(result_document_two, EqualsProto(CreateDocument(/*id=*/2)));
+  EXPECT_THAT(result_snippet_two.entries(), SizeIs(2));
+  EXPECT_THAT(result_snippet_two.entries(0).property_name(), Eq("body"));
+  content = GetString(&result_document_two,
+                      result_snippet_two.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, result_snippet_two.entries(0)),
+              ElementsAre("body bar 2"));
+  EXPECT_THAT(GetMatches(content, result_snippet_two.entries(0)),
+              ElementsAre("bar"));
+  EXPECT_THAT(result_snippet_two.entries(1).property_name(), Eq("name"));
+  content = GetString(&result_document_two,
+                      result_snippet_two.entries(1).property_name());
+  EXPECT_THAT(GetWindows(content, result_snippet_two.entries(1)),
+              ElementsAre("subject foo 2"));
+  EXPECT_THAT(GetMatches(content, result_snippet_two.entries(1)),
+              ElementsAre("foo"));
+
+  const DocumentProto& result_document_three = result.at(2).document();
+  const SnippetProto& result_snippet_three = result.at(2).snippet();
+  EXPECT_THAT(result_document_three, EqualsProto(CreateDocument(/*id=*/3)));
+  EXPECT_THAT(result_snippet_three.entries(), SizeIs(2));
+  EXPECT_THAT(result_snippet_three.entries(0).property_name(), Eq("body"));
+  content = GetString(&result_document_three,
+                      result_snippet_three.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, result_snippet_three.entries(0)),
+              ElementsAre("body bar 3"));
+  EXPECT_THAT(GetMatches(content, result_snippet_three.entries(0)),
+              ElementsAre("bar"));
+  EXPECT_THAT(result_snippet_three.entries(1).property_name(), Eq("name"));
+  content = GetString(&result_document_three,
+                      result_snippet_three.entries(1).property_name());
+  EXPECT_THAT(GetWindows(content, result_snippet_three.entries(1)),
+              ElementsAre("subject foo 3"));
+  EXPECT_THAT(GetMatches(content, result_snippet_three.entries(1)),
+              ElementsAre("foo"));
 }
 
 TEST_F(ResultRetrieverTest, OnlyOneDocumentSnippeted) {
@@ -568,15 +593,25 @@ TEST_F(ResultRetrieverTest, OnlyOneDocumentSnippeted) {
       std::vector<SearchResultProto::ResultProto> result,
       result_retriever->RetrieveResults(page_result_state));
   EXPECT_THAT(result, SizeIs(3));
-  EXPECT_THAT(result[0].document(), EqualsProto(CreateDocument(/*id=*/1)));
-  EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "name", 0),
-              Eq("subject foo 1"));
-  EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "name", 0),
-              Eq("foo"));
-  EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "body", 0),
-              Eq("body bar 1"));
-  EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "body", 0),
-              Eq("bar"));
+
+  const DocumentProto& result_document = result.at(0).document();
+  const SnippetProto& result_snippet = result.at(0).snippet();
+  EXPECT_THAT(result_document, EqualsProto(CreateDocument(/*id=*/1)));
+  EXPECT_THAT(result_snippet.entries(), SizeIs(2));
+  EXPECT_THAT(result_snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&result_document, result_snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, result_snippet.entries(0)),
+              ElementsAre("body bar 1"));
+  EXPECT_THAT(GetMatches(content, result_snippet.entries(0)),
+              ElementsAre("bar"));
+  EXPECT_THAT(result_snippet.entries(1).property_name(), Eq("name"));
+  content =
+      GetString(&result_document, result_snippet.entries(1).property_name());
+  EXPECT_THAT(GetWindows(content, result_snippet.entries(1)),
+              ElementsAre("subject foo 1"));
+  EXPECT_THAT(GetMatches(content, result_snippet.entries(1)),
+              ElementsAre("foo"));
 
   EXPECT_THAT(result[1].document(), EqualsProto(CreateDocument(/*id=*/2)));
   EXPECT_THAT(result[1].snippet(),
diff --git a/icing/result/result-state-manager.cc b/icing/result/result-state-manager.cc
index 0f27d9e..d606e79 100644
--- a/icing/result/result-state-manager.cc
+++ b/icing/result/result-state-manager.cc
@@ -16,15 +16,17 @@
 
 #include "icing/proto/search.pb.h"
 #include "icing/util/clock.h"
+#include "icing/util/logging.h"
 #include "icing/util/status-macros.h"
 
 namespace icing {
 namespace lib {
 
-ResultStateManager::ResultStateManager(int max_hits_per_query,
-                                       int max_result_states)
-    : max_hits_per_query_(max_hits_per_query),
-      max_result_states_(max_result_states),
+ResultStateManager::ResultStateManager(int max_total_hits,
+                                       const DocumentStore& document_store)
+    : document_store_(document_store),
+      max_total_hits_(max_total_hits),
+      num_total_hits_(0),
       random_generator_(GetSteadyTimeNanoseconds()) {}
 
 libtextclassifier3::StatusOr<PageResultState>
@@ -33,16 +35,13 @@ ResultStateManager::RankAndPaginate(ResultState result_state) {
     return absl_ports::InvalidArgumentError("ResultState has no results");
   }
 
-  // Truncates scored document hits so that they don't take up too much space.
-  result_state.TruncateHitsTo(max_hits_per_query_);
-
   // Gets the number before calling GetNextPage() because num_returned() may
   // change after returning more results.
   int num_previously_returned = result_state.num_returned();
   int num_per_page = result_state.num_per_page();
 
   std::vector<ScoredDocumentHit> page_result_document_hits =
-      result_state.GetNextPage();
+      result_state.GetNextPage(document_store_);
 
   SnippetContext snippet_context_copy = result_state.snippet_context();
 
@@ -68,10 +67,12 @@ ResultStateManager::RankAndPaginate(ResultState result_state) {
 }
 
 uint64_t ResultStateManager::Add(ResultState result_state) {
-  RemoveStatesIfNeeded();
+  RemoveStatesIfNeeded(result_state);
+  result_state.TruncateHitsTo(max_total_hits_);
 
   uint64_t new_token = GetUniqueToken();
 
+  num_total_hits_ += result_state.num_remaining();
   result_state_map_.emplace(new_token, std::move(result_state));
   // Tracks the insertion order
   token_queue_.push(new_token);
@@ -91,7 +92,7 @@ libtextclassifier3::StatusOr<PageResultState> ResultStateManager::GetNextPage(
   int num_returned = state_iterator->second.num_returned();
   int num_per_page = state_iterator->second.num_per_page();
   std::vector<ScoredDocumentHit> result_of_page =
-      state_iterator->second.GetNextPage();
+      state_iterator->second.GetNextPage(document_store_);
   if (result_of_page.empty()) {
     // This shouldn't happen, all our active states should contain results, but
     // a sanity check here in case of any data inconsistency.
@@ -112,6 +113,7 @@ libtextclassifier3::StatusOr<PageResultState> ResultStateManager::GetNextPage(
     next_page_token = kInvalidNextPageToken;
   }
 
+  num_total_hits_ -= result_of_page.size();
   return PageResultState(
       result_of_page, next_page_token, std::move(snippet_context_copy),
       std::move(projection_tree_map_copy), num_returned, num_per_page);
@@ -129,10 +131,14 @@ void ResultStateManager::InvalidateResultState(uint64_t next_page_token) {
 
 void ResultStateManager::InvalidateAllResultStates() {
   absl_ports::unique_lock l(&mutex_);
+  InternalInvalidateAllResultStates();
+}
 
+void ResultStateManager::InternalInvalidateAllResultStates() {
   result_state_map_.clear();
   invalidated_token_set_.clear();
-  token_queue_ = {};
+  token_queue_ = std::queue<uint64_t>();
+  num_total_hits_ = 0;
 }
 
 uint64_t ResultStateManager::GetUniqueToken() {
@@ -148,12 +154,21 @@ uint64_t ResultStateManager::GetUniqueToken() {
   return new_token;
 }
 
-void ResultStateManager::RemoveStatesIfNeeded() {
+void ResultStateManager::RemoveStatesIfNeeded(const ResultState& result_state) {
   if (result_state_map_.empty() || token_queue_.empty()) {
     return;
   }
 
-  // Removes any tokens that were previously invalidated.
+  // 1. Check if this new result_state would take up the entire result state
+  // manager budget.
+  if (result_state.num_remaining() > max_total_hits_) {
+    // This single result state will exceed our budget. Drop everything else to
+    // accomodate it.
+    InternalInvalidateAllResultStates();
+    return;
+  }
+
+  // 2. Remove any tokens that were previously invalidated.
   while (!token_queue_.empty() &&
          invalidated_token_set_.find(token_queue_.front()) !=
              invalidated_token_set_.end()) {
@@ -161,11 +176,13 @@ void ResultStateManager::RemoveStatesIfNeeded() {
     token_queue_.pop();
   }
 
-  // Removes the oldest state
-  if (result_state_map_.size() >= max_result_states_ && !token_queue_.empty()) {
-    result_state_map_.erase(token_queue_.front());
+  // 3. If we're over budget, remove states from oldest to newest until we fit
+  // into our budget.
+  while (result_state.num_remaining() + num_total_hits_ > max_total_hits_) {
+    InternalInvalidateResultState(token_queue_.front());
     token_queue_.pop();
   }
+  invalidated_token_set_.clear();
 }
 
 void ResultStateManager::InternalInvalidateResultState(uint64_t token) {
@@ -173,7 +190,10 @@ void ResultStateManager::InternalInvalidateResultState(uint64_t token) {
   // invalidated_token_set_. The entry in token_queue_ can't be easily removed
   // right now (may need O(n) time), so we leave it there and later completely
   // remove the token in RemoveStatesIfNeeded().
-  if (result_state_map_.erase(token) > 0) {
+  auto itr = result_state_map_.find(token);
+  if (itr != result_state_map_.end()) {
+    num_total_hits_ -= itr->second.num_remaining();
+    result_state_map_.erase(itr);
     invalidated_token_set_.insert(token);
   }
 }
diff --git a/icing/result/result-state-manager.h b/icing/result/result-state-manager.h
index eaf9eb5..c04217f 100644
--- a/icing/result/result-state-manager.h
+++ b/icing/result/result-state-manager.h
@@ -37,7 +37,8 @@ inline constexpr uint64_t kInvalidNextPageToken = 0;
 // Used to store and manage ResultState.
 class ResultStateManager {
  public:
-  explicit ResultStateManager(int max_hits_per_query, int max_result_states);
+  explicit ResultStateManager(int max_total_hits,
+                              const DocumentStore& document_store);
 
   ResultStateManager(const ResultStateManager&) = delete;
   ResultStateManager& operator=(const ResultStateManager&) = delete;
@@ -77,13 +78,17 @@ class ResultStateManager {
  private:
   absl_ports::shared_mutex mutex_;
 
-  // The maximum number of scored document hits to return for a query. When we
-  // have more than the maximum number, extra hits will be truncated.
-  const int max_hits_per_query_;
+  const DocumentStore& document_store_;
 
-  // The maximum number of result states. When we have more than the maximum
-  // number, the oldest / firstly added result state will be removed.
-  const int max_result_states_;
+  // The maximum number of scored document hits that all result states may
+  // have. When a new result state is added such that num_total_hits_ would
+  // exceed max_total_hits_, the oldest result states are evicted until
+  // num_total_hits_ is below max_total_hits.
+  const int max_total_hits_;
+
+  // The number of scored document hits that all result states currently held by
+  // the result state manager have.
+  int num_total_hits_;
 
   // A hash map of (next-page token -> result state)
   std::unordered_map<uint64_t, ResultState> result_state_map_
@@ -112,13 +117,21 @@ class ResultStateManager {
   uint64_t GetUniqueToken() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   // Helper method to remove old states to make room for incoming states.
-  void RemoveStatesIfNeeded() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+  void RemoveStatesIfNeeded(const ResultState& result_state)
+      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   // Helper method to remove a result state from result_state_map_, the token
   // will then be temporarily kept in invalidated_token_set_ until it's finally
   // removed from token_queue_.
   void InternalInvalidateResultState(uint64_t token)
       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Internal method to invalidates all result states / tokens currently in
+  // ResultStateManager. We need this separate method so that other public
+  // methods don't need to call InvalidateAllResultStates(). Public methods
+  // calling each other may cause deadlock issues.
+  void InternalInvalidateAllResultStates()
+      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 };
 
 }  // namespace lib
diff --git a/icing/result/result-state-manager_test.cc b/icing/result/result-state-manager_test.cc
index 6defa6f..32e45aa 100644
--- a/icing/result/result-state-manager_test.cc
+++ b/icing/result/result-state-manager_test.cc
@@ -14,9 +14,15 @@
 
 #include "icing/result/result-state-manager.h"
 
+#include "gmock/gmock.h"
 #include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
 #include "icing/portable/equals-proto.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-store.h"
 #include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/clock.h"
 
 namespace icing {
 namespace lib {
@@ -27,10 +33,6 @@ using ::testing::Eq;
 using ::testing::Gt;
 using ::testing::IsEmpty;
 
-ScoredDocumentHit CreateScoredDocumentHit(DocumentId document_id) {
-  return ScoredDocumentHit(document_id, kSectionIdMaskNone, /*score=*/1);
-}
-
 ScoringSpecProto CreateScoringSpec() {
   ScoringSpecProto scoring_spec;
   scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
@@ -43,24 +45,73 @@ ResultSpecProto CreateResultSpec(int num_per_page) {
   return result_spec;
 }
 
-ResultState CreateResultState(
-    const std::vector<ScoredDocumentHit>& scored_document_hits,
-    int num_per_page) {
-  return ResultState(scored_document_hits, /*query_terms=*/{},
-                     SearchSpecProto::default_instance(), CreateScoringSpec(),
-                     CreateResultSpec(num_per_page));
+ScoredDocumentHit CreateScoredHit(DocumentId document_id) {
+  return ScoredDocumentHit(document_id, kSectionIdMaskNone, /*score=*/1);
 }
 
-TEST(ResultStateManagerTest, ShouldRankAndPaginateOnePage) {
+class ResultStateManagerTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    schema_store_base_dir_ = GetTestTempDir() + "/schema_store";
+    filesystem_.CreateDirectoryRecursively(schema_store_base_dir_.c_str());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, schema_store_base_dir_, &clock_));
+    SchemaProto schema;
+    schema.add_types()->set_schema_type("Document");
+    ICING_ASSERT_OK(schema_store_->SetSchema(std::move(schema)));
+
+    doc_store_base_dir_ = GetTestTempDir() + "/document_store";
+    filesystem_.CreateDirectoryRecursively(doc_store_base_dir_.c_str());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult result,
+        DocumentStore::Create(&filesystem_, doc_store_base_dir_, &clock_,
+                              schema_store_.get()));
+    document_store_ = std::move(result.document_store);
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(doc_store_base_dir_.c_str());
+    filesystem_.DeleteDirectoryRecursively(schema_store_base_dir_.c_str());
+  }
+
+  ResultState CreateResultState(
+      const std::vector<ScoredDocumentHit>& scored_document_hits,
+      int num_per_page) {
+    return ResultState(scored_document_hits, /*query_terms=*/{},
+                       SearchSpecProto::default_instance(), CreateScoringSpec(),
+                       CreateResultSpec(num_per_page), *document_store_);
+  }
+
+  ScoredDocumentHit AddScoredDocument(DocumentId document_id) {
+    DocumentProto document;
+    document.set_namespace_("namespace");
+    document.set_uri(std::to_string(document_id));
+    document.set_schema("Document");
+    document_store_->Put(std::move(document));
+    return ScoredDocumentHit(document_id, kSectionIdMaskNone, /*score=*/1);
+  }
+
+  const DocumentStore& document_store() const { return *document_store_; }
+
+ private:
+  Filesystem filesystem_;
+  std::string doc_store_base_dir_;
+  std::string schema_store_base_dir_;
+  Clock clock_;
+  std::unique_ptr<DocumentStore> document_store_;
+  std::unique_ptr<SchemaStore> schema_store_;
+};
+
+TEST_F(ResultStateManagerTest, ShouldRankAndPaginateOnePage) {
   ResultState original_result_state =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
-                         CreateScoredDocumentHit(/*document_id=*/2),
-                         CreateScoredDocumentHit(/*document_id=*/3)},
+      CreateResultState({AddScoredDocument(/*document_id=*/0),
+                         AddScoredDocument(/*document_id=*/1),
+                         AddScoredDocument(/*document_id=*/2)},
                         /*num_per_page=*/10);
 
   ResultStateManager result_state_manager(
-      /*max_hits_per_query=*/std::numeric_limits<int>::max(),
-      /*max_result_states=*/std::numeric_limits<int>::max());
+      /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
   ICING_ASSERT_OK_AND_ASSIGN(
       PageResultState page_result_state,
       result_state_manager.RankAndPaginate(std::move(original_result_state)));
@@ -70,24 +121,22 @@ TEST(ResultStateManagerTest, ShouldRankAndPaginateOnePage) {
   // Should get the original scored document hits
   EXPECT_THAT(
       page_result_state.scored_document_hits,
-      ElementsAre(
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/3)),
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/2)),
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/1))));
+      ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(/*document_id=*/2)),
+                  EqualsScoredDocumentHit(CreateScoredHit(/*document_id=*/1)),
+                  EqualsScoredDocumentHit(CreateScoredHit(/*document_id=*/0))));
 }
 
-TEST(ResultStateManagerTest, ShouldRankAndPaginateMultiplePages) {
+TEST_F(ResultStateManagerTest, ShouldRankAndPaginateMultiplePages) {
   ResultState original_result_state =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
-                         CreateScoredDocumentHit(/*document_id=*/2),
-                         CreateScoredDocumentHit(/*document_id=*/3),
-                         CreateScoredDocumentHit(/*document_id=*/4),
-                         CreateScoredDocumentHit(/*document_id=*/5)},
+      CreateResultState({AddScoredDocument(/*document_id=*/0),
+                         AddScoredDocument(/*document_id=*/1),
+                         AddScoredDocument(/*document_id=*/2),
+                         AddScoredDocument(/*document_id=*/3),
+                         AddScoredDocument(/*document_id=*/4)},
                         /*num_per_page=*/2);
 
   ResultStateManager result_state_manager(
-      /*max_hits_per_query=*/std::numeric_limits<int>::max(),
-      /*max_result_states=*/std::numeric_limits<int>::max());
+      /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
 
   // First page, 2 results
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -95,9 +144,8 @@ TEST(ResultStateManagerTest, ShouldRankAndPaginateMultiplePages) {
       result_state_manager.RankAndPaginate(std::move(original_result_state)));
   EXPECT_THAT(
       page_result_state1.scored_document_hits,
-      ElementsAre(
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/5)),
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/4))));
+      ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(/*document_id=*/4)),
+                  EqualsScoredDocumentHit(CreateScoredHit(/*document_id=*/3))));
 
   uint64_t next_page_token = page_result_state1.next_page_token;
 
@@ -106,48 +154,45 @@ TEST(ResultStateManagerTest, ShouldRankAndPaginateMultiplePages) {
                              result_state_manager.GetNextPage(next_page_token));
   EXPECT_THAT(
       page_result_state2.scored_document_hits,
-      ElementsAre(
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/3)),
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/2))));
+      ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(/*document_id=*/2)),
+                  EqualsScoredDocumentHit(CreateScoredHit(/*document_id=*/1))));
 
   // Third page, 1 result
   ICING_ASSERT_OK_AND_ASSIGN(PageResultState page_result_state3,
                              result_state_manager.GetNextPage(next_page_token));
-  EXPECT_THAT(page_result_state3.scored_document_hits,
-              ElementsAre(EqualsScoredDocumentHit(
-                  CreateScoredDocumentHit(/*document_id=*/1))));
+  EXPECT_THAT(
+      page_result_state3.scored_document_hits,
+      ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(/*document_id=*/0))));
 
   // No results
   EXPECT_THAT(result_state_manager.GetNextPage(next_page_token),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST(ResultStateManagerTest, EmptyStateShouldReturnError) {
+TEST_F(ResultStateManagerTest, EmptyStateShouldReturnError) {
   ResultState empty_result_state = CreateResultState({}, /*num_per_page=*/1);
 
   ResultStateManager result_state_manager(
-      /*max_hits_per_query=*/std::numeric_limits<int>::max(),
-      /*max_result_states=*/std::numeric_limits<int>::max());
+      /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
   EXPECT_THAT(
       result_state_manager.RankAndPaginate(std::move(empty_result_state)),
       StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST(ResultStateManagerTest, ShouldInvalidateOneToken) {
+TEST_F(ResultStateManagerTest, ShouldInvalidateOneToken) {
   ResultState result_state1 =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
-                         CreateScoredDocumentHit(/*document_id=*/2),
-                         CreateScoredDocumentHit(/*document_id=*/3)},
+      CreateResultState({AddScoredDocument(/*document_id=*/0),
+                         AddScoredDocument(/*document_id=*/1),
+                         AddScoredDocument(/*document_id=*/2)},
                         /*num_per_page=*/1);
   ResultState result_state2 =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/4),
-                         CreateScoredDocumentHit(/*document_id=*/5),
-                         CreateScoredDocumentHit(/*document_id=*/6)},
+      CreateResultState({AddScoredDocument(/*document_id=*/3),
+                         AddScoredDocument(/*document_id=*/4),
+                         AddScoredDocument(/*document_id=*/5)},
                         /*num_per_page=*/1);
 
   ResultStateManager result_state_manager(
-      /*max_hits_per_query=*/std::numeric_limits<int>::max(),
-      /*max_result_states=*/std::numeric_limits<int>::max());
+      /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
   ICING_ASSERT_OK_AND_ASSIGN(
       PageResultState page_result_state1,
       result_state_manager.RankAndPaginate(std::move(result_state1)));
@@ -167,26 +212,25 @@ TEST(ResultStateManagerTest, ShouldInvalidateOneToken) {
   ICING_ASSERT_OK_AND_ASSIGN(
       page_result_state2,
       result_state_manager.GetNextPage(page_result_state2.next_page_token));
-  EXPECT_THAT(page_result_state2.scored_document_hits,
-              ElementsAre(EqualsScoredDocumentHit(
-                  CreateScoredDocumentHit(/*document_id=*/5))));
+  EXPECT_THAT(
+      page_result_state2.scored_document_hits,
+      ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(/*document_id=*/4))));
 }
 
-TEST(ResultStateManagerTest, ShouldInvalidateAllTokens) {
+TEST_F(ResultStateManagerTest, ShouldInvalidateAllTokens) {
   ResultState result_state1 =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
-                         CreateScoredDocumentHit(/*document_id=*/2),
-                         CreateScoredDocumentHit(/*document_id=*/3)},
+      CreateResultState({AddScoredDocument(/*document_id=*/0),
+                         AddScoredDocument(/*document_id=*/1),
+                         AddScoredDocument(/*document_id=*/2)},
                         /*num_per_page=*/1);
   ResultState result_state2 =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/4),
-                         CreateScoredDocumentHit(/*document_id=*/5),
-                         CreateScoredDocumentHit(/*document_id=*/6)},
+      CreateResultState({AddScoredDocument(/*document_id=*/3),
+                         AddScoredDocument(/*document_id=*/4),
+                         AddScoredDocument(/*document_id=*/5)},
                         /*num_per_page=*/1);
 
   ResultStateManager result_state_manager(
-      /*max_hits_per_query=*/std::numeric_limits<int>::max(),
-      /*max_result_states=*/std::numeric_limits<int>::max());
+      /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
   ICING_ASSERT_OK_AND_ASSIGN(
       PageResultState page_result_state1,
       result_state_manager.RankAndPaginate(std::move(result_state1)));
@@ -207,23 +251,22 @@ TEST(ResultStateManagerTest, ShouldInvalidateAllTokens) {
       StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST(ResultStateManagerTest, ShouldRemoveOldestResultState) {
+TEST_F(ResultStateManagerTest, ShouldRemoveOldestResultState) {
   ResultState result_state1 =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
-                         CreateScoredDocumentHit(/*document_id=*/2)},
+      CreateResultState({AddScoredDocument(/*document_id=*/0),
+                         AddScoredDocument(/*document_id=*/1)},
                         /*num_per_page=*/1);
   ResultState result_state2 =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/3),
-                         CreateScoredDocumentHit(/*document_id=*/4)},
+      CreateResultState({AddScoredDocument(/*document_id=*/2),
+                         AddScoredDocument(/*document_id=*/3)},
                         /*num_per_page=*/1);
   ResultState result_state3 =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/5),
-                         CreateScoredDocumentHit(/*document_id=*/6)},
+      CreateResultState({AddScoredDocument(/*document_id=*/4),
+                         AddScoredDocument(/*document_id=*/5)},
                         /*num_per_page=*/1);
 
-  ResultStateManager result_state_manager(
-      /*max_hits_per_query=*/std::numeric_limits<int>::max(),
-      /*max_result_states=*/2);
+  ResultStateManager result_state_manager(/*max_total_hits=*/2,
+                                          document_store());
   ICING_ASSERT_OK_AND_ASSIGN(
       PageResultState page_result_state1,
       result_state_manager.RankAndPaginate(std::move(result_state1)));
@@ -243,39 +286,111 @@ TEST(ResultStateManagerTest, ShouldRemoveOldestResultState) {
       page_result_state2,
       result_state_manager.GetNextPage(page_result_state2.next_page_token));
   EXPECT_THAT(page_result_state2.scored_document_hits,
-              ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
-                  /*document_id=*/3))));
+              ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+                  /*document_id=*/2))));
 
   ICING_ASSERT_OK_AND_ASSIGN(
       page_result_state3,
       result_state_manager.GetNextPage(page_result_state3.next_page_token));
   EXPECT_THAT(page_result_state3.scored_document_hits,
-              ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
-                  /*document_id=*/5))));
+              ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+                  /*document_id=*/4))));
 }
 
-TEST(ResultStateManagerTest,
-     PreviouslyInvalidatedResultStateShouldNotBeCounted) {
+TEST_F(ResultStateManagerTest,
+       InvalidatedResultStateShouldDecreaseCurrentHitsCount) {
   ResultState result_state1 =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
-                         CreateScoredDocumentHit(/*document_id=*/2)},
+      CreateResultState({AddScoredDocument(/*document_id=*/0),
+                         AddScoredDocument(/*document_id=*/1)},
                         /*num_per_page=*/1);
   ResultState result_state2 =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/3),
-                         CreateScoredDocumentHit(/*document_id=*/4)},
+      CreateResultState({AddScoredDocument(/*document_id=*/2),
+                         AddScoredDocument(/*document_id=*/3)},
                         /*num_per_page=*/1);
   ResultState result_state3 =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/5),
-                         CreateScoredDocumentHit(/*document_id=*/6)},
+      CreateResultState({AddScoredDocument(/*document_id=*/4),
+                         AddScoredDocument(/*document_id=*/5)},
                         /*num_per_page=*/1);
+
+  // Add the first three states. Remember, the first page for each result state
+  // won't be cached (since it is returned immediately from RankAndPaginate).
+  // Each result state has a page size of 1 and a result set of 2 hits. So each
+  // result will take up one hit of our three hit budget.
+  ResultStateManager result_state_manager(/*max_total_hits=*/3,
+                                          document_store());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultState page_result_state1,
+      result_state_manager.RankAndPaginate(std::move(result_state1)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultState page_result_state2,
+      result_state_manager.RankAndPaginate(std::move(result_state2)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultState page_result_state3,
+      result_state_manager.RankAndPaginate(std::move(result_state3)));
+
+  // Invalidates state 2, so that the number of hits current cached should be
+  // decremented to 2.
+  result_state_manager.InvalidateResultState(
+      page_result_state2.next_page_token);
+
+  // If invalidating state 2 correctly decremented the current hit count to 2,
+  // then adding state 4 should still be within our budget and no other result
+  // states should be evicted.
   ResultState result_state4 =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/7),
-                         CreateScoredDocumentHit(/*document_id=*/8)},
+      CreateResultState({AddScoredDocument(/*document_id=*/6),
+                         AddScoredDocument(/*document_id=*/7)},
                         /*num_per_page=*/1);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultState page_result_state4,
+      result_state_manager.RankAndPaginate(std::move(result_state4)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      page_result_state1,
+      result_state_manager.GetNextPage(page_result_state1.next_page_token));
+  EXPECT_THAT(page_result_state1.scored_document_hits,
+              ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+                  /*document_id=*/0))));
+
+  EXPECT_THAT(
+      result_state_manager.GetNextPage(page_result_state2.next_page_token),
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 
-  ResultStateManager result_state_manager(
-      /*max_hits_per_query=*/std::numeric_limits<int>::max(),
-      /*max_result_states=*/3);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      page_result_state3,
+      result_state_manager.GetNextPage(page_result_state3.next_page_token));
+  EXPECT_THAT(page_result_state3.scored_document_hits,
+              ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+                  /*document_id=*/4))));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      page_result_state4,
+      result_state_manager.GetNextPage(page_result_state4.next_page_token));
+  EXPECT_THAT(page_result_state4.scored_document_hits,
+              ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+                  /*document_id=*/6))));
+}
+
+TEST_F(ResultStateManagerTest,
+       InvalidatedAllResultStatesShouldResetCurrentHitCount) {
+  ResultState result_state1 =
+      CreateResultState({AddScoredDocument(/*document_id=*/0),
+                         AddScoredDocument(/*document_id=*/1)},
+                        /*num_per_page=*/1);
+  ResultState result_state2 =
+      CreateResultState({AddScoredDocument(/*document_id=*/2),
+                         AddScoredDocument(/*document_id=*/3)},
+                        /*num_per_page=*/1);
+  ResultState result_state3 =
+      CreateResultState({AddScoredDocument(/*document_id=*/4),
+                         AddScoredDocument(/*document_id=*/5)},
+                        /*num_per_page=*/1);
+
+  // Add the first three states. Remember, the first page for each result state
+  // won't be cached (since it is returned immediately from RankAndPaginate).
+  // Each result state has a page size of 1 and a result set of 2 hits. So each
+  // result will take up one hit of our three hit budget.
+  ResultStateManager result_state_manager(/*max_total_hits=*/3,
+                                          document_store());
   ICING_ASSERT_OK_AND_ASSIGN(
       PageResultState page_result_state1,
       result_state_manager.RankAndPaginate(std::move(result_state1)));
@@ -286,21 +401,298 @@ TEST(ResultStateManagerTest,
       PageResultState page_result_state3,
       result_state_manager.RankAndPaginate(std::move(result_state3)));
 
-  // Invalidates state 2, so that the number of valid tokens becomes 2.
+  // Invalidates all states so that the current hit count will be 0.
+  result_state_manager.InvalidateAllResultStates();
+
+  // If invalidating all states correctly reset the current hit count to 0,
+  // then the entirety of state 4 should still be within our budget and no other
+  // result states should be evicted.
+  ResultState result_state4 =
+      CreateResultState({AddScoredDocument(/*document_id=*/6),
+                         AddScoredDocument(/*document_id=*/7)},
+                        /*num_per_page=*/1);
+  ResultState result_state5 =
+      CreateResultState({AddScoredDocument(/*document_id=*/8),
+                         AddScoredDocument(/*document_id=*/9)},
+                        /*num_per_page=*/1);
+  ResultState result_state6 =
+      CreateResultState({AddScoredDocument(/*document_id=*/10),
+                         AddScoredDocument(/*document_id=*/11)},
+                        /*num_per_page=*/1);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultState page_result_state4,
+      result_state_manager.RankAndPaginate(std::move(result_state4)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultState page_result_state5,
+      result_state_manager.RankAndPaginate(std::move(result_state5)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultState page_result_state6,
+      result_state_manager.RankAndPaginate(std::move(result_state6)));
+
+  EXPECT_THAT(
+      result_state_manager.GetNextPage(page_result_state1.next_page_token),
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  EXPECT_THAT(
+      result_state_manager.GetNextPage(page_result_state2.next_page_token),
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  EXPECT_THAT(
+      result_state_manager.GetNextPage(page_result_state3.next_page_token),
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      page_result_state4,
+      result_state_manager.GetNextPage(page_result_state4.next_page_token));
+  EXPECT_THAT(page_result_state4.scored_document_hits,
+              ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+                  /*document_id=*/6))));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      page_result_state5,
+      result_state_manager.GetNextPage(page_result_state5.next_page_token));
+  EXPECT_THAT(page_result_state5.scored_document_hits,
+              ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+                  /*document_id=*/8))));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      page_result_state6,
+      result_state_manager.GetNextPage(page_result_state6.next_page_token));
+  EXPECT_THAT(page_result_state6.scored_document_hits,
+              ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+                  /*document_id=*/10))));
+}
+
+TEST_F(
+    ResultStateManagerTest,
+    InvalidatedResultStateShouldDecreaseCurrentHitsCountByExactStateHitCount) {
+  ResultState result_state1 =
+      CreateResultState({AddScoredDocument(/*document_id=*/0),
+                         AddScoredDocument(/*document_id=*/1)},
+                        /*num_per_page=*/1);
+  ResultState result_state2 =
+      CreateResultState({AddScoredDocument(/*document_id=*/2),
+                         AddScoredDocument(/*document_id=*/3)},
+                        /*num_per_page=*/1);
+  ResultState result_state3 =
+      CreateResultState({AddScoredDocument(/*document_id=*/4),
+                         AddScoredDocument(/*document_id=*/5)},
+                        /*num_per_page=*/1);
+
+  // Add the first three states. Remember, the first page for each result state
+  // won't be cached (since it is returned immediately from RankAndPaginate).
+  // Each result state has a page size of 1 and a result set of 2 hits. So each
+  // result will take up one hit of our three hit budget.
+  ResultStateManager result_state_manager(/*max_total_hits=*/3,
+                                          document_store());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultState page_result_state1,
+      result_state_manager.RankAndPaginate(std::move(result_state1)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultState page_result_state2,
+      result_state_manager.RankAndPaginate(std::move(result_state2)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultState page_result_state3,
+      result_state_manager.RankAndPaginate(std::move(result_state3)));
+
+  // Invalidates state 2, so that the number of hits current cached should be
+  // decremented to 2.
   result_state_manager.InvalidateResultState(
       page_result_state2.next_page_token);
 
-  // Adding state 4 shouldn't affect rest of the states
+  // If invalidating state 2 correctly decremented the current hit count to 2,
+  // then adding state 4 should still be within our budget and no other result
+  // states should be evicted.
+  ResultState result_state4 =
+      CreateResultState({AddScoredDocument(/*document_id=*/6),
+                         AddScoredDocument(/*document_id=*/7)},
+                        /*num_per_page=*/1);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultState page_result_state4,
+      result_state_manager.RankAndPaginate(std::move(result_state4)));
+
+  // If invalidating result state 2 correctly decremented the current hit count
+  // to 2 and adding state 4 correctly incremented it to 3, then adding this
+  // result state should trigger the eviction of state 1.
+  ResultState result_state5 =
+      CreateResultState({AddScoredDocument(/*document_id=*/8),
+                         AddScoredDocument(/*document_id=*/9)},
+                        /*num_per_page=*/1);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultState page_result_state5,
+      result_state_manager.RankAndPaginate(std::move(result_state5)));
+
+  EXPECT_THAT(
+      result_state_manager.GetNextPage(page_result_state1.next_page_token),
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  EXPECT_THAT(
+      result_state_manager.GetNextPage(page_result_state2.next_page_token),
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      page_result_state3,
+      result_state_manager.GetNextPage(page_result_state3.next_page_token));
+  EXPECT_THAT(page_result_state3.scored_document_hits,
+              ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+                  /*document_id=*/4))));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      page_result_state4,
+      result_state_manager.GetNextPage(page_result_state4.next_page_token));
+  EXPECT_THAT(page_result_state4.scored_document_hits,
+              ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+                  /*document_id=*/6))));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      page_result_state5,
+      result_state_manager.GetNextPage(page_result_state5.next_page_token));
+  EXPECT_THAT(page_result_state5.scored_document_hits,
+              ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+                  /*document_id=*/8))));
+}
+
+TEST_F(ResultStateManagerTest, GetNextPageShouldDecreaseCurrentHitsCount) {
+  ResultState result_state1 =
+      CreateResultState({AddScoredDocument(/*document_id=*/0),
+                         AddScoredDocument(/*document_id=*/1)},
+                        /*num_per_page=*/1);
+  ResultState result_state2 =
+      CreateResultState({AddScoredDocument(/*document_id=*/2),
+                         AddScoredDocument(/*document_id=*/3)},
+                        /*num_per_page=*/1);
+  ResultState result_state3 =
+      CreateResultState({AddScoredDocument(/*document_id=*/4),
+                         AddScoredDocument(/*document_id=*/5)},
+                        /*num_per_page=*/1);
+
+  // Add the first three states. Remember, the first page for each result state
+  // won't be cached (since it is returned immediately from RankAndPaginate).
+  // Each result state has a page size of 1 and a result set of 2 hits. So each
+  // result will take up one hit of our three hit budget.
+  ResultStateManager result_state_manager(/*max_total_hits=*/3,
+                                          document_store());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultState page_result_state1,
+      result_state_manager.RankAndPaginate(std::move(result_state1)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultState page_result_state2,
+      result_state_manager.RankAndPaginate(std::move(result_state2)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultState page_result_state3,
+      result_state_manager.RankAndPaginate(std::move(result_state3)));
+
+  // GetNextPage for result state 1 should return its result and decrement the
+  // number of cached hits to 2.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      page_result_state1,
+      result_state_manager.GetNextPage(page_result_state1.next_page_token));
+  EXPECT_THAT(page_result_state1.scored_document_hits,
+              ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+                  /*document_id=*/0))));
+
+  // If retrieving the next page for result state 1 correctly decremented the
+  // current hit count to 2, then adding state 4 should still be within our
+  // budget and no other result states should be evicted.
+  ResultState result_state4 =
+      CreateResultState({AddScoredDocument(/*document_id=*/6),
+                         AddScoredDocument(/*document_id=*/7)},
+                        /*num_per_page=*/1);
   ICING_ASSERT_OK_AND_ASSIGN(
       PageResultState page_result_state4,
       result_state_manager.RankAndPaginate(std::move(result_state4)));
 
+  EXPECT_THAT(
+      result_state_manager.GetNextPage(page_result_state1.next_page_token),
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      page_result_state2,
+      result_state_manager.GetNextPage(page_result_state2.next_page_token));
+  EXPECT_THAT(page_result_state2.scored_document_hits,
+              ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+                  /*document_id=*/2))));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      page_result_state3,
+      result_state_manager.GetNextPage(page_result_state3.next_page_token));
+  EXPECT_THAT(page_result_state3.scored_document_hits,
+              ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+                  /*document_id=*/4))));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      page_result_state4,
+      result_state_manager.GetNextPage(page_result_state4.next_page_token));
+  EXPECT_THAT(page_result_state4.scored_document_hits,
+              ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+                  /*document_id=*/6))));
+}
+
+TEST_F(ResultStateManagerTest,
+       GetNextPageShouldDecreaseCurrentHitsCountByExactlyOnePage) {
+  ResultState result_state1 =
+      CreateResultState({AddScoredDocument(/*document_id=*/0),
+                         AddScoredDocument(/*document_id=*/1)},
+                        /*num_per_page=*/1);
+  ResultState result_state2 =
+      CreateResultState({AddScoredDocument(/*document_id=*/2),
+                         AddScoredDocument(/*document_id=*/3)},
+                        /*num_per_page=*/1);
+  ResultState result_state3 =
+      CreateResultState({AddScoredDocument(/*document_id=*/4),
+                         AddScoredDocument(/*document_id=*/5)},
+                        /*num_per_page=*/1);
+
+  // Add the first three states. Remember, the first page for each result state
+  // won't be cached (since it is returned immediately from RankAndPaginate).
+  // Each result state has a page size of 1 and a result set of 2 hits. So each
+  // result will take up one hit of our three hit budget.
+  ResultStateManager result_state_manager(/*max_total_hits=*/3,
+                                          document_store());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultState page_result_state1,
+      result_state_manager.RankAndPaginate(std::move(result_state1)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultState page_result_state2,
+      result_state_manager.RankAndPaginate(std::move(result_state2)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultState page_result_state3,
+      result_state_manager.RankAndPaginate(std::move(result_state3)));
+
+  // GetNextPage for result state 1 should return its result and decrement the
+  // number of cached hits to 2.
   ICING_ASSERT_OK_AND_ASSIGN(
       page_result_state1,
       result_state_manager.GetNextPage(page_result_state1.next_page_token));
   EXPECT_THAT(page_result_state1.scored_document_hits,
-              ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
-                  /*document_id=*/1))));
+              ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+                  /*document_id=*/0))));
+
+  // If retrieving the next page for result state 1 correctly decremented the
+  // current hit count to 2, then adding state 4 should still be within our
+  // budget and no other result states should be evicted.
+  ResultState result_state4 =
+      CreateResultState({AddScoredDocument(/*document_id=*/6),
+                         AddScoredDocument(/*document_id=*/7)},
+                        /*num_per_page=*/1);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultState page_result_state4,
+      result_state_manager.RankAndPaginate(std::move(result_state4)));
+
+  // If retrieving the next page for result state 1 correctly decremented the
+  // current hit count to 2 and adding state 4 correctly incremented it to 3,
+  // then adding this result state should trigger the eviction of state 2.
+  ResultState result_state5 =
+      CreateResultState({AddScoredDocument(/*document_id=*/8),
+                         AddScoredDocument(/*document_id=*/9)},
+                        /*num_per_page=*/1);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultState page_result_state5,
+      result_state_manager.RankAndPaginate(std::move(result_state5)));
+
+  EXPECT_THAT(
+      result_state_manager.GetNextPage(page_result_state1.next_page_token),
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 
   EXPECT_THAT(
       result_state_manager.GetNextPage(page_result_state2.next_page_token),
@@ -310,18 +702,150 @@ TEST(ResultStateManagerTest,
       page_result_state3,
       result_state_manager.GetNextPage(page_result_state3.next_page_token));
   EXPECT_THAT(page_result_state3.scored_document_hits,
-              ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
-                  /*document_id=*/5))));
+              ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+                  /*document_id=*/4))));
 
   ICING_ASSERT_OK_AND_ASSIGN(
       page_result_state4,
       result_state_manager.GetNextPage(page_result_state4.next_page_token));
   EXPECT_THAT(page_result_state4.scored_document_hits,
-              ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
+              ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+                  /*document_id=*/6))));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      page_result_state5,
+      result_state_manager.GetNextPage(page_result_state5.next_page_token));
+  EXPECT_THAT(page_result_state5.scored_document_hits,
+              ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+                  /*document_id=*/8))));
+}
+
+TEST_F(ResultStateManagerTest,
+       AddingOverBudgetResultStateShouldEvictAllStates) {
+  ResultState result_state1 =
+      CreateResultState({AddScoredDocument(/*document_id=*/0),
+                         AddScoredDocument(/*document_id=*/1),
+                         AddScoredDocument(/*document_id=*/2)},
+                        /*num_per_page=*/1);
+  ResultState result_state2 =
+      CreateResultState({AddScoredDocument(/*document_id=*/3),
+                         AddScoredDocument(/*document_id=*/4)},
+                        /*num_per_page=*/1);
+
+  // Add the first two states. Remember, the first page for each result state
+  // won't be cached (since it is returned immediately from RankAndPaginate).
+  // Each result state has a page size of 1. So 3 hits will remain cached.
+  ResultStateManager result_state_manager(/*max_total_hits=*/4,
+                                          document_store());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultState page_result_state1,
+      result_state_manager.RankAndPaginate(std::move(result_state1)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultState page_result_state2,
+      result_state_manager.RankAndPaginate(std::move(result_state2)));
+
+  // Add a result state that is larger than the entire budget. This should
+  // result in all previous result states being evicted, the first hit from
+  // result state 3 being returned and the next four hits being cached (the last
+  // hit should be dropped because it exceeds the max).
+  ResultState result_state3 =
+      CreateResultState({AddScoredDocument(/*document_id=*/5),
+                         AddScoredDocument(/*document_id=*/6),
+                         AddScoredDocument(/*document_id=*/7),
+                         AddScoredDocument(/*document_id=*/8),
+                         AddScoredDocument(/*document_id=*/9),
+                         AddScoredDocument(/*document_id=*/10)},
+                        /*num_per_page=*/1);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultState page_result_state3,
+      result_state_manager.RankAndPaginate(std::move(result_state3)));
+
+  // GetNextPage for result state 1 and 2 should return NOT_FOUND.
+  EXPECT_THAT(
+      result_state_manager.GetNextPage(page_result_state1.next_page_token),
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  EXPECT_THAT(
+      result_state_manager.GetNextPage(page_result_state2.next_page_token),
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  // Only the next four results in state 3 should be retrievable.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      page_result_state3,
+      result_state_manager.GetNextPage(page_result_state3.next_page_token));
+  EXPECT_THAT(page_result_state3.scored_document_hits,
+              ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+                  /*document_id=*/9))));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      page_result_state3,
+      result_state_manager.GetNextPage(page_result_state3.next_page_token));
+  EXPECT_THAT(page_result_state3.scored_document_hits,
+              ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+                  /*document_id=*/8))));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      page_result_state3,
+      result_state_manager.GetNextPage(page_result_state3.next_page_token));
+  EXPECT_THAT(page_result_state3.scored_document_hits,
+              ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
                   /*document_id=*/7))));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      page_result_state3,
+      result_state_manager.GetNextPage(page_result_state3.next_page_token));
+  EXPECT_THAT(page_result_state3.scored_document_hits,
+              ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+                  /*document_id=*/6))));
+
+  // The final result should have been dropped because it exceeded the budget.
+  EXPECT_THAT(
+      result_state_manager.GetNextPage(page_result_state3.next_page_token),
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(ResultStateManagerTest,
+       AddingResultStateShouldEvictOverBudgetResultState) {
+  ResultStateManager result_state_manager(/*max_total_hits=*/4,
+                                          document_store());
+  // Add a result state that is larger than the entire budget. The entire result
+  // state will still be cached
+  ResultState result_state1 =
+      CreateResultState({AddScoredDocument(/*document_id=*/0),
+                         AddScoredDocument(/*document_id=*/1),
+                         AddScoredDocument(/*document_id=*/2),
+                         AddScoredDocument(/*document_id=*/3),
+                         AddScoredDocument(/*document_id=*/4),
+                         AddScoredDocument(/*document_id=*/5)},
+                        /*num_per_page=*/1);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultState page_result_state1,
+      result_state_manager.RankAndPaginate(std::move(result_state1)));
+
+  // Add a result state. Because state2 + state1 is larger than the budget,
+  // state1 should be evicted.
+  ResultState result_state2 =
+      CreateResultState({AddScoredDocument(/*document_id=*/6),
+                         AddScoredDocument(/*document_id=*/7)},
+                        /*num_per_page=*/1);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultState page_result_state2,
+      result_state_manager.RankAndPaginate(std::move(result_state2)));
+
+  // state1 should have been evicted and state2 should still be retrievable.
+  EXPECT_THAT(
+      result_state_manager.GetNextPage(page_result_state1.next_page_token),
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      page_result_state2,
+      result_state_manager.GetNextPage(page_result_state2.next_page_token));
+  EXPECT_THAT(page_result_state2.scored_document_hits,
+              ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+                  /*document_id=*/6))));
 }
 
-TEST(ResultStateManagerTest, ShouldGetSnippetContext) {
+TEST_F(ResultStateManagerTest, ShouldGetSnippetContext) {
   ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/1);
   result_spec.mutable_snippet_spec()->set_num_to_snippet(5);
   result_spec.mutable_snippet_spec()->set_num_matches_per_property(5);
@@ -334,13 +858,13 @@ TEST(ResultStateManagerTest, ShouldGetSnippetContext) {
   query_terms_map.emplace("term1", std::unordered_set<std::string>());
 
   ResultState original_result_state = ResultState(
-      /*scored_document_hits=*/{CreateScoredDocumentHit(/*document_id=*/1),
-                                CreateScoredDocumentHit(/*document_id=*/2)},
-      query_terms_map, search_spec, CreateScoringSpec(), result_spec);
+      /*scored_document_hits=*/{AddScoredDocument(/*document_id=*/0),
+                                AddScoredDocument(/*document_id=*/1)},
+      query_terms_map, search_spec, CreateScoringSpec(), result_spec,
+      document_store());
 
   ResultStateManager result_state_manager(
-      /*max_hits_per_query=*/std::numeric_limits<int>::max(),
-      /*max_result_states=*/std::numeric_limits<int>::max());
+      /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
   ICING_ASSERT_OK_AND_ASSIGN(
       PageResultState page_result_state,
       result_state_manager.RankAndPaginate(std::move(original_result_state)));
@@ -355,7 +879,7 @@ TEST(ResultStateManagerTest, ShouldGetSnippetContext) {
               EqualsProto(result_spec.snippet_spec()));
 }
 
-TEST(ResultStateManagerTest, ShouldGetDefaultSnippetContext) {
+TEST_F(ResultStateManagerTest, ShouldGetDefaultSnippetContext) {
   ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/1);
   // 0 indicates no snippeting
   result_spec.mutable_snippet_spec()->set_num_to_snippet(0);
@@ -369,13 +893,13 @@ TEST(ResultStateManagerTest, ShouldGetDefaultSnippetContext) {
   query_terms_map.emplace("term1", std::unordered_set<std::string>());
 
   ResultState original_result_state = ResultState(
-      /*scored_document_hits=*/{CreateScoredDocumentHit(/*document_id=*/1),
-                                CreateScoredDocumentHit(/*document_id=*/2)},
-      query_terms_map, search_spec, CreateScoringSpec(), result_spec);
+      /*scored_document_hits=*/{AddScoredDocument(/*document_id=*/0),
+                                AddScoredDocument(/*document_id=*/1)},
+      query_terms_map, search_spec, CreateScoringSpec(), result_spec,
+      document_store());
 
   ResultStateManager result_state_manager(
-      /*max_hits_per_query=*/std::numeric_limits<int>::max(),
-      /*max_result_states=*/std::numeric_limits<int>::max());
+      /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
   ICING_ASSERT_OK_AND_ASSIGN(
       PageResultState page_result_state,
       result_state_manager.RankAndPaginate(std::move(original_result_state)));
@@ -390,18 +914,17 @@ TEST(ResultStateManagerTest, ShouldGetDefaultSnippetContext) {
               Eq(TermMatchType::UNKNOWN));
 }
 
-TEST(ResultStateManagerTest, ShouldGetCorrectNumPreviouslyReturned) {
+TEST_F(ResultStateManagerTest, ShouldGetCorrectNumPreviouslyReturned) {
   ResultState original_result_state =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
-                         CreateScoredDocumentHit(/*document_id=*/2),
-                         CreateScoredDocumentHit(/*document_id=*/3),
-                         CreateScoredDocumentHit(/*document_id=*/4),
-                         CreateScoredDocumentHit(/*document_id=*/5)},
+      CreateResultState({AddScoredDocument(/*document_id=*/0),
+                         AddScoredDocument(/*document_id=*/1),
+                         AddScoredDocument(/*document_id=*/2),
+                         AddScoredDocument(/*document_id=*/3),
+                         AddScoredDocument(/*document_id=*/4)},
                         /*num_per_page=*/2);
 
   ResultStateManager result_state_manager(
-      /*max_hits_per_query=*/std::numeric_limits<int>::max(),
-      /*max_result_states=*/std::numeric_limits<int>::max());
+      /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
 
   // First page, 2 results
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -435,41 +958,48 @@ TEST(ResultStateManagerTest, ShouldGetCorrectNumPreviouslyReturned) {
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST(ResultStateManagerTest, ShouldStoreMaxNumberOfScoredDocumentHits) {
-  ResultState original_result_state =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
-                         CreateScoredDocumentHit(/*document_id=*/2),
-                         CreateScoredDocumentHit(/*document_id=*/3),
-                         CreateScoredDocumentHit(/*document_id=*/4),
-                         CreateScoredDocumentHit(/*document_id=*/5)},
-                        /*num_per_page=*/2);
+TEST_F(ResultStateManagerTest, ShouldStoreAllHits) {
+  ScoredDocumentHit scored_hit_1 = AddScoredDocument(/*document_id=*/0);
+  ScoredDocumentHit scored_hit_2 = AddScoredDocument(/*document_id=*/1);
+  ScoredDocumentHit scored_hit_3 = AddScoredDocument(/*document_id=*/2);
+  ScoredDocumentHit scored_hit_4 = AddScoredDocument(/*document_id=*/3);
+  ScoredDocumentHit scored_hit_5 = AddScoredDocument(/*document_id=*/4);
 
-  ResultStateManager result_state_manager(
-      /*max_hits_per_query=*/3,
-      /*max_result_states=*/std::numeric_limits<int>::max());
+  ResultState original_result_state = CreateResultState(
+      {scored_hit_1, scored_hit_2, scored_hit_3, scored_hit_4, scored_hit_5},
+      /*num_per_page=*/2);
+
+  ResultStateManager result_state_manager(/*max_total_hits=*/4,
+                                          document_store());
 
-  // The 5 input scored document hits will be truncated to 3.
+  // The 5 input scored document hits will not be truncated. The first page of
+  // two hits will be returned immediately and the other three hits will fit
+  // within our caching budget.
 
   // First page, 2 results
   ICING_ASSERT_OK_AND_ASSIGN(
       PageResultState page_result_state1,
       result_state_manager.RankAndPaginate(std::move(original_result_state)));
-  EXPECT_THAT(
-      page_result_state1.scored_document_hits,
-      ElementsAre(
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/5)),
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/4))));
+  EXPECT_THAT(page_result_state1.scored_document_hits,
+              ElementsAre(EqualsScoredDocumentHit(scored_hit_5),
+                          EqualsScoredDocumentHit(scored_hit_4)));
 
   uint64_t next_page_token = page_result_state1.next_page_token;
 
-  // Second page, 1 results.
+  // Second page, 2 results.
   ICING_ASSERT_OK_AND_ASSIGN(PageResultState page_result_state2,
                              result_state_manager.GetNextPage(next_page_token));
   EXPECT_THAT(page_result_state2.scored_document_hits,
-              ElementsAre(EqualsScoredDocumentHit(
-                  CreateScoredDocumentHit(/*document_id=*/3))));
+              ElementsAre(EqualsScoredDocumentHit(scored_hit_3),
+                          EqualsScoredDocumentHit(scored_hit_2)));
+
+  // Third page, 1 result.
+  ICING_ASSERT_OK_AND_ASSIGN(PageResultState page_result_state3,
+                             result_state_manager.GetNextPage(next_page_token));
+  EXPECT_THAT(page_result_state3.scored_document_hits,
+              ElementsAre(EqualsScoredDocumentHit(scored_hit_1)));
 
-  // No third page.
+  // Fourth page, 0 results.
   EXPECT_THAT(result_state_manager.GetNextPage(next_page_token),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
diff --git a/icing/result/result-state.cc b/icing/result/result-state.cc
index 82738a9..fc89185 100644
--- a/icing/result/result-state.cc
+++ b/icing/result/result-state.cc
@@ -16,6 +16,7 @@
 
 #include "icing/result/projection-tree.h"
 #include "icing/scoring/ranker.h"
+#include "icing/store/namespace-id.h"
 #include "icing/util/logging.h"
 
 namespace icing {
@@ -39,7 +40,8 @@ ResultState::ResultState(std::vector<ScoredDocumentHit> scored_document_hits,
                          SectionRestrictQueryTermsMap query_terms,
                          const SearchSpecProto& search_spec,
                          const ScoringSpecProto& scoring_spec,
-                         const ResultSpecProto& result_spec)
+                         const ResultSpecProto& result_spec,
+                         const DocumentStore& document_store)
     : scored_document_hits_(std::move(scored_document_hits)),
       snippet_context_(CreateSnippetContext(std::move(query_terms), search_spec,
                                             result_spec)),
@@ -52,14 +54,82 @@ ResultState::ResultState(std::vector<ScoredDocumentHit> scored_document_hits,
     projection_tree_map_.insert(
         {type_field_mask.schema_type(), ProjectionTree(type_field_mask)});
   }
+
+  for (const ResultSpecProto::ResultGrouping& result_grouping :
+       result_spec.result_groupings()) {
+    int group_id = group_result_limits_.size();
+    group_result_limits_.push_back(result_grouping.max_results());
+    for (const std::string& name_space : result_grouping.namespaces()) {
+      auto namespace_id_or = document_store.GetNamespaceId(name_space);
+      if (!namespace_id_or.ok()) {
+        continue;
+      }
+      namespace_group_id_map_.insert({namespace_id_or.ValueOrDie(), group_id});
+    }
+  }
   BuildHeapInPlace(&scored_document_hits_, scored_document_hit_comparator_);
 }
 
-std::vector<ScoredDocumentHit> ResultState::GetNextPage() {
-  std::vector<ScoredDocumentHit> scored_document_hits = PopTopResultsFromHeap(
-      &scored_document_hits_, num_per_page_, scored_document_hit_comparator_);
-  num_returned_ += scored_document_hits.size();
-  return scored_document_hits;
+class GroupResultLimiter {
+ public:
+  GroupResultLimiter(
+      const std::unordered_map<NamespaceId, int>& namespace_group_id_map,
+      std::vector<int>& group_result_limits,
+      const DocumentStore& document_store)
+      : namespace_group_id_map_(namespace_group_id_map),
+        group_result_limits_(&group_result_limits),
+        document_store_(document_store) {}
+
+  // Returns true if the scored_document_hit should be removed.
+  bool operator()(const ScoredDocumentHit& scored_document_hit) {
+    auto document_filter_data_or = document_store_.GetDocumentFilterData(
+        scored_document_hit.document_id());
+    if (!document_filter_data_or.ok()) {
+      return true;
+    }
+    NamespaceId namespace_id =
+        document_filter_data_or.ValueOrDie().namespace_id();
+    auto iter = namespace_group_id_map_.find(namespace_id);
+    if (iter == namespace_group_id_map_.end()) {
+      return false;
+    }
+    int& count = group_result_limits_->at(iter->second);
+    if (count <= 0) {
+      return true;
+    }
+    --count;
+    return false;
+  }
+
+ private:
+  const std::unordered_map<NamespaceId, int>& namespace_group_id_map_;
+  std::vector<int>* group_result_limits_;
+  const DocumentStore& document_store_;
+};
+
+std::vector<ScoredDocumentHit> ResultState::GetNextPage(
+    const DocumentStore& document_store) {
+  int num_requested = num_per_page_;
+  bool more_results_available = true;
+  std::vector<ScoredDocumentHit> final_scored_document_hits;
+  while (more_results_available && num_requested > 0) {
+    std::vector<ScoredDocumentHit> scored_document_hits = PopTopResultsFromHeap(
+        &scored_document_hits_, num_requested, scored_document_hit_comparator_);
+    more_results_available = scored_document_hits.size() == num_requested;
+    auto itr = std::remove_if(
+        scored_document_hits.begin(), scored_document_hits.end(),
+        GroupResultLimiter(namespace_group_id_map_, group_result_limits_,
+                           document_store));
+    scored_document_hits.erase(itr, scored_document_hits.end());
+    final_scored_document_hits.reserve(final_scored_document_hits.size() +
+                                       scored_document_hits.size());
+    std::move(scored_document_hits.begin(), scored_document_hits.end(),
+              std::back_inserter(final_scored_document_hits));
+    num_requested = num_per_page_ - final_scored_document_hits.size();
+  }
+
+  num_returned_ += final_scored_document_hits.size();
+  return final_scored_document_hits;
 }
 
 void ResultState::TruncateHitsTo(int new_size) {
diff --git a/icing/result/result-state.h b/icing/result/result-state.h
index be92b85..303d610 100644
--- a/icing/result/result-state.h
+++ b/icing/result/result-state.h
@@ -23,6 +23,8 @@
 #include "icing/result/projection-tree.h"
 #include "icing/result/snippet-context.h"
 #include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-store.h"
+#include "icing/store/namespace-id.h"
 
 namespace icing {
 namespace lib {
@@ -31,17 +33,19 @@ namespace lib {
 // same query. Stored in ResultStateManager.
 class ResultState {
  public:
-  explicit ResultState(std::vector<ScoredDocumentHit> scored_document_hits,
-                       SectionRestrictQueryTermsMap query_terms,
-                       const SearchSpecProto& search_spec,
-                       const ScoringSpecProto& scoring_spec,
-                       const ResultSpecProto& result_spec);
+  ResultState(std::vector<ScoredDocumentHit> scored_document_hits,
+              SectionRestrictQueryTermsMap query_terms,
+              const SearchSpecProto& search_spec,
+              const ScoringSpecProto& scoring_spec,
+              const ResultSpecProto& result_spec,
+              const DocumentStore& document_store);
 
   // Returns the next page of results. The size of page is passed in from
   // ResultSpecProto in constructor. Calling this method could increase the
   // value of num_returned(), so be careful of the order of calling these
   // methods.
-  std::vector<ScoredDocumentHit> GetNextPage();
+  std::vector<ScoredDocumentHit> GetNextPage(
+      const DocumentStore& document_store);
 
   // Truncates the vector of ScoredDocumentHits to the given size. The best
   // ScoredDocumentHits are kept.
@@ -67,6 +71,10 @@ class ResultState {
   // increased when GetNextPage() is called.
   int num_returned() const { return num_returned_; }
 
+  // The number of results yet to be returned. This number is decreased when
+  // GetNextPage is called.
+  int num_remaining() const { return scored_document_hits_.size(); }
+
  private:
   // The scored document hits. It represents a heap data structure when ranking
   // is required so that we can get top K hits in O(KlgN) time. If no ranking is
@@ -79,6 +87,13 @@ class ResultState {
   // Information needed for projection.
   std::unordered_map<std::string, ProjectionTree> projection_tree_map_;
 
+  // A map between namespace id and the id of the group that it appears in.
+  std::unordered_map<NamespaceId, int> namespace_group_id_map_;
+
+  // The count of remaining results to return for a group where group id is the
+  // index.
+  std::vector<int> group_result_limits_;
+
   // Number of results to return in each page.
   int num_per_page_;
 
diff --git a/icing/result/result-state_test.cc b/icing/result/result-state_test.cc
index 85cb242..f2121a5 100644
--- a/icing/result/result-state_test.cc
+++ b/icing/result/result-state_test.cc
@@ -15,9 +15,15 @@
 #include "icing/result/result-state.h"
 
 #include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
 #include "icing/portable/equals-proto.h"
+#include "icing/schema/schema-store.h"
 #include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-store.h"
 #include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/clock.h"
 
 namespace icing {
 namespace lib {
@@ -50,42 +56,90 @@ ResultSpecProto CreateResultSpec(int num_per_page) {
   return result_spec;
 }
 
+class ResultStateTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    schema_store_base_dir_ = GetTestTempDir() + "/schema_store";
+    filesystem_.CreateDirectoryRecursively(schema_store_base_dir_.c_str());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, schema_store_base_dir_, &clock_));
+    SchemaProto schema;
+    schema.add_types()->set_schema_type("Document");
+    ICING_ASSERT_OK(schema_store_->SetSchema(std::move(schema)));
+
+    doc_store_base_dir_ = GetTestTempDir() + "/document_store";
+    filesystem_.CreateDirectoryRecursively(doc_store_base_dir_.c_str());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult result,
+        DocumentStore::Create(&filesystem_, doc_store_base_dir_, &clock_,
+                              schema_store_.get()));
+    document_store_ = std::move(result.document_store);
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(doc_store_base_dir_.c_str());
+    filesystem_.DeleteDirectoryRecursively(schema_store_base_dir_.c_str());
+  }
+
+  ScoredDocumentHit AddScoredDocument(DocumentId document_id) {
+    DocumentProto document;
+    document.set_namespace_("namespace");
+    document.set_uri(std::to_string(document_id));
+    document.set_schema("Document");
+    document_store_->Put(std::move(document));
+    return ScoredDocumentHit(document_id, kSectionIdMaskNone, /*score=*/1);
+  }
+
+  DocumentStore& document_store() { return *document_store_; }
+
+ private:
+  Filesystem filesystem_;
+  std::string doc_store_base_dir_;
+  std::string schema_store_base_dir_;
+  Clock clock_;
+  std::unique_ptr<DocumentStore> document_store_;
+  std::unique_ptr<SchemaStore> schema_store_;
+};
+
 // ResultState::ResultState() and ResultState::GetNextPage() are calling
 // Ranker::BuildHeapInPlace() and Ranker::PopTopResultsFromHeap() directly, so
 // we don't need to test much on what order is returned as that is tested in
 // Ranker's tests. Here we just need one sanity test to make sure that the
 // correct functions are called.
-TEST(ResultStateTest, ShouldReturnNextPage) {
+TEST_F(ResultStateTest, ShouldReturnNextPage) {
+  ScoredDocumentHit scored_hit_0 = AddScoredDocument(/*document_id=*/0);
+  ScoredDocumentHit scored_hit_1 = AddScoredDocument(/*document_id=*/1);
+  ScoredDocumentHit scored_hit_2 = AddScoredDocument(/*document_id=*/2);
+  ScoredDocumentHit scored_hit_3 = AddScoredDocument(/*document_id=*/3);
+  ScoredDocumentHit scored_hit_4 = AddScoredDocument(/*document_id=*/4);
   std::vector<ScoredDocumentHit> scored_document_hits = {
-      CreateScoredDocumentHit(/*document_id=*/2),
-      CreateScoredDocumentHit(/*document_id=*/1),
-      CreateScoredDocumentHit(/*document_id=*/3),
-      CreateScoredDocumentHit(/*document_id=*/5),
-      CreateScoredDocumentHit(/*document_id=*/4)};
+      scored_hit_1, scored_hit_0, scored_hit_2, scored_hit_4, scored_hit_3};
 
   ResultState result_state(scored_document_hits, /*query_terms=*/{},
                            CreateSearchSpec(TermMatchType::EXACT_ONLY),
                            CreateScoringSpec(/*is_descending_order=*/true),
-                           CreateResultSpec(/*num_per_page=*/2));
+                           CreateResultSpec(/*num_per_page=*/2),
+                           document_store());
 
   EXPECT_THAT(
-      result_state.GetNextPage(),
+      result_state.GetNextPage(document_store()),
       ElementsAre(
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/5)),
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/4))));
+          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/4)),
+          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/3))));
 
   EXPECT_THAT(
-      result_state.GetNextPage(),
+      result_state.GetNextPage(document_store()),
       ElementsAre(
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/3)),
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/2))));
+          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/2)),
+          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/1))));
 
-  EXPECT_THAT(result_state.GetNextPage(),
+  EXPECT_THAT(result_state.GetNextPage(document_store()),
               ElementsAre(EqualsScoredDocumentHit(
-                  CreateScoredDocumentHit(/*document_id=*/1))));
+                  CreateScoredDocumentHit(/*document_id=*/0))));
 }
 
-TEST(ResultStateTest, ShouldReturnSnippetContextAccordingToSpecs) {
+TEST_F(ResultStateTest, ShouldReturnSnippetContextAccordingToSpecs) {
   ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
   result_spec.mutable_snippet_spec()->set_num_to_snippet(5);
   result_spec.mutable_snippet_spec()->set_num_matches_per_property(5);
@@ -97,7 +151,8 @@ TEST(ResultStateTest, ShouldReturnSnippetContextAccordingToSpecs) {
   ResultState result_state(
       /*scored_document_hits=*/{}, query_terms_map,
       CreateSearchSpec(TermMatchType::EXACT_ONLY),
-      CreateScoringSpec(/*is_descending_order=*/true), result_spec);
+      CreateScoringSpec(/*is_descending_order=*/true), result_spec,
+      document_store());
 
   const SnippetContext& snippet_context = result_state.snippet_context();
 
@@ -117,7 +172,7 @@ TEST(ResultStateTest, ShouldReturnSnippetContextAccordingToSpecs) {
   EXPECT_THAT(snippet_context2.match_type, Eq(TermMatchType::EXACT_ONLY));
 }
 
-TEST(ResultStateTest, NoSnippetingShouldReturnNull) {
+TEST_F(ResultStateTest, NoSnippetingShouldReturnNull) {
   ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
   // Setting num_to_snippet to 0 so that snippeting info won't be
   // stored.
@@ -131,7 +186,7 @@ TEST(ResultStateTest, NoSnippetingShouldReturnNull) {
   ResultState result_state(/*scored_document_hits=*/{}, query_terms_map,
                            CreateSearchSpec(TermMatchType::EXACT_ONLY),
                            CreateScoringSpec(/*is_descending_order=*/true),
-                           result_spec);
+                           result_spec, document_store());
 
   const SnippetContext& snippet_context = result_state.snippet_context();
   EXPECT_THAT(snippet_context.query_terms, IsEmpty());
@@ -141,72 +196,375 @@ TEST(ResultStateTest, NoSnippetingShouldReturnNull) {
   EXPECT_THAT(snippet_context.match_type, TermMatchType::UNKNOWN);
 }
 
-TEST(ResultStateTest, ShouldTruncateToNewSize) {
+TEST_F(ResultStateTest, ShouldTruncateToNewSize) {
+  ScoredDocumentHit scored_hit_0 = AddScoredDocument(/*document_id=*/0);
+  ScoredDocumentHit scored_hit_1 = AddScoredDocument(/*document_id=*/1);
+  ScoredDocumentHit scored_hit_2 = AddScoredDocument(/*document_id=*/2);
+  ScoredDocumentHit scored_hit_3 = AddScoredDocument(/*document_id=*/3);
+  ScoredDocumentHit scored_hit_4 = AddScoredDocument(/*document_id=*/4);
   std::vector<ScoredDocumentHit> scored_document_hits = {
-      CreateScoredDocumentHit(/*document_id=*/2),
-      CreateScoredDocumentHit(/*document_id=*/1),
-      CreateScoredDocumentHit(/*document_id=*/3),
-      CreateScoredDocumentHit(/*document_id=*/5),
-      CreateScoredDocumentHit(/*document_id=*/4)};
+      scored_hit_1, scored_hit_0, scored_hit_2, scored_hit_4, scored_hit_3};
 
   // Creates a ResultState with 5 ScoredDocumentHits.
   ResultState result_state(scored_document_hits, /*query_terms=*/{},
                            CreateSearchSpec(TermMatchType::EXACT_ONLY),
                            CreateScoringSpec(/*is_descending_order=*/true),
-                           CreateResultSpec(/*num_per_page=*/5));
+                           CreateResultSpec(/*num_per_page=*/5),
+                           document_store());
 
   result_state.TruncateHitsTo(/*new_size=*/3);
   // The best 3 are left.
   EXPECT_THAT(
-      result_state.GetNextPage(),
+      result_state.GetNextPage(document_store()),
       ElementsAre(
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/5)),
           EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/4)),
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/3))));
+          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/3)),
+          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/2))));
 }
 
-TEST(ResultStateTest, ShouldTruncateToZero) {
+TEST_F(ResultStateTest, ShouldTruncateToZero) {
+  ScoredDocumentHit scored_hit_0 = AddScoredDocument(/*document_id=*/0);
+  ScoredDocumentHit scored_hit_1 = AddScoredDocument(/*document_id=*/1);
+  ScoredDocumentHit scored_hit_2 = AddScoredDocument(/*document_id=*/2);
+  ScoredDocumentHit scored_hit_3 = AddScoredDocument(/*document_id=*/3);
+  ScoredDocumentHit scored_hit_4 = AddScoredDocument(/*document_id=*/4);
   std::vector<ScoredDocumentHit> scored_document_hits = {
-      CreateScoredDocumentHit(/*document_id=*/2),
-      CreateScoredDocumentHit(/*document_id=*/1),
-      CreateScoredDocumentHit(/*document_id=*/3),
-      CreateScoredDocumentHit(/*document_id=*/5),
-      CreateScoredDocumentHit(/*document_id=*/4)};
+      scored_hit_1, scored_hit_0, scored_hit_2, scored_hit_4, scored_hit_3};
 
   // Creates a ResultState with 5 ScoredDocumentHits.
   ResultState result_state(scored_document_hits, /*query_terms=*/{},
                            CreateSearchSpec(TermMatchType::EXACT_ONLY),
                            CreateScoringSpec(/*is_descending_order=*/true),
-                           CreateResultSpec(/*num_per_page=*/5));
+                           CreateResultSpec(/*num_per_page=*/5),
+                           document_store());
 
   result_state.TruncateHitsTo(/*new_size=*/0);
-  EXPECT_THAT(result_state.GetNextPage(), IsEmpty());
+  EXPECT_THAT(result_state.GetNextPage(document_store()), IsEmpty());
 }
 
-TEST(ResultStateTest, ShouldNotTruncateToNegative) {
+TEST_F(ResultStateTest, ShouldNotTruncateToNegative) {
+  ScoredDocumentHit scored_hit_0 = AddScoredDocument(/*document_id=*/0);
+  ScoredDocumentHit scored_hit_1 = AddScoredDocument(/*document_id=*/1);
+  ScoredDocumentHit scored_hit_2 = AddScoredDocument(/*document_id=*/2);
+  ScoredDocumentHit scored_hit_3 = AddScoredDocument(/*document_id=*/3);
+  ScoredDocumentHit scored_hit_4 = AddScoredDocument(/*document_id=*/4);
   std::vector<ScoredDocumentHit> scored_document_hits = {
-      CreateScoredDocumentHit(/*document_id=*/2),
-      CreateScoredDocumentHit(/*document_id=*/1),
-      CreateScoredDocumentHit(/*document_id=*/3),
-      CreateScoredDocumentHit(/*document_id=*/5),
-      CreateScoredDocumentHit(/*document_id=*/4)};
+      scored_hit_1, scored_hit_0, scored_hit_2, scored_hit_4, scored_hit_3};
 
   // Creates a ResultState with 5 ScoredDocumentHits.
   ResultState result_state(scored_document_hits, /*query_terms=*/{},
                            CreateSearchSpec(TermMatchType::EXACT_ONLY),
                            CreateScoringSpec(/*is_descending_order=*/true),
-                           CreateResultSpec(/*num_per_page=*/5));
+                           CreateResultSpec(/*num_per_page=*/5),
+                           document_store());
 
   result_state.TruncateHitsTo(/*new_size=*/-1);
   // Results are not affected.
   EXPECT_THAT(
-      result_state.GetNextPage(),
+      result_state.GetNextPage(document_store()),
       ElementsAre(
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/5)),
           EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/4)),
           EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/3)),
           EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/2)),
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/1))));
+          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/1)),
+          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/0))));
+}
+
+TEST_F(ResultStateTest, ResultGroupingShouldLimitResults) {
+  // Creates 2 documents and ensures the relationship in terms of document
+  // score is: document1 < document2
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace", "uri/1")
+                                .SetSchema("Document")
+                                .SetScore(1)
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace", "uri/2")
+                                .SetSchema("Document")
+                                .SetScore(2)
+                                .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store().Put(document1));
+  ScoredDocumentHit scored_hit_1(document_id1, kSectionIdMaskNone,
+                                 document1.score());
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store().Put(document2));
+  ScoredDocumentHit scored_hit_2(document_id2, kSectionIdMaskNone,
+                                 document2.score());
+  std::vector<ScoredDocumentHit> scored_document_hits = {scored_hit_2,
+                                                         scored_hit_1};
+
+  // Create a ResultSpec that limits "namespace" to a single result.
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(5);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  result_grouping->set_max_results(1);
+  result_grouping->add_namespaces("namespace");
+
+  // Creates a ResultState with 2 ScoredDocumentHits.
+  ResultState result_state(scored_document_hits, /*query_terms=*/{},
+                           CreateSearchSpec(TermMatchType::EXACT_ONLY),
+                           CreateScoringSpec(/*is_descending_order=*/true),
+                           result_spec, document_store());
+
+  // Only the top ranked document in "namespace" (document2), should be
+  // returned.
+  EXPECT_THAT(result_state.GetNextPage(document_store()),
+              ElementsAre(EqualsScoredDocumentHit(scored_hit_2)));
+}
+
+TEST_F(ResultStateTest, ResultGroupingDoesNotLimitOtherNamespaceResults) {
+  // Creates 4 documents and ensures the relationship in terms of document
+  // score is: document1 < document2 < document3 < document4
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri/1")
+                                .SetSchema("Document")
+                                .SetScore(1)
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace1", "uri/2")
+                                .SetSchema("Document")
+                                .SetScore(2)
+                                .Build();
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace2", "uri/3")
+                                .SetSchema("Document")
+                                .SetScore(3)
+                                .Build();
+  DocumentProto document4 = DocumentBuilder()
+                                .SetKey("namespace2", "uri/4")
+                                .SetSchema("Document")
+                                .SetScore(4)
+                                .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store().Put(document1));
+  ScoredDocumentHit scored_hit_1(document_id1, kSectionIdMaskNone,
+                                 document1.score());
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store().Put(document2));
+  ScoredDocumentHit scored_hit_2(document_id2, kSectionIdMaskNone,
+                                 document2.score());
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             document_store().Put(document3));
+  ScoredDocumentHit scored_hit_3(document_id3, kSectionIdMaskNone,
+                                 document3.score());
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+                             document_store().Put(document4));
+  ScoredDocumentHit scored_hit_4(document_id4, kSectionIdMaskNone,
+                                 document4.score());
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      scored_hit_4, scored_hit_3, scored_hit_2, scored_hit_1};
+
+  // Create a ResultSpec that limits "namespace1" to a single result, but
+  // doesn't limit "namespace2".
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(5);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  result_grouping->set_max_results(1);
+  result_grouping->add_namespaces("namespace1");
+
+  // Creates a ResultState with 4 ScoredDocumentHits.
+  ResultState result_state(scored_document_hits, /*query_terms=*/{},
+                           CreateSearchSpec(TermMatchType::EXACT_ONLY),
+                           CreateScoringSpec(/*is_descending_order=*/true),
+                           result_spec, document_store());
+
+  // Only the top ranked document in "namespace" (document2), should be
+  // returned.
+  EXPECT_THAT(result_state.GetNextPage(document_store()),
+              ElementsAre(EqualsScoredDocumentHit(scored_hit_4),
+                          EqualsScoredDocumentHit(scored_hit_3),
+                          EqualsScoredDocumentHit(scored_hit_2)));
+}
+
+TEST_F(ResultStateTest, ResultGroupingNonexistentNamespaceShouldBeIgnored) {
+  // Creates 2 documents and ensures the relationship in terms of document
+  // score is: document1 < document2
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace", "uri/1")
+                                .SetSchema("Document")
+                                .SetScore(1)
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace", "uri/2")
+                                .SetSchema("Document")
+                                .SetScore(2)
+                                .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store().Put(document1));
+  ScoredDocumentHit scored_hit_1(document_id1, kSectionIdMaskNone,
+                                 document1.score());
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store().Put(document2));
+  ScoredDocumentHit scored_hit_2(document_id2, kSectionIdMaskNone,
+                                 document2.score());
+  std::vector<ScoredDocumentHit> scored_document_hits = {scored_hit_2,
+                                                         scored_hit_1};
+
+  // Create a ResultSpec that limits "namespace"+"nonExistentNamespace" to a
+  // single result.
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(5);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  result_grouping->set_max_results(1);
+  result_grouping->add_namespaces("namespace");
+  result_grouping->add_namespaces("nonexistentNamespace");
+
+  // Creates a ResultState with 2 ScoredDocumentHits.
+  ResultState result_state(scored_document_hits, /*query_terms=*/{},
+                           CreateSearchSpec(TermMatchType::EXACT_ONLY),
+                           CreateScoringSpec(/*is_descending_order=*/true),
+                           result_spec, document_store());
+
+  // Only the top ranked document in "namespace" (document2), should be
+  // returned. The presence of "nonexistentNamespace" in the same result
+  // grouping should have no effect.
+  EXPECT_THAT(result_state.GetNextPage(document_store()),
+              ElementsAre(EqualsScoredDocumentHit(scored_hit_2)));
+}
+
+TEST_F(ResultStateTest, ResultGroupingMultiNamespaceGrouping) {
+  // Creates 6 documents and ensures the relationship in terms of document
+  // score is: document1 < document2 < document3 < document4 < document5 <
+  // document6
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri/1")
+                                .SetSchema("Document")
+                                .SetScore(1)
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace1", "uri/2")
+                                .SetSchema("Document")
+                                .SetScore(2)
+                                .Build();
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace2", "uri/3")
+                                .SetSchema("Document")
+                                .SetScore(3)
+                                .Build();
+  DocumentProto document4 = DocumentBuilder()
+                                .SetKey("namespace2", "uri/4")
+                                .SetSchema("Document")
+                                .SetScore(4)
+                                .Build();
+  DocumentProto document5 = DocumentBuilder()
+                                .SetKey("namespace3", "uri/5")
+                                .SetSchema("Document")
+                                .SetScore(5)
+                                .Build();
+  DocumentProto document6 = DocumentBuilder()
+                                .SetKey("namespace3", "uri/6")
+                                .SetSchema("Document")
+                                .SetScore(6)
+                                .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store().Put(document1));
+  ScoredDocumentHit scored_hit_1(document_id1, kSectionIdMaskNone,
+                                 document1.score());
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store().Put(document2));
+  ScoredDocumentHit scored_hit_2(document_id2, kSectionIdMaskNone,
+                                 document2.score());
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             document_store().Put(document3));
+  ScoredDocumentHit scored_hit_3(document_id3, kSectionIdMaskNone,
+                                 document3.score());
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+                             document_store().Put(document4));
+  ScoredDocumentHit scored_hit_4(document_id4, kSectionIdMaskNone,
+                                 document4.score());
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+                             document_store().Put(document5));
+  ScoredDocumentHit scored_hit_5(document_id5, kSectionIdMaskNone,
+                                 document5.score());
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id6,
+                             document_store().Put(document6));
+  ScoredDocumentHit scored_hit_6(document_id6, kSectionIdMaskNone,
+                                 document6.score());
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      scored_hit_6, scored_hit_5, scored_hit_4,
+      scored_hit_3, scored_hit_2, scored_hit_1};
+
+  // Create a ResultSpec that limits "namespace1" to a single result and limits
+  // "namespace2"+"namespace3" to a total of two results.
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(5);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  result_grouping->set_max_results(1);
+  result_grouping->add_namespaces("namespace1");
+  result_grouping = result_spec.add_result_groupings();
+  result_grouping->set_max_results(2);
+  result_grouping->add_namespaces("namespace2");
+  result_grouping->add_namespaces("namespace3");
+
+  // Creates a ResultState with 4 ScoredDocumentHits.
+  ResultState result_state(scored_document_hits, /*query_terms=*/{},
+                           CreateSearchSpec(TermMatchType::EXACT_ONLY),
+                           CreateScoringSpec(/*is_descending_order=*/true),
+                           result_spec, document_store());
+
+  // Only the top-ranked result in "namespace1" (document2) should be returned.
+  // Only the top-ranked results across "namespace2" and "namespace3"
+  // (document6, document5) should be returned.
+  EXPECT_THAT(result_state.GetNextPage(document_store()),
+              ElementsAre(EqualsScoredDocumentHit(scored_hit_6),
+                          EqualsScoredDocumentHit(scored_hit_5),
+                          EqualsScoredDocumentHit(scored_hit_2)));
+}
+
+TEST_F(ResultStateTest, ResultGroupingOnlyNonexistentNamespaces) {
+  // Creates 2 documents and ensures the relationship in terms of document
+  // score is: document1 < document2
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace", "uri/1")
+                                .SetSchema("Document")
+                                .SetScore(1)
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace", "uri/2")
+                                .SetSchema("Document")
+                                .SetScore(2)
+                                .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store().Put(document1));
+  ScoredDocumentHit scored_hit_1(document_id1, kSectionIdMaskNone,
+                                 document1.score());
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store().Put(document2));
+  ScoredDocumentHit scored_hit_2(document_id2, kSectionIdMaskNone,
+                                 document2.score());
+  std::vector<ScoredDocumentHit> scored_document_hits = {scored_hit_2,
+                                                         scored_hit_1};
+
+  // Create a ResultSpec that limits "nonexistentNamespace" to a single result.
+  // but doesn't limit "namespace"
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(5);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  result_grouping->set_max_results(1);
+  result_grouping->add_namespaces("nonexistentNamespace");
+
+  // Creates a ResultState with 2 ScoredDocumentHits.
+  ResultState result_state(scored_document_hits, /*query_terms=*/{},
+                           CreateSearchSpec(TermMatchType::EXACT_ONLY),
+                           CreateScoringSpec(/*is_descending_order=*/true),
+                           result_spec, document_store());
+
+  // All documents in "namespace" should be returned. The presence of
+  // "nonexistentNamespace" should have no effect.
+  EXPECT_THAT(result_state.GetNextPage(document_store()),
+              ElementsAre(EqualsScoredDocumentHit(scored_hit_2),
+                          EqualsScoredDocumentHit(scored_hit_1)));
 }
 
 }  // namespace
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test-jni.cc b/icing/result/snippet-retriever-test-jni-layer.cc
index 8392363..707d9ee 100644
--- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test-jni.cc
+++ b/icing/result/snippet-retriever-test-jni-layer.cc
@@ -21,12 +21,11 @@
 JNIEnv* g_jenv = nullptr;
 
 extern "C" JNIEXPORT jboolean JNICALL
-Java_icing_tokenization_reverse_1jni_ReverseJniLanguageSegmenterTest_testsMain(
-    JNIEnv* env, jclass ignored) {
+Java_icing_jni_SnippetRetrieverJniTest_testsMain(JNIEnv* env, jclass ignored) {
   g_jenv = env;
 
   std::vector<char*> my_argv;
-  char arg[] = "reverse-jni-language-segmenter-test-lib";
+  char arg[] = "jni-test-lib";
   my_argv.push_back(arg);
   int argc = 1;
   char** argv = &(my_argv[0]);
diff --git a/icing/result/snippet-retriever.cc b/icing/result/snippet-retriever.cc
index d4a5f79..33b343e 100644
--- a/icing/result/snippet-retriever.cc
+++ b/icing/result/snippet-retriever.cc
@@ -15,6 +15,7 @@
 #include "icing/result/snippet-retriever.h"
 
 #include <algorithm>
+#include <iterator>
 #include <memory>
 #include <string>
 #include <string_view>
@@ -25,9 +26,12 @@
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/absl_ports/str_join.h"
 #include "icing/proto/term.pb.h"
 #include "icing/query/query-terms.h"
 #include "icing/schema/schema-store.h"
+#include "icing/schema/section-manager.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-filter-data.h"
 #include "icing/tokenization/language-segmenter.h"
@@ -35,6 +39,7 @@
 #include "icing/tokenization/tokenizer-factory.h"
 #include "icing/tokenization/tokenizer.h"
 #include "icing/transform/normalizer.h"
+#include "icing/util/character-iterator.h"
 #include "icing/util/i18n-utils.h"
 #include "icing/util/status-macros.h"
 
@@ -43,10 +48,47 @@ namespace lib {
 
 namespace {
 
+const PropertyProto* GetProperty(const DocumentProto& document,
+                                 std::string_view property_name) {
+  for (const PropertyProto& property : document.properties()) {
+    if (property.name() == property_name) {
+      return &property;
+    }
+  }
+  return nullptr;
+}
+
+inline std::string AddPropertyToPath(const std::string& current_path,
+                                     std::string_view property) {
+  if (current_path.empty()) {
+    return std::string(property);
+  }
+  return absl_ports::StrCat(current_path, kPropertySeparator, property);
+}
+
+inline std::string AddIndexToPath(int values_size, int index,
+                                  const std::string& property_path) {
+  if (values_size == 1) {
+    return property_path;
+  }
+  return absl_ports::StrCat(property_path, kLBracket, std::to_string(index),
+                            kRBracket);
+}
+
 class TokenMatcher {
  public:
   virtual ~TokenMatcher() = default;
-  virtual bool Matches(Token token) const = 0;
+
+  // Returns a CharacterIterator pointing just past the end of the substring in
+  // token.text that matches a query term. Note that the utf* indices will be
+  // in relation to token.text's start.
+  //
+  // If there is no match, then it will construct a CharacterIterator with all
+  // of its indices set to -1.
+  //
+  // Ex. With an exact matcher, query terms=["foo","bar"] and token.text="bar",
+  // Matches will return a CharacterIterator(u8:3, u16:3, u32:3).
+  virtual CharacterIterator Matches(Token token) const = 0;
 };
 
 class TokenMatcherExact : public TokenMatcher {
@@ -59,10 +101,17 @@ class TokenMatcherExact : public TokenMatcher {
         restricted_query_terms_(restricted_query_terms),
         normalizer_(normalizer) {}
 
-  bool Matches(Token token) const override {
+  CharacterIterator Matches(Token token) const override {
     std::string s = normalizer_.NormalizeTerm(token.text);
-    return (unrestricted_query_terms_.count(s) > 0) ||
-           (restricted_query_terms_.count(s) > 0);
+    auto itr = unrestricted_query_terms_.find(s);
+    if (itr == unrestricted_query_terms_.end()) {
+      itr = restricted_query_terms_.find(s);
+    }
+    if (itr != unrestricted_query_terms_.end() &&
+        itr != restricted_query_terms_.end()) {
+      return normalizer_.CalculateNormalizedMatchLength(token.text, *itr);
+    }
+    return CharacterIterator(token.text, -1, -1, -1);
   }
 
  private:
@@ -81,22 +130,23 @@ class TokenMatcherPrefix : public TokenMatcher {
         restricted_query_terms_(restricted_query_terms),
         normalizer_(normalizer) {}
 
-  bool Matches(Token token) const override {
+  CharacterIterator Matches(Token token) const override {
     std::string s = normalizer_.NormalizeTerm(token.text);
-    if (std::any_of(unrestricted_query_terms_.begin(),
-                    unrestricted_query_terms_.end(),
-                    [&s](const std::string& term) {
-                      return term.length() <= s.length() &&
-                             s.compare(0, term.length(), term) == 0;
-                    })) {
-      return true;
+    for (const std::string& query_term : unrestricted_query_terms_) {
+      if (query_term.length() <= s.length() &&
+          s.compare(0, query_term.length(), query_term) == 0) {
+        return normalizer_.CalculateNormalizedMatchLength(token.text,
+                                                          query_term);
+      }
+    }
+    for (const std::string& query_term : restricted_query_terms_) {
+      if (query_term.length() <= s.length() &&
+          s.compare(0, query_term.length(), query_term) == 0) {
+        return normalizer_.CalculateNormalizedMatchLength(token.text,
+                                                          query_term);
+      }
     }
-    return std::any_of(restricted_query_terms_.begin(),
-                       restricted_query_terms_.end(),
-                       [&s](const std::string& term) {
-                         return term.length() <= s.length() &&
-                                s.compare(0, term.length(), term) == 0;
-                       });
+    return CharacterIterator(token.text, -1, -1, -1);
   }
 
  private:
@@ -124,110 +174,170 @@ libtextclassifier3::StatusOr<std::unique_ptr<TokenMatcher>> CreateTokenMatcher(
   }
 }
 
-// Returns true if token matches any of the terms in query terms according to
-// the provided match type.
+// Finds the start position of a valid token that is after
+// window_start_min_exclusive_utf32
 //
 // Returns:
 //   the position of the window start if successful
 //   INTERNAL_ERROR - if a tokenizer error is encountered
-libtextclassifier3::StatusOr<int> DetermineWindowStart(
+libtextclassifier3::StatusOr<CharacterIterator> DetermineWindowStart(
     const ResultSpecProto::SnippetSpecProto& snippet_spec,
-    std::string_view value, int match_mid, Tokenizer::Iterator* iterator) {
-  int window_start_min = (match_mid - snippet_spec.max_window_bytes() / 2) - 1;
-  if (window_start_min < 0) {
-    return 0;
-  }
-  if (!iterator->ResetToTokenAfter(window_start_min)) {
+    std::string_view value, int window_start_min_exclusive_utf32,
+    Tokenizer::Iterator* iterator) {
+  if (!iterator->ResetToTokenAfter(window_start_min_exclusive_utf32)) {
     return absl_ports::InternalError(
         "Couldn't reset tokenizer to determine snippet window!");
   }
-  return iterator->GetToken().text.data() - value.data();
+  return iterator->CalculateTokenStart();
 }
 
 // Increments window_end_exclusive so long as the character at the position
 // of window_end_exclusive is punctuation and does not exceed
-// window_end_max_exclusive.
-int IncludeTrailingPunctuation(std::string_view value, int window_end_exclusive,
-                               int window_end_max_exclusive) {
-  while (window_end_exclusive < window_end_max_exclusive) {
+// window_end_max_exclusive_utf32.
+CharacterIterator IncludeTrailingPunctuation(
+    std::string_view value, CharacterIterator window_end_exclusive,
+    int window_end_max_exclusive_utf32) {
+  while (window_end_exclusive.utf32_index() < window_end_max_exclusive_utf32) {
     int char_len = 0;
-    if (!i18n_utils::IsPunctuationAt(value, window_end_exclusive, &char_len)) {
-      break;
-    }
-    if (window_end_exclusive + char_len > window_end_max_exclusive) {
-      // This is punctuation, but it goes beyond the window end max. Don't
-      // include it.
+    if (!i18n_utils::IsPunctuationAt(value, window_end_exclusive.utf8_index(),
+                                     &char_len)) {
       break;
     }
     // Expand window by char_len and check the next character.
-    window_end_exclusive += char_len;
+    window_end_exclusive.AdvanceToUtf32(window_end_exclusive.utf32_index() + 1);
   }
   return window_end_exclusive;
 }
 
+// Finds the end position of a valid token that is before the
+// window_end_max_exclusive_utf32.
+//
 // Returns:
 //   the position of the window end if successful
 //   INTERNAL_ERROR - if a tokenizer error is encountered
-libtextclassifier3::StatusOr<int> DetermineWindowEnd(
+libtextclassifier3::StatusOr<CharacterIterator> DetermineWindowEnd(
     const ResultSpecProto::SnippetSpecProto& snippet_spec,
-    std::string_view value, int match_mid, Tokenizer::Iterator* iterator) {
-  int window_end_max_exclusive =
-      match_mid + snippet_spec.max_window_bytes() / 2;
-  if (window_end_max_exclusive >= value.length()) {
-    return value.length();
-  }
-  if (!iterator->ResetToTokenBefore(window_end_max_exclusive)) {
+    std::string_view value, int window_end_max_exclusive_utf32,
+    Tokenizer::Iterator* iterator) {
+  if (!iterator->ResetToTokenBefore(window_end_max_exclusive_utf32)) {
     return absl_ports::InternalError(
         "Couldn't reset tokenizer to determine snippet window!");
   }
-  int window_end_exclusive = iterator->GetToken().text.data() - value.data() +
-                             iterator->GetToken().text.length();
-  return IncludeTrailingPunctuation(value, window_end_exclusive,
-                                    window_end_max_exclusive);
+  ICING_ASSIGN_OR_RETURN(CharacterIterator end_exclusive,
+                         iterator->CalculateTokenEndExclusive());
+  return IncludeTrailingPunctuation(value, end_exclusive,
+                                    window_end_max_exclusive_utf32);
 }
 
 struct SectionData {
   std::string_view section_name;
   std::string_view section_subcontent;
-  // Identifies which subsection of the section content, section_subcontent has
-  // come from.
-  // Ex. "recipient.address" :
-  //       ["foo@google.com", "bar@google.com", "baz@google.com"]
-  // The subcontent_index of "bar@google.com" is 1.
-  int subcontent_index;
 };
 
+// Creates a snippet match proto for the match pointed to by the iterator and
+// char_iterator
+//
+// Returns:
+//   the position of the window start if successful
+//   INTERNAL_ERROR - if a tokenizer error is encountered and iterator is left
+//     in an invalid state
+//   ABORTED_ERROR - if an invalid utf-8 sequence is encountered
 libtextclassifier3::StatusOr<SnippetMatchProto> RetrieveMatch(
     const ResultSpecProto::SnippetSpecProto& snippet_spec,
-    const SectionData& value, Tokenizer::Iterator* iterator) {
+    const SectionData& value, Tokenizer::Iterator* iterator,
+    const CharacterIterator& char_iterator) {
   SnippetMatchProto snippet_match;
-  snippet_match.set_values_index(value.subcontent_index);
-
-  Token match = iterator->GetToken();
-  int match_pos = match.text.data() - value.section_subcontent.data();
-  int match_mid = match_pos + match.text.length() / 2;
-
-  snippet_match.set_exact_match_position(match_pos);
-  snippet_match.set_exact_match_bytes(match.text.length());
-
-  if (snippet_spec.max_window_bytes() > match.text.length()) {
+  ICING_ASSIGN_OR_RETURN(CharacterIterator start_itr,
+                         iterator->CalculateTokenStart());
+  ICING_ASSIGN_OR_RETURN(CharacterIterator end_itr,
+                         iterator->CalculateTokenEndExclusive());
+
+  // When finding boundaries,  we have a few cases:
+  //
+  // Case 1:
+  //   If we have an odd length match an odd length window, the window surrounds
+  //   the match perfectly.
+  //     match  = "bar" in "foo bar baz"
+  //     window =              |---|
+  //
+  // Case 2:
+  //   If we have an even length match with an even length window, the window
+  //   surrounds the match perfectly.
+  //     match  = "baar" in "foo baar baz"
+  //     window =               |----|
+  //
+  // Case 3:
+  //   If we have an odd length match with an even length window, we allocate
+  //   that extra window byte to the beginning.
+  //     match  = "bar" in "foo bar baz"
+  //     window =             |----|
+  //
+  // Case 4:
+  //   If we have an even length match with an odd length window, we allocate
+  //   that extra window byte to the end.
+  //     match  = "baar" in "foo baar baz"
+  //     window =               |-----|
+  //
+  // We have do +1/-1 below to get the math to match up.
+  int match_pos_utf32 = start_itr.utf32_index();
+  int match_len_utf32 = end_itr.utf32_index() - match_pos_utf32;
+  int match_mid_utf32 = match_pos_utf32 + match_len_utf32 / 2;
+  int window_start_min_exclusive_utf32 =
+      (match_mid_utf32 - snippet_spec.max_window_bytes() / 2) - 1;
+  int window_end_max_exclusive_utf32 =
+      match_mid_utf32 + (snippet_spec.max_window_bytes() + 1) / 2;
+
+  snippet_match.set_exact_match_byte_position(start_itr.utf8_index());
+  snippet_match.set_exact_match_utf16_position(start_itr.utf16_index());
+  snippet_match.set_exact_match_byte_length(end_itr.utf8_index() -
+                                            start_itr.utf8_index());
+  snippet_match.set_exact_match_utf16_length(end_itr.utf16_index() -
+                                             start_itr.utf16_index());
+
+  // Only include windows if it'll at least include the matched text. Otherwise,
+  // it'll just be an empty string anyways.
+  if (snippet_spec.max_window_bytes() >= match_len_utf32) {
     // Find the beginning of the window.
     ICING_ASSIGN_OR_RETURN(
-        int window_start,
-        DetermineWindowStart(snippet_spec, value.section_subcontent, match_mid,
-                             iterator));
-    snippet_match.set_window_position(window_start);
+        CharacterIterator window_start,
+        DetermineWindowStart(snippet_spec, value.section_subcontent,
+                             window_start_min_exclusive_utf32, iterator));
+
+    // Check. Did we get fewer characters than we requested? If so, then add it
+    // on to the window_end.
+    int extra_window_space =
+        window_start.utf32_index() - 1 - window_start_min_exclusive_utf32;
+    window_end_max_exclusive_utf32 += extra_window_space;
 
     // Find the end of the window.
     ICING_ASSIGN_OR_RETURN(
-        int window_end_exclusive,
-        DetermineWindowEnd(snippet_spec, value.section_subcontent, match_mid,
-                           iterator));
-    snippet_match.set_window_bytes(window_end_exclusive - window_start);
+        CharacterIterator window_end,
+        DetermineWindowEnd(snippet_spec, value.section_subcontent,
+                           window_end_max_exclusive_utf32, iterator));
+
+    // Check one more time. Did we get fewer characters than we requested? If
+    // so, then see if we can push the start back again.
+    extra_window_space =
+        window_end_max_exclusive_utf32 - window_end.utf32_index();
+    if (extra_window_space > 0) {
+      window_start_min_exclusive_utf32 =
+          window_start.utf32_index() - 1 - extra_window_space;
+      ICING_ASSIGN_OR_RETURN(
+          window_start,
+          DetermineWindowStart(snippet_spec, value.section_subcontent,
+                               window_start_min_exclusive_utf32, iterator));
+    }
+
+    snippet_match.set_window_byte_position(window_start.utf8_index());
+    snippet_match.set_window_utf16_position(window_start.utf16_index());
+    snippet_match.set_window_byte_length(window_end.utf8_index() -
+                                         window_start.utf8_index());
+    snippet_match.set_window_utf16_length(window_end.utf16_index() -
+                                          window_start.utf16_index());
 
     // DetermineWindowStart/End may change the position of the iterator. So,
     // reset the iterator back to the original position.
-    bool success = (match_pos > 0) ? iterator->ResetToTokenAfter(match_pos - 1)
+    bool success = (match_pos_utf32 > 0) ? iterator->ResetToTokenAfter(match_pos_utf32 - 1)
                                    : iterator->ResetToStart();
     if (!success) {
       return absl_ports::InternalError(
@@ -243,33 +353,142 @@ struct MatchOptions {
   int max_matches_remaining;
 };
 
-libtextclassifier3::StatusOr<SnippetProto::EntryProto> RetrieveMatches(
-    const TokenMatcher* matcher, const MatchOptions& match_options,
-    const SectionData& value, const Tokenizer* tokenizer) {
-  SnippetProto::EntryProto snippet_entry;
-  snippet_entry.set_property_name(std::string(value.section_name));
-  ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> iterator,
-                         tokenizer->Tokenize(value.section_subcontent));
-  while (iterator->Advance()) {
-    if (snippet_entry.snippet_matches_size() >=
-        match_options.max_matches_remaining) {
-      break;
+// Retrieves snippets in the string values of current_property.
+// Tokenizer is provided to tokenize string content and matcher is provided to
+// indicate when a token matches content in the query.
+//
+// current_property is the property with the string values to snippet.
+// property_path is the path in the document to current_property.
+//
+// MatchOptions holds the snippet spec and number of desired matches remaining.
+// Each call to GetEntriesFromProperty will decrement max_matches_remaining
+// by the number of entries that it adds to snippet_proto.
+//
+// The SnippetEntries found for matched content will be added to snippet_proto.
+void GetEntriesFromProperty(const PropertyProto* current_property,
+                            const std::string& property_path,
+                            const TokenMatcher* matcher,
+                            const Tokenizer* tokenizer,
+                            MatchOptions* match_options,
+                            SnippetProto* snippet_proto) {
+  // We're at the end. Let's check our values.
+  for (int i = 0; i < current_property->string_values_size(); ++i) {
+    SnippetProto::EntryProto snippet_entry;
+    snippet_entry.set_property_name(AddIndexToPath(
+        current_property->string_values_size(), /*index=*/i, property_path));
+    std::string_view value = current_property->string_values(i);
+    std::unique_ptr<Tokenizer::Iterator> iterator =
+        tokenizer->Tokenize(value).ValueOrDie();
+    CharacterIterator char_iterator(value);
+    while (iterator->Advance()) {
+      Token token = iterator->GetToken();
+      CharacterIterator submatch_end = matcher->Matches(token);
+      // If the token matched a query term, then submatch_end will point to an
+      // actual position within token.text.
+      if (submatch_end.utf8_index() != -1) {
+        if (!char_iterator.AdvanceToUtf8(token.text.data() - value.data())) {
+          // We can't get the char_iterator to a valid position, so there's no
+          // way for us to provide valid utf-16 indices. There's nothing more we
+          // can do here, so just return whatever we've built up so far.
+          if (!snippet_entry.snippet_matches().empty()) {
+            *snippet_proto->add_entries() = std::move(snippet_entry);
+          }
+          return;
+        }
+        SectionData data = {property_path, value};
+        auto match_or = RetrieveMatch(match_options->snippet_spec, data,
+                                      iterator.get(), char_iterator);
+        if (!match_or.ok()) {
+          if (absl_ports::IsAborted(match_or.status())) {
+            // Only an aborted. We can't get this match, but we might be able to
+            // retrieve others. Just continue.
+            continue;
+          } else {
+            // Probably an internal error. The tokenizer iterator is probably in
+            // an invalid state. There's nothing more we can do here, so just
+            // return whatever we've built up so far.
+            if (!snippet_entry.snippet_matches().empty()) {
+              *snippet_proto->add_entries() = std::move(snippet_entry);
+            }
+            return;
+          }
+        }
+        SnippetMatchProto match = std::move(match_or).ValueOrDie();
+        // submatch_end refers to a position *within* token.text.
+        // This, conveniently enough, means that index that submatch_end points
+        // to is the length of the submatch (because the submatch starts at 0 in
+        // token.text).
+        match.set_submatch_byte_length(submatch_end.utf8_index());
+        match.set_submatch_utf16_length(submatch_end.utf16_index());
+        // Add the values for the submatch.
+        snippet_entry.mutable_snippet_matches()->Add(std::move(match));
+
+        if (--match_options->max_matches_remaining <= 0) {
+          *snippet_proto->add_entries() = std::move(snippet_entry);
+          return;
+        }
+      }
     }
-    Token token = iterator->GetToken();
-    if (matcher->Matches(token)) {
-      // If there was an error while retrieving the match, the tokenizer
-      // iterator is probably in an invalid state. There's nothing we can do
-      // here, so just return.
-      ICING_ASSIGN_OR_RETURN(
-          SnippetMatchProto match,
-          RetrieveMatch(match_options.snippet_spec, value, iterator.get()));
-      snippet_entry.mutable_snippet_matches()->Add(std::move(match));
+    if (!snippet_entry.snippet_matches().empty()) {
+      *snippet_proto->add_entries() = std::move(snippet_entry);
     }
   }
-  if (snippet_entry.snippet_matches().empty()) {
-    return absl_ports::NotFoundError("No matches found in value!");
+}
+
+// Retrieves snippets in document from content at section_path.
+// Tokenizer is provided to tokenize string content and matcher is provided to
+// indicate when a token matches content in the query.
+//
+// section_path_index refers to the current property that is held by document.
+// current_path is equivalent to the first section_path_index values in
+// section_path, but with value indices present.
+//
+// For example, suppose that a hit appeared somewhere in the "bcc.emailAddress".
+// The arguments for RetrieveSnippetForSection might be
+// {section_path=["bcc", "emailAddress"], section_path_index=0, current_path=""}
+// on the first call and
+// {section_path=["bcc", "emailAddress"], section_path_index=1,
+// current_path="bcc[1]"} on the second recursive call.
+//
+// MatchOptions holds the snippet spec and number of desired matches remaining.
+// Each call to RetrieveSnippetForSection will decrement max_matches_remaining
+// by the number of entries that it adds to snippet_proto.
+//
+// The SnippetEntries found for matched content will be added to snippet_proto.
+void RetrieveSnippetForSection(
+    const DocumentProto& document, const TokenMatcher* matcher,
+    const Tokenizer* tokenizer,
+    const std::vector<std::string_view>& section_path, int section_path_index,
+    const std::string& current_path, MatchOptions* match_options,
+    SnippetProto* snippet_proto) {
+  std::string_view next_property_name = section_path.at(section_path_index);
+  const PropertyProto* current_property =
+      GetProperty(document, next_property_name);
+  if (current_property == nullptr) {
+    ICING_VLOG(1) << "No property " << next_property_name << " found at path "
+                  << current_path;
+    return;
+  }
+  std::string property_path =
+      AddPropertyToPath(current_path, next_property_name);
+  if (section_path_index == section_path.size() - 1) {
+    // We're at the end. Let's check our values.
+    GetEntriesFromProperty(current_property, property_path, matcher, tokenizer,
+                           match_options, snippet_proto);
+  } else {
+    // Still got more to go. Let's look through our subdocuments.
+    std::vector<SnippetProto::EntryProto> entries;
+    for (int i = 0; i < current_property->document_values_size(); ++i) {
+      std::string new_path = AddIndexToPath(
+          current_property->document_values_size(), /*index=*/i, property_path);
+      RetrieveSnippetForSection(current_property->document_values(i), matcher,
+                                tokenizer, section_path, section_path_index + 1,
+                                new_path, match_options, snippet_proto);
+      if (match_options->max_matches_remaining <= 0) {
+        break;
+      }
+    }
   }
-  return snippet_entry;
 }
 
 }  // namespace
@@ -304,6 +523,10 @@ SnippetProto SnippetRetriever::RetrieveSnippet(
     // Remove this section from the mask.
     section_id_mask &= ~(1u << section_id);
 
+    MatchOptions match_options = {snippet_spec};
+    match_options.max_matches_remaining =
+        snippet_spec.num_matches_per_property();
+
     // Determine the section name and match type.
     auto section_metadata_or =
         schema_store_.GetSectionMetadata(type_id, section_id);
@@ -311,7 +534,9 @@ SnippetProto SnippetRetriever::RetrieveSnippet(
       continue;
     }
     const SectionMetadata* metadata = section_metadata_or.ValueOrDie();
-    MatchOptions match_options = {snippet_spec};
+    std::vector<std::string_view> section_path =
+        absl_ports::StrSplit(metadata->path, kPropertySeparator);
+
     // Match type must be as restrictive as possible. Prefix matches for a
     // snippet should only be included if both the query is Prefix and the
     // section has prefixes enabled.
@@ -330,38 +555,18 @@ SnippetProto SnippetRetriever::RetrieveSnippet(
     if (!matcher_or.ok()) {
       continue;
     }
-    match_options.max_matches_remaining =
-        snippet_spec.num_matches_per_property();
+    std::unique_ptr<TokenMatcher> matcher = std::move(matcher_or).ValueOrDie();
 
-    // Retrieve values and snippet them.
-    auto values_or =
-        schema_store_.GetStringSectionContent(document, metadata->path);
-    if (!values_or.ok()) {
-      continue;
-    }
     auto tokenizer_or = tokenizer_factory::CreateIndexingTokenizer(
         metadata->tokenizer, &language_segmenter_);
     if (!tokenizer_or.ok()) {
       // If we couldn't create the tokenizer properly, just skip this section.
       continue;
     }
-    std::vector<std::string_view> values = values_or.ValueOrDie();
-    for (int value_index = 0; value_index < values.size(); ++value_index) {
-      if (match_options.max_matches_remaining <= 0) {
-        break;
-      }
-      SectionData value = {metadata->path, values.at(value_index), value_index};
-      auto entry_or =
-          RetrieveMatches(matcher_or.ValueOrDie().get(), match_options, value,
-                          tokenizer_or.ValueOrDie().get());
-
-      // Drop any entries that encountered errors or didn't find any matches.
-      if (entry_or.ok()) {
-        match_options.max_matches_remaining -=
-            entry_or.ValueOrDie().snippet_matches_size();
-        snippet_proto.mutable_entries()->Add(std::move(entry_or).ValueOrDie());
-      }
-    }
+    std::unique_ptr<Tokenizer> tokenizer = std::move(tokenizer_or).ValueOrDie();
+    RetrieveSnippetForSection(
+        document, matcher.get(), tokenizer.get(), section_path,
+        /*section_path_index=*/0, "", &match_options, &snippet_proto);
   }
   return snippet_proto;
 }
diff --git a/icing/result/snippet-retriever_test.cc b/icing/result/snippet-retriever_test.cc
index ecda400..ad70038 100644
--- a/icing/result/snippet-retriever_test.cc
+++ b/icing/result/snippet-retriever_test.cc
@@ -24,22 +24,26 @@
 #include "icing/file/mock-filesystem.h"
 #include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
 #include "icing/query/query-terms.h"
+#include "icing/schema-builder.h"
 #include "icing/schema/schema-store.h"
 #include "icing/schema/section-manager.h"
 #include "icing/store/document-id.h"
 #include "icing/store/key-mapper.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
-#include "icing/testing/platform.h"
+#include "icing/testing/jni-test-helpers.h"
 #include "icing/testing/snippet-helpers.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/tokenization/language-segmenter.h"
+#include "icing/transform/map/map-normalizer.h"
 #include "icing/transform/normalizer-factory.h"
 #include "icing/transform/normalizer.h"
 #include "unicode/uloc.h"
@@ -49,10 +53,30 @@ namespace lib {
 
 namespace {
 
+using ::testing::ElementsAre;
 using ::testing::Eq;
 using ::testing::IsEmpty;
 using ::testing::SizeIs;
 
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED =
+    PropertyConfigProto_Cardinality_Code_REPEATED;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+    StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+
+std::vector<std::string_view> GetPropertyPaths(const SnippetProto& snippet) {
+  std::vector<std::string_view> paths;
+  for (const SnippetProto::EntryProto& entry : snippet.entries()) {
+    paths.push_back(entry.property_name());
+  }
+  return paths;
+}
+
 class SnippetRetrieverTest : public testing::Test {
  protected:
   void SetUp() override {
@@ -66,7 +90,9 @@ class SnippetRetrieverTest : public testing::Test {
               GetTestFilePath("icing/icu.dat")));
     }
 
-    language_segmenter_factory::SegmenterOptions options(ULOC_US);
+    jni_cache_ = GetTestJniCache();
+    language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                         jni_cache_.get());
     ICING_ASSERT_OK_AND_ASSIGN(
         language_segmenter_,
         language_segmenter_factory::Create(std::move(options)));
@@ -75,25 +101,22 @@ class SnippetRetrieverTest : public testing::Test {
     ICING_ASSERT_OK_AND_ASSIGN(
         schema_store_,
         SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
-    SchemaProto schema;
-    SchemaTypeConfigProto* type_config = schema.add_types();
-    type_config->set_schema_type("email");
-    PropertyConfigProto* prop_config = type_config->add_properties();
-    prop_config->set_property_name("subject");
-    prop_config->set_data_type(PropertyConfigProto::DataType::STRING);
-    prop_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    prop_config->mutable_string_indexing_config()->set_term_match_type(
-        TermMatchType::PREFIX);
-    prop_config->mutable_string_indexing_config()->set_tokenizer_type(
-        StringIndexingConfig::TokenizerType::PLAIN);
-    prop_config = type_config->add_properties();
-    prop_config->set_property_name("body");
-    prop_config->set_data_type(PropertyConfigProto::DataType::STRING);
-    prop_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    prop_config->mutable_string_indexing_config()->set_term_match_type(
-        TermMatchType::EXACT_ONLY);
-    prop_config->mutable_string_indexing_config()->set_tokenizer_type(
-        StringIndexingConfig::TokenizerType::PLAIN);
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType("email")
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName("subject")
+                            .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                            .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName("body")
+                            .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                            .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
     ICING_ASSERT_OK(schema_store_->SetSchema(schema));
 
     ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
@@ -121,6 +144,7 @@ class SnippetRetrieverTest : public testing::Test {
   std::unique_ptr<LanguageSegmenter> language_segmenter_;
   std::unique_ptr<SnippetRetriever> snippet_retriever_;
   std::unique_ptr<Normalizer> normalizer_;
+  std::unique_ptr<const JniCache> jni_cache_;
   ResultSpecProto::SnippetSpecProto snippet_spec_;
   std::string test_dir_;
 };
@@ -156,11 +180,65 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeSmallerThanMatch) {
   // "three". len=4, orig_window= "thre"
   snippet_spec_.set_max_window_bytes(4);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
-              Eq(""));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre(""));
+}
+
+TEST_F(SnippetRetrieverTest,
+       SnippetingWindowMaxWindowSizeEqualToMatch_OddLengthMatch) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "counting")
+          .AddStringProperty("body", "one two three four.... five")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
+
+  // Window starts at the beginning of "three" and at the exact end of
+  // "three". len=5, orig_window= "three"
+  snippet_spec_.set_max_window_bytes(5);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("three"));
+}
+
+TEST_F(SnippetRetrieverTest,
+       SnippetingWindowMaxWindowSizeEqualToMatch_EvenLengthMatch) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "counting")
+          .AddStringProperty("body", "one two three four.... five")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"four"}}};
+
+  // Window starts at the beginning of "four" and at the exact end of
+  // "four". len=4, orig_window= "four"
+  snippet_spec_.set_max_window_bytes(4);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("four"));
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsInWhitespace) {
@@ -175,16 +253,25 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsInWhitespace) {
   SectionIdMask section_mask = 0b00000011;
   SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
 
-  // Window starts at the space between "one" and "two". Window ends in the
-  // middle of "four".
-  // len=14, orig_window=" two three fou"
+  // String:      "one two three four.... five"
+  //               ^   ^   ^     ^        ^   ^
+  // UTF-8 idx:    0   4   8     14       23  27
+  // UTF-32 idx:   0   4   8     14       23  27
+  //
+  // The window will be:
+  //   1. untrimmed, no-shifting window will be (2,17).
+  //   2. trimmed, no-shifting window [4,13) "two three"
+  //   3. trimmed, shifted window [4,18) "two three four"
   snippet_spec_.set_max_window_bytes(14);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
-              Eq("two three"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("two three four"));
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsMidToken) {
@@ -199,15 +286,25 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsMidToken) {
   SectionIdMask section_mask = 0b00000011;
   SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
 
-  // Window starts in the middle of "one" and ends at the end of "four".
-  // len=16, orig_window="e two three four"
+  // String:      "one two three four.... five"
+  //               ^   ^   ^     ^        ^   ^
+  // UTF-8 idx:    0   4   8     14       23  27
+  // UTF-32 idx:   0   4   8     14       23  27
+  //
+  // The window will be:
+  //   1. untrimmed, no-shifting window will be (1,18).
+  //   2. trimmed, no-shifting window [4,18) "two three four"
+  //   3. trimmed, shifted window [4,20) "two three four.."
   snippet_spec_.set_max_window_bytes(16);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
-              Eq("two three four"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("two three four.."));
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsInPunctuation) {
@@ -226,15 +323,18 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsInPunctuation) {
   // len=20, orig_window="one two three four.."
   snippet_spec_.set_max_window_bytes(20);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
-              Eq("one two three four.."));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("one two three four.."));
 }
 
 TEST_F(SnippetRetrieverTest,
-       SnippetingWindowMaxWindowEndsInMiddleOfMultiBytePunctuation) {
+       SnippetingWindowMaxWindowEndsMultiBytePunctuation) {
   DocumentProto document =
       DocumentBuilder()
           .SetKey("icing", "email/1")
@@ -248,18 +348,21 @@ TEST_F(SnippetRetrieverTest,
   SectionRestrictQueryTermsMap query_terms{{"", {"in"}}};
 
   // Window ends in the middle of all the punctuation and window starts at 0.
-  // len=26, orig_window="pside down in Australia\xC2"
+  // len=26, orig_window="pside down in Australia¿"
   snippet_spec_.set_max_window_bytes(24);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
-              Eq("down in Australia"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("down in Australia¿"));
 }
 
 TEST_F(SnippetRetrieverTest,
-       SnippetingWindowMaxWindowEndsInMultiBytePunctuation) {
+       SnippetingWindowMaxWindowBeyondMultiBytePunctuation) {
   DocumentProto document =
       DocumentBuilder()
           .SetKey("icing", "email/1")
@@ -273,14 +376,17 @@ TEST_F(SnippetRetrieverTest,
   SectionRestrictQueryTermsMap query_terms{{"", {"in"}}};
 
   // Window ends in the middle of all the punctuation and window starts at 0.
-  // len=26, orig_window="upside down in Australia\xC2\xBF"
+  // len=26, orig_window="upside down in Australia¿ "
   snippet_spec_.set_max_window_bytes(26);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
-              Eq("upside down in Australia¿"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("upside down in Australia¿"));
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsBeforeValueStart) {
@@ -295,15 +401,25 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsBeforeValueStart) {
   SectionIdMask section_mask = 0b00000011;
   SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
 
-  // Window starts before 0.
-  // len=22, orig_window="one two three four..."
+  // String:      "one two three four.... five"
+  //               ^   ^   ^     ^        ^   ^
+  // UTF-8 idx:    0   4   8     14       23  27
+  // UTF-32 idx:   0   4   8     14       23  27
+  //
+  // The window will be:
+  //   1. untrimmed, no-shifting window will be (-2,21).
+  //   2. trimmed, no-shifting window [0,21) "one two three four..."
+  //   3. trimmed, shifted window [0,22) "one two three four...."
   snippet_spec_.set_max_window_bytes(22);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
-              Eq("one two three four..."));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("one two three four...."));
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsInWhitespace) {
@@ -322,11 +438,14 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsInWhitespace) {
   // len=26, orig_window="one two three four.... "
   snippet_spec_.set_max_window_bytes(26);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
-              Eq("one two three four...."));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("one two three four...."));
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsMidToken) {
@@ -341,15 +460,25 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsMidToken) {
   SectionIdMask section_mask = 0b00000011;
   SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
 
-  // Window ends in the middle of "five"
-  // len=32, orig_window="one two three four.... fiv"
+  // String:      "one two three four.... five"
+  //               ^   ^   ^     ^        ^   ^
+  // UTF-8 idx:    0   4   8     14       23  27
+  // UTF-32 idx:   0   4   8     14       23  27
+  //
+  // The window will be:
+  //   1. untrimmed, no-shifting window will be ((-7,26).
+  //   2. trimmed, no-shifting window [0,26) "one two three four...."
+  //   3. trimmed, shifted window [0,27) "one two three four.... five"
   snippet_spec_.set_max_window_bytes(32);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
-              Eq("one two three four...."));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("one two three four.... five"));
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeEqualToValueSize) {
@@ -368,11 +497,14 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeEqualToValueSize) {
   // len=34, orig_window="one two three four.... five"
   snippet_spec_.set_max_window_bytes(34);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
-              Eq("one two three four.... five"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("one two three four.... five"));
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeLargerThanValueSize) {
@@ -391,11 +523,150 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeLargerThanValueSize) {
   // len=36, orig_window="one two three four.... five"
   snippet_spec_.set_max_window_bytes(36);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("one two three four.... five"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingWindowMatchAtTextStart) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "counting")
+          .AddStringProperty("body", "one two three four.... five six")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"two"}}};
+
+  // String:      "one two three four.... five six"
+  //               ^   ^   ^     ^        ^    ^  ^
+  // UTF-8 idx:    0   4   8     14       23  28  31
+  // UTF-32 idx:   0   4   8     14       23  28  31
+  //
+  // Window size will go past the start of the window.
+  // The window will be:
+  //   1. untrimmed, no-shifting window will be (-10,19).
+  //   2. trimmed, no-shifting window [0,19) "one two three four."
+  //   3. trimmed, shifted window [0,27) "one two three four.... five"
+  snippet_spec_.set_max_window_bytes(28);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("one two three four.... five"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingWindowMatchAtTextEnd) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "counting")
+          .AddStringProperty("body", "one two three four.... five six")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"five"}}};
+
+  // String:      "one two three four.... five six"
+  //               ^   ^   ^     ^        ^    ^  ^
+  // UTF-8 idx:    0   4   8     14       23  28  31
+  // UTF-32 idx:   0   4   8     14       23  28  31
+  //
+  // Window size will go past the end of the window.
+  // The window will be:
+  //   1. untrimmed, no-shifting window will be (10,39).
+  //   2. trimmed, no-shifting window [14,31) "four.... five six"
+  //   3. trimmed, shifted window [4,31) "two three four.... five six"
+  snippet_spec_.set_max_window_bytes(28);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
-              Eq("one two three four.... five"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("two three four.... five six"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingWindowMatchAtTextStartShortText) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "counting")
+          .AddStringProperty("body", "one two three four....")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"two"}}};
+
+  // String:      "one two three four...."
+  //               ^   ^   ^     ^       ^
+  // UTF-8 idx:    0   4   8     14      22
+  // UTF-32 idx:   0   4   8     14      22
+  //
+  // Window size will go past the start of the window.
+  // The window will be:
+  //   1. untrimmed, no-shifting window will be (-10,19).
+  //   2. trimmed, no-shifting window [0, 19) "one two three four."
+  //   3. trimmed, shifted window [0, 22) "one two three four...."
+  snippet_spec_.set_max_window_bytes(28);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("one two three four...."));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingWindowMatchAtTextEndShortText) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "counting")
+          .AddStringProperty("body", "one two three four....")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"four"}}};
+
+  // String:      "one two three four...."
+  //               ^   ^   ^     ^       ^
+  // UTF-8 idx:    0   4   8     14      22
+  // UTF-32 idx:   0   4   8     14      22
+  //
+  // Window size will go past the start of the window.
+  // The window will be:
+  //   1. untrimmed, no-shifting window will be (1,30).
+  //   2. trimmed, no-shifting window [4, 22) "two three four...."
+  //   3. trimmed, shifted window [0, 22) "one two three four...."
+  snippet_spec_.set_max_window_bytes(28);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("one two three four...."));
 }
 
 TEST_F(SnippetRetrieverTest, PrefixSnippeting) {
@@ -409,14 +680,21 @@ TEST_F(SnippetRetrieverTest, PrefixSnippeting) {
   SectionIdMask section_mask = 0b00000011;
   SectionRestrictQueryTermsMap query_terms{{"", {"f"}}};
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::PREFIX, snippet_spec_, document,
-      section_mask);
+      query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
 
   // Check the snippets. 'f' should match prefix-enabled property 'subject', but
   // not exact-only property 'body'
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "subject", 0), Eq("subject foo"));
-  EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("foo"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("subject"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("subject foo"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("foo"));
+
+  if (normalizer_factory::GetNormalizerName() == MapNormalizer::kName) {
+    EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("f"));
+  }
 }
 
 TEST_F(SnippetRetrieverTest, ExactSnippeting) {
@@ -431,8 +709,7 @@ TEST_F(SnippetRetrieverTest, ExactSnippeting) {
   SectionIdMask section_mask = 0b00000011;
   SectionRestrictQueryTermsMap query_terms{{"", {"f"}}};
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
   // Check the snippets
   EXPECT_THAT(snippet.entries(), IsEmpty());
@@ -452,13 +729,18 @@ TEST_F(SnippetRetrieverTest, SimpleSnippetingNoWindowing) {
   SectionIdMask section_mask = 0b00000011;
   SectionRestrictQueryTermsMap query_terms{{"", {"foo"}}};
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
   // Check the snippets
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "subject", 0), IsEmpty());
-  EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("foo"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("subject"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre(""));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("foo"));
+  if (normalizer_factory::GetNormalizerName() == MapNormalizer::kName) {
+    EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("foo"));
+  }
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingMultipleMatches) {
@@ -471,23 +753,53 @@ TEST_F(SnippetRetrieverTest, SnippetingMultipleMatches) {
                              "Concerning the subject of foo, we need to begin "
                              "considering our options regarding body bar.")
           .Build();
+  // String:      "Concerning the subject of foo, we need to begin considering "
+  //               ^          ^   ^       ^  ^    ^  ^    ^  ^     ^
+  // UTF-8 idx:    0          11  15     23  26  31  34  39  42    48
+  // UTF-32 idx:   0          11  15     23  26  31  34  39  42    48
+  //
+  // String ctd:  "our options regarding body bar."
+  //               ^   ^       ^         ^    ^   ^
+  // UTF-8 idx:    60  64      72        82   87  91
+  // UTF-32 idx:   60  64      72        82   87  91
   SectionIdMask section_mask = 0b00000011;
   SectionRestrictQueryTermsMap query_terms{{"", {"foo", "bar"}}};
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::PREFIX, snippet_spec_, document,
-      section_mask);
+      query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
 
   // Check the snippets
   EXPECT_THAT(snippet.entries(), SizeIs(2));
-  EXPECT_THAT(GetWindow(document, snippet, "subject", 0), Eq("subject foo"));
-  EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("foo"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  // The first window will be:
+  //   1. untrimmed, no-shifting window will be (-6,59).
+  //   2. trimmed, no-shifting window [0, 59) "Concerning... considering".
+  //   3. trimmed, shifted window [0, 63) "Concerning... our"
+  // The second window will be:
+  //   1. untrimmed, no-shifting window will be (54,91).
+  //   2. trimmed, no-shifting window [60, 91) "our... bar.".
+  //   3. trimmed, shifted window [31, 91) "we... bar."
   EXPECT_THAT(
-      GetWindow(document, snippet, "body", 0),
-      Eq("Concerning the subject of foo, we need to begin considering"));
-  EXPECT_THAT(GetMatch(document, snippet, "body", 0), Eq("foo"));
-  EXPECT_THAT(GetWindow(document, snippet, "body", 1),
-              Eq("our options regarding body bar."));
-  EXPECT_THAT(GetMatch(document, snippet, "body", 1), Eq("bar"));
+      GetWindows(content, snippet.entries(0)),
+      ElementsAre(
+          "Concerning the subject of foo, we need to begin considering our",
+          "we need to begin considering our options regarding body bar."));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)),
+              ElementsAre("foo", "bar"));
+  if (normalizer_factory::GetNormalizerName() == MapNormalizer::kName) {
+    EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
+                ElementsAre("foo", "bar"));
+  }
+
+  EXPECT_THAT(snippet.entries(1).property_name(), Eq("subject"));
+  content = GetString(&document, snippet.entries(1).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(1)),
+              ElementsAre("subject foo"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("foo"));
+  if (normalizer_factory::GetNormalizerName() == MapNormalizer::kName) {
+    EXPECT_THAT(GetSubMatches(content, snippet.entries(1)), ElementsAre("foo"));
+  }
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesSectionRestrict) {
@@ -500,23 +812,47 @@ TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesSectionRestrict) {
                              "Concerning the subject of foo, we need to begin "
                              "considering our options regarding body bar.")
           .Build();
+  // String:      "Concerning the subject of foo, we need to begin considering "
+  //               ^          ^   ^       ^  ^    ^  ^    ^  ^     ^
+  // UTF-8 idx:    0          11  15     23  26  31  34  39  42    48
+  // UTF-32 idx:   0          11  15     23  26  31  34  39  42    48
+  //
+  // String ctd:  "our options regarding body bar."
+  //               ^   ^       ^         ^    ^   ^
+  // UTF-8 idx:    60  64      72        82   87  91
+  // UTF-32 idx:   60  64      72        82   87  91
+  //
   // Section 1 "subject" is not in the section_mask, so no snippet information
   // from that section should be returned by the SnippetRetriever.
   SectionIdMask section_mask = 0b00000001;
   SectionRestrictQueryTermsMap query_terms{{"", {"foo", "bar"}}};
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::PREFIX, snippet_spec_, document,
-      section_mask);
+      query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
 
   // Check the snippets
   EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  // The first window will be:
+  //   1. untrimmed, no-shifting window will be (-6,59).
+  //   2. trimmed, no-shifting window [0, 59) "Concerning... considering".
+  //   3. trimmed, shifted window [0, 63) "Concerning... our"
+  // The second window will be:
+  //   1. untrimmed, no-shifting window will be (54,91).
+  //   2. trimmed, no-shifting window [60, 91) "our... bar.".
+  //   3. trimmed, shifted window [31, 91) "we... bar."
   EXPECT_THAT(
-      GetWindow(document, snippet, "body", 0),
-      Eq("Concerning the subject of foo, we need to begin considering"));
-  EXPECT_THAT(GetMatch(document, snippet, "body", 0), Eq("foo"));
-  EXPECT_THAT(GetWindow(document, snippet, "body", 1),
-              Eq("our options regarding body bar."));
-  EXPECT_THAT(GetMatch(document, snippet, "body", 1), Eq("bar"));
+      GetWindows(content, snippet.entries(0)),
+      ElementsAre(
+          "Concerning the subject of foo, we need to begin considering our",
+          "we need to begin considering our options regarding body bar."));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)),
+              ElementsAre("foo", "bar"));
+  if (normalizer_factory::GetNormalizerName() == MapNormalizer::kName) {
+    EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
+                ElementsAre("foo", "bar"));
+  }
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesSectionRestrictedTerm) {
@@ -529,6 +865,15 @@ TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesSectionRestrictedTerm) {
                              "Concerning the subject of foo, we need to begin "
                              "considering our options regarding body bar.")
           .Build();
+  // String:      "Concerning the subject of foo, we need to begin considering "
+  //               ^          ^   ^       ^  ^    ^  ^    ^  ^     ^
+  // UTF-8 idx:    0          11  15     23  26  31  34  39  42    48
+  // UTF-32 idx:   0          11  15     23  26  31  34  39  42    48
+  //
+  // String ctd:  "our options regarding body bar."
+  //               ^   ^       ^         ^    ^   ^
+  // UTF-8 idx:    60  64      72        82   87  91
+  // UTF-32 idx:   60  64      72        82   87  91
   SectionIdMask section_mask = 0b00000011;
   // "subject" should match in both sections, but "foo" is restricted to "body"
   // so it should only match in the 'body' section and not the 'subject'
@@ -536,25 +881,42 @@ TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesSectionRestrictedTerm) {
   SectionRestrictQueryTermsMap query_terms{{"", {"subject"}},
                                            {"body", {"foo"}}};
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::PREFIX, snippet_spec_, document,
-      section_mask);
+      query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
 
   // Check the snippets
   EXPECT_THAT(snippet.entries(), SizeIs(2));
-  // 'subject' section should only have the one match for "subject".
-  EXPECT_THAT(GetWindow(document, snippet, "subject", 0), Eq("subject foo"));
-  EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("subject"));
-  EXPECT_THAT(GetWindow(document, snippet, "subject", 1), IsEmpty());
-  EXPECT_THAT(GetMatch(document, snippet, "subject", 1), IsEmpty());
-
-  // 'body' section should have matches for "subject" and "foo".
-  EXPECT_THAT(GetWindow(document, snippet, "body", 0),
-              Eq("Concerning the subject of foo, we need to begin"));
-  EXPECT_THAT(GetMatch(document, snippet, "body", 0), Eq("subject"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  // The first window will be:
+  //   1. untrimmed, no-shifting window will be (-15,50).
+  //   2. trimmed, no-shifting window [0, 47) "Concerning... begin".
+  //   3. trimmed, shifted window [0, 63) "Concerning... our"
+  // The second window will be:
+  //   1. untrimmed, no-shifting window will be (-6,59).
+  //   2. trimmed, no-shifting window [0, 59) "Concerning... considering".
+  //   3. trimmed, shifted window [0, 63) "Concerning... our"
   EXPECT_THAT(
-      GetWindow(document, snippet, "body", 1),
-      Eq("Concerning the subject of foo, we need to begin considering"));
-  EXPECT_THAT(GetMatch(document, snippet, "body", 1), Eq("foo"));
+      GetWindows(content, snippet.entries(0)),
+      ElementsAre(
+          "Concerning the subject of foo, we need to begin considering our",
+          "Concerning the subject of foo, we need to begin considering our"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)),
+              ElementsAre("subject", "foo"));
+  if (normalizer_factory::GetNormalizerName() == MapNormalizer::kName) {
+    EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
+                ElementsAre("subject", "foo"));
+  }
+
+  EXPECT_THAT(snippet.entries(1).property_name(), Eq("subject"));
+  content = GetString(&document, snippet.entries(1).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(1)),
+              ElementsAre("subject foo"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("subject"));
+  if (normalizer_factory::GetNormalizerName() == MapNormalizer::kName) {
+    EXPECT_THAT(GetSubMatches(content, snippet.entries(1)),
+                ElementsAre("subject"));
+  }
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesOneMatchPerProperty) {
@@ -568,24 +930,48 @@ TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesOneMatchPerProperty) {
                              "considering our options regarding body bar.")
           .Build();
 
+  // String:      "Concerning the subject of foo, we need to begin considering "
+  //               ^          ^   ^       ^  ^    ^  ^    ^  ^     ^
+  // UTF-8 idx:    0          11  15     23  26  31  34  39  42    48
+  // UTF-32 idx:   0          11  15     23  26  31  34  39  42    48
+  //
+  // String ctd:  "our options regarding body bar."
+  //               ^   ^       ^         ^    ^   ^
+  // UTF-8 idx:    60  64      72        82   87  91
+  // UTF-32 idx:   60  64      72        82   87  91
   snippet_spec_.set_num_matches_per_property(1);
 
   SectionIdMask section_mask = 0b00000011;
   SectionRestrictQueryTermsMap query_terms{{"", {"foo", "bar"}}};
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::PREFIX, snippet_spec_, document,
-      section_mask);
+      query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
 
   // Check the snippets
   EXPECT_THAT(snippet.entries(), SizeIs(2));
-  EXPECT_THAT(GetWindow(document, snippet, "subject", 0), Eq("subject foo"));
-  EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("foo"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  // The window will be:
+  //   1. untrimmed, no-shifting window will be (-6,59).
+  //   2. trimmed, no-shifting window [0, 59) "Concerning... considering".
+  //   3. trimmed, shifted window [0, 63) "Concerning... our"
   EXPECT_THAT(
-      GetWindow(document, snippet, "body", 0),
-      Eq("Concerning the subject of foo, we need to begin considering"));
-  EXPECT_THAT(GetMatch(document, snippet, "body", 0), Eq("foo"));
-  EXPECT_THAT(GetWindow(document, snippet, "body", 1), IsEmpty());
-  EXPECT_THAT(GetMatch(document, snippet, "body", 1), IsEmpty());
+      GetWindows(content, snippet.entries(0)),
+      ElementsAre(
+          "Concerning the subject of foo, we need to begin considering our"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("foo"));
+  if (normalizer_factory::GetNormalizerName() == MapNormalizer::kName) {
+    EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("foo"));
+  }
+
+  EXPECT_THAT(snippet.entries(1).property_name(), Eq("subject"));
+  content = GetString(&document, snippet.entries(1).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(1)),
+              ElementsAre("subject foo"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("foo"));
+  if (normalizer_factory::GetNormalizerName() == MapNormalizer::kName) {
+    EXPECT_THAT(GetSubMatches(content, snippet.entries(1)), ElementsAre("foo"));
+  }
 }
 
 TEST_F(SnippetRetrieverTest, PrefixSnippetingNormalization) {
@@ -599,12 +985,17 @@ TEST_F(SnippetRetrieverTest, PrefixSnippetingNormalization) {
   SectionIdMask section_mask = 0b00000011;
   SectionRestrictQueryTermsMap query_terms{{"", {"md"}}};
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::PREFIX, snippet_spec_, document,
-      section_mask);
+      query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
 
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "subject", 0), Eq("MDI team"));
-  EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("MDI"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("subject"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("MDI team"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("MDI"));
+  if (normalizer_factory::GetNormalizerName() == MapNormalizer::kName) {
+    EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("MD"));
+  }
 }
 
 TEST_F(SnippetRetrieverTest, ExactSnippetingNormalization) {
@@ -619,13 +1010,646 @@ TEST_F(SnippetRetrieverTest, ExactSnippetingNormalization) {
   SectionIdMask section_mask = 0b00000011;
   SectionRestrictQueryTermsMap query_terms{{"", {"zurich"}}};
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "body", 0),
-              Eq("Some members are in Zürich."));
-  EXPECT_THAT(GetMatch(document, snippet, "body", 0), Eq("Zürich"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("Some members are in Zürich."));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("Zürich"));
+
+  if (normalizer_factory::GetNormalizerName() == MapNormalizer::kName) {
+    EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
+                ElementsAre("Zürich"));
+  }
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingTestOneLevel) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("SingleLevelType")
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("X")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("Y")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("Z")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/true));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      snippet_retriever_,
+      SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+                               normalizer_.get()));
+
+  std::vector<std::string> string_values = {"marco", "polo", "marco", "polo"};
+  DocumentProto document;
+  document.set_schema("SingleLevelType");
+  PropertyProto* prop = document.add_properties();
+  prop->set_name("X");
+  for (const std::string& s : string_values) {
+    prop->add_string_values(s);
+  }
+  prop = document.add_properties();
+  prop->set_name("Y");
+  for (const std::string& s : string_values) {
+    prop->add_string_values(s);
+  }
+  prop = document.add_properties();
+  prop->set_name("Z");
+  for (const std::string& s : string_values) {
+    prop->add_string_values(s);
+  }
+
+  SectionIdMask section_mask = 0b00000111;
+  SectionRestrictQueryTermsMap query_terms{{"", {"polo"}}};
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
+  EXPECT_THAT(snippet.entries(), SizeIs(6));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("X[1]"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("polo"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("polo"));
+  if (normalizer_factory::GetNormalizerName() == MapNormalizer::kName) {
+    EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
+                ElementsAre("polo"));
+  }
+
+  EXPECT_THAT(snippet.entries(1).property_name(), Eq("X[3]"));
+  content = GetString(&document, snippet.entries(1).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(1)), ElementsAre("polo"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("polo"));
+
+  if (normalizer_factory::GetNormalizerName() == MapNormalizer::kName) {
+    EXPECT_THAT(GetSubMatches(content, snippet.entries(1)),
+                ElementsAre("polo"));
+  }
+
+  EXPECT_THAT(GetPropertyPaths(snippet),
+              ElementsAre("X[1]", "X[3]", "Y[1]", "Y[3]", "Z[1]", "Z[3]"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevel) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("SingleLevelType")
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("X")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("Y")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("Z")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_REPEATED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("MultiLevelType")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("A")
+                                        .SetDataTypeDocument(
+                                            "SingleLevelType",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("B")
+                                        .SetDataTypeDocument(
+                                            "SingleLevelType",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("C")
+                                        .SetDataTypeDocument(
+                                            "SingleLevelType",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/true));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      snippet_retriever_,
+      SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+                               normalizer_.get()));
+
+  std::vector<std::string> string_values = {"marco", "polo", "marco", "polo"};
+  DocumentProto subdocument;
+  PropertyProto* prop = subdocument.add_properties();
+  prop->set_name("X");
+  for (const std::string& s : string_values) {
+    prop->add_string_values(s);
+  }
+  prop = subdocument.add_properties();
+  prop->set_name("Y");
+  for (const std::string& s : string_values) {
+    prop->add_string_values(s);
+  }
+  prop = subdocument.add_properties();
+  prop->set_name("Z");
+  for (const std::string& s : string_values) {
+    prop->add_string_values(s);
+  }
+
+  DocumentProto document;
+  document.set_schema("MultiLevelType");
+  prop = document.add_properties();
+  prop->set_name("A");
+  *prop->add_document_values() = subdocument;
+
+  prop = document.add_properties();
+  prop->set_name("B");
+  *prop->add_document_values() = subdocument;
+
+  prop = document.add_properties();
+  prop->set_name("C");
+  *prop->add_document_values() = subdocument;
+
+  SectionIdMask section_mask = 0b111111111;
+  SectionRestrictQueryTermsMap query_terms{{"", {"polo"}}};
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
+  EXPECT_THAT(snippet.entries(), SizeIs(18));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("A.X[1]"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("polo"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("polo"));
+  if (normalizer_factory::GetNormalizerName() == MapNormalizer::kName) {
+    EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
+                ElementsAre("polo"));
+  }
+
+  EXPECT_THAT(snippet.entries(1).property_name(), Eq("A.X[3]"));
+  content = GetString(&document, snippet.entries(1).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(1)), ElementsAre("polo"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("polo"));
+  if (normalizer_factory::GetNormalizerName() == MapNormalizer::kName) {
+    EXPECT_THAT(GetSubMatches(content, snippet.entries(1)),
+                ElementsAre("polo"));
+  }
+
+  EXPECT_THAT(
+      GetPropertyPaths(snippet),
+      ElementsAre("A.X[1]", "A.X[3]", "A.Y[1]", "A.Y[3]", "A.Z[1]", "A.Z[3]",
+                  "B.X[1]", "B.X[3]", "B.Y[1]", "B.Y[3]", "B.Z[1]", "B.Z[3]",
+                  "C.X[1]", "C.X[3]", "C.Y[1]", "C.Y[3]", "C.Z[1]", "C.Z[3]"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevelRepeated) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("SingleLevelType")
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("X")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("Y")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("Z")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_REPEATED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("MultiLevelType")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("A")
+                                        .SetDataTypeDocument(
+                                            "SingleLevelType",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("B")
+                                        .SetDataTypeDocument(
+                                            "SingleLevelType",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("C")
+                                        .SetDataTypeDocument(
+                                            "SingleLevelType",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/true));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      snippet_retriever_,
+      SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+                               normalizer_.get()));
+
+  std::vector<std::string> string_values = {"marco", "polo", "marco", "polo"};
+  DocumentProto subdocument;
+  PropertyProto* prop = subdocument.add_properties();
+  prop->set_name("X");
+  for (const std::string& s : string_values) {
+    prop->add_string_values(s);
+  }
+  prop = subdocument.add_properties();
+  prop->set_name("Y");
+  for (const std::string& s : string_values) {
+    prop->add_string_values(s);
+  }
+  prop = subdocument.add_properties();
+  prop->set_name("Z");
+  for (const std::string& s : string_values) {
+    prop->add_string_values(s);
+  }
+
+  DocumentProto document;
+  document.set_schema("MultiLevelType");
+  prop = document.add_properties();
+  prop->set_name("A");
+  *prop->add_document_values() = subdocument;
+  *prop->add_document_values() = subdocument;
+
+  prop = document.add_properties();
+  prop->set_name("B");
+  *prop->add_document_values() = subdocument;
+  *prop->add_document_values() = subdocument;
+
+  prop = document.add_properties();
+  prop->set_name("C");
+  *prop->add_document_values() = subdocument;
+  *prop->add_document_values() = subdocument;
+
+  SectionIdMask section_mask = 0b111111111;
+  SectionRestrictQueryTermsMap query_terms{{"", {"polo"}}};
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
+  EXPECT_THAT(snippet.entries(), SizeIs(36));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("A[0].X[1]"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("polo"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("polo"));
+  if (normalizer_factory::GetNormalizerName() == MapNormalizer::kName) {
+    EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
+                ElementsAre("polo"));
+  }
+
+  EXPECT_THAT(snippet.entries(1).property_name(), Eq("A[0].X[3]"));
+  content = GetString(&document, snippet.entries(1).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(1)), ElementsAre("polo"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("polo"));
+  if (normalizer_factory::GetNormalizerName() == MapNormalizer::kName) {
+    EXPECT_THAT(GetSubMatches(content, snippet.entries(1)),
+                ElementsAre("polo"));
+  }
+
+  EXPECT_THAT(GetPropertyPaths(snippet),
+              ElementsAre("A[0].X[1]", "A[0].X[3]", "A[1].X[1]", "A[1].X[3]",
+                          "A[0].Y[1]", "A[0].Y[3]", "A[1].Y[1]", "A[1].Y[3]",
+                          "A[0].Z[1]", "A[0].Z[3]", "A[1].Z[1]", "A[1].Z[3]",
+                          "B[0].X[1]", "B[0].X[3]", "B[1].X[1]", "B[1].X[3]",
+                          "B[0].Y[1]", "B[0].Y[3]", "B[1].Y[1]", "B[1].Y[3]",
+                          "B[0].Z[1]", "B[0].Z[3]", "B[1].Z[1]", "B[1].Z[3]",
+                          "C[0].X[1]", "C[0].X[3]", "C[1].X[1]", "C[1].X[3]",
+                          "C[0].Y[1]", "C[0].Y[3]", "C[1].Y[1]", "C[1].Y[3]",
+                          "C[0].Z[1]", "C[0].Z[3]", "C[1].Z[1]", "C[1].Z[3]"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevelSingleValue) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("SingleLevelType")
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("X")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("Y")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("Z")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("MultiLevelType")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("A")
+                                        .SetDataTypeDocument(
+                                            "SingleLevelType",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("B")
+                                        .SetDataTypeDocument(
+                                            "SingleLevelType",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("C")
+                                        .SetDataTypeDocument(
+                                            "SingleLevelType",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/true));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      snippet_retriever_,
+      SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+                               normalizer_.get()));
+
+  DocumentProto subdocument;
+  PropertyProto* prop = subdocument.add_properties();
+  prop->set_name("X");
+  prop->add_string_values("polo");
+  prop = subdocument.add_properties();
+  prop->set_name("Y");
+  prop->add_string_values("marco");
+  prop = subdocument.add_properties();
+  prop->set_name("Z");
+  prop->add_string_values("polo");
+
+  DocumentProto document;
+  document.set_schema("MultiLevelType");
+  prop = document.add_properties();
+  prop->set_name("A");
+  *prop->add_document_values() = subdocument;
+  *prop->add_document_values() = subdocument;
+
+  prop = document.add_properties();
+  prop->set_name("B");
+  *prop->add_document_values() = subdocument;
+  *prop->add_document_values() = subdocument;
+
+  prop = document.add_properties();
+  prop->set_name("C");
+  *prop->add_document_values() = subdocument;
+  *prop->add_document_values() = subdocument;
+
+  SectionIdMask section_mask = 0b111111111;
+  SectionRestrictQueryTermsMap query_terms{{"", {"polo"}}};
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
+  EXPECT_THAT(snippet.entries(), SizeIs(12));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("A[0].X"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("polo"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("polo"));
+  if (normalizer_factory::GetNormalizerName() == MapNormalizer::kName) {
+    EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
+                ElementsAre("polo"));
+  }
+
+  EXPECT_THAT(snippet.entries(1).property_name(), Eq("A[1].X"));
+  content = GetString(&document, snippet.entries(1).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(1)), ElementsAre("polo"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("polo"));
+  if (normalizer_factory::GetNormalizerName() == MapNormalizer::kName) {
+    EXPECT_THAT(GetSubMatches(content, snippet.entries(1)),
+                ElementsAre("polo"));
+  }
+
+  EXPECT_THAT(
+      GetPropertyPaths(snippet),
+      ElementsAre("A[0].X", "A[1].X", "A[0].Z", "A[1].Z", "B[0].X", "B[1].X",
+                  "B[0].Z", "B[1].Z", "C[0].X", "C[1].X", "C[0].Z", "C[1].Z"));
+}
+
+TEST_F(SnippetRetrieverTest, CJKSnippetMatchTest) {
+  // String:     "我每天走路去上班。"
+  //              ^ ^  ^   ^^
+  // UTF8 idx:    0 3  9  15 18
+  // UTF16 idx:   0 1  3   5 6
+  // Breaks into segments: "我", "每天", "走路", "去", "上班"
+  constexpr std::string_view kChinese = "我每天走路去上班。";
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", kChinese)
+          .AddStringProperty("body",
+                             "Concerning the subject of foo, we need to begin "
+                             "considering our options regarding body bar.")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"走"}}};
+
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
+
+  // Ensure that one and only one property was matched and it was "body"
+  ASSERT_THAT(snippet.entries(), SizeIs(1));
+  const SnippetProto::EntryProto* entry = &snippet.entries(0);
+  EXPECT_THAT(entry->property_name(), Eq("subject"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+
+  // Ensure that there is one and only one match within "subject"
+  ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+  const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+
+  // Ensure that the match is correct.
+  EXPECT_THAT(GetMatches(content, *entry), ElementsAre("走路"));
+  if (normalizer_factory::GetNormalizerName() == MapNormalizer::kName) {
+    EXPECT_THAT(GetSubMatches(content, *entry), ElementsAre("走"));
+  }
+
+  // Ensure that the utf-16 values are also as expected
+  EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(3));
+  EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2));
+  if (normalizer_factory::GetNormalizerName() == MapNormalizer::kName) {
+    EXPECT_THAT(match_proto.submatch_utf16_length(), Eq(1));
+  }
+}
+
+TEST_F(SnippetRetrieverTest, CJKSnippetWindowTest) {
+  language_segmenter_factory::SegmenterOptions options(ULOC_SIMPLIFIED_CHINESE,
+                                                       jni_cache_.get());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      language_segmenter_,
+      language_segmenter_factory::Create(std::move(options)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      snippet_retriever_,
+      SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+                               normalizer_.get()));
+
+  // String:     "我每天走路去上班。"
+  //              ^ ^  ^   ^^
+  // UTF8 idx:    0 3  9  15 18
+  // UTF16 idx:   0 1  3   5 6
+  // UTF32 idx:   0 1  3   5 6
+  // Breaks into segments: "我", "每天", "走路", "去", "上班"
+  constexpr std::string_view kChinese = "我每天走路去上班。";
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", kChinese)
+          .AddStringProperty("body",
+                             "Concerning the subject of foo, we need to begin "
+                             "considering our options regarding body bar.")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"走"}}};
+
+  // The window will be:
+  //   1. untrimmed, no-shifting window will be (0,7).
+  //   2. trimmed, no-shifting window [1, 6) "每天走路去".
+  //   3. trimmed, shifted window [0, 6) "我每天走路去"
+  snippet_spec_.set_max_window_bytes(6);
+
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
+
+  // Ensure that one and only one property was matched and it was "body"
+  ASSERT_THAT(snippet.entries(), SizeIs(1));
+  const SnippetProto::EntryProto* entry = &snippet.entries(0);
+  EXPECT_THAT(entry->property_name(), Eq("subject"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+
+  // Ensure that there is one and only one match within "subject"
+  ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+  const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+
+  // Ensure that the match is correct.
+  EXPECT_THAT(GetWindows(content, *entry), ElementsAre("我每天走路去"));
+
+  // Ensure that the utf-16 values are also as expected
+  EXPECT_THAT(match_proto.window_utf16_position(), Eq(0));
+  EXPECT_THAT(match_proto.window_utf16_length(), Eq(6));
+}
+
+TEST_F(SnippetRetrieverTest, Utf16MultiCodeUnitSnippetMatchTest) {
+  // The following string has four-byte UTF-8 characters. Most importantly, it
+  // is also two code units in UTF-16.
+  // String:     "𐀀𐀁 𐀂𐀃 𐀄"
+  //              ^  ^  ^
+  // UTF8 idx:    0  9  18
+  // UTF16 idx:   0  5  10
+  // Breaks into segments: "𐀀𐀁", "𐀂𐀃", "𐀄"
+  constexpr std::string_view kText = "𐀀𐀁 𐀂𐀃 𐀄";
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", kText)
+          .AddStringProperty("body",
+                             "Concerning the subject of foo, we need to begin "
+                             "considering our options regarding body bar.")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"𐀂"}}};
+
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
+
+  // Ensure that one and only one property was matched and it was "body"
+  ASSERT_THAT(snippet.entries(), SizeIs(1));
+  const SnippetProto::EntryProto* entry = &snippet.entries(0);
+  EXPECT_THAT(entry->property_name(), Eq("subject"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+
+  // Ensure that there is one and only one match within "subject"
+  ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+  const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+
+  // Ensure that the match is correct.
+  EXPECT_THAT(GetMatches(content, *entry), ElementsAre("𐀂𐀃"));
+  if (normalizer_factory::GetNormalizerName() == MapNormalizer::kName) {
+    EXPECT_THAT(GetSubMatches(content, *entry), ElementsAre("𐀂"));
+  }
+
+  // Ensure that the utf-16 values are also as expected
+  EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(5));
+  EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(4));
+  if (normalizer_factory::GetNormalizerName() == MapNormalizer::kName) {
+    EXPECT_THAT(match_proto.submatch_utf16_length(), Eq(2));
+  }
+}
+
+TEST_F(SnippetRetrieverTest, Utf16MultiCodeUnitWindowTest) {
+  // The following string has four-byte UTF-8 characters. Most importantly, it
+  // is also two code units in UTF-16.
+  // String:     "𐀀𐀁 𐀂𐀃 𐀄"
+  //              ^  ^  ^
+  // UTF8 idx:    0  9  18
+  // UTF16 idx:   0  5  10
+  // UTF32 idx:   0  3  6
+  // Breaks into segments: "𐀀𐀁", "𐀂𐀃", "𐀄"
+  constexpr std::string_view kText = "𐀀𐀁 𐀂𐀃 𐀄";
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", kText)
+          .AddStringProperty("body",
+                             "Concerning the subject of foo, we need to begin "
+                             "considering our options regarding body bar.")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"𐀂"}}};
+
+  // Set a six character window. This will produce a window like this:
+  // String:     "𐀀𐀁 𐀂𐀃 𐀄"
+  //                 ^   ^
+  // UTF8 idx:       9   22
+  // UTF16 idx:      5   12
+  // UTF32 idx:      3   7
+  snippet_spec_.set_max_window_bytes(6);
+
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
+
+  // Ensure that one and only one property was matched and it was "body"
+  ASSERT_THAT(snippet.entries(), SizeIs(1));
+  const SnippetProto::EntryProto* entry = &snippet.entries(0);
+  EXPECT_THAT(entry->property_name(), Eq("subject"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+
+  // Ensure that there is one and only one match within "subject"
+  ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+  const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+
+  // Ensure that the match is correct.
+  EXPECT_THAT(GetWindows(content, *entry), ElementsAre("𐀂𐀃 𐀄"));
+
+  // Ensure that the utf-16 values are also as expected
+  EXPECT_THAT(match_proto.window_utf16_position(), Eq(5));
+  EXPECT_THAT(match_proto.window_utf16_length(), Eq(7));
 }
 
 }  // namespace
diff --git a/icing/schema-builder.h b/icing/schema-builder.h
new file mode 100644
index 0000000..59ed7c5
--- /dev/null
+++ b/icing/schema-builder.h
@@ -0,0 +1,130 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_BUILDER_H_
+#define ICING_SCHEMA_BUILDER_H_
+
+#include <cstdint>
+#include <initializer_list>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include "icing/proto/schema.pb.h"
+
+namespace icing {
+namespace lib {
+
+class PropertyConfigBuilder {
+ public:
+  PropertyConfigBuilder() = default;
+  explicit PropertyConfigBuilder(PropertyConfigProto property)
+      : property_(std::move(property)) {}
+
+  PropertyConfigBuilder& SetName(std::string_view name) {
+    property_.set_property_name(std::string(name));
+    return *this;
+  }
+
+  PropertyConfigBuilder& SetDataType(
+      PropertyConfigProto::DataType::Code data_type) {
+    property_.set_data_type(data_type);
+    return *this;
+  }
+
+  PropertyConfigBuilder& SetDataTypeString(
+      TermMatchType::Code match_type,
+      StringIndexingConfig::TokenizerType::Code tokenizer) {
+    property_.set_data_type(PropertyConfigProto::DataType::STRING);
+    property_.mutable_string_indexing_config()->set_term_match_type(match_type);
+    property_.mutable_string_indexing_config()->set_tokenizer_type(tokenizer);
+    return *this;
+  }
+
+  PropertyConfigBuilder& SetDataTypeDocument(std::string_view schema_type,
+                                             bool index_nested_properties) {
+    property_.set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+    property_.set_schema_type(std::string(schema_type));
+    property_.mutable_document_indexing_config()->set_index_nested_properties(
+        index_nested_properties);
+    return *this;
+  }
+
+  PropertyConfigBuilder& SetCardinality(
+      PropertyConfigProto::Cardinality::Code cardinality) {
+    property_.set_cardinality(cardinality);
+    return *this;
+  }
+
+  PropertyConfigProto Build() const { return std::move(property_); }
+
+ private:
+  PropertyConfigProto property_;
+};
+
+class SchemaTypeConfigBuilder {
+ public:
+  SchemaTypeConfigBuilder() = default;
+  SchemaTypeConfigBuilder(SchemaTypeConfigProto type_config)
+      : type_config_(std::move(type_config)) {}
+
+  SchemaTypeConfigBuilder& SetType(std::string_view type) {
+    type_config_.set_schema_type(std::string(type));
+    return *this;
+  }
+
+  SchemaTypeConfigBuilder& SetVersion(int version) {
+    type_config_.set_version(version);
+    return *this;
+  }
+
+  SchemaTypeConfigBuilder& AddProperty(PropertyConfigProto property) {
+    *type_config_.add_properties() = std::move(property);
+    return *this;
+  }
+  SchemaTypeConfigBuilder& AddProperty(PropertyConfigBuilder property_builder) {
+    *type_config_.add_properties() = property_builder.Build();
+    return *this;
+  }
+
+  SchemaTypeConfigProto Build() { return std::move(type_config_); }
+
+ private:
+  SchemaTypeConfigProto type_config_;
+};
+
+class SchemaBuilder {
+ public:
+  SchemaBuilder() = default;
+  SchemaBuilder(SchemaProto schema) : schema_(std::move(schema)) {}
+
+  SchemaBuilder& AddType(SchemaTypeConfigProto type) {
+    *schema_.add_types() = std::move(type);
+    return *this;
+  }
+  SchemaBuilder& AddType(SchemaTypeConfigBuilder type_builder) {
+    *schema_.add_types() = type_builder.Build();
+    return *this;
+  }
+
+  SchemaProto Build() { return std::move(schema_); }
+
+ private:
+  SchemaProto schema_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCHEMA_BUILDER_H_
diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc
index b43d2a4..3307638 100644
--- a/icing/schema/schema-store.cc
+++ b/icing/schema/schema-store.cc
@@ -104,7 +104,7 @@ std::unordered_set<SchemaTypeId> SchemaTypeIdsChanged(
 
 libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> SchemaStore::Create(
     const Filesystem* filesystem, const std::string& base_dir,
-    const Clock* clock, NativeInitializeStats* initialize_stats) {
+    const Clock* clock, InitializeStatsProto* initialize_stats) {
   ICING_RETURN_ERROR_IF_NULL(filesystem);
   ICING_RETURN_ERROR_IF_NULL(clock);
 
@@ -122,7 +122,7 @@ SchemaStore::SchemaStore(const Filesystem* filesystem, std::string base_dir,
       schema_file_(*filesystem, MakeSchemaFilename(base_dir_)) {}
 
 SchemaStore::~SchemaStore() {
-  if (initialized_) {
+  if (has_schema_successfully_set_) {
     if (!PersistToDisk().ok()) {
       ICING_LOG(ERROR) << "Error persisting to disk in SchemaStore destructor";
     }
@@ -130,7 +130,7 @@ SchemaStore::~SchemaStore() {
 }
 
 libtextclassifier3::Status SchemaStore::Initialize(
-    NativeInitializeStats* initialize_stats) {
+    InitializeStatsProto* initialize_stats) {
   auto schema_proto_or = GetSchema();
   if (absl_ports::IsNotFound(schema_proto_or.status())) {
     // Don't have an existing schema proto, that's fine
@@ -139,6 +139,7 @@ libtextclassifier3::Status SchemaStore::Initialize(
     // Real error when trying to read the existing schema
     return schema_proto_or.status();
   }
+  has_schema_successfully_set_ = true;
 
   if (!InitializeDerivedFiles().ok()) {
     ICING_VLOG(3)
@@ -147,7 +148,7 @@ libtextclassifier3::Status SchemaStore::Initialize(
     std::unique_ptr<Timer> regenerate_timer = clock_.GetNewTimer();
     if (initialize_stats != nullptr) {
       initialize_stats->set_schema_store_recovery_cause(
-          NativeInitializeStats::IO_ERROR);
+          InitializeStatsProto::IO_ERROR);
     }
     ICING_RETURN_IF_ERROR(RegenerateDerivedFiles());
     if (initialize_stats != nullptr) {
@@ -156,7 +157,6 @@ libtextclassifier3::Status SchemaStore::Initialize(
     }
   }
 
-  initialized_ = true;
   if (initialize_stats != nullptr) {
     initialize_stats->set_num_schema_types(type_config_map_.size());
   }
@@ -253,9 +253,12 @@ libtextclassifier3::Status SchemaStore::UpdateHeader(const Crc32& checksum) {
   header.magic = SchemaStore::Header::kMagic;
   header.checksum = checksum.Get();
 
+  ScopedFd scoped_fd(
+      filesystem_.OpenForWrite(MakeHeaderFilename(base_dir_).c_str()));
   // This should overwrite the header.
-  if (!filesystem_.Write(MakeHeaderFilename(base_dir_).c_str(), &header,
-                         sizeof(header))) {
+  if (!scoped_fd.is_valid() ||
+      !filesystem_.Write(scoped_fd.get(), &header, sizeof(header)) ||
+      !filesystem_.DataSync(scoped_fd.get())) {
     return absl_ports::InternalError(absl_ports::StrCat(
         "Failed to write SchemaStore header: ", MakeHeaderFilename(base_dir_)));
   }
@@ -285,18 +288,11 @@ libtextclassifier3::Status SchemaStore::ResetSchemaTypeMapper() {
 
 libtextclassifier3::StatusOr<Crc32> SchemaStore::ComputeChecksum() const {
   Crc32 total_checksum;
-
-  auto schema_proto_or = GetSchema();
-  if (absl_ports::IsNotFound(schema_proto_or.status())) {
+  if (!has_schema_successfully_set_) {
     // Nothing to checksum
     return total_checksum;
-  } else if (!schema_proto_or.ok()) {
-    // Some real error. Pass it up
-    return schema_proto_or.status();
   }
-
-  // Guaranteed to have a schema proto now
-  const SchemaProto* schema_proto = schema_proto_or.ValueOrDie();
+  ICING_ASSIGN_OR_RETURN(const SchemaProto* schema_proto, GetSchema());
   Crc32 schema_checksum;
   schema_checksum.Append(schema_proto->SerializeAsString());
 
@@ -326,12 +322,18 @@ SchemaStore::SetSchema(const SchemaProto& new_schema,
 libtextclassifier3::StatusOr<const SchemaStore::SetSchemaResult>
 SchemaStore::SetSchema(SchemaProto&& new_schema,
                        bool ignore_errors_and_delete_documents) {
+  ICING_ASSIGN_OR_RETURN(SchemaUtil::DependencyMap new_dependency_map,
+                         SchemaUtil::Validate(new_schema));
+
   SetSchemaResult result;
 
   auto schema_proto_or = GetSchema();
   if (absl_ports::IsNotFound(schema_proto_or.status())) {
     // We don't have a pre-existing schema, so anything is valid.
     result.success = true;
+    for (const SchemaTypeConfigProto& type_config : new_schema.types()) {
+      result.schema_types_new_by_name.insert(type_config.schema_type());
+    }
   } else if (!schema_proto_or.ok()) {
     // Real error
     return schema_proto_or.status();
@@ -349,10 +351,14 @@ SchemaStore::SetSchema(SchemaProto&& new_schema,
 
     // Different schema, track the differences and see if we can still write it
     SchemaUtil::SchemaDelta schema_delta =
-        SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema);
+        SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+                                              new_dependency_map);
 
-    // An incompatible index is fine, we can just reindex
-    result.index_incompatible = schema_delta.index_incompatible;
+    result.schema_types_new_by_name = std::move(schema_delta.schema_types_new);
+    result.schema_types_changed_fully_compatible_by_name =
+        std::move(schema_delta.schema_types_changed_fully_compatible);
+    result.schema_types_index_incompatible_by_name =
+        std::move(schema_delta.schema_types_index_incompatible);
 
     for (const auto& schema_type : schema_delta.schema_types_deleted) {
       // We currently don't support deletions, so mark this as not possible.
@@ -390,6 +396,7 @@ SchemaStore::SetSchema(SchemaProto&& new_schema,
     // Write the schema (and potentially overwrite a previous schema)
     ICING_RETURN_IF_ERROR(
         schema_file_.Write(std::make_unique<SchemaProto>(new_schema)));
+    has_schema_successfully_set_ = true;
 
     ICING_RETURN_IF_ERROR(RegenerateDerivedFiles());
   }
@@ -399,14 +406,7 @@ SchemaStore::SetSchema(SchemaProto&& new_schema,
 
 libtextclassifier3::StatusOr<const SchemaTypeConfigProto*>
 SchemaStore::GetSchemaTypeConfig(std::string_view schema_type) const {
-  auto schema_proto_or = GetSchema();
-  if (absl_ports::IsNotFound(schema_proto_or.status())) {
-    return absl_ports::FailedPreconditionError("Schema not set yet.");
-  } else if (!schema_proto_or.ok()) {
-    // Some other real error, pass it up
-    return schema_proto_or.status();
-  }
-
+  ICING_RETURN_IF_ERROR(CheckSchemaSet());
   const auto& type_config_iter =
       type_config_map_.find(std::string(schema_type));
   if (type_config_iter == type_config_map_.end()) {
@@ -418,39 +418,42 @@ SchemaStore::GetSchemaTypeConfig(std::string_view schema_type) const {
 
 libtextclassifier3::StatusOr<SchemaTypeId> SchemaStore::GetSchemaTypeId(
     std::string_view schema_type) const {
+  ICING_RETURN_IF_ERROR(CheckSchemaSet());
   return schema_type_mapper_->Get(schema_type);
 }
 
 libtextclassifier3::StatusOr<std::vector<std::string_view>>
 SchemaStore::GetStringSectionContent(const DocumentProto& document,
                                      std::string_view section_path) const {
+  ICING_RETURN_IF_ERROR(CheckSchemaSet());
   return section_manager_->GetStringSectionContent(document, section_path);
 }
 
 libtextclassifier3::StatusOr<std::vector<std::string_view>>
 SchemaStore::GetStringSectionContent(const DocumentProto& document,
                                      SectionId section_id) const {
+  ICING_RETURN_IF_ERROR(CheckSchemaSet());
   return section_manager_->GetStringSectionContent(document, section_id);
 }
 
 libtextclassifier3::StatusOr<const SectionMetadata*>
 SchemaStore::GetSectionMetadata(SchemaTypeId schema_type_id,
                                 SectionId section_id) const {
+  ICING_RETURN_IF_ERROR(CheckSchemaSet());
   return section_manager_->GetSectionMetadata(schema_type_id, section_id);
 }
 
 libtextclassifier3::StatusOr<std::vector<Section>> SchemaStore::ExtractSections(
     const DocumentProto& document) const {
+  ICING_RETURN_IF_ERROR(CheckSchemaSet());
   return section_manager_->ExtractSections(document);
 }
 
 libtextclassifier3::Status SchemaStore::PersistToDisk() {
-  if (schema_type_mapper_ != nullptr) {
-    // It's possible we haven't had a schema set yet, so SchemaTypeMapper hasn't
-    // been initialized and is still a nullptr
-    ICING_RETURN_IF_ERROR(schema_type_mapper_->PersistToDisk());
+  if (!has_schema_successfully_set_) {
+    return libtextclassifier3::Status::OK;
   }
-
+  ICING_RETURN_IF_ERROR(schema_type_mapper_->PersistToDisk());
   // Write the header
   ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
   ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
@@ -458,5 +461,35 @@ libtextclassifier3::Status SchemaStore::PersistToDisk() {
   return libtextclassifier3::Status::OK;
 }
 
+SchemaStoreStorageInfoProto SchemaStore::GetStorageInfo() const {
+  SchemaStoreStorageInfoProto storage_info;
+  int64_t directory_size = filesystem_.GetDiskUsage(base_dir_.c_str());
+  if (directory_size != Filesystem::kBadFileSize) {
+    storage_info.set_schema_store_size(directory_size);
+  } else {
+    storage_info.set_schema_store_size(-1);
+  }
+  ICING_ASSIGN_OR_RETURN(const SchemaProto* schema, GetSchema(), storage_info);
+  storage_info.set_num_schema_types(schema->types_size());
+  int total_sections = 0;
+  int num_types_sections_exhausted = 0;
+  for (const SchemaTypeConfigProto& type : schema->types()) {
+    auto sections_list_or =
+        section_manager_->GetMetadataList(type.schema_type());
+    if (!sections_list_or.ok()) {
+      continue;
+    }
+    total_sections += sections_list_or.ValueOrDie()->size();
+    if (sections_list_or.ValueOrDie()->size() == kMaxSectionId + 1) {
+      ++num_types_sections_exhausted;
+    }
+  }
+
+  storage_info.set_num_total_sections(total_sections);
+  storage_info.set_num_schema_types_sections_exhausted(
+      num_types_sections_exhausted);
+  return storage_info;
+}
+
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/schema/schema-store.h b/icing/schema/schema-store.h
index 3854704..b9be6c0 100644
--- a/icing/schema/schema-store.h
+++ b/icing/schema/schema-store.h
@@ -29,6 +29,7 @@
 #include "icing/proto/document.pb.h"
 #include "icing/proto/logging.pb.h"
 #include "icing/proto/schema.pb.h"
+#include "icing/proto/storage.pb.h"
 #include "icing/schema/schema-util.h"
 #include "icing/schema/section-manager.h"
 #include "icing/schema/section.h"
@@ -67,9 +68,6 @@ class SchemaStore {
     // to file.
     bool success = false;
 
-    // Whether the new schema changes invalidate the index.
-    bool index_incompatible = false;
-
     // SchemaTypeIds of schema types can be reassigned new SchemaTypeIds if:
     //   1. Schema types are added in the middle of the SchemaProto
     //   2. Schema types are removed from the middle of the SchemaProto
@@ -99,6 +97,21 @@ class SchemaStore {
     // SchemaUtil::ComputeCompatibilityDelta. Represented by the SchemaTypeId
     // assigned to this SchemaTypeConfigProto in the *old* schema.
     std::unordered_set<SchemaTypeId> schema_types_incompatible_by_id;
+
+    // Schema types that were added in the new schema. Represented by the
+    // `schema_type` field in the SchemaTypeConfigProto.
+    std::unordered_set<std::string> schema_types_new_by_name;
+
+    // Schema types that were changed in a way that was backwards compatible and
+    // didn't invalidate the index. Represented by the `schema_type` field in
+    // the SchemaTypeConfigProto.
+    std::unordered_set<std::string>
+        schema_types_changed_fully_compatible_by_name;
+
+    // Schema types that were changed in a way that was backwards compatible,
+    // but invalidated the index. Represented by the `schema_type` field in the
+    // SchemaTypeConfigProto.
+    std::unordered_set<std::string> schema_types_index_incompatible_by_name;
   };
 
   // Factory function to create a SchemaStore which does not take ownership
@@ -115,7 +128,7 @@ class SchemaStore {
   //   INTERNAL_ERROR on any IO errors
   static libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> Create(
       const Filesystem* filesystem, const std::string& base_dir,
-      const Clock* clock, NativeInitializeStats* initialize_stats = nullptr);
+      const Clock* clock, InitializeStatsProto* initialize_stats = nullptr);
 
   // Not copyable
   SchemaStore(const SchemaStore&) = delete;
@@ -167,6 +180,7 @@ class SchemaStore {
   //
   // Returns:
   //   SchemaTypeId on success
+  //   FAILED_PRECONDITION if schema hasn't been set yet
   //   NOT_FOUND_ERROR if we don't know about the schema type
   //   INTERNAL_ERROR on IO error
   libtextclassifier3::StatusOr<SchemaTypeId> GetSchemaTypeId(
@@ -176,6 +190,7 @@ class SchemaStore {
   //
   // Returns:
   //   A string of content on success
+  //   FAILED_PRECONDITION if schema hasn't been set yet
   //   NOT_FOUND if:
   //     1. Property is optional and not found in the document
   //     2. section_path is invalid
@@ -188,6 +203,7 @@ class SchemaStore {
   //
   // Returns:
   //   A string of content on success
+  //   FAILED_PRECONDITION if schema hasn't been set yet
   //   INVALID_ARGUMENT if section id is invalid
   //   NOT_FOUND if type config name of document not found
   libtextclassifier3::StatusOr<std::vector<std::string_view>>
@@ -199,6 +215,7 @@ class SchemaStore {
   //
   // Returns:
   //   pointer to SectionMetadata on success
+  //   FAILED_PRECONDITION if schema hasn't been set yet
   //   INVALID_ARGUMENT if schema type id or section is invalid
   libtextclassifier3::StatusOr<const SectionMetadata*> GetSectionMetadata(
       SchemaTypeId schema_type_id, SectionId section_id) const;
@@ -209,6 +226,7 @@ class SchemaStore {
   //
   // Returns:
   //   A list of sections on success
+  //   FAILED_PRECONDITION if schema hasn't been set yet
   //   NOT_FOUND if type config name of document not found
   libtextclassifier3::StatusOr<std::vector<Section>> ExtractSections(
       const DocumentProto& document) const;
@@ -228,6 +246,12 @@ class SchemaStore {
   //   INTERNAL_ERROR on compute error
   libtextclassifier3::StatusOr<Crc32> ComputeChecksum() const;
 
+  // Calculates the StorageInfo for the Schema Store.
+  //
+  // If an IO error occurs while trying to calculate the value for a field, then
+  // that field will be set to -1.
+  SchemaStoreStorageInfoProto GetStorageInfo() const;
+
  private:
   // Use SchemaStore::Create instead.
   explicit SchemaStore(const Filesystem* filesystem, std::string base_dir,
@@ -238,8 +262,7 @@ class SchemaStore {
   // Returns:
   //   OK on success
   //   INTERNAL_ERROR on IO error
-  libtextclassifier3::Status Initialize(
-      NativeInitializeStats* initialize_stats);
+  libtextclassifier3::Status Initialize(InitializeStatsProto* initialize_stats);
 
   // Creates sub-components and verifies the integrity of each sub-component.
   //
@@ -275,16 +298,20 @@ class SchemaStore {
   // Returns any IO errors.
   libtextclassifier3::Status ResetSchemaTypeMapper();
 
+  libtextclassifier3::Status CheckSchemaSet() const {
+    return has_schema_successfully_set_
+               ? libtextclassifier3::Status::OK
+               : absl_ports::FailedPreconditionError("Schema not set yet.");
+  }
+
   const Filesystem& filesystem_;
   const std::string base_dir_;
   const Clock& clock_;
 
-  // Used internally to indicate whether the class has been initialized. This is
-  // to guard against cases where the object has been created, but Initialize
-  // fails in the constructor. If we have successfully exited the constructor,
-  // then this field can be ignored. Clients of SchemaStore should not need to
-  // worry about this field.
-  bool initialized_ = false;
+  // Used internally to indicate whether the class has been successfully
+  // initialized with a valid schema. Will be false if Initialize failed or no
+  // schema has ever been set.
+  bool has_schema_successfully_set_ = false;
 
   // Cached schema
   FileBackedProto<SchemaProto> schema_file_;
diff --git a/icing/schema/schema-store_test.cc b/icing/schema/schema-store_test.cc
index 7df3dd9..be7170f 100644
--- a/icing/schema/schema-store_test.cc
+++ b/icing/schema/schema-store_test.cc
@@ -25,13 +25,15 @@
 #include "icing/portable/equals-proto.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
 #include "icing/schema/schema-util.h"
 #include "icing/schema/section-manager.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-filter-data.h"
 #include "icing/testing/common-matchers.h"
-#include "icing/testing/tmp-directory.h"
 #include "icing/testing/fake-clock.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/crc32.h"
 
 namespace icing {
 namespace lib {
@@ -41,26 +43,39 @@ namespace {
 using ::icing::lib::portable_equals_proto::EqualsProto;
 using ::testing::ElementsAre;
 using ::testing::Eq;
+using ::testing::Ge;
 using ::testing::Not;
 using ::testing::Pointee;
 
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED =
+    PropertyConfigProto_Cardinality_Code_REPEATED;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+    StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+
+constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
+    PropertyConfigProto_DataType_Code_STRING;
+constexpr PropertyConfigProto_DataType_Code TYPE_DOUBLE =
+    PropertyConfigProto_DataType_Code_DOUBLE;
+
 class SchemaStoreTest : public ::testing::Test {
  protected:
   SchemaStoreTest() : test_dir_(GetTestTempDir() + "/icing") {
     filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
 
-    auto type = schema_.add_types();
-    type->set_schema_type("email");
-
-    // Add an indexed property so we generate section metadata on it
-    auto property = type->add_properties();
-    property->set_property_name("subject");
-    property->set_data_type(PropertyConfigProto::DataType::STRING);
-    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    property->mutable_string_indexing_config()->set_term_match_type(
-        TermMatchType::EXACT_ONLY);
-    property->mutable_string_indexing_config()->set_tokenizer_type(
-        StringIndexingConfig::TokenizerType::PLAIN);
+    schema_ =
+        SchemaBuilder()
+            .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+                // Add an indexed property so we generate section metadata on it
+                PropertyConfigBuilder()
+                    .SetName("subject")
+                    .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                    .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
   }
 
   void TearDown() override {
@@ -74,8 +89,9 @@ class SchemaStoreTest : public ::testing::Test {
 };
 
 TEST_F(SchemaStoreTest, CreationWithNullPointerShouldFail) {
-  EXPECT_THAT(SchemaStore::Create(/*filesystem=*/nullptr, test_dir_, &fake_clock_),
-              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(
+      SchemaStore::Create(/*filesystem=*/nullptr, test_dir_, &fake_clock_),
+      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 }
 
 TEST_F(SchemaStoreTest, CorruptSchemaError) {
@@ -87,6 +103,7 @@ TEST_F(SchemaStoreTest, CorruptSchemaError) {
     // Set it for the first time
     SchemaStore::SetSchemaResult result;
     result.success = true;
+    result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
     EXPECT_THAT(schema_store->SetSchema(schema_),
                 IsOkAndHolds(EqualsSetSchemaResult(result)));
     ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -97,9 +114,10 @@ TEST_F(SchemaStoreTest, CorruptSchemaError) {
   // "Corrupt" the  ground truth schema by adding new data to it. This will mess
   // up the checksum of the schema store
 
-  SchemaProto corrupt_schema;
-  auto type = corrupt_schema.add_types();
-  type->set_schema_type("corrupted");
+  SchemaProto corrupt_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("corrupted"))
+          .Build();
 
   const std::string schema_file = absl_ports::StrCat(test_dir_, "/schema.pb");
   const std::string serialized_schema = corrupt_schema.SerializeAsString();
@@ -121,6 +139,7 @@ TEST_F(SchemaStoreTest, RecoverCorruptDerivedFileOk) {
     // Set it for the first time
     SchemaStore::SetSchemaResult result;
     result.success = true;
+    result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
     EXPECT_THAT(schema_store->SetSchema(schema_),
                 IsOkAndHolds(EqualsSetSchemaResult(result)));
     ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -158,6 +177,7 @@ TEST_F(SchemaStoreTest, RecoverBadChecksumOk) {
     // Set it for the first time
     SchemaStore::SetSchemaResult result;
     result.success = true;
+    result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
     EXPECT_THAT(schema_store->SetSchema(schema_),
                 IsOkAndHolds(EqualsSetSchemaResult(result)));
     ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -190,7 +210,36 @@ TEST_F(SchemaStoreTest, RecoverBadChecksumOk) {
 }
 
 TEST_F(SchemaStoreTest, CreateNoPreviousSchemaOk) {
-  EXPECT_THAT(SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> store,
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+
+  // The apis to retrieve information about the schema should fail gracefully.
+  EXPECT_THAT(store->GetSchema(),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(store->GetSchemaTypeConfig("foo"),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(store->GetSchemaTypeId("foo"),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(store->GetSectionMetadata(/*schema_type_id=*/0, /*section_id=*/0),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+
+  // The apis to extract content from a document should fail gracefully.
+  DocumentProto doc;
+  PropertyProto* prop = doc.add_properties();
+  prop->set_name("name");
+  prop->add_string_values("foo bar baz");
+
+  EXPECT_THAT(store->GetStringSectionContent(doc, /*section_id=*/0),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(store->GetStringSectionContent(doc, "name"),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(store->ExtractSections(doc),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+
+  // The apis to persist and checksum data should succeed.
+  EXPECT_THAT(store->ComputeChecksum(), IsOkAndHolds(Crc32()));
+  EXPECT_THAT(store->PersistToDisk(), IsOk());
 }
 
 TEST_F(SchemaStoreTest, CreateWithPreviousSchemaOk) {
@@ -200,11 +249,13 @@ TEST_F(SchemaStoreTest, CreateWithPreviousSchemaOk) {
 
   SchemaStore::SetSchemaResult result;
   result.success = true;
+  result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
   EXPECT_THAT(schema_store->SetSchema(schema_),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
 
   schema_store.reset();
-  EXPECT_THAT(SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_), IsOk());
+  EXPECT_THAT(SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_),
+              IsOk());
 }
 
 TEST_F(SchemaStoreTest, MultipleCreateOk) {
@@ -220,6 +271,7 @@ TEST_F(SchemaStoreTest, MultipleCreateOk) {
 
   SchemaStore::SetSchemaResult result;
   result.success = true;
+  result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
   EXPECT_THAT(schema_store->SetSchema(schema_),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
 
@@ -256,6 +308,7 @@ TEST_F(SchemaStoreTest, SetNewSchemaOk) {
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
+  result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
   EXPECT_THAT(schema_store->SetSchema(schema_),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -271,6 +324,7 @@ TEST_F(SchemaStoreTest, SetSameSchemaOk) {
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
+  result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
   EXPECT_THAT(schema_store->SetSchema(schema_),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -278,6 +332,8 @@ TEST_F(SchemaStoreTest, SetSameSchemaOk) {
   EXPECT_THAT(*actual_schema, EqualsProto(schema_));
 
   // And one more for fun
+  result = SchemaStore::SetSchemaResult();
+  result.success = true;
   EXPECT_THAT(schema_store->SetSchema(schema_),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
@@ -292,6 +348,7 @@ TEST_F(SchemaStoreTest, SetIncompatibleSchemaOk) {
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
+  result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
   EXPECT_THAT(schema_store->SetSchema(schema_),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -302,6 +359,7 @@ TEST_F(SchemaStoreTest, SetIncompatibleSchemaOk) {
   schema_.clear_types();
 
   // Set the incompatible schema
+  result = SchemaStore::SetSchemaResult();
   result.success = false;
   result.schema_types_deleted_by_name.emplace("email");
   result.schema_types_deleted_by_id.emplace(0);
@@ -314,13 +372,14 @@ TEST_F(SchemaStoreTest, SetSchemaWithAddedTypeOk) {
       std::unique_ptr<SchemaStore> schema_store,
       SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
-  SchemaProto schema;
-  auto type = schema.add_types();
-  type->set_schema_type("email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
+  result.schema_types_new_by_name.insert("email");
   EXPECT_THAT(schema_store->SetSchema(schema),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -328,10 +387,14 @@ TEST_F(SchemaStoreTest, SetSchemaWithAddedTypeOk) {
   EXPECT_THAT(*actual_schema, EqualsProto(schema));
 
   // Add a type, shouldn't affect the index or cached SchemaTypeIds
-  type = schema.add_types();
-  type->set_schema_type("new_type");
+  schema = SchemaBuilder(schema)
+               .AddType(SchemaTypeConfigBuilder().SetType("new_type"))
+               .Build();
 
   // Set the compatible schema
+  result = SchemaStore::SetSchemaResult();
+  result.success = true;
+  result.schema_types_new_by_name.insert("new_type");
   EXPECT_THAT(schema_store->SetSchema(schema),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
@@ -343,15 +406,17 @@ TEST_F(SchemaStoreTest, SetSchemaWithDeletedTypeOk) {
       std::unique_ptr<SchemaStore> schema_store,
       SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
-  SchemaProto schema;
-  auto type = schema.add_types();
-  type->set_schema_type("email");
-  type = schema.add_types();
-  type->set_schema_type("message");
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email"))
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .Build();
 
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
+  result.schema_types_new_by_name.insert("email");
+  result.schema_types_new_by_name.insert("message");
   EXPECT_THAT(schema_store->SetSchema(schema),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -364,9 +429,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithDeletedTypeOk) {
                              schema_store->GetSchemaTypeId("message"));
 
   // Remove "email" type, this also changes previous SchemaTypeIds
-  schema.Clear();
-  type = schema.add_types();
-  type->set_schema_type("message");
+  schema = SchemaBuilder()
+               .AddType(SchemaTypeConfigBuilder().SetType("message"))
+               .Build();
 
   SchemaStore::SetSchemaResult incompatible_result;
   incompatible_result.success = false;
@@ -399,15 +464,17 @@ TEST_F(SchemaStoreTest, SetSchemaWithReorderedTypesOk) {
       std::unique_ptr<SchemaStore> schema_store,
       SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
-  SchemaProto schema;
-  auto type = schema.add_types();
-  type->set_schema_type("email");
-  type = schema.add_types();
-  type->set_schema_type("message");
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email"))
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .Build();
 
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
+  result.schema_types_new_by_name.insert("email");
+  result.schema_types_new_by_name.insert("message");
   EXPECT_THAT(schema_store->SetSchema(schema),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -415,14 +482,15 @@ TEST_F(SchemaStoreTest, SetSchemaWithReorderedTypesOk) {
   EXPECT_THAT(*actual_schema, EqualsProto(schema));
 
   // Reorder the types
-  schema.clear_types();
-  type = schema.add_types();
-  type->set_schema_type("message");
-  type = schema.add_types();
-  type->set_schema_type("email");
+  schema = SchemaBuilder()
+               .AddType(SchemaTypeConfigBuilder().SetType("message"))
+               .AddType(SchemaTypeConfigBuilder().SetType("email"))
+               .Build();
 
   // Since we assign SchemaTypeIds based on order in the SchemaProto, this will
   // cause SchemaTypeIds to change
+  result = SchemaStore::SetSchemaResult();
+  result.success = true;
   result.old_schema_type_ids_changed.emplace(0);  // Old SchemaTypeId of "email"
   result.old_schema_type_ids_changed.emplace(
       1);  // Old SchemaTypeId of "message"
@@ -434,24 +502,25 @@ TEST_F(SchemaStoreTest, SetSchemaWithReorderedTypesOk) {
   EXPECT_THAT(*actual_schema, EqualsProto(schema));
 }
 
-TEST_F(SchemaStoreTest, SetSchemaThatRequiresReindexingOk) {
+TEST_F(SchemaStoreTest, IndexedPropertyChangeRequiresReindexingOk) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
       SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
-  SchemaProto schema;
-  auto type = schema.add_types();
-  type->set_schema_type("email");
-
-  // Add an unindexed property
-  auto property = type->add_properties();
-  property->set_property_name("subject");
-  property->set_data_type(PropertyConfigProto::DataType::STRING);
-  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+              // Add an unindexed property
+              PropertyConfigBuilder()
+                  .SetName("subject")
+                  .SetDataType(TYPE_STRING)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
+  result.schema_types_new_by_name.insert("email");
   EXPECT_THAT(schema_store->SetSchema(schema),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -459,40 +528,112 @@ TEST_F(SchemaStoreTest, SetSchemaThatRequiresReindexingOk) {
   EXPECT_THAT(*actual_schema, EqualsProto(schema));
 
   // Make a previously unindexed property indexed
-  property = schema.mutable_types(0)->mutable_properties(0);
-  property->mutable_string_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  property->mutable_string_indexing_config()->set_tokenizer_type(
-      StringIndexingConfig::TokenizerType::PLAIN);
-
-  // With a new indexed property, we'll need to reindex
-  result.index_incompatible = true;
+  schema = SchemaBuilder()
+               .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+                   PropertyConfigBuilder()
+                       .SetName("subject")
+                       .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                       .SetCardinality(CARDINALITY_OPTIONAL)))
+               .Build();
 
   // Set the compatible schema
+  result = SchemaStore::SetSchemaResult();
+  result.success = true;
+  result.schema_types_index_incompatible_by_name.insert("email");
   EXPECT_THAT(schema_store->SetSchema(schema),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
   EXPECT_THAT(*actual_schema, EqualsProto(schema));
 }
 
-TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) {
+TEST_F(SchemaStoreTest, IndexNestedDocumentsChangeRequiresReindexingOk) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
       SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
-  SchemaProto schema;
-  auto type = schema.add_types();
-  type->set_schema_type("email");
+  // Make two schemas. One that sets index_nested_properties to false and one
+  // that sets it to true.
+  SchemaTypeConfigProto email_type_config =
+      SchemaTypeConfigBuilder()
+          .SetType("email")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto no_nested_index_schema =
+      SchemaBuilder()
+          .AddType(email_type_config)
+          .AddType(SchemaTypeConfigBuilder().SetType("person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("emails")
+                  .SetDataTypeDocument("email",
+                                       /*index_nested_properties=*/false)
+                  .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+
+  SchemaProto nested_index_schema =
+      SchemaBuilder()
+          .AddType(email_type_config)
+          .AddType(SchemaTypeConfigBuilder().SetType("person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("emails")
+                  .SetDataTypeDocument("email",
+                                       /*index_nested_properties=*/true)
+                  .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+
+  // Set schema with index_nested_properties=false to start.
+  SchemaStore::SetSchemaResult result;
+  result.success = true;
+  result.schema_types_new_by_name.insert("email");
+  result.schema_types_new_by_name.insert("person");
+  EXPECT_THAT(schema_store->SetSchema(no_nested_index_schema),
+              IsOkAndHolds(EqualsSetSchemaResult(result)));
+  ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+                             schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(no_nested_index_schema));
+
+  // Set schema with index_nested_properties=true and confirm that the change to
+  // 'person' is index incompatible.
+  result = SchemaStore::SetSchemaResult();
+  result.success = true;
+  result.schema_types_index_incompatible_by_name.insert("person");
+  EXPECT_THAT(schema_store->SetSchema(nested_index_schema),
+              IsOkAndHolds(EqualsSetSchemaResult(result)));
+  ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(nested_index_schema));
 
-  // Add a STRING property
-  auto property = type->add_properties();
-  property->set_property_name("subject");
-  property->set_data_type(PropertyConfigProto::DataType::STRING);
-  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+  // Set schema with index_nested_properties=false and confirm that the change
+  // to 'person' is index incompatible.
+  result = SchemaStore::SetSchemaResult();
+  result.success = true;
+  result.schema_types_index_incompatible_by_name.insert("person");
+  EXPECT_THAT(schema_store->SetSchema(no_nested_index_schema),
+              IsOkAndHolds(EqualsSetSchemaResult(result)));
+  ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(no_nested_index_schema));
+}
+
+TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+              // Add a STRING property
+              PropertyConfigBuilder()
+                  .SetName("subject")
+                  .SetDataType(TYPE_STRING)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
+  result.schema_types_new_by_name.insert("email");
   EXPECT_THAT(schema_store->SetSchema(schema),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -503,8 +644,14 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) {
                              schema_store->GetSchemaTypeId("email"));
 
   // Make a previously STRING property into DOUBLE
-  property = schema.mutable_types(0)->mutable_properties(0);
-  property->set_data_type(PropertyConfigProto::DataType::DOUBLE);
+  schema = SchemaBuilder()
+               .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+                   // Add a STRING property
+                   PropertyConfigBuilder()
+                       .SetName("subject")
+                       .SetDataType(TYPE_DOUBLE)
+                       .SetCardinality(CARDINALITY_OPTIONAL)))
+               .Build();
 
   SchemaStore::SetSchemaResult incompatible_result;
   incompatible_result.success = false;
@@ -549,6 +696,8 @@ TEST_F(SchemaStoreTest, GetSchemaTypeId) {
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
+  result.schema_types_new_by_name.insert(first_type);
+  result.schema_types_new_by_name.insert(second_type);
   EXPECT_THAT(schema_store->SetSchema(schema_),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
 
@@ -570,9 +719,8 @@ TEST_F(SchemaStoreTest, ComputeChecksumSameBetweenCalls) {
       std::unique_ptr<SchemaStore> schema_store,
       SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
-  SchemaProto foo_schema;
-  auto type_config = foo_schema.add_types();
-  type_config->set_schema_type("foo");
+  SchemaProto foo_schema =
+      SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build();
 
   ICING_EXPECT_OK(schema_store->SetSchema(foo_schema));
 
@@ -587,9 +735,8 @@ TEST_F(SchemaStoreTest, ComputeChecksumSameAcrossInstances) {
       std::unique_ptr<SchemaStore> schema_store,
       SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
-  SchemaProto foo_schema;
-  auto type_config = foo_schema.add_types();
-  type_config->set_schema_type("foo");
+  SchemaProto foo_schema =
+      SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build();
 
   ICING_EXPECT_OK(schema_store->SetSchema(foo_schema));
 
@@ -608,20 +755,19 @@ TEST_F(SchemaStoreTest, ComputeChecksumChangesOnModification) {
       std::unique_ptr<SchemaStore> schema_store,
       SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
-  SchemaProto foo_schema;
-  auto type_config = foo_schema.add_types();
-  type_config->set_schema_type("foo");
+  SchemaProto foo_schema =
+      SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build();
 
   ICING_EXPECT_OK(schema_store->SetSchema(foo_schema));
 
   ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, schema_store->ComputeChecksum());
 
   // Modifying the SchemaStore changes the checksum
-  SchemaProto foo_bar_schema;
-  type_config = foo_bar_schema.add_types();
-  type_config->set_schema_type("foo");
-  type_config = foo_bar_schema.add_types();
-  type_config->set_schema_type("bar");
+  SchemaProto foo_bar_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("foo"))
+          .AddType(SchemaTypeConfigBuilder().SetType("bar"))
+          .Build();
 
   ICING_EXPECT_OK(schema_store->SetSchema(foo_bar_schema));
 
@@ -642,9 +788,8 @@ TEST_F(SchemaStoreTest, PersistToDiskPreservesAcrossInstances) {
       std::unique_ptr<SchemaStore> schema_store,
       SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
-  SchemaProto schema;
-  auto type_config = schema.add_types();
-  type_config->set_schema_type("foo");
+  SchemaProto schema =
+      SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build();
 
   ICING_EXPECT_OK(schema_store->SetSchema(schema));
 
@@ -656,8 +801,9 @@ TEST_F(SchemaStoreTest, PersistToDiskPreservesAcrossInstances) {
   EXPECT_THAT(*actual_schema, EqualsProto(schema));
 
   // Modify the schema so that something different is persisted next time
-  type_config = schema.add_types();
-  type_config->set_schema_type("bar");
+  schema = SchemaBuilder(schema)
+               .AddType(SchemaTypeConfigBuilder().SetType("bar"))
+               .Build();
   ICING_EXPECT_OK(schema_store->SetSchema(schema));
 
   // Should also persist on destruction
@@ -670,6 +816,58 @@ TEST_F(SchemaStoreTest, PersistToDiskPreservesAcrossInstances) {
   EXPECT_THAT(*actual_schema, EqualsProto(schema));
 }
 
+TEST_F(SchemaStoreTest, SchemaStoreStorageInfoProto) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+
+  // Create a schema with two types: one simple type and one type that uses all
+  // 16 sections.
+  PropertyConfigProto prop =
+      PropertyConfigBuilder()
+          .SetName("subject")
+          .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .Build();
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+              PropertyConfigBuilder(prop)))
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType("fullSectionsType")
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop0"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop1"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop2"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop3"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop4"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop5"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop6"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop7"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop8"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop9"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop10"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop11"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop12"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop13"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop14"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop15")))
+          .Build();
+
+  SchemaStore::SetSchemaResult result;
+  result.success = true;
+  result.schema_types_new_by_name.insert("email");
+  result.schema_types_new_by_name.insert("fullSectionsType");
+  EXPECT_THAT(schema_store->SetSchema(schema),
+              IsOkAndHolds(EqualsSetSchemaResult(result)));
+
+  SchemaStoreStorageInfoProto storage_info = schema_store->GetStorageInfo();
+  EXPECT_THAT(storage_info.schema_store_size(), Ge(0));
+  EXPECT_THAT(storage_info.num_schema_types(), Eq(2));
+  EXPECT_THAT(storage_info.num_total_sections(), Eq(17));
+  EXPECT_THAT(storage_info.num_schema_types_sections_exhausted(), Eq(1));
+}
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/schema/schema-util.cc b/icing/schema/schema-util.cc
index 49e7096..22bc3f6 100644
--- a/icing/schema/schema-util.cc
+++ b/icing/schema/schema-util.cc
@@ -37,6 +37,20 @@ namespace lib {
 
 namespace {
 
+bool ArePropertiesEqual(const PropertyConfigProto& old_property,
+                        const PropertyConfigProto& new_property) {
+  return old_property.property_name() == new_property.property_name() &&
+         old_property.data_type() == new_property.data_type() &&
+         old_property.schema_type() == new_property.schema_type() &&
+         old_property.cardinality() == new_property.cardinality() &&
+         old_property.string_indexing_config().term_match_type() ==
+             new_property.string_indexing_config().term_match_type() &&
+         old_property.string_indexing_config().tokenizer_type() ==
+             new_property.string_indexing_config().tokenizer_type() &&
+         old_property.document_indexing_config().index_nested_properties() ==
+             new_property.document_indexing_config().index_nested_properties();
+}
+
 bool IsCardinalityCompatible(const PropertyConfigProto& old_property,
                              const PropertyConfigProto& new_property) {
   if (old_property.cardinality() < new_property.cardinality()) {
@@ -95,43 +109,175 @@ bool IsTermMatchTypeCompatible(const StringIndexingConfig& old_indexed,
 
 }  // namespace
 
-libtextclassifier3::Status SchemaUtil::Validate(const SchemaProto& schema) {
-  // Tracks SchemaTypeConfigs that we've validated already.
-  std::unordered_set<std::string_view> known_schema_types;
+libtextclassifier3::Status ExpandTranstiveDependencies(
+    const SchemaUtil::DependencyMap& child_to_direct_parent_map,
+    std::string_view type,
+    SchemaUtil::DependencyMap* expanded_child_to_parent_map,
+    std::unordered_set<std::string_view>* pending_expansions,
+    std::unordered_set<std::string_view>* orphaned_types) {
+  auto expanded_itr = expanded_child_to_parent_map->find(type);
+  if (expanded_itr != expanded_child_to_parent_map->end()) {
+    // We've already expanded this type. Just return.
+    return libtextclassifier3::Status::OK;
+  }
+  auto itr = child_to_direct_parent_map.find(type);
+  if (itr == child_to_direct_parent_map.end()) {
+    // It's an orphan. Just return.
+    orphaned_types->insert(type);
+    return libtextclassifier3::Status::OK;
+  }
+  pending_expansions->insert(type);
+  std::unordered_set<std::string_view> expanded_dependencies;
+
+  // Add all of the direct parent dependencies.
+  expanded_dependencies.reserve(itr->second.size());
+  expanded_dependencies.insert(itr->second.begin(), itr->second.end());
+
+  // Iterate through each direct parent and add their indirect parents.
+  for (std::string_view dep : itr->second) {
+    // 1. Check if we're in the middle of expanding this type - IOW there's a
+    // cycle!
+    if (pending_expansions->count(dep) > 0) {
+      return absl_ports::InvalidArgumentError(
+          absl_ports::StrCat("Infinite loop detected in type configs. '", type,
+                             "' references itself."));
+    }
 
-  // Tracks SchemaTypeConfigs that have been mentioned (by other
-  // SchemaTypeConfigs), but we haven't validated yet.
-  std::unordered_set<std::string_view> unknown_schema_types;
+    // 2. Expand this type as needed.
+    ICING_RETURN_IF_ERROR(ExpandTranstiveDependencies(
+        child_to_direct_parent_map, dep, expanded_child_to_parent_map,
+        pending_expansions, orphaned_types));
+    if (orphaned_types->count(dep) > 0) {
+      // Dep is an orphan. Just skip to the next dep.
+      continue;
+    }
 
-  // Tracks PropertyConfigs within a SchemaTypeConfig that we've validated
-  // already.
-  std::unordered_set<std::string_view> known_property_names;
+    // 3. Dep has been fully expanded. Add all of its dependencies to this
+    // type's dependencies.
+    auto dep_expanded_itr = expanded_child_to_parent_map->find(dep);
+    expanded_dependencies.reserve(expanded_dependencies.size() +
+                                  dep_expanded_itr->second.size());
+    expanded_dependencies.insert(dep_expanded_itr->second.begin(),
+                                 dep_expanded_itr->second.end());
+  }
+  expanded_child_to_parent_map->insert(
+      {type, std::move(expanded_dependencies)});
+  pending_expansions->erase(type);
+  return libtextclassifier3::Status::OK;
+}
 
-  // Tracks which schemas reference other schemas. This is used to detect
-  // infinite loops between indexed schema references (e.g. A -> B -> C -> A).
-  // We could get into an infinite loop while trying to assign section ids.
-  //
-  // The key is the "child" schema that is being referenced within another
-  // schema.
-  // The value is a set of all the direct/indirect "parent" schemas that
-  // reference the "child" schema.
-  //
-  // For example, if A has a nested document property of type B, then A is the
-  // "parent" and B is the "child" and so schema_references will contain
-  // schema_references[B] == {A}.
-  std::unordered_map<std::string_view, std::unordered_set<std::string_view>>
-      schema_references;
+// Expands the dependencies represented by the child_to_direct_parent_map to
+// also include indirect parents.
+//
+// Ex. Suppose we have a schema with four types A, B, C, D. A has a property of
+// type B and B has a property of type C. C and D only have non-document
+// properties.
+//
+// The child to direct parent dependency map for this schema would be:
+// C -> B
+// B -> A
+//
+// This function would expand it so that A is also present as an indirect parent
+// of C.
+libtextclassifier3::StatusOr<SchemaUtil::DependencyMap>
+ExpandTranstiveDependencies(
+    const SchemaUtil::DependencyMap& child_to_direct_parent_map) {
+  SchemaUtil::DependencyMap expanded_child_to_parent_map;
+
+  // Types that we are expanding.
+  std::unordered_set<std::string_view> pending_expansions;
+
+  // Types that have no parents that depend on them.
+  std::unordered_set<std::string_view> orphaned_types;
+  for (const auto& kvp : child_to_direct_parent_map) {
+    ICING_RETURN_IF_ERROR(ExpandTranstiveDependencies(
+        child_to_direct_parent_map, kvp.first, &expanded_child_to_parent_map,
+        &pending_expansions, &orphaned_types));
+  }
+  return expanded_child_to_parent_map;
+}
 
+// Builds a transitive child-parent dependency map. 'Orphaned' types (types with
+// no parents) will not be present in the map.
+//
+// Ex. Suppose we have a schema with four types A, B, C, D. A has a property of
+// type B and B has a property of type C. C and D only have non-document
+// properties.
+//
+// The transitive child-parent dependency map for this schema would be:
+// C -> A, B
+// B -> A
+//
+// A and D would be considered orphaned properties because no type refers to
+// them.
+//
+// RETURNS:
+//   On success, a transitive child-parent dependency map of all types in the
+//   schema.
+//   INVALID_ARGUMENT if the schema contains a cycle or an undefined type.
+//   ALREADY_EXISTS if a schema type is specified more than once in the schema
+libtextclassifier3::StatusOr<SchemaUtil::DependencyMap>
+BuildTransitiveDependencyGraph(const SchemaProto& schema) {
+  // Child to parent map.
+  SchemaUtil::DependencyMap child_to_direct_parent_map;
+
+  // Add all first-order dependencies.
+  std::unordered_set<std::string_view> known_types;
+  std::unordered_set<std::string_view> unknown_types;
   for (const auto& type_config : schema.types()) {
     std::string_view schema_type(type_config.schema_type());
-    ICING_RETURN_IF_ERROR(ValidateSchemaType(schema_type));
-
-    // We can't have duplicate schema_types
-    if (!known_schema_types.insert(schema_type).second) {
+    if (known_types.count(schema_type) > 0) {
       return absl_ports::AlreadyExistsError(absl_ports::StrCat(
           "Field 'schema_type' '", schema_type, "' is already defined"));
     }
-    unknown_schema_types.erase(schema_type);
+    known_types.insert(schema_type);
+    unknown_types.erase(schema_type);
+    for (const auto& property_config : type_config.properties()) {
+      if (property_config.data_type() ==
+          PropertyConfigProto::DataType::DOCUMENT) {
+        // Need to know what schema_type these Document properties should be
+        // validated against
+        std::string_view property_schema_type(property_config.schema_type());
+        if (property_schema_type == schema_type) {
+          return absl_ports::InvalidArgumentError(
+              absl_ports::StrCat("Infinite loop detected in type configs. '",
+                                 schema_type, "' references itself."));
+        }
+        if (known_types.count(property_schema_type) == 0) {
+          unknown_types.insert(property_schema_type);
+        }
+        auto itr = child_to_direct_parent_map.find(property_schema_type);
+        if (itr == child_to_direct_parent_map.end()) {
+          child_to_direct_parent_map.insert(
+              {property_schema_type, std::unordered_set<std::string_view>()});
+          itr = child_to_direct_parent_map.find(property_schema_type);
+        }
+        itr->second.insert(schema_type);
+      }
+    }
+  }
+  if (!unknown_types.empty()) {
+    return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+        "Undefined 'schema_type's: ", absl_ports::StrJoin(unknown_types, ",")));
+  }
+  return ExpandTranstiveDependencies(child_to_direct_parent_map);
+}
+
+libtextclassifier3::StatusOr<SchemaUtil::DependencyMap> SchemaUtil::Validate(
+    const SchemaProto& schema) {
+  // 1. Build the dependency map. This will detect any cycles, non-existent or
+  // duplicate types in the schema.
+  ICING_ASSIGN_OR_RETURN(SchemaUtil::DependencyMap dependency_map,
+                         BuildTransitiveDependencyGraph(schema));
+
+  // Tracks PropertyConfigs within a SchemaTypeConfig that we've validated
+  // already.
+  std::unordered_set<std::string_view> known_property_names;
+
+  // 2. Validate the properties of each type.
+  for (const auto& type_config : schema.types()) {
+    std::string_view schema_type(type_config.schema_type());
+    ICING_RETURN_IF_ERROR(ValidateSchemaType(schema_type));
 
     // We only care about properties being unique within one type_config
     known_property_names.clear();
@@ -164,56 +310,6 @@ libtextclassifier3::Status SchemaUtil::Validate(const SchemaProto& schema) {
                                  "data_types in schema property '",
                                  schema_type, ".", property_name, "'"));
         }
-
-        if (property_schema_type == schema_type) {
-          // The schema refers to itself. This also causes a infinite loop.
-          //
-          // TODO(b/171996137): When clients can opt out of indexing document
-          // properties, then we don't need to do this if the document property
-          // isn't indexed. We only care about infinite loops while we're trying
-          // to assign section ids for indexing.
-          return absl_ports::InvalidArgumentError(
-              absl_ports::StrCat("Infinite loop detected in type configs. '",
-                                 schema_type, "' references itself."));
-        }
-
-        // Need to make sure we eventually see/validate this schema_type
-        if (known_schema_types.count(property_schema_type) == 0) {
-          unknown_schema_types.insert(property_schema_type);
-        }
-
-        // Start tracking the parent schemas that references this nested schema
-        // for infinite loop detection.
-        //
-        // TODO(b/171996137): When clients can opt out of indexing document
-        // properties, then we don't need to do this if the document property
-        // isn't indexed. We only care about infinite loops while we're trying
-        // to assign section ids for indexing.
-        std::unordered_set<std::string_view> parent_schemas;
-        parent_schemas.insert(schema_type);
-
-        for (const auto& parent : parent_schemas) {
-          // Check for any indirect parents
-          auto indirect_parents_iter = schema_references.find(parent);
-          if (indirect_parents_iter == schema_references.end()) {
-            continue;
-          }
-
-          // Our "parent" schema has parents as well. They're our indirect
-          // parents now.
-          for (const std::string_view& indirect_parent :
-               indirect_parents_iter->second) {
-            if (indirect_parent == property_schema_type) {
-              // We're our own indirect parent! Infinite loop found.
-              return absl_ports::InvalidArgumentError(absl_ports::StrCat(
-                  "Infinite loop detected in type configs. '",
-                  property_schema_type, "' references itself."));
-            }
-            parent_schemas.insert(indirect_parent);
-          }
-        }
-
-        schema_references.insert({property_schema_type, parent_schemas});
       }
 
       ICING_RETURN_IF_ERROR(ValidateCardinality(property_config.cardinality(),
@@ -227,15 +323,7 @@ libtextclassifier3::Status SchemaUtil::Validate(const SchemaProto& schema) {
     }
   }
 
-  // A Document property claimed to be of a schema_type that we never
-  // saw/validated
-  if (!unknown_schema_types.empty()) {
-    return absl_ports::UnknownError(
-        absl_ports::StrCat("Undefined 'schema_type's: ",
-                           absl_ports::StrJoin(unknown_schema_types, ",")));
-  }
-
-  return libtextclassifier3::Status::OK;
+  return dependency_map;
 }
 
 libtextclassifier3::Status SchemaUtil::ValidateSchemaType(
@@ -355,9 +443,9 @@ SchemaUtil::ParsedPropertyConfigs SchemaUtil::ParsePropertyConfigs(
 }
 
 const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
-    const SchemaProto& old_schema, const SchemaProto& new_schema) {
+    const SchemaProto& old_schema, const SchemaProto& new_schema,
+    const DependencyMap& new_schema_dependency_map) {
   SchemaDelta schema_delta;
-  schema_delta.index_incompatible = false;
 
   TypeConfigMap new_type_config_map;
   BuildTypeConfigMap(new_schema, &new_type_config_map);
@@ -385,7 +473,29 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
     // be reindexed.
     int32_t old_required_properties = 0;
     int32_t old_indexed_properties = 0;
+
+    // If there is a different number of properties, then there must have been a
+    // change.
+    bool has_property_changed =
+        old_type_config.properties_size() !=
+        new_schema_type_and_config->second.properties_size();
+    bool is_incompatible = false;
+    bool is_index_incompatible = false;
     for (const auto& old_property_config : old_type_config.properties()) {
+      if (old_property_config.cardinality() ==
+          PropertyConfigProto::Cardinality::REQUIRED) {
+        ++old_required_properties;
+      }
+
+      // A non-default term_match_type indicates that this property is meant to
+      // be indexed.
+      bool is_indexed_property =
+          old_property_config.string_indexing_config().term_match_type() !=
+          TermMatchType::UNKNOWN;
+      if (is_indexed_property) {
+        ++old_indexed_properties;
+      }
+
       auto new_property_name_and_config =
           new_parsed_property_configs.property_config_map.find(
               old_property_config.property_name());
@@ -397,39 +507,35 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
             "Previously defined property type '", old_type_config.schema_type(),
             ".", old_property_config.property_name(),
             "' was not defined in new schema");
-        schema_delta.schema_types_incompatible.insert(
-            old_type_config.schema_type());
+        is_incompatible = true;
+        is_index_incompatible |= is_indexed_property;
         continue;
       }
 
       const PropertyConfigProto* new_property_config =
           new_property_name_and_config->second;
+      if (!has_property_changed &&
+          !ArePropertiesEqual(old_property_config, *new_property_config)) {
+        // Finally found a property that changed.
+        has_property_changed = true;
+      }
 
       if (!IsPropertyCompatible(old_property_config, *new_property_config)) {
         ICING_VLOG(1) << absl_ports::StrCat(
             "Property '", old_type_config.schema_type(), ".",
             old_property_config.property_name(), "' is incompatible.");
-        schema_delta.schema_types_incompatible.insert(
-            old_type_config.schema_type());
-      }
-
-      if (old_property_config.cardinality() ==
-          PropertyConfigProto::Cardinality::REQUIRED) {
-        ++old_required_properties;
-      }
-
-      // A non-default term_match_type indicates that this property is meant to
-      // be indexed.
-      if (old_property_config.string_indexing_config().term_match_type() !=
-          TermMatchType::UNKNOWN) {
-        ++old_indexed_properties;
+        is_incompatible = true;
       }
 
       // Any change in the indexed property requires a reindexing
       if (!IsTermMatchTypeCompatible(
               old_property_config.string_indexing_config(),
-              new_property_config->string_indexing_config())) {
-        schema_delta.index_incompatible = true;
+              new_property_config->string_indexing_config()) ||
+          old_property_config.document_indexing_config()
+                  .index_nested_properties() !=
+              new_property_config->document_indexing_config()
+                  .index_nested_properties()) {
+        is_index_incompatible = true;
       }
     }
 
@@ -444,8 +550,7 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
           "New schema '", old_type_config.schema_type(),
           "' has REQUIRED properties that are not "
           "present in the previously defined schema");
-      schema_delta.schema_types_incompatible.insert(
-          old_type_config.schema_type());
+      is_incompatible = true;
     }
 
     // If we've gained any new indexed properties, then the section ids may
@@ -457,8 +562,59 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
           "Set of indexed properties in schema type '",
           old_type_config.schema_type(),
           "' has  changed, required reindexing.");
-      schema_delta.index_incompatible = true;
+      is_index_incompatible = true;
+    }
+
+    if (is_incompatible) {
+      // If this type is incompatible, then every type that depends on it might
+      // also be incompatible. Use the dependency map to mark those ones as
+      // incompatible too.
+      schema_delta.schema_types_incompatible.insert(
+          old_type_config.schema_type());
+      auto parent_types_itr =
+          new_schema_dependency_map.find(old_type_config.schema_type());
+      if (parent_types_itr != new_schema_dependency_map.end()) {
+        schema_delta.schema_types_incompatible.reserve(
+            schema_delta.schema_types_incompatible.size() +
+            parent_types_itr->second.size());
+        schema_delta.schema_types_incompatible.insert(
+            parent_types_itr->second.begin(), parent_types_itr->second.end());
+      }
+    }
+
+    if (is_index_incompatible) {
+      // If this type is index incompatible, then every type that depends on it
+      // might also be index incompatible. Use the dependency map to mark those
+      // ones as index incompatible too.
+      schema_delta.schema_types_index_incompatible.insert(
+          old_type_config.schema_type());
+      auto parent_types_itr =
+          new_schema_dependency_map.find(old_type_config.schema_type());
+      if (parent_types_itr != new_schema_dependency_map.end()) {
+        schema_delta.schema_types_index_incompatible.reserve(
+            schema_delta.schema_types_index_incompatible.size() +
+            parent_types_itr->second.size());
+        schema_delta.schema_types_index_incompatible.insert(
+            parent_types_itr->second.begin(), parent_types_itr->second.end());
+      }
     }
+
+    if (!is_incompatible && !is_index_incompatible && has_property_changed) {
+      schema_delta.schema_types_changed_fully_compatible.insert(
+          old_type_config.schema_type());
+    }
+
+    // Lastly, remove this type from the map. We know that this type can't
+    // come up in future iterations through the old schema types because the old
+    // type config has unique types.
+    new_type_config_map.erase(old_type_config.schema_type());
+  }
+
+  // Any types that are still present in the new_type_config_map are newly added
+  // types.
+  schema_delta.schema_types_new.reserve(new_type_config_map.size());
+  for (auto& kvp : new_type_config_map) {
+    schema_delta.schema_types_new.insert(std::move(kvp.first));
   }
 
   return schema_delta;
diff --git a/icing/schema/schema-util.h b/icing/schema/schema-util.h
index 7b989a8..fa80b15 100644
--- a/icing/schema/schema-util.h
+++ b/icing/schema/schema-util.h
@@ -22,6 +22,7 @@
 #include <unordered_set>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/proto/schema.pb.h"
 
 namespace icing {
@@ -32,13 +33,14 @@ class SchemaUtil {
   using TypeConfigMap =
       std::unordered_map<std::string, const SchemaTypeConfigProto>;
 
-  struct SchemaDelta {
-    // Whether an indexing config has changed, requiring the index to be
-    // regenerated. We don't list out all the types that make the index
-    // incompatible because our index isn't optimized for that. It's much easier
-    // to reset the entire index and reindex every document.
-    bool index_incompatible = false;
+  // Maps from a child type to the parent types that depend on it.
+  // Ex. type A has a single property of type B
+  // The dependency map will be { { "B", { "A" } } }
+  using DependencyMap =
+      std::unordered_map<std::string_view,
+                         std::unordered_set<std::string_view>>;
 
+  struct SchemaDelta {
     // Which schema types were present in the old schema, but were deleted from
     // the new schema.
     std::unordered_set<std::string> schema_types_deleted;
@@ -47,10 +49,28 @@ class SchemaUtil {
     // could invalidate existing Documents of that schema type.
     std::unordered_set<std::string> schema_types_incompatible;
 
+    // Schema types that were added in the new schema. Represented by the
+    // `schema_type` field in the SchemaTypeConfigProto.
+    std::unordered_set<std::string> schema_types_new;
+
+    // Schema types that were changed in a way that was backwards compatible and
+    // didn't invalidate the index. Represented by the `schema_type` field in
+    // the SchemaTypeConfigProto.
+    std::unordered_set<std::string> schema_types_changed_fully_compatible;
+
+    // Schema types that were changed in a way that was backwards compatible,
+    // but invalidated the index. Represented by the `schema_type` field in the
+    // SchemaTypeConfigProto.
+    std::unordered_set<std::string> schema_types_index_incompatible;
+
     bool operator==(const SchemaDelta& other) const {
-      return index_incompatible == other.index_incompatible &&
-             schema_types_deleted == other.schema_types_deleted &&
-             schema_types_incompatible == other.schema_types_incompatible;
+      return schema_types_deleted == other.schema_types_deleted &&
+             schema_types_incompatible == other.schema_types_incompatible &&
+             schema_types_new == other.schema_types_new &&
+             schema_types_changed_fully_compatible ==
+                 other.schema_types_changed_fully_compatible &&
+             schema_types_index_incompatible ==
+                 other.schema_types_index_incompatible;
     }
   };
 
@@ -90,10 +110,12 @@ class SchemaUtil {
   //  document properties can be opted out of indexing.
   //
   // Returns:
+  //   On success, a dependency map from each child types to all parent types
+  //   that depend on it directly or indirectly.
   //   ALREADY_EXISTS for case 1 and 2
   //   INVALID_ARGUMENT for 3-13
-  //   OK otherwise
-  static libtextclassifier3::Status Validate(const SchemaProto& schema);
+  static libtextclassifier3::StatusOr<DependencyMap> Validate(
+      const SchemaProto& schema);
 
   // Creates a mapping of schema type -> schema type config proto. The
   // type_config_map is cleared, and then each schema-type_config_proto pair is
@@ -142,7 +164,8 @@ class SchemaUtil {
   //
   // Returns a SchemaDelta that captures the aforementioned differences.
   static const SchemaDelta ComputeCompatibilityDelta(
-      const SchemaProto& old_schema, const SchemaProto& new_schema);
+      const SchemaProto& old_schema, const SchemaProto& new_schema,
+      const DependencyMap& new_schema_dependency_map);
 
   // Validates the 'property_name' field.
   //   1. Can't be an empty string
diff --git a/icing/schema/schema-util_test.cc b/icing/schema/schema-util_test.cc
index 61a861c..26ef4c7 100644
--- a/icing/schema/schema-util_test.cc
+++ b/icing/schema/schema-util_test.cc
@@ -17,11 +17,13 @@
 #include <cstdint>
 #include <string>
 #include <string_view>
+#include <unordered_set>
 
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
 #include "icing/testing/common-matchers.h"
 
 namespace icing {
@@ -33,691 +35,1286 @@ using ::testing::HasSubstr;
 
 // Properties/fields in a schema type
 constexpr char kEmailType[] = "EmailMessage";
+constexpr char kMessageType[] = "Text";
 constexpr char kPersonType[] = "Person";
 
-class SchemaUtilTest : public ::testing::Test {
- protected:
-  SchemaProto schema_proto_;
-
-  static SchemaTypeConfigProto CreateSchemaTypeConfig(
-      const std::string_view schema_type,
-      const std::string_view nested_schema_type = "") {
-    SchemaTypeConfigProto type;
-    type.set_schema_type(std::string(schema_type));
-
-    auto string_property = type.add_properties();
-    string_property->set_property_name("string");
-    string_property->set_data_type(PropertyConfigProto::DataType::STRING);
-    string_property->set_cardinality(
-        PropertyConfigProto::Cardinality::REQUIRED);
-
-    auto int_property = type.add_properties();
-    int_property->set_property_name("int");
-    int_property->set_data_type(PropertyConfigProto::DataType::INT64);
-    int_property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
-    auto double_property = type.add_properties();
-    double_property->set_property_name("double");
-    double_property->set_data_type(PropertyConfigProto::DataType::DOUBLE);
-    double_property->set_cardinality(
-        PropertyConfigProto::Cardinality::REPEATED);
-
-    auto bool_property = type.add_properties();
-    bool_property->set_property_name("boolean");
-    bool_property->set_data_type(PropertyConfigProto::DataType::BOOLEAN);
-    bool_property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+constexpr PropertyConfigProto_DataType_Code TYPE_DOCUMENT =
+    PropertyConfigProto_DataType_Code_DOCUMENT;
+constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
+    PropertyConfigProto_DataType_Code_STRING;
+constexpr PropertyConfigProto_DataType_Code TYPE_INT =
+    PropertyConfigProto_DataType_Code_INT64;
+constexpr PropertyConfigProto_DataType_Code TYPE_DOUBLE =
+    PropertyConfigProto_DataType_Code_DOUBLE;
+
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_UNKNOWN =
+    PropertyConfigProto_Cardinality_Code_UNKNOWN;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
+    PropertyConfigProto_Cardinality_Code_REQUIRED;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED =
+    PropertyConfigProto_Cardinality_Code_REPEATED;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_NONE =
+    StringIndexingConfig_TokenizerType_Code_NONE;
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+    StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_UNKNOWN = TermMatchType_Code_UNKNOWN;
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+
+TEST(SchemaUtilTest, DependencyGraphAlphabeticalOrder) {
+  // Create a schema with the following dependencies:
+  //         C
+  //       /   \
+  // A - B       E - F
+  //       \   /
+  //         D
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("d")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("e")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("E", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_d =
+      SchemaTypeConfigBuilder()
+          .SetType("D")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("e")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("E", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_e =
+      SchemaTypeConfigBuilder()
+          .SetType("E")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("f")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("F", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_f =
+      SchemaTypeConfigBuilder()
+          .SetType("F")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("text")
+                           .SetCardinality(CARDINALITY_OPTIONAL)
+                           .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+
+  // Provide these in alphabetical (also parent-child) order: A, B, C, D, E, F
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(type_a)
+                           .AddType(type_b)
+                           .AddType(type_c)
+                           .AddType(type_d)
+                           .AddType(type_e)
+                           .AddType(type_f)
+                           .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependencyMap d_map,
+                             SchemaUtil::Validate(schema));
+  EXPECT_THAT(d_map, testing::SizeIs(5));
+  EXPECT_THAT(d_map["F"],
+              testing::UnorderedElementsAre("A", "B", "C", "D", "E"));
+  EXPECT_THAT(d_map["E"], testing::UnorderedElementsAre("A", "B", "C", "D"));
+  EXPECT_THAT(d_map["D"], testing::UnorderedElementsAre("A", "B"));
+  EXPECT_THAT(d_map["C"], testing::UnorderedElementsAre("A", "B"));
+  EXPECT_THAT(d_map["B"], testing::UnorderedElementsAre("A"));
+}
 
-    auto bytes_property = type.add_properties();
-    bytes_property->set_property_name("bytes");
-    bytes_property->set_data_type(PropertyConfigProto::DataType::BYTES);
-    bytes_property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+TEST(SchemaUtilTest, DependencyGraphReverseAlphabeticalOrder) {
+  // Create a schema with the following dependencies:
+  //         C
+  //       /   \
+  // A - B       E - F
+  //       \   /
+  //         D
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("d")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("e")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("E", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_d =
+      SchemaTypeConfigBuilder()
+          .SetType("D")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("e")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("E", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_e =
+      SchemaTypeConfigBuilder()
+          .SetType("E")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("f")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("F", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_f =
+      SchemaTypeConfigBuilder()
+          .SetType("F")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("text")
+                           .SetCardinality(CARDINALITY_OPTIONAL)
+                           .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+
+  // Provide these in reverse alphabetical (also child-parent) order:
+  //   F, E, D, C, B, A
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(type_f)
+                           .AddType(type_e)
+                           .AddType(type_d)
+                           .AddType(type_c)
+                           .AddType(type_b)
+                           .AddType(type_a)
+                           .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependencyMap d_map,
+                             SchemaUtil::Validate(schema));
+  EXPECT_THAT(d_map, testing::SizeIs(5));
+  EXPECT_THAT(d_map["F"],
+              testing::UnorderedElementsAre("A", "B", "C", "D", "E"));
+  EXPECT_THAT(d_map["E"], testing::UnorderedElementsAre("A", "B", "C", "D"));
+  EXPECT_THAT(d_map["D"], testing::UnorderedElementsAre("A", "B"));
+  EXPECT_THAT(d_map["C"], testing::UnorderedElementsAre("A", "B"));
+  EXPECT_THAT(d_map["B"], testing::UnorderedElementsAre("A"));
+}
 
-    if (!nested_schema_type.empty()) {
-      auto document_property = type.add_properties();
-      document_property->set_property_name("document");
-      document_property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-      document_property->set_cardinality(
-          PropertyConfigProto::Cardinality::REPEATED);
-      document_property->set_schema_type(std::string(nested_schema_type));
-    }
+TEST(SchemaUtilTest, DependencyGraphMixedOrder) {
+  // Create a schema with the following dependencies:
+  //         C
+  //       /   \
+  // A - B       E - F
+  //       \   /
+  //         D
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("d")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("e")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("E", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_d =
+      SchemaTypeConfigBuilder()
+          .SetType("D")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("e")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("E", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_e =
+      SchemaTypeConfigBuilder()
+          .SetType("E")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("f")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("F", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_f =
+      SchemaTypeConfigBuilder()
+          .SetType("F")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("text")
+                           .SetCardinality(CARDINALITY_OPTIONAL)
+                           .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+
+  // Provide these in a random order: C, E, F, A, B, D
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(type_c)
+                           .AddType(type_e)
+                           .AddType(type_f)
+                           .AddType(type_a)
+                           .AddType(type_b)
+                           .AddType(type_d)
+                           .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependencyMap d_map,
+                             SchemaUtil::Validate(schema));
+  EXPECT_THAT(d_map, testing::SizeIs(5));
+  EXPECT_THAT(d_map["F"],
+              testing::UnorderedElementsAre("A", "B", "C", "D", "E"));
+  EXPECT_THAT(d_map["E"], testing::UnorderedElementsAre("A", "B", "C", "D"));
+  EXPECT_THAT(d_map["D"], testing::UnorderedElementsAre("A", "B"));
+  EXPECT_THAT(d_map["C"], testing::UnorderedElementsAre("A", "B"));
+  EXPECT_THAT(d_map["B"], testing::UnorderedElementsAre("A"));
+}
 
-    return type;
-  }
-};
+TEST(SchemaUtilTest, TopLevelCycle) {
+  // Create a schema with the following dependencies:
+  // A - B - B - B - B....
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
+
+  SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("Infinite loop")));
+}
 
-TEST_F(SchemaUtilTest, EmptySchemaProtoIsValid) {
-  ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
+TEST(SchemaUtilTest, MultiLevelCycle) {
+  // Create a schema with the following dependencies:
+  // A - B - C - A - B - C - A ...
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+          .Build();
+
+  SchemaProto schema =
+      SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(SchemaUtilTest, Valid_Nested) {
-  auto email_type = schema_proto_.add_types();
-  *email_type = CreateSchemaTypeConfig(kEmailType, kPersonType);
+TEST(SchemaUtilTest, NonExistentType) {
+  // Create a schema with the following dependencies:
+  // A - B - C - X (does not exist)
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("x")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("X", /*index_nested_properties=*/true))
+          .Build();
+
+  SchemaProto schema =
+      SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
 
-  auto person_type = schema_proto_.add_types();
-  *person_type = CreateSchemaTypeConfig(kPersonType);
+TEST(SchemaUtilTest, EmptySchemaProtoIsValid) {
+  SchemaProto schema;
+  ICING_ASSERT_OK(SchemaUtil::Validate(schema));
+}
 
-  ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
+TEST(SchemaUtilTest, Valid_Nested) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("sender")
+                                        .SetDataTypeDocument(
+                                            kPersonType,
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("name")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  ICING_ASSERT_OK(SchemaUtil::Validate(schema));
 }
 
-TEST_F(SchemaUtilTest, ClearedPropertyConfigsIsValid) {
+TEST(SchemaUtilTest, ClearedPropertyConfigsIsValid) {
   // No property fields is technically ok, but probably not realistic.
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-  type->clear_properties();
-
-  ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType(kEmailType))
+          .Build();
+  ICING_ASSERT_OK(SchemaUtil::Validate(schema));
 }
 
-TEST_F(SchemaUtilTest, ClearedSchemaTypeIsInvalid) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-  type->clear_schema_type();
-
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+TEST(SchemaUtilTest, ClearedSchemaTypeIsInvalid) {
+  SchemaProto schema =
+      SchemaBuilder().AddType(SchemaTypeConfigBuilder()).Build();
+  ASSERT_THAT(SchemaUtil::Validate(schema),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(SchemaUtilTest, EmptySchemaTypeIsInvalid) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-  type->set_schema_type("");
+TEST(SchemaUtilTest, EmptySchemaTypeIsInvalid) {
+  SchemaProto schema =
+      SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("")).Build();
 
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+  ASSERT_THAT(SchemaUtil::Validate(schema),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(SchemaUtilTest, AnySchemaTypeOk) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-  type->set_schema_type("abc123!@#$%^&*()_-+=[{]}|\\;:'\",<.>?你好");
+TEST(SchemaUtilTest, AnySchemaTypeOk) {
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType(
+                               "abc123!@#$%^&*()_-+=[{]}|\\;:'\",<.>?你好"))
+                           .Build();
 
-  ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
+  ICING_ASSERT_OK(SchemaUtil::Validate(schema));
 }
 
-TEST_F(SchemaUtilTest, ClearedPropertyNameIsInvalid) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  auto property = type->add_properties();
-  property->clear_property_name();
-  property->set_data_type(PropertyConfigProto::DataType::STRING);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+TEST(SchemaUtilTest, ClearedPropertyNameIsInvalid) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("foo")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+  schema.mutable_types(0)->mutable_properties(0)->clear_property_name();
+  ASSERT_THAT(SchemaUtil::Validate(schema),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(SchemaUtilTest, EmptyPropertyNameIsInvalid) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  auto property = type->add_properties();
-  property->set_property_name("");
-  property->set_data_type(PropertyConfigProto::DataType::STRING);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+TEST(SchemaUtilTest, EmptyPropertyNameIsInvalid) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  ASSERT_THAT(SchemaUtil::Validate(schema),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(SchemaUtilTest, NonAlphanumericPropertyNameIsInvalid) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  auto property = type->add_properties();
-  property->set_property_name("_");
-  property->set_data_type(PropertyConfigProto::DataType::STRING);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+TEST(SchemaUtilTest, NonAlphanumericPropertyNameIsInvalid) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("a_b")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  ASSERT_THAT(SchemaUtil::Validate(schema),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(SchemaUtilTest, AlphanumericPropertyNameOk) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  auto property = type->add_properties();
-  property->set_property_name("abc123");
-  property->set_data_type(PropertyConfigProto::DataType::STRING);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
-  ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
+TEST(SchemaUtilTest, AlphanumericPropertyNameOk) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("abc123")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  ICING_ASSERT_OK(SchemaUtil::Validate(schema));
 }
 
-TEST_F(SchemaUtilTest, DuplicatePropertyNameIsInvalid) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  auto first_property = type->add_properties();
-  first_property->set_property_name("DuplicatedProperty");
-  first_property->set_data_type(PropertyConfigProto::DataType::STRING);
-  first_property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
-  auto second_property = type->add_properties();
-  second_property->set_property_name("DuplicatedProperty");
-  second_property->set_data_type(PropertyConfigProto::DataType::STRING);
-  second_property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+TEST(SchemaUtilTest, DuplicatePropertyNameIsInvalid) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("DuplicatedProperty")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("DuplicatedProperty")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+  ASSERT_THAT(SchemaUtil::Validate(schema),
               StatusIs(libtextclassifier3::StatusCode::ALREADY_EXISTS));
 }
 
-TEST_F(SchemaUtilTest, ClearedDataTypeIsInvalid) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  auto property = type->add_properties();
-  property->set_property_name("NewProperty");
-  property->clear_data_type();
-  property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+TEST(SchemaUtilTest, ClearedDataTypeIsInvalid) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NewProperty")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+  schema.mutable_types(0)->mutable_properties(0)->clear_data_type();
+  ASSERT_THAT(SchemaUtil::Validate(schema),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(SchemaUtilTest, UnknownDataTypeIsInvalid) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  auto property = type->add_properties();
-  property->set_property_name("NewProperty");
-  property->set_data_type(PropertyConfigProto::DataType::UNKNOWN);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+TEST(SchemaUtilTest, UnknownDataTypeIsInvalid) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType(kEmailType)
+                  .AddProperty(
+                      PropertyConfigBuilder()
+                          .SetName("NewProperty")
+                          .SetDataType(PropertyConfigProto::DataType::UNKNOWN)
+                          .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+  ASSERT_THAT(SchemaUtil::Validate(schema),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(SchemaUtilTest, ClearedCardinalityIsInvalid) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  auto property = type->add_properties();
-  property->set_property_name("NewProperty");
-  property->set_data_type(PropertyConfigProto::DataType::STRING);
-  property->clear_cardinality();
-
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+TEST(SchemaUtilTest, ClearedCardinalityIsInvalid) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NewProperty")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+  schema.mutable_types(0)->mutable_properties(0)->clear_cardinality();
+  ASSERT_THAT(SchemaUtil::Validate(schema),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(SchemaUtilTest, UnknownCardinalityIsInvalid) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  auto property = type->add_properties();
-  property->set_property_name("NewProperty");
-  property->set_data_type(PropertyConfigProto::DataType::STRING);
-  property->set_cardinality(PropertyConfigProto::Cardinality::UNKNOWN);
-
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+TEST(SchemaUtilTest, UnknownCardinalityIsInvalid) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NewProperty")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_UNKNOWN)))
+          .Build();
+  ASSERT_THAT(SchemaUtil::Validate(schema),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(SchemaUtilTest, ClearedPropertySchemaTypeIsInvalid) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  auto property = type->add_properties();
-  property->set_property_name("NewProperty");
-  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-  property->clear_schema_type();
-
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+TEST(SchemaUtilTest, ClearedPropertySchemaTypeIsInvalid) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NewProperty")
+                                        .SetDataType(TYPE_DOCUMENT)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+  ASSERT_THAT(SchemaUtil::Validate(schema),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(SchemaUtilTest, Invalid_EmptyPropertySchemaType) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  auto property = type->add_properties();
-  property->set_property_name("NewProperty");
-  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-  property->set_schema_type("");
-
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+TEST(SchemaUtilTest, Invalid_EmptyPropertySchemaType) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NewProperty")
+                                        .SetDataTypeDocument(
+                                            /*schema_type=*/"",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  ASSERT_THAT(SchemaUtil::Validate(schema),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(SchemaUtilTest, NoMatchingSchemaTypeIsInvalid) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  auto property = type->add_properties();
-  property->set_property_name("NewProperty");
-  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-  property->set_schema_type("NewSchemaType");
-
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
-              StatusIs(libtextclassifier3::StatusCode::UNKNOWN,
+TEST(SchemaUtilTest, NoMatchingSchemaTypeIsInvalid) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NewProperty")
+                                        .SetDataTypeDocument(
+                                            /*schema_type=*/"NewSchemaType",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  ASSERT_THAT(SchemaUtil::Validate(schema),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
                        HasSubstr("Undefined 'schema_type'")));
 }
 
-TEST_F(SchemaUtilTest, NewOptionalPropertyIsCompatible) {
+TEST(SchemaUtilTest, NewOptionalPropertyIsCompatible) {
   // Configure old schema
-  SchemaProto old_schema;
-  auto type = old_schema.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
 
   // Configure new schema with an optional field, not considered incompatible
   // since it's fine if old data doesn't have this optional field
-  SchemaProto new_schema_with_optional;
-  type = new_schema_with_optional.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  auto property = type->add_properties();
-  property->set_property_name("NewOptional");
-  property->set_data_type(PropertyConfigProto::DataType::DOUBLE);
-  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+  SchemaProto new_schema_with_optional =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NewOptional")
+                                        .SetDataType(TYPE_DOUBLE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   SchemaUtil::SchemaDelta schema_delta;
-  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema,
-                                                    new_schema_with_optional),
+  schema_delta.schema_types_changed_fully_compatible.insert(kEmailType);
+  SchemaUtil::DependencyMap no_dependencies_map;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
+                  old_schema, new_schema_with_optional, no_dependencies_map),
               Eq(schema_delta));
 }
 
-TEST_F(SchemaUtilTest, NewRequiredPropertyIsIncompatible) {
+TEST(SchemaUtilTest, NewRequiredPropertyIsIncompatible) {
   // Configure old schema
-  SchemaProto old_schema;
-  auto type = old_schema.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
 
   // Configure new schema with a required field, considered incompatible since
   // old data won't have this required field
-  SchemaProto new_schema_with_required;
-  type = new_schema_with_required.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  auto property = type->add_properties();
-  property->set_property_name("NewRequired");
-  property->set_data_type(PropertyConfigProto::DataType::DOUBLE);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  SchemaProto new_schema_with_required =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NewRequired")
+                                        .SetDataType(TYPE_DOUBLE)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
 
   SchemaUtil::SchemaDelta schema_delta;
   schema_delta.schema_types_incompatible.emplace(kEmailType);
-  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema,
-                                                    new_schema_with_required),
+  SchemaUtil::DependencyMap no_dependencies_map;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
+                  old_schema, new_schema_with_required, no_dependencies_map),
               Eq(schema_delta));
 }
 
-TEST_F(SchemaUtilTest, NewSchemaMissingPropertyIsIncompatible) {
+TEST(SchemaUtilTest, NewSchemaMissingPropertyIsIncompatible) {
   // Configure old schema
-  SchemaProto old_schema;
-  auto type = old_schema.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  auto property = type->add_properties();
-  property->set_property_name("OldOptional");
-  property->set_data_type(PropertyConfigProto::DataType::INT64);
-  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("OldOptional")
+                                        .SetDataType(TYPE_INT)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   // Configure new schema, new schema needs to at least have all the
   // previously defined properties
-  SchemaProto new_schema;
-  type = new_schema.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
 
   SchemaUtil::SchemaDelta schema_delta;
   schema_delta.schema_types_incompatible.emplace(kEmailType);
-  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+  SchemaUtil::DependencyMap no_dependencies_map;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+                                                    no_dependencies_map),
               Eq(schema_delta));
 }
 
-TEST_F(SchemaUtilTest, CompatibilityOfDifferentCardinalityOk) {
+TEST(SchemaUtilTest, CompatibilityOfDifferentCardinalityOk) {
   // Configure less restrictive schema based on cardinality
-  SchemaProto less_restrictive_schema;
-  auto type = less_restrictive_schema.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  auto property = type->add_properties();
-  property->set_property_name("Property");
-  property->set_data_type(PropertyConfigProto::DataType::INT64);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+  SchemaProto less_restrictive_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataType(TYPE_INT)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
 
   // Configure more restrictive schema based on cardinality
-  SchemaProto more_restrictive_schema;
-  type = more_restrictive_schema.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  property = type->add_properties();
-  property->set_property_name("Property");
-  property->set_data_type(PropertyConfigProto::DataType::INT64);
-  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
-  // We can't have a new schema be less restrictive, REQUIRED->OPTIONAL
+  SchemaProto more_restrictive_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataType(TYPE_INT)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // We can't have a new schema be more restrictive, REPEATED->OPTIONAL
   SchemaUtil::SchemaDelta incompatible_schema_delta;
   incompatible_schema_delta.schema_types_incompatible.emplace(kEmailType);
+  SchemaUtil::DependencyMap no_dependencies_map;
   EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
                   /*old_schema=*/less_restrictive_schema,
-                  /*new_schema=*/more_restrictive_schema),
+                  /*new_schema=*/more_restrictive_schema, no_dependencies_map),
               Eq(incompatible_schema_delta));
 
-  // We can have the new schema be more restrictive, OPTIONAL->REPEATED;
+  // We can have the new schema be less restrictive, OPTIONAL->REPEATED;
   SchemaUtil::SchemaDelta compatible_schema_delta;
+  compatible_schema_delta.schema_types_changed_fully_compatible.insert(
+      kEmailType);
   EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
                   /*old_schema=*/more_restrictive_schema,
-                  /*new_schema=*/less_restrictive_schema),
+                  /*new_schema=*/less_restrictive_schema, no_dependencies_map),
               Eq(compatible_schema_delta));
 }
 
-TEST_F(SchemaUtilTest, DifferentDataTypeIsIncompatible) {
+TEST(SchemaUtilTest, DifferentDataTypeIsIncompatible) {
   // Configure old schema, with an int64_t property
-  SchemaProto old_schema;
-  auto type = old_schema.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  auto property = type->add_properties();
-  property->set_property_name("Property");
-  property->set_data_type(PropertyConfigProto::DataType::INT64);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataType(TYPE_INT)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
 
   // Configure new schema, with a double property
-  SchemaProto new_schema;
-  type = new_schema.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  property = type->add_properties();
-  property->set_property_name("Property");
-  property->set_data_type(PropertyConfigProto::DataType::DOUBLE);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataType(TYPE_DOUBLE)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
 
   SchemaUtil::SchemaDelta schema_delta;
   schema_delta.schema_types_incompatible.emplace(kEmailType);
-  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+  SchemaUtil::DependencyMap no_dependencies_map;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+                                                    no_dependencies_map),
               Eq(schema_delta));
 }
 
-TEST_F(SchemaUtilTest, DifferentSchemaTypeIsIncompatible) {
+TEST(SchemaUtilTest, DifferentSchemaTypeIsIncompatible) {
   // Configure old schema, where Property is supposed to be a Person type
-  SchemaProto old_schema;
-  auto type = old_schema.add_types();
-  *type = CreateSchemaTypeConfig(kPersonType);
-
-  *type = CreateSchemaTypeConfig(kEmailType);
-  auto property = type->add_properties();
-  property->set_property_name("Property");
-  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-  property->set_schema_type(kPersonType);
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop")
+                                        .SetDataType(TYPE_INT)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kMessageType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop")
+                                        .SetDataType(TYPE_INT)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeDocument(
+                                            kPersonType,
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
 
   // Configure new schema, where Property is supposed to be an Email type
-  SchemaProto new_schema;
-  type = new_schema.add_types();
-  *type = CreateSchemaTypeConfig(kPersonType);
-
-  *type = CreateSchemaTypeConfig(kEmailType);
-  property = type->add_properties();
-  property->set_property_name("Property");
-  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-  property->set_schema_type(kEmailType);
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop")
+                                        .SetDataType(TYPE_INT)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kMessageType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop")
+                                        .SetDataType(TYPE_INT)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeDocument(
+                                            kMessageType,
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
 
   SchemaUtil::SchemaDelta schema_delta;
   schema_delta.schema_types_incompatible.emplace(kEmailType);
-  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
-              Eq(schema_delta));
+  // kEmailType depends on kMessageType
+  SchemaUtil::DependencyMap dependencies_map = {{kMessageType, {kEmailType}}};
+  SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta(
+      old_schema, new_schema, dependencies_map);
+  EXPECT_THAT(actual, Eq(schema_delta));
+  EXPECT_THAT(actual.schema_types_incompatible,
+              testing::ElementsAre(kEmailType));
+  EXPECT_THAT(actual.schema_types_deleted, testing::IsEmpty());
 }
 
-TEST_F(SchemaUtilTest, ChangingIndexedPropertiesMakesIndexIncompatible) {
+TEST(SchemaUtilTest, ChangingIndexedPropertiesMakesIndexIncompatible) {
   // Configure old schema
-  SchemaProto old_schema;
-  auto old_type = old_schema.add_types();
-  *old_type = CreateSchemaTypeConfig(kEmailType, kPersonType);
-
-  auto old_property = old_type->add_properties();
-  old_property->set_property_name("Property");
-  old_property->set_data_type(PropertyConfigProto::DataType::STRING);
-  old_property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+  SchemaProto schema_with_indexed_property =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("Property")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   // Configure new schema
-  SchemaProto new_schema;
-  auto new_type = new_schema.add_types();
-  *new_type = CreateSchemaTypeConfig(kEmailType, kPersonType);
-
-  auto new_property = new_type->add_properties();
-  new_property->set_property_name("Property");
-  new_property->set_data_type(PropertyConfigProto::DataType::STRING);
-  new_property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+  SchemaProto schema_with_unindexed_property =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("Property")
+                               .SetDataTypeString(MATCH_UNKNOWN, TOKENIZER_NONE)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   SchemaUtil::SchemaDelta schema_delta;
-  schema_delta.index_incompatible = true;
+  schema_delta.schema_types_index_incompatible.insert(kPersonType);
 
   // New schema gained a new indexed property.
-  old_property->mutable_string_indexing_config()->set_term_match_type(
-      TermMatchType::UNKNOWN);
-  new_property->mutable_string_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+  SchemaUtil::DependencyMap no_dependencies_map;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
+                  schema_with_indexed_property, schema_with_unindexed_property,
+                  no_dependencies_map),
               Eq(schema_delta));
 
   // New schema lost an indexed property.
-  old_property->mutable_string_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  new_property->mutable_string_indexing_config()->set_term_match_type(
-      TermMatchType::UNKNOWN);
-  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
+                  schema_with_indexed_property, schema_with_unindexed_property,
+                  no_dependencies_map),
               Eq(schema_delta));
 }
 
-TEST_F(SchemaUtilTest, AddingNewIndexedPropertyMakesIndexIncompatible) {
+TEST(SchemaUtilTest, AddingNewIndexedPropertyMakesIndexIncompatible) {
   // Configure old schema
-  SchemaProto old_schema;
-  auto old_type = old_schema.add_types();
-  *old_type = CreateSchemaTypeConfig(kEmailType, kPersonType);
-
-  auto old_property = old_type->add_properties();
-  old_property->set_property_name("Property");
-  old_property->set_data_type(PropertyConfigProto::DataType::STRING);
-  old_property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("Property")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   // Configure new schema
-  SchemaProto new_schema;
-  auto new_type = new_schema.add_types();
-  *new_type = CreateSchemaTypeConfig(kEmailType, kPersonType);
-
-  auto new_property = new_type->add_properties();
-  new_property->set_property_name("Property");
-  new_property->set_data_type(PropertyConfigProto::DataType::STRING);
-  new_property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
-  new_property = new_type->add_properties();
-  new_property->set_property_name("NewIndexedProperty");
-  new_property->set_data_type(PropertyConfigProto::DataType::STRING);
-  new_property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  new_property->mutable_string_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("Property")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("NewIndexedProperty")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   SchemaUtil::SchemaDelta schema_delta;
-  schema_delta.index_incompatible = true;
-  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+  schema_delta.schema_types_index_incompatible.insert(kPersonType);
+  SchemaUtil::DependencyMap no_dependencies_map;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+                                                    no_dependencies_map),
               Eq(schema_delta));
 }
 
-TEST_F(SchemaUtilTest, AddingTypeIsCompatible) {
+TEST(SchemaUtilTest, AddingTypeIsCompatible) {
   // Can add a new type, existing data isn't incompatible, since none of them
   // are of this new schema type
-  SchemaProto old_schema;
-  auto type = old_schema.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  SchemaProto new_schema;
-  type = new_schema.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-  type = new_schema.add_types();
-  *type = CreateSchemaTypeConfig(kPersonType);
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("Property")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("Property")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("Property")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   SchemaUtil::SchemaDelta schema_delta;
-  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+  schema_delta.schema_types_new.insert(kEmailType);
+  SchemaUtil::DependencyMap no_dependencies_map;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+                                                    no_dependencies_map),
               Eq(schema_delta));
 }
 
-TEST_F(SchemaUtilTest, DeletingTypeIsNoted) {
+TEST(SchemaUtilTest, DeletingTypeIsNoted) {
   // Can't remove an old type, new schema needs to at least have all the
   // previously defined schema otherwise the Documents of the missing schema
   // are invalid
-  SchemaProto old_schema;
-  auto type = old_schema.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-  type = old_schema.add_types();
-  *type = CreateSchemaTypeConfig(kPersonType);
-
-  SchemaProto new_schema;
-  type = new_schema.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("Property")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("Property")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("Property")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   SchemaUtil::SchemaDelta schema_delta;
   schema_delta.schema_types_deleted.emplace(kPersonType);
-  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+  SchemaUtil::DependencyMap no_dependencies_map;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+                                                    no_dependencies_map),
               Eq(schema_delta));
 }
 
-TEST_F(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTermMatchType) {
-  SchemaProto schema;
-  auto* type = schema.add_types();
-  type->set_schema_type("MyType");
+TEST(SchemaUtilTest, DeletingPropertyAndChangingProperty) {
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property1")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("Property2")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  // Remove Property2 and make Property1 indexed now. Removing Property2 should
+  // be incompatible.
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("Property1")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SchemaUtil::SchemaDelta schema_delta;
+  schema_delta.schema_types_incompatible.emplace(kEmailType);
+  schema_delta.schema_types_index_incompatible.emplace(kEmailType);
+  SchemaUtil::DependencyMap no_dependencies_map;
+  SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta(
+      old_schema, new_schema, no_dependencies_map);
+  EXPECT_THAT(actual, Eq(schema_delta));
+}
+
+TEST(SchemaUtilTest, IndexNestedDocumentsIndexIncompatible) {
+  // Make two schemas. One that sets index_nested_properties to false and one
+  // that sets it to true.
+  SchemaTypeConfigProto email_type_config =
+      SchemaTypeConfigBuilder()
+          .SetType(kEmailType)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto no_nested_index_schema =
+      SchemaBuilder()
+          .AddType(email_type_config)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("emails")
+                                        .SetDataTypeDocument(
+                                            kEmailType,
+                                            /*index_nested_properties=*/false)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+
+  SchemaProto nested_index_schema =
+      SchemaBuilder()
+          .AddType(email_type_config)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("emails")
+                               .SetDataTypeDocument(
+                                   kEmailType, /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+
+  // Going from index_nested_properties=false to index_nested_properties=true
+  // should make kPersonType index_incompatible. kEmailType should be
+  // unaffected.
+  SchemaUtil::SchemaDelta schema_delta;
+  schema_delta.schema_types_index_incompatible.emplace(kPersonType);
+  SchemaUtil::DependencyMap dependencies_map = {{kEmailType, {kPersonType}}};
+  SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta(
+      no_nested_index_schema, nested_index_schema, dependencies_map);
+  EXPECT_THAT(actual, Eq(schema_delta));
+
+  // Going from index_nested_properties=true to index_nested_properties=false
+  // should also make kPersonType index_incompatible. kEmailType should be
+  // unaffected.
+  actual = SchemaUtil::ComputeCompatibilityDelta(
+      nested_index_schema, no_nested_index_schema, dependencies_map);
+  EXPECT_THAT(actual, Eq(schema_delta));
+}
 
-  auto* prop = type->add_properties();
-  prop->set_property_name("Foo");
-  prop->set_data_type(PropertyConfigProto::DataType::STRING);
-  prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  prop->mutable_string_indexing_config()->set_tokenizer_type(
-      StringIndexingConfig::TokenizerType::PLAIN);
+TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTermMatchType) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("Foo")
+                  .SetDataTypeString(MATCH_UNKNOWN, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
 
   // Error if we don't set a term match type
   EXPECT_THAT(SchemaUtil::Validate(schema),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 
   // Passes once we set a term match type
-  prop->mutable_string_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
+  schema = SchemaBuilder()
+               .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+                   PropertyConfigBuilder()
+                       .SetName("Foo")
+                       .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                       .SetCardinality(CARDINALITY_REQUIRED)))
+               .Build();
   EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
 }
 
-TEST_F(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTokenizer) {
-  SchemaProto schema;
-  auto* type = schema.add_types();
-  type->set_schema_type("MyType");
-
-  auto* prop = type->add_properties();
-  prop->set_property_name("Foo");
-  prop->set_data_type(PropertyConfigProto::DataType::STRING);
-  prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  prop->mutable_string_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
+TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTokenizer) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("Foo")
+                  .SetDataTypeString(MATCH_EXACT, TOKENIZER_NONE)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
 
   // Error if we don't set a tokenizer type
   EXPECT_THAT(SchemaUtil::Validate(schema),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 
   // Passes once we set a tokenizer type
-  prop->mutable_string_indexing_config()->set_tokenizer_type(
-      StringIndexingConfig::TokenizerType::PLAIN);
+  schema = SchemaBuilder()
+               .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+                   PropertyConfigBuilder()
+                       .SetName("Foo")
+                       .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                       .SetCardinality(CARDINALITY_REQUIRED)))
+               .Build();
   EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
 }
 
-TEST_F(SchemaUtilTest, MultipleReferencesToSameNestedSchemaOk) {
-  SchemaProto schema;
-
-  // Create a parent schema
-  auto type = schema.add_types();
-  type->set_schema_type("ParentSchema");
-
-  // Create multiple references to the same child schema
-  auto property = type->add_properties();
-  property->set_property_name("ChildProperty1");
-  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  property->set_schema_type("ChildSchema");
-  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-
-  property = type->add_properties();
-  property->set_property_name("ChildProperty2");
-  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  property->set_schema_type("ChildSchema");
-  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-
-  // Create a child schema
-  type = schema.add_types();
-  type->set_schema_type("ChildSchema");
+TEST(SchemaUtilTest, MultipleReferencesToSameNestedSchemaOk) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("ChildSchema"))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("ParentSchema")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("ChildProperty1")
+                                        .SetDataTypeDocument(
+                                            "ChildSchema",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("ChildProperty2")
+                                        .SetDataTypeDocument(
+                                            "ChildSchema",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
 
   EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
 }
 
-TEST_F(SchemaUtilTest, InvalidSelfReference) {
-  SchemaProto schema;
-
+TEST(SchemaUtilTest, InvalidSelfReference) {
   // Create a schema with a self-reference cycle in it: OwnSchema -> OwnSchema
-  auto type = schema.add_types();
-  type->set_schema_type("OwnSchema");
-
-  // Reference a child schema, so far so good
-  auto property = type->add_properties();
-  property->set_property_name("NestedDocument");
-  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  property->set_schema_type("OwnSchema");
-  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("OwnSchema")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NestedDocument")
+                                        .SetDataTypeDocument(
+                                            "OwnSchema",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   EXPECT_THAT(SchemaUtil::Validate(schema),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
                        HasSubstr("Infinite loop")));
 }
 
-TEST_F(SchemaUtilTest, InvalidSelfReferenceEvenWithOtherProperties) {
-  SchemaProto schema;
-
+TEST(SchemaUtilTest, InvalidSelfReferenceEvenWithOtherProperties) {
   // Create a schema with a self-reference cycle in it: OwnSchema -> OwnSchema
-  auto type = schema.add_types();
-  type->set_schema_type("OwnSchema");
-
-  // Reference a child schema, so far so good
-  auto property = type->add_properties();
-  property->set_property_name("NestedDocument");
-  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  property->set_schema_type("OwnSchema");
-  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
-  property = type->add_properties();
-  property->set_property_name("SomeString");
-  property->set_data_type(PropertyConfigProto::DataType::STRING);
-  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  property->mutable_string_indexing_config()->set_term_match_type(
-      TermMatchType::PREFIX);
-  property->mutable_string_indexing_config()->set_tokenizer_type(
-      StringIndexingConfig::TokenizerType::PLAIN);
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("OwnSchema")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NestedDocument")
+                                        .SetDataTypeDocument(
+                                            "OwnSchema",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("SomeString")
+                               .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   EXPECT_THAT(SchemaUtil::Validate(schema),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
                        HasSubstr("Infinite loop")));
 }
 
-TEST_F(SchemaUtilTest, InvalidInfiniteLoopTwoDegrees) {
-  SchemaProto schema;
-
+TEST(SchemaUtilTest, InvalidInfiniteLoopTwoDegrees) {
   // Create a schema for the parent schema
-  auto type = schema.add_types();
-  type->set_schema_type("A");
-
-  // Reference schema B, so far so good
-  auto property = type->add_properties();
-  property->set_property_name("NestedDocument");
-  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  property->set_schema_type("B");
-  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
-  // Create the child schema
-  type = schema.add_types();
-  type->set_schema_type("B");
-
-  // Reference the schema A, causing an infinite loop of references.
-  property = type->add_properties();
-  property->set_property_name("NestedDocument");
-  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  property->set_schema_type("A");
-  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType("A")
+                  // Reference schema B, so far so good
+                  .AddProperty(PropertyConfigBuilder()
+                                   .SetName("NestedDocument")
+                                   .SetDataTypeDocument(
+                                       "B", /*index_nested_properties=*/true)
+                                   .SetCardinality(CARDINALITY_OPTIONAL)))
+          // Create the child schema
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType("B")
+                  // Reference the schema A, causing an infinite loop of
+                  // references.
+                  .AddProperty(PropertyConfigBuilder()
+                                   .SetName("NestedDocument")
+                                   .SetDataTypeDocument(
+                                       "A", /*index_nested_properties=*/true)
+                                   .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
 
   // Two degrees of referencing: A -> B -> A
   EXPECT_THAT(SchemaUtil::Validate(schema),
@@ -725,41 +1322,40 @@ TEST_F(SchemaUtilTest, InvalidInfiniteLoopTwoDegrees) {
                        HasSubstr("Infinite loop")));
 }
 
-TEST_F(SchemaUtilTest, InvalidInfiniteLoopThreeDegrees) {
-  SchemaProto schema;
-
-  // Create a schema for the parent schema
-  auto type = schema.add_types();
-  type->set_schema_type("A");
-
-  // Reference schema B , so far so good
-  auto property = type->add_properties();
-  property->set_property_name("NestedDocument");
-  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  property->set_schema_type("B");
-  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
-  // Create the child schema
-  type = schema.add_types();
-  type->set_schema_type("B");
-
-  // Reference schema C, so far so good
-  property = type->add_properties();
-  property->set_property_name("NestedDocument");
-  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  property->set_schema_type("C");
-  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-
-  // Create the child schema
-  type = schema.add_types();
-  type->set_schema_type("C");
-
-  // Reference schema A, no good
-  property = type->add_properties();
-  property->set_property_name("NestedDocument");
-  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  property->set_schema_type("A");
-  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+TEST(SchemaUtilTest, InvalidInfiniteLoopThreeDegrees) {
+  SchemaProto schema =
+      SchemaBuilder()
+          // Create a schema for the parent schema
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType("A")
+                  // Reference schema B, so far so good
+                  .AddProperty(PropertyConfigBuilder()
+                                   .SetName("NestedDocument")
+                                   .SetDataTypeDocument(
+                                       "B", /*index_nested_properties=*/true)
+                                   .SetCardinality(CARDINALITY_OPTIONAL)))
+          // Create the child schema
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType("B")
+                  // Reference schema C, so far so good
+                  .AddProperty(PropertyConfigBuilder()
+                                   .SetName("NestedDocument")
+                                   .SetDataTypeDocument(
+                                       "C", /*index_nested_properties=*/true)
+                                   .SetCardinality(CARDINALITY_REPEATED)))
+          // Create the child schema
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType("C")
+                  // Reference schema C, so far so good
+                  .AddProperty(PropertyConfigBuilder()
+                                   .SetName("NestedDocument")
+                                   .SetDataTypeDocument(
+                                       "A", /*index_nested_properties=*/true)
+                                   .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
 
   // Three degrees of referencing: A -> B -> C -> A
   EXPECT_THAT(SchemaUtil::Validate(schema),
diff --git a/icing/schema/section-manager.cc b/icing/schema/section-manager.cc
index a10e9b9..a0893e6 100644
--- a/icing/schema/section-manager.cc
+++ b/icing/schema/section-manager.cc
@@ -165,16 +165,6 @@ std::vector<std::string_view> GetStringPropertyContent(
   return values;
 }
 
-// Helper function to get metadata list of a type config
-libtextclassifier3::StatusOr<std::vector<SectionMetadata>> GetMetadataList(
-    const KeyMapper<SchemaTypeId>& schema_type_mapper,
-    const std::vector<std::vector<SectionMetadata>>& section_metadata_cache,
-    const std::string& type_config_name) {
-  ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
-                         schema_type_mapper.Get(type_config_name));
-  return section_metadata_cache.at(schema_type_id);
-}
-
 }  // namespace
 
 SectionManager::SectionManager(
@@ -263,18 +253,16 @@ SectionManager::GetStringSectionContent(const DocumentProto& document,
         "Section id %d is greater than the max value %d", section_id,
         kMaxSectionId));
   }
-  ICING_ASSIGN_OR_RETURN(
-      const std::vector<SectionMetadata>& metadata_list,
-      GetMetadataList(schema_type_mapper_, section_metadata_cache_,
-                      document.schema()));
-  if (section_id >= metadata_list.size()) {
+  ICING_ASSIGN_OR_RETURN(const std::vector<SectionMetadata>* metadata_list,
+                         GetMetadataList(document.schema()));
+  if (section_id >= metadata_list->size()) {
     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
         "Section with id %d doesn't exist in type config %s", section_id,
         document.schema().c_str()));
   }
   // The index of metadata list is the same as the section id, so we can use
   // section id as the index.
-  return GetStringSectionContent(document, metadata_list[section_id].path);
+  return GetStringSectionContent(document, metadata_list->at(section_id).path);
 }
 
 libtextclassifier3::StatusOr<const SectionMetadata*>
@@ -300,12 +288,10 @@ SectionManager::GetSectionMetadata(SchemaTypeId schema_type_id,
 
 libtextclassifier3::StatusOr<std::vector<Section>>
 SectionManager::ExtractSections(const DocumentProto& document) const {
-  ICING_ASSIGN_OR_RETURN(
-      const std::vector<SectionMetadata>& metadata_list,
-      GetMetadataList(schema_type_mapper_, section_metadata_cache_,
-                      document.schema()));
+  ICING_ASSIGN_OR_RETURN(const std::vector<SectionMetadata>* metadata_list,
+                         GetMetadataList(document.schema()));
   std::vector<Section> sections;
-  for (const auto& section_metadata : metadata_list) {
+  for (const auto& section_metadata : *metadata_list) {
     auto section_content_or =
         GetStringSectionContent(document, section_metadata.path);
     // Adds to result vector if section is found in document
@@ -317,5 +303,12 @@ SectionManager::ExtractSections(const DocumentProto& document) const {
   return sections;
 }
 
+libtextclassifier3::StatusOr<const std::vector<SectionMetadata>*>
+SectionManager::GetMetadataList(const std::string& type_config_name) const {
+  ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
+                         schema_type_mapper_.Get(type_config_name));
+  return &section_metadata_cache_.at(schema_type_id);
+}
+
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/schema/section-manager.h b/icing/schema/section-manager.h
index 191a169..51eb133 100644
--- a/icing/schema/section-manager.h
+++ b/icing/schema/section-manager.h
@@ -30,7 +30,9 @@
 namespace icing {
 namespace lib {
 
-inline constexpr char kPropertySeparator[] = ".";
+inline constexpr std::string_view kPropertySeparator = ".";
+inline constexpr std::string_view kLBracket = "[";
+inline constexpr std::string_view kRBracket = "]";
 
 // This class provides section-related operations. It assigns sections according
 // to type configs and extracts section / sections from documents.
@@ -94,6 +96,12 @@ class SectionManager {
   libtextclassifier3::StatusOr<std::vector<Section>> ExtractSections(
       const DocumentProto& document) const;
 
+  // Returns:
+  //   - On success, the section metadatas for the specified type
+  //   - NOT_FOUND if the type config name is not present in the schema
+  libtextclassifier3::StatusOr<const std::vector<SectionMetadata>*>
+  GetMetadataList(const std::string& type_config_name) const;
+
  private:
   // Use SectionManager::Create() to instantiate
   explicit SectionManager(
diff --git a/icing/schema/section-manager_test.cc b/icing/schema/section-manager_test.cc
index 15d9a19..3dcc5a9 100644
--- a/icing/schema/section-manager_test.cc
+++ b/icing/schema/section-manager_test.cc
@@ -20,7 +20,6 @@
 #include "gtest/gtest.h"
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
-#include "icing/proto/schema.proto.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/term.pb.h"
 #include "icing/schema/schema-util.h"
diff --git a/icing/scoring/bm25f-calculator.cc b/icing/scoring/bm25f-calculator.cc
index 7495e98..4822d7f 100644
--- a/icing/scoring/bm25f-calculator.cc
+++ b/icing/scoring/bm25f-calculator.cc
@@ -42,24 +42,25 @@ constexpr float k1_ = 1.2f;
 constexpr float b_ = 0.7f;
 
 // TODO(b/158603900): add tests for Bm25fCalculator
-Bm25fCalculator::Bm25fCalculator(const DocumentStore *document_store)
+Bm25fCalculator::Bm25fCalculator(const DocumentStore* document_store)
     : document_store_(document_store) {}
 
 // During initialization, Bm25fCalculator iterates through
 // hit-iterators for each query term to pre-compute n(q_i) for each corpus under
 // consideration.
 void Bm25fCalculator::PrepareToScore(
-    std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
-        *query_term_iterators) {
+    std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>*
+        query_term_iterators) {
   Clear();
   TermId term_id = 0;
-  for (auto &iter : *query_term_iterators) {
-    const std::string &term = iter.first;
+  for (auto& iter : *query_term_iterators) {
+    const std::string& term = iter.first;
     if (term_id_map_.find(term) != term_id_map_.end()) {
       continue;
     }
     term_id_map_[term] = ++term_id;
-    DocHitInfoIterator *term_it = iter.second.get();
+    DocHitInfoIterator* term_it = iter.second.get();
+
     while (term_it->Advance().ok()) {
       auto status_or = document_store_->GetDocumentAssociatedScoreData(
           term_it->doc_hit_info().document_id());
@@ -89,8 +90,8 @@ void Bm25fCalculator::Clear() {
 // where IDF(q_i) is the Inverse Document Frequency (IDF) weight of the query
 // term q_i in the corpus with document D, and tf(q_i, D) is the weighted and
 // normalized term frequency of query term q_i in the document D.
-float Bm25fCalculator::ComputeScore(const DocHitInfoIterator *query_it,
-                                    const DocHitInfo &hit_info,
+float Bm25fCalculator::ComputeScore(const DocHitInfoIterator* query_it,
+                                    const DocHitInfo& hit_info,
                                     double default_score) {
   auto status_or =
       document_store_->GetDocumentAssociatedScoreData(hit_info.document_id());
@@ -103,7 +104,7 @@ float Bm25fCalculator::ComputeScore(const DocHitInfoIterator *query_it,
   query_it->PopulateMatchedTermsStats(&matched_terms_stats);
 
   float score = 0;
-  for (const TermMatchInfo &term_match_info : matched_terms_stats) {
+  for (const TermMatchInfo& term_match_info : matched_terms_stats) {
     float idf_weight =
         GetCorpusIdfWeightForTerm(term_match_info.term, data.corpus_id());
     float normalized_tf =
@@ -186,8 +187,8 @@ float Bm25fCalculator::GetCorpusAvgDocLength(CorpusId corpus_id) {
 // |D| is the #tokens in D, avgdl is the average document length in the corpus,
 // k1 and b are smoothing parameters.
 float Bm25fCalculator::ComputedNormalizedTermFrequency(
-    const TermMatchInfo &term_match_info, const DocHitInfo &hit_info,
-    const DocumentAssociatedScoreData &data) {
+    const TermMatchInfo& term_match_info, const DocHitInfo& hit_info,
+    const DocumentAssociatedScoreData& data) {
   uint32_t dl = data.length_in_tokens();
   float avgdl = GetCorpusAvgDocLength(data.corpus_id());
   float f_q =
@@ -204,7 +205,7 @@ float Bm25fCalculator::ComputedNormalizedTermFrequency(
 // Note: once we support section weights, we should update this function to
 // compute the weighted term frequency.
 float Bm25fCalculator::ComputeTermFrequencyForMatchedSections(
-    CorpusId corpus_id, const TermMatchInfo &term_match_info) const {
+    CorpusId corpus_id, const TermMatchInfo& term_match_info) const {
   float sum = 0.0f;
   SectionIdMask sections = term_match_info.section_ids_mask;
   while (sections != 0) {
diff --git a/icing/scoring/scorer.cc b/icing/scoring/scorer.cc
index b7e1b92..a4734b4 100644
--- a/icing/scoring/scorer.cc
+++ b/icing/scoring/scorer.cc
@@ -89,6 +89,7 @@ class RelevanceScoreScorer : public Scorer {
     if (!query_it) {
       return default_score_;
     }
+
     return static_cast<double>(
         bm25f_calculator_->ComputeScore(query_it, hit_info, default_score_));
   }
@@ -122,11 +123,11 @@ class UsageScorer : public Scorer {
       case ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT:
         return usage_scores.usage_type3_count;
       case ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP:
-        return usage_scores.usage_type1_last_used_timestamp_s;
+        return usage_scores.usage_type1_last_used_timestamp_s * 1000.0;
       case ScoringSpecProto::RankingStrategy::USAGE_TYPE2_LAST_USED_TIMESTAMP:
-        return usage_scores.usage_type2_last_used_timestamp_s;
+        return usage_scores.usage_type2_last_used_timestamp_s * 1000.0;
       case ScoringSpecProto::RankingStrategy::USAGE_TYPE3_LAST_USED_TIMESTAMP:
-        return usage_scores.usage_type3_last_used_timestamp_s;
+        return usage_scores.usage_type3_last_used_timestamp_s * 1000.0;
       default:
         // This shouldn't happen if this scorer is used correctly.
         return default_score_;
diff --git a/icing/scoring/scorer_test.cc b/icing/scoring/scorer_test.cc
index b114515..8b89514 100644
--- a/icing/scoring/scorer_test.cc
+++ b/icing/scoring/scorer_test.cc
@@ -25,6 +25,7 @@
 #include "icing/proto/document.pb.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/scoring.pb.h"
+#include "icing/schema-builder.h"
 #include "icing/schema/schema-store.h"
 #include "icing/store/document-id.h"
 #include "icing/store/document-store.h"
@@ -38,6 +39,12 @@ namespace lib {
 namespace {
 using ::testing::Eq;
 
+constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
+    PropertyConfigProto_DataType_Code_STRING;
+
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
+    PropertyConfigProto_Cardinality_Code_REQUIRED;
+
 class ScorerTest : public testing::Test {
  protected:
   ScorerTest()
@@ -64,13 +71,14 @@ class ScorerTest : public testing::Test {
     document_store_ = std::move(create_result.document_store);
 
     // Creates a simple email schema
-    SchemaProto test_email_schema;
-    auto type_config = test_email_schema.add_types();
-    type_config->set_schema_type("email");
-    auto subject = type_config->add_properties();
-    subject->set_property_name("subject");
-    subject->set_data_type(PropertyConfigProto::DataType::STRING);
-    subject->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+    SchemaProto test_email_schema =
+        SchemaBuilder()
+            .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+                PropertyConfigBuilder()
+                    .SetName("subject")
+                    .SetDataType(TYPE_STRING)
+                    .SetCardinality(CARDINALITY_REQUIRED)))
+            .Build();
 
     ICING_ASSERT_OK(schema_store_->SetSchema(test_email_schema));
   }
@@ -87,6 +95,10 @@ class ScorerTest : public testing::Test {
 
   const FakeClock& fake_clock2() { return fake_clock2_; }
 
+  void SetFakeClock1Time(int64_t new_time) {
+    fake_clock1_.SetSystemTimeMilliseconds(new_time);
+  }
+
  private:
   const std::string test_dir_;
   const std::string doc_store_dir_;
@@ -115,7 +127,7 @@ TEST_F(ScorerTest, CreationWithNullPointerShouldFail) {
               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 }
 
-TEST_F(ScorerTest, ShouldGetDefaultScore) {
+TEST_F(ScorerTest, ShouldGetDefaultScoreIfDocumentDoesntExist) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer,
       Scorer::Create(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE,
@@ -127,6 +139,66 @@ TEST_F(ScorerTest, ShouldGetDefaultScore) {
   EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(10));
 }
 
+TEST_F(ScorerTest, ShouldGetDefaultScoreIfDocumentIsDeleted) {
+  // Creates a test document with a provided score
+  DocumentProto test_document = DocumentBuilder()
+                                    .SetKey("icing", "email/1")
+                                    .SetSchema("email")
+                                    .AddStringProperty("subject", "subject foo")
+                                    .SetScore(42)
+                                    .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store()->Put(test_document));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer,
+      Scorer::Create(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE,
+                     /*default_score=*/10, document_store()));
+
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+
+  // The document's score is returned
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(42));
+
+  // Delete the document and check that the caller-provided default score is
+  // returned
+  EXPECT_THAT(document_store()->Delete(document_id), IsOk());
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(10));
+}
+
+TEST_F(ScorerTest, ShouldGetDefaultScoreIfDocumentIsExpired) {
+  // Creates a test document with a provided score
+  int64_t creation_time = fake_clock1().GetSystemTimeMilliseconds();
+  int64_t ttl = 100;
+  DocumentProto test_document = DocumentBuilder()
+                                    .SetKey("icing", "email/1")
+                                    .SetSchema("email")
+                                    .AddStringProperty("subject", "subject foo")
+                                    .SetScore(42)
+                                    .SetCreationTimestampMs(creation_time)
+                                    .SetTtlMs(ttl)
+                                    .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store()->Put(test_document));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer,
+      Scorer::Create(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE,
+                     /*default_score=*/10, document_store()));
+
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+
+  // The document's score is returned since the document hasn't expired yet.
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(42));
+
+  // Expire the document and check that the caller-provided default score is
+  // returned
+  SetFakeClock1Time(creation_time + ttl + 10);
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(10));
+}
+
 TEST_F(ScorerTest, ShouldGetDefaultDocumentScore) {
   // Creates a test document with the default document score 0
   DocumentProto test_document =
@@ -389,7 +461,7 @@ TEST_F(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType1) {
       /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
       UsageReport::USAGE_TYPE1);
   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1_time1));
-  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(1));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(1000));
   EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
   EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
 
@@ -398,7 +470,7 @@ TEST_F(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType1) {
       /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000,
       UsageReport::USAGE_TYPE1);
   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1_time5));
-  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(5));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(5000));
   EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
   EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
 
@@ -407,7 +479,7 @@ TEST_F(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType1) {
       /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/3000,
       UsageReport::USAGE_TYPE1);
   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1_time3));
-  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(5));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(5000));
   EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
   EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
 }
@@ -450,7 +522,7 @@ TEST_F(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType2) {
       UsageReport::USAGE_TYPE2);
   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type2_time1));
   EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
-  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(1));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(1000));
   EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
 
   // Report usage with timestamp = 5000ms, score should be updated.
@@ -459,7 +531,7 @@ TEST_F(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType2) {
       UsageReport::USAGE_TYPE2);
   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type2_time5));
   EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
-  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(5));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(5000));
   EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
 
   // Report usage with timestamp = 3000ms, score should not be updated.
@@ -468,7 +540,7 @@ TEST_F(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType2) {
       UsageReport::USAGE_TYPE2);
   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type2_time3));
   EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
-  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(5));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(5000));
   EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
 }
 
@@ -511,7 +583,7 @@ TEST_F(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType3) {
   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type3_time1));
   EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
   EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
-  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(1));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(1000));
 
   // Report usage with timestamp = 5000ms, score should be updated.
   UsageReport usage_report_type3_time5 = CreateUsageReport(
@@ -520,7 +592,7 @@ TEST_F(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType3) {
   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type3_time5));
   EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
   EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
-  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(5));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(5000));
 
   // Report usage with timestamp = 3000ms, score should not be updated.
   UsageReport usage_report_type3_time3 = CreateUsageReport(
@@ -529,7 +601,7 @@ TEST_F(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType3) {
   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type3_time3));
   EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
   EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
-  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(5));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(5000));
 }
 
 TEST_F(ScorerTest, NoScorerShouldAlwaysReturnDefaultScore) {
@@ -557,6 +629,37 @@ TEST_F(ScorerTest, NoScorerShouldAlwaysReturnDefaultScore) {
   EXPECT_THAT(scorer->GetScore(docHitInfo3), Eq(111));
 }
 
+TEST_F(ScorerTest, ShouldScaleUsageTimestampScoreForMaxTimestamp) {
+  DocumentProto test_document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .SetCreationTimestampMs(fake_clock1().GetSystemTimeMilliseconds())
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store()->Put(test_document));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer1,
+      Scorer::Create(
+          ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP,
+          /*default_score=*/0, document_store()));
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+
+  // Create usage report for the maximum allowable timestamp.
+  UsageReport usage_report_type1 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1",
+      /*timestamp_ms=*/std::numeric_limits<uint32_t>::max() * 1000.0,
+      UsageReport::USAGE_TYPE1);
+
+  double max_int_usage_timestamp_score =
+      std::numeric_limits<uint32_t>::max() * 1000.0;
+  ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(max_int_usage_timestamp_score));
+}
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/scoring/scoring-processor_test.cc b/icing/scoring/scoring-processor_test.cc
index 65eecd1..125e2a7 100644
--- a/icing/scoring/scoring-processor_test.cc
+++ b/icing/scoring/scoring-processor_test.cc
@@ -24,6 +24,7 @@
 #include "icing/proto/document.pb.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/scoring.pb.h"
+#include "icing/schema-builder.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
 #include "icing/testing/tmp-directory.h"
@@ -36,6 +37,12 @@ using ::testing::ElementsAre;
 using ::testing::IsEmpty;
 using ::testing::SizeIs;
 
+constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
+    PropertyConfigProto_DataType_Code_STRING;
+
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto_Cardinality_Code_OPTIONAL;
+
 class ScoringProcessorTest : public testing::Test {
  protected:
   ScoringProcessorTest()
@@ -60,14 +67,14 @@ class ScoringProcessorTest : public testing::Test {
     document_store_ = std::move(create_result.document_store);
 
     // Creates a simple email schema
-    SchemaProto test_email_schema;
-    auto type_config = test_email_schema.add_types();
-    type_config->set_schema_type("email");
-    auto subject = type_config->add_properties();
-    subject->set_property_name("subject");
-    subject->set_data_type(PropertyConfigProto::DataType::STRING);
-    subject->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
+    SchemaProto test_email_schema =
+        SchemaBuilder()
+            .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+                PropertyConfigBuilder()
+                    .SetName("subject")
+                    .SetDataType(TYPE_STRING)
+                    .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
     ICING_ASSERT_OK(schema_store_->SetSchema(test_email_schema));
   }
 
@@ -603,9 +610,9 @@ TEST_F(ScoringProcessorTest, ShouldScoreByUsageTimestamp) {
   DocHitInfo doc_hit_info2(document_id2);
   DocHitInfo doc_hit_info3(document_id3);
   ScoredDocumentHit scored_document_hit1(document_id1, kSectionIdMaskNone,
-                                         /*score=*/1);
+                                         /*score=*/1000);
   ScoredDocumentHit scored_document_hit2(document_id2, kSectionIdMaskNone,
-                                         /*score=*/5);
+                                         /*score=*/5000);
   ScoredDocumentHit scored_document_hit3(document_id3, kSectionIdMaskNone,
                                          /*score=*/0);
 
diff --git a/icing/store/document-log-creator.cc b/icing/store/document-log-creator.cc
new file mode 100644
index 0000000..5e0426e
--- /dev/null
+++ b/icing/store/document-log-creator.cc
@@ -0,0 +1,196 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/store/document-log-creator.h"
+
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/logging.h"
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/annotate.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/file-backed-proto-log.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Used in DocumentId mapper to mark a document as deleted
+constexpr char kDocumentLogFilename[] = "document_log";
+
+std::string DocumentLogFilenameV0() {
+  // Originally only had this one version, no suffix.
+  return kDocumentLogFilename;
+}
+
+std::string DocumentLogFilenameV1() {
+  return absl_ports::StrCat(kDocumentLogFilename, "_v1");
+}
+
+std::string MakeDocumentLogFilenameV0(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", DocumentLogFilenameV0());
+}
+
+std::string MakeDocumentLogFilenameV1(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", DocumentLogFilenameV1());
+}
+
+}  // namespace
+
+std::string DocumentLogCreator::GetDocumentLogFilename() {
+  // This should always return the latest version of the document log in use.
+  // The current latest version is V1.
+  return DocumentLogFilenameV1();
+}
+
+libtextclassifier3::StatusOr<DocumentLogCreator::CreateResult>
+DocumentLogCreator::Create(const Filesystem* filesystem,
+                           const std::string& base_dir) {
+  bool v0_exists =
+      filesystem->FileExists(MakeDocumentLogFilenameV0(base_dir).c_str());
+  bool v1_exists =
+      filesystem->FileExists(MakeDocumentLogFilenameV1(base_dir).c_str());
+
+  bool regen_derived_files = false;
+  if (v0_exists && !v1_exists) {
+    ICING_RETURN_IF_ERROR(MigrateFromV0ToV1(filesystem, base_dir));
+
+    // Need to regenerate derived files since documents may be written to a
+    // different file offset in the log.
+    regen_derived_files = true;
+  } else if (!v1_exists) {
+    // First time initializing a v1 log. There are no existing derived files at
+    // this point, so we should generate some. "regenerate" here also means
+    // "generate for the first time", i.e. we shouldn't expect there to be any
+    // existing derived files.
+    regen_derived_files = true;
+  }
+
+  ICING_ASSIGN_OR_RETURN(
+      PortableFileBackedProtoLog<DocumentWrapper>::CreateResult
+          log_create_result,
+      PortableFileBackedProtoLog<DocumentWrapper>::Create(
+          filesystem, MakeDocumentLogFilenameV1(base_dir),
+          PortableFileBackedProtoLog<DocumentWrapper>::Options(
+              /*compress_in=*/true)));
+
+  CreateResult create_result = {std::move(log_create_result),
+                                regen_derived_files};
+  return create_result;
+}
+
+libtextclassifier3::Status DocumentLogCreator::MigrateFromV0ToV1(
+    const Filesystem* filesystem, const std::string& base_dir) {
+  ICING_VLOG(1) << "Migrating from v0 to v1 document log.";
+
+  // Our v0 proto log was non-portable, create it so we can read protos out from
+  // it.
+  auto v0_create_result_or = FileBackedProtoLog<DocumentWrapper>::Create(
+      filesystem, MakeDocumentLogFilenameV0(base_dir),
+      FileBackedProtoLog<DocumentWrapper>::Options(
+          /*compress_in=*/true));
+  if (!v0_create_result_or.ok()) {
+    return absl_ports::Annotate(
+        v0_create_result_or.status(),
+        "Failed to initialize v0 document log while migrating.");
+    return v0_create_result_or.status();
+  }
+  FileBackedProtoLog<DocumentWrapper>::CreateResult v0_create_result =
+      std::move(v0_create_result_or).ValueOrDie();
+  std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> v0_proto_log =
+      std::move(v0_create_result.proto_log);
+
+  // Create a v1 portable proto log that we will write our protos to.
+  auto v1_create_result_or =
+      PortableFileBackedProtoLog<DocumentWrapper>::Create(
+          filesystem, MakeDocumentLogFilenameV1(base_dir),
+          PortableFileBackedProtoLog<DocumentWrapper>::Options(
+              /*compress_in=*/true));
+  if (!v1_create_result_or.ok()) {
+    return absl_ports::Annotate(
+        v1_create_result_or.status(),
+        "Failed to initialize v1 document log while migrating.");
+  }
+  PortableFileBackedProtoLog<DocumentWrapper>::CreateResult v1_create_result =
+      std::move(v1_create_result_or).ValueOrDie();
+  std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> v1_proto_log =
+      std::move(v1_create_result.proto_log);
+
+  // Dummy empty document to be used when copying over deleted documents.
+  DocumentProto empty_document;
+
+  // Start reading out from the old log and putting them in the new log.
+  auto iterator = v0_proto_log->GetIterator();
+  auto iterator_status = iterator.Advance();
+  while (iterator_status.ok()) {
+    libtextclassifier3::StatusOr<DocumentWrapper> document_wrapper_or =
+        v0_proto_log->ReadProto(iterator.GetOffset());
+
+    bool deleted_document = false;
+    DocumentWrapper document_wrapper;
+    if (absl_ports::IsNotFound(document_wrapper_or.status())) {
+      // Proto was erased, we can skip copying this into our new log.
+      *document_wrapper.mutable_document() = empty_document;
+      deleted_document = true;
+    } else if (!document_wrapper_or.ok()) {
+      // Some real error, pass up
+      return document_wrapper_or.status();
+    } else {
+      document_wrapper = std::move(document_wrapper_or).ValueOrDie();
+    }
+
+    auto offset_or = v1_proto_log->WriteProto(document_wrapper);
+    if (!offset_or.ok()) {
+      return absl_ports::Annotate(
+          offset_or.status(),
+          "Failed to write proto to v1 document log while migrating.");
+    }
+
+    // If the original document was deleted, erase the proto we just wrote.
+    // We do this to maintain the document_ids, i.e. we still want document_id 2
+    // to point to a deleted document even though we may not have the document
+    // contents anymore. DocumentStore guarantees that the document_ids don't
+    // change unless an Optimize is triggered.
+    if (deleted_document) {
+      int64_t offset = offset_or.ValueOrDie();
+      auto erased_status = v1_proto_log->EraseProto(offset);
+      if (!erased_status.ok()) {
+        return absl_ports::Annotate(
+            erased_status,
+            "Failed to erase proto in v1 document log while migrating.");
+      }
+    }
+
+    iterator_status = iterator.Advance();
+  }
+
+  // Close out our file log pointers.
+  v0_proto_log.reset();
+  v1_proto_log.reset();
+
+  return libtextclassifier3::Status::OK;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/store/document-log-creator.h b/icing/store/document-log-creator.h
new file mode 100644
index 0000000..51cf497
--- /dev/null
+++ b/icing/store/document-log-creator.h
@@ -0,0 +1,77 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_DOCUMENT_LOG_CREATOR_H_
+#define ICING_STORE_DOCUMENT_LOG_CREATOR_H_
+
+#include <string>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/proto/document_wrapper.pb.h"
+
+namespace icing {
+namespace lib {
+
+// Handles creation of the document log and any underlying migrations that may
+// be necessary.
+class DocumentLogCreator {
+ public:
+  struct CreateResult {
+    // The create result passed up from the PortableFileBackedProtoLog::Create.
+    // Contains the document log.
+    PortableFileBackedProtoLog<DocumentWrapper>::CreateResult log_create_result;
+
+    // Whether the caller needs to also regenerate/generate any derived files
+    // based off of the initialized document log.
+    bool regen_derived_files;
+  };
+
+  // Creates the document log in the base_dir. Will create one if it doesn't
+  // already exist.
+  //
+  // This also handles any potential migrations from old document log versions.
+  // At the end of this call, the most up-to-date log will be returned and will
+  // be usable.
+  //
+  // Returns:
+  //   CreateResult on success.
+  //   INTERNAL on any I/O error.
+  static libtextclassifier3::StatusOr<DocumentLogCreator::CreateResult> Create(
+      const Filesystem* filesystem, const std::string& base_dir);
+
+  // Returns the filename of the document log, without any directory prefixes.
+  // Used mainly for testing purposes.
+  static std::string GetDocumentLogFilename();
+
+ private:
+  // Handles migrating a v0 document log (not portable) to a v1 document log
+  // (portable). This will initialize the log in the beginning, and close it
+  // when migration is done. Callers will need to reinitialize the log on their
+  // own.
+  //
+  // Returns:
+  //   OK on success.
+  //   INVALID_ARGUMENT if some invalid option was passed to the document log.
+  //   INTERNAL on I/O error.
+  static libtextclassifier3::Status MigrateFromV0ToV1(
+      const Filesystem* filesystem, const std::string& base_dir);
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_STORE_DOCUMENT_LOG_CREATOR_H_
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 72bf736..226a96b 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -19,6 +19,7 @@
 #include <memory>
 #include <string>
 #include <string_view>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -32,18 +33,22 @@
 #include "icing/file/file-backed-vector.h"
 #include "icing/file/filesystem.h"
 #include "icing/file/memory-mapped-file.h"
+#include "icing/file/portable-file-backed-proto-log.h"
 #include "icing/legacy/core/icing-string-util.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/document_wrapper.pb.h"
 #include "icing/proto/logging.pb.h"
+#include "icing/proto/storage.pb.h"
 #include "icing/schema/schema-store.h"
 #include "icing/store/corpus-associated-scoring-data.h"
 #include "icing/store/corpus-id.h"
 #include "icing/store/document-associated-score-data.h"
 #include "icing/store/document-filter-data.h"
 #include "icing/store/document-id.h"
+#include "icing/store/document-log-creator.h"
 #include "icing/store/key-mapper.h"
 #include "icing/store/namespace-id.h"
+#include "icing/store/usage-store.h"
 #include "icing/tokenization/language-segmenter.h"
 #include "icing/util/clock.h"
 #include "icing/util/crc32.h"
@@ -59,7 +64,6 @@ namespace {
 
 // Used in DocumentId mapper to mark a document as deleted
 constexpr int64_t kDocDeletedFlag = -1;
-constexpr char kDocumentLogFilename[] = "document_log";
 constexpr char kDocumentIdMapperFilename[] = "document_id_mapper";
 constexpr char kDocumentStoreHeaderFilename[] = "document_store_header";
 constexpr char kScoreCacheFilename[] = "score_cache";
@@ -69,7 +73,9 @@ constexpr char kNamespaceMapperFilename[] = "namespace_mapper";
 constexpr char kUsageStoreDirectoryName[] = "usage_store";
 constexpr char kCorpusIdMapperFilename[] = "corpus_mapper";
 
-constexpr int32_t kUriMapperMaxSize = 12 * 1024 * 1024;  // 12 MiB
+// Determined through manual testing to allow for 1 million uris. 1 million
+// because we allow up to 1 million DocumentIds.
+constexpr int32_t kUriMapperMaxSize = 36 * 1024 * 1024;  // 36 MiB
 
 // 384 KiB for a KeyMapper would allow each internal array to have a max of
 // 128 KiB for storage.
@@ -82,33 +88,6 @@ DocumentWrapper CreateDocumentWrapper(DocumentProto&& document) {
   return document_wrapper;
 }
 
-DocumentWrapper CreateDocumentTombstone(std::string_view document_namespace,
-                                        std::string_view document_uri) {
-  DocumentWrapper document_wrapper;
-  document_wrapper.set_deleted(true);
-  DocumentProto* document = document_wrapper.mutable_document();
-  document->set_namespace_(std::string(document_namespace));
-  document->set_uri(std::string(document_uri));
-  return document_wrapper;
-}
-
-DocumentWrapper CreateNamespaceTombstone(std::string_view document_namespace) {
-  DocumentWrapper document_wrapper;
-  document_wrapper.set_deleted(true);
-  DocumentProto* document = document_wrapper.mutable_document();
-  document->set_namespace_(std::string(document_namespace));
-  return document_wrapper;
-}
-
-DocumentWrapper CreateSchemaTypeTombstone(
-    std::string_view document_schema_type) {
-  DocumentWrapper document_wrapper;
-  document_wrapper.set_deleted(true);
-  DocumentProto* document = document_wrapper.mutable_document();
-  document->set_schema(std::string(document_schema_type));
-  return document_wrapper;
-}
-
 std::string MakeHeaderFilename(const std::string& base_dir) {
   return absl_ports::StrCat(base_dir, "/", kDocumentStoreHeaderFilename);
 }
@@ -117,10 +96,6 @@ std::string MakeDocumentIdMapperFilename(const std::string& base_dir) {
   return absl_ports::StrCat(base_dir, "/", kDocumentIdMapperFilename);
 }
 
-std::string MakeDocumentLogFilename(const std::string& base_dir) {
-  return absl_ports::StrCat(base_dir, "/", kDocumentLogFilename);
-}
-
 std::string MakeScoreCacheFilename(const std::string& base_dir) {
   return absl_ports::StrCat(base_dir, "/", kScoreCacheFilename);
 }
@@ -203,20 +178,20 @@ DocumentStore::DocumentStore(const Filesystem* filesystem,
 
 libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
     const DocumentProto& document, int32_t num_tokens,
-    NativePutDocumentStats* put_document_stats) {
+    PutDocumentStatsProto* put_document_stats) {
   return Put(DocumentProto(document), num_tokens, put_document_stats);
 }
 
 libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
     DocumentProto&& document, int32_t num_tokens,
-    NativePutDocumentStats* put_document_stats) {
+    PutDocumentStatsProto* put_document_stats) {
   document.mutable_internal_fields()->set_length_in_tokens(num_tokens);
   return InternalPut(document, put_document_stats);
 }
 
 DocumentStore::~DocumentStore() {
   if (initialized_) {
-    if (!PersistToDisk().ok()) {
+    if (!PersistToDisk(PersistType::FULL).ok()) {
       ICING_LOG(ERROR)
           << "Error persisting to disk in DocumentStore destructor";
     }
@@ -226,15 +201,18 @@ DocumentStore::~DocumentStore() {
 libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
     const Filesystem* filesystem, const std::string& base_dir,
     const Clock* clock, const SchemaStore* schema_store,
-    NativeInitializeStats* initialize_stats) {
+    bool force_recovery_and_revalidate_documents,
+    InitializeStatsProto* initialize_stats) {
   ICING_RETURN_ERROR_IF_NULL(filesystem);
   ICING_RETURN_ERROR_IF_NULL(clock);
   ICING_RETURN_ERROR_IF_NULL(schema_store);
 
   auto document_store = std::unique_ptr<DocumentStore>(
       new DocumentStore(filesystem, base_dir, clock, schema_store));
-  ICING_ASSIGN_OR_RETURN(DataLoss data_loss,
-                         document_store->Initialize(initialize_stats));
+  ICING_ASSIGN_OR_RETURN(
+      DataLoss data_loss,
+      document_store->Initialize(force_recovery_and_revalidate_documents,
+                                 initialize_stats));
 
   CreateResult create_result;
   create_result.document_store = std::move(document_store);
@@ -243,42 +221,57 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
 }
 
 libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
-    NativeInitializeStats* initialize_stats) {
-  auto create_result_or = FileBackedProtoLog<DocumentWrapper>::Create(
-      filesystem_, MakeDocumentLogFilename(base_dir_),
-      FileBackedProtoLog<DocumentWrapper>::Options(
-          /*compress_in=*/true));
+    bool force_recovery_and_revalidate_documents,
+    InitializeStatsProto* initialize_stats) {
+  auto create_result_or = DocumentLogCreator::Create(filesystem_, base_dir_);
+
   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
   // that can support error logging.
   if (!create_result_or.ok()) {
     ICING_LOG(ERROR) << create_result_or.status().error_message()
-                     << "\nFailed to initialize DocumentLog";
+                     << "\nFailed to initialize DocumentLog.";
     return create_result_or.status();
   }
-  FileBackedProtoLog<DocumentWrapper>::CreateResult create_result =
+  DocumentLogCreator::CreateResult create_result =
       std::move(create_result_or).ValueOrDie();
-  document_log_ = std::move(create_result.proto_log);
 
-  if (create_result.has_data_loss()) {
-    ICING_LOG(WARNING)
-        << "Data loss in document log, regenerating derived files.";
-    if (initialize_stats != nullptr) {
+  document_log_ = std::move(create_result.log_create_result.proto_log);
+
+  if (create_result.regen_derived_files ||
+      force_recovery_and_revalidate_documents ||
+      create_result.log_create_result.has_data_loss()) {
+    // We can't rely on any existing derived files. Recreate them from scratch.
+    // Currently happens if:
+    //   1) This is a new log and we don't have derived files yet
+    //   2) Client wanted us to force a regeneration.
+    //   3) Log has some data loss, can't rely on existing derived data.
+    if (create_result.log_create_result.has_data_loss() &&
+        initialize_stats != nullptr) {
+      ICING_LOG(WARNING)
+          << "Data loss in document log, regenerating derived files.";
       initialize_stats->set_document_store_recovery_cause(
-          NativeInitializeStats::DATA_LOSS);
+          InitializeStatsProto::DATA_LOSS);
 
-      if (create_result.data_loss == DataLoss::PARTIAL) {
+      if (create_result.log_create_result.data_loss == DataLoss::PARTIAL) {
         // Ground truth is partially lost.
         initialize_stats->set_document_store_data_status(
-            NativeInitializeStats::PARTIAL_LOSS);
+            InitializeStatsProto::PARTIAL_LOSS);
       } else {
         // Ground truth is completely lost.
         initialize_stats->set_document_store_data_status(
-            NativeInitializeStats::COMPLETE_LOSS);
+            InitializeStatsProto::COMPLETE_LOSS);
       }
     }
+
     std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
-    libtextclassifier3::Status status = RegenerateDerivedFiles();
-    if (initialize_stats != nullptr) {
+    libtextclassifier3::Status status =
+        RegenerateDerivedFiles(force_recovery_and_revalidate_documents);
+    if (initialize_stats != nullptr &&
+        (force_recovery_and_revalidate_documents ||
+         create_result.log_create_result.has_data_loss())) {
+      // Only consider it a recovery if the client forced a recovery or there
+      // was data loss. Otherwise, this could just be the first time we're
+      // initializing and generating derived files.
       initialize_stats->set_document_store_recovery_latency_ms(
           document_recovery_timer->GetElapsedMilliseconds());
     }
@@ -288,17 +281,16 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
       return status;
     }
   } else {
-    if (!InitializeDerivedFiles().ok()) {
+    if (!InitializeExistingDerivedFiles().ok()) {
       ICING_VLOG(1)
           << "Couldn't find derived files or failed to initialize them, "
              "regenerating derived files for DocumentStore.";
-      if (initialize_stats != nullptr) {
-        initialize_stats->set_document_store_recovery_cause(
-            NativeInitializeStats::IO_ERROR);
-      }
       std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
-      libtextclassifier3::Status status = RegenerateDerivedFiles();
-      if (initialize_stats != nullptr) {
+      libtextclassifier3::Status status = RegenerateDerivedFiles(
+          /*force_recovery_and_revalidate_documents*/ false);
+      if (initialize_stats != nullptr && num_documents() > 0) {
+        initialize_stats->set_document_store_recovery_cause(
+            InitializeStatsProto::IO_ERROR);
         initialize_stats->set_document_store_recovery_latency_ms(
             document_recovery_timer->GetElapsedMilliseconds());
       }
@@ -315,10 +307,10 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
     initialize_stats->set_num_documents(document_id_mapper_->num_elements());
   }
 
-  return create_result.data_loss;
+  return create_result.log_create_result.data_loss;
 }
 
-libtextclassifier3::Status DocumentStore::InitializeDerivedFiles() {
+libtextclassifier3::Status DocumentStore::InitializeExistingDerivedFiles() {
   if (!HeaderExists()) {
     // Without a header, we don't know if things are consistent between each
     // other so the caller should just regenerate everything from ground
@@ -404,7 +396,8 @@ libtextclassifier3::Status DocumentStore::InitializeDerivedFiles() {
   return libtextclassifier3::Status::OK;
 }
 
-libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles() {
+libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles(
+    bool revalidate_documents) {
   ICING_RETURN_IF_ERROR(ResetDocumentKeyMapper());
   ICING_RETURN_IF_ERROR(ResetDocumentIdMapper());
   ICING_RETURN_IF_ERROR(ResetDocumentAssociatedScoreCache());
@@ -438,148 +431,80 @@ libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles() {
 
     DocumentWrapper document_wrapper =
         std::move(document_wrapper_or).ValueOrDie();
-    if (document_wrapper.deleted()) {
-      if (!document_wrapper.document().uri().empty()) {
-        // Individual document deletion.
-        auto document_id_or =
-            GetDocumentId(document_wrapper.document().namespace_(),
-                          document_wrapper.document().uri());
-        // Updates document_id mapper with deletion
-        if (document_id_or.ok()) {
-          ICING_RETURN_IF_ERROR(document_id_mapper_->Set(
-              document_id_or.ValueOrDie(), kDocDeletedFlag));
-        } else if (!absl_ports::IsNotFound(document_id_or.status())) {
-          // Real error
-          return absl_ports::Annotate(
-              document_id_or.status(),
-              absl_ports::StrCat("Failed to find document id. namespace: ",
-                                 document_wrapper.document().namespace_(),
-                                 ", uri: ", document_wrapper.document().uri()));
-        }
-      } else if (!document_wrapper.document().namespace_().empty()) {
-        // Namespace deletion.
-        ICING_ASSIGN_OR_RETURN(
-            NamespaceId namespace_id,
-            namespace_mapper_->Get(document_wrapper.document().namespace_()));
-        // Tombstone indicates it's a soft delete.
-        ICING_RETURN_IF_ERROR(BatchDelete(namespace_id, kInvalidSchemaTypeId,
-                                          /*soft_delete=*/true));
-      } else if (!document_wrapper.document().schema().empty()) {
-        // SchemaType deletion.
-        auto schema_type_id_or = schema_store_->GetSchemaTypeId(
-            document_wrapper.document().schema());
-
-        if (schema_type_id_or.ok()) {
-          // Tombstone indicates it's a soft delete.
-          ICING_RETURN_IF_ERROR(BatchDelete(kInvalidNamespaceId,
-                                            schema_type_id_or.ValueOrDie(),
-                                            /*soft_delete=*/true));
-        } else {
-          // The deleted schema type doesn't have a SchemaTypeId we can refer
-          // to in the FilterCache.
-          //
-          // TODO(cassiewang): We could avoid reading out all the documents.
-          // When we see a schema type doesn't have a SchemaTypeId, assign the
-          // unknown schema type a unique, temporary SchemaTypeId and store
-          // that in the FilterCache. Then, when we see the schema type
-          // tombstone here, we can look up its temporary SchemaTypeId and
-          // just iterate through the FilterCache to mark those documents as
-          // deleted.
-          int size = document_id_mapper_->num_elements();
-          for (DocumentId document_id = 0; document_id < size; document_id++) {
-            auto document_or = Get(document_id);
-            if (absl_ports::IsNotFound(document_or.status())) {
-              // Skip nonexistent documents
-              continue;
-            } else if (!document_or.ok()) {
-              // Real error, pass up
-              return absl_ports::Annotate(
-                  document_or.status(),
-                  IcingStringUtil::StringPrintf(
-                      "Failed to retrieve Document for DocumentId %d",
-                      document_id));
-            }
-
-            // Guaranteed to have a document now.
-            DocumentProto document = document_or.ValueOrDie();
-
-            if (document.schema() == document_wrapper.document().schema()) {
-              ICING_RETURN_IF_ERROR(
-                  document_id_mapper_->Set(document_id, kDocDeletedFlag));
-            }
-          }
-        }
-      } else {
-        return absl_ports::InternalError(
-            "Encountered an invalid tombstone during recovery!");
+    // Revalidate that this document is still compatible if requested.
+    if (revalidate_documents) {
+      if (!document_validator_.Validate(document_wrapper.document()).ok()) {
+        // Document is no longer valid with the current schema. Mark as
+        // deleted
+        DocumentId new_document_id = document_id_mapper_->num_elements();
+        ICING_RETURN_IF_ERROR(document_log_->EraseProto(iterator.GetOffset()));
+        ICING_RETURN_IF_ERROR(ClearDerivedData(new_document_id));
+        continue;
       }
+    }
+    // Updates key mapper and document_id mapper with the new document
+    DocumentId new_document_id = document_id_mapper_->num_elements();
+    ICING_RETURN_IF_ERROR(document_key_mapper_->Put(
+        MakeFingerprint(document_wrapper.document().namespace_(),
+                        document_wrapper.document().uri()),
+        new_document_id));
+    ICING_RETURN_IF_ERROR(
+        document_id_mapper_->Set(new_document_id, iterator.GetOffset()));
+
+    SchemaTypeId schema_type_id;
+    auto schema_type_id_or =
+        schema_store_->GetSchemaTypeId(document_wrapper.document().schema());
+    if (absl_ports::IsNotFound(schema_type_id_or.status())) {
+      // Didn't find a SchemaTypeId. This means that the DocumentStore and
+      // the SchemaStore are out of sync. But DocumentStore can't do
+      // anything about it so just ignore this for now. This should be
+      // detected/handled by the owner of DocumentStore. Set it to some
+      // arbitrary invalid value for now, it'll get updated to the correct
+      // ID later.
+      schema_type_id = -1;
+    } else if (!schema_type_id_or.ok()) {
+      // Real error. Pass it up
+      return schema_type_id_or.status();
     } else {
-      // Updates key mapper and document_id mapper with the new document
-      DocumentId new_document_id = document_id_mapper_->num_elements();
-      ICING_RETURN_IF_ERROR(document_key_mapper_->Put(
-          MakeFingerprint(document_wrapper.document().namespace_(),
-                          document_wrapper.document().uri()),
-          new_document_id));
-      ICING_RETURN_IF_ERROR(
-          document_id_mapper_->Set(new_document_id, iterator.GetOffset()));
-
-      SchemaTypeId schema_type_id;
-      auto schema_type_id_or =
-          schema_store_->GetSchemaTypeId(document_wrapper.document().schema());
-      if (absl_ports::IsNotFound(schema_type_id_or.status())) {
-        // Didn't find a SchemaTypeId. This means that the DocumentStore and
-        // the SchemaStore are out of sync. But DocumentStore can't do
-        // anything about it so just ignore this for now. This should be
-        // detected/handled by the owner of DocumentStore. Set it to some
-        // arbitrary invalid value for now, it'll get updated to the correct
-        // ID later.
-        schema_type_id = -1;
-      } else if (!schema_type_id_or.ok()) {
-        // Real error. Pass it up
-        return schema_type_id_or.status();
-      } else {
-        // We're guaranteed that SchemaTypeId is valid now
-        schema_type_id = schema_type_id_or.ValueOrDie();
-      }
+      // We're guaranteed that SchemaTypeId is valid now
+      schema_type_id = schema_type_id_or.ValueOrDie();
+    }
 
-      ICING_ASSIGN_OR_RETURN(
-          NamespaceId namespace_id,
-          namespace_mapper_->GetOrPut(document_wrapper.document().namespace_(),
-                                      namespace_mapper_->num_keys()));
+    ICING_ASSIGN_OR_RETURN(
+        NamespaceId namespace_id,
+        namespace_mapper_->GetOrPut(document_wrapper.document().namespace_(),
+                                    namespace_mapper_->num_keys()));
 
-      // Update corpus maps
-      std::string corpus =
-          MakeFingerprint(document_wrapper.document().namespace_(),
-                          document_wrapper.document().schema());
-      ICING_ASSIGN_OR_RETURN(
-          CorpusId corpusId,
-          corpus_mapper_->GetOrPut(corpus, corpus_mapper_->num_keys()));
+    // Update corpus maps
+    std::string corpus =
+        MakeFingerprint(document_wrapper.document().namespace_(),
+                        document_wrapper.document().schema());
+    ICING_ASSIGN_OR_RETURN(
+        CorpusId corpusId,
+        corpus_mapper_->GetOrPut(corpus, corpus_mapper_->num_keys()));
 
-      ICING_ASSIGN_OR_RETURN(CorpusAssociatedScoreData scoring_data,
-                             GetCorpusAssociatedScoreDataToUpdate(corpusId));
-      scoring_data.AddDocument(
-          document_wrapper.document().internal_fields().length_in_tokens());
+    ICING_ASSIGN_OR_RETURN(CorpusAssociatedScoreData scoring_data,
+                           GetCorpusAssociatedScoreDataToUpdate(corpusId));
+    scoring_data.AddDocument(
+        document_wrapper.document().internal_fields().length_in_tokens());
 
-      ICING_RETURN_IF_ERROR(
-          UpdateCorpusAssociatedScoreCache(corpusId, scoring_data));
-
-      ICING_RETURN_IF_ERROR(UpdateDocumentAssociatedScoreCache(
-          new_document_id,
-          DocumentAssociatedScoreData(
-              corpusId, document_wrapper.document().score(),
-              document_wrapper.document().creation_timestamp_ms(),
-              document_wrapper.document()
-                  .internal_fields()
-                  .length_in_tokens())));
-
-      int64_t expiration_timestamp_ms = CalculateExpirationTimestampMs(
-          document_wrapper.document().creation_timestamp_ms(),
-          document_wrapper.document().ttl_ms());
-
-      ICING_RETURN_IF_ERROR(UpdateFilterCache(
-          new_document_id, DocumentFilterData(namespace_id, schema_type_id,
-                                              expiration_timestamp_ms)));
-    }
+    ICING_RETURN_IF_ERROR(
+        UpdateCorpusAssociatedScoreCache(corpusId, scoring_data));
+
+    ICING_RETURN_IF_ERROR(UpdateDocumentAssociatedScoreCache(
+        new_document_id,
+        DocumentAssociatedScoreData(
+            corpusId, document_wrapper.document().score(),
+            document_wrapper.document().creation_timestamp_ms(),
+            document_wrapper.document().internal_fields().length_in_tokens())));
+
+    int64_t expiration_timestamp_ms = CalculateExpirationTimestampMs(
+        document_wrapper.document().creation_timestamp_ms(),
+        document_wrapper.document().ttl_ms());
+
+    ICING_RETURN_IF_ERROR(UpdateFilterCache(
+        new_document_id, DocumentFilterData(namespace_id, schema_type_id,
+                                            expiration_timestamp_ms)));
     iterator_status = iterator.Advance();
   }
 
@@ -788,6 +713,11 @@ libtextclassifier3::StatusOr<Crc32> DocumentStore::ComputeChecksum() const {
   }
   Crc32 corpus_score_cache_checksum = std::move(checksum_or).ValueOrDie();
 
+  // NOTE: We purposely don't include usage_store checksum here because we can't
+  // regenerate it from ground truth documents. If it gets corrupted, we'll just
+  // clear all usage reports, but we shouldn't throw everything else in the
+  // document store out.
+
   total_checksum.Append(std::to_string(document_log_checksum.Get()));
   total_checksum.Append(std::to_string(document_key_mapper_checksum.Get()));
   total_checksum.Append(std::to_string(document_id_mapper_checksum.Get()));
@@ -819,8 +749,11 @@ libtextclassifier3::Status DocumentStore::UpdateHeader(const Crc32& checksum) {
   header.checksum = checksum.Get();
 
   // This should overwrite the header.
-  if (!filesystem_->Write(MakeHeaderFilename(base_dir_).c_str(), &header,
-                          sizeof(header))) {
+  ScopedFd sfd(
+      filesystem_->OpenForWrite(MakeHeaderFilename(base_dir_).c_str()));
+  if (!sfd.is_valid() ||
+      !filesystem_->Write(sfd.get(), &header, sizeof(header)) ||
+      !filesystem_->DataSync(sfd.get())) {
     return absl_ports::InternalError(absl_ports::StrCat(
         "Failed to write DocStore header: ", MakeHeaderFilename(base_dir_)));
   }
@@ -828,7 +761,7 @@ libtextclassifier3::Status DocumentStore::UpdateHeader(const Crc32& checksum) {
 }
 
 libtextclassifier3::StatusOr<DocumentId> DocumentStore::InternalPut(
-    DocumentProto& document, NativePutDocumentStats* put_document_stats) {
+    DocumentProto& document, PutDocumentStatsProto* put_document_stats) {
   std::unique_ptr<Timer> put_timer = clock_.GetNewTimer();
   ICING_RETURN_IF_ERROR(document_validator_.Validate(document));
 
@@ -874,6 +807,12 @@ libtextclassifier3::StatusOr<DocumentId> DocumentStore::InternalPut(
 
   // Creates a new document id, updates key mapper and document_id mapper
   DocumentId new_document_id = document_id_mapper_->num_elements();
+  if (!IsDocumentIdValid(new_document_id)) {
+    return absl_ports::ResourceExhaustedError(
+        "Exceeded maximum number of documents. Try calling Optimize to reclaim "
+        "some space.");
+  }
+
   ICING_RETURN_IF_ERROR(document_key_mapper_->Put(
       MakeFingerprint(name_space, uri), new_document_id));
   ICING_RETURN_IF_ERROR(document_id_mapper_->Set(new_document_id, file_offset));
@@ -909,18 +848,20 @@ libtextclassifier3::StatusOr<DocumentId> DocumentStore::InternalPut(
                                           expiration_timestamp_ms)));
 
   if (old_document_id_or.ok()) {
+    // The old document exists, copy over the usage scores and delete the old
+    // document.
     DocumentId old_document_id = old_document_id_or.ValueOrDie();
-    auto offset_or = DoesDocumentExistAndGetFileOffset(old_document_id);
 
-    if (offset_or.ok()) {
-      // The old document exists, copy over the usage scores.
-      ICING_RETURN_IF_ERROR(
-          usage_store_->CloneUsageScores(/*from_document_id=*/old_document_id,
-                                         /*to_document_id=*/new_document_id));
-
-      // Hard delete the old document.
-      ICING_RETURN_IF_ERROR(
-          HardDelete(old_document_id, offset_or.ValueOrDie()));
+    ICING_RETURN_IF_ERROR(
+        usage_store_->CloneUsageScores(/*from_document_id=*/old_document_id,
+                                       /*to_document_id=*/new_document_id));
+
+    // Delete the old document. It's fine if it's not found since it might have
+    // been deleted previously.
+    auto delete_status = Delete(old_document_id);
+    if (!delete_status.ok() && !absl_ports::IsNotFound(delete_status)) {
+      // Real error, pass it up.
+      return delete_status;
     }
   }
 
@@ -939,7 +880,7 @@ libtextclassifier3::StatusOr<DocumentProto> DocumentStore::Get(
   // existing Status.
   auto document_id_or = GetDocumentId(name_space, uri);
   if (absl_ports::IsNotFound(document_id_or.status())) {
-    ICING_LOG(ERROR) << document_id_or.status().error_message();
+    ICING_VLOG(1) << document_id_or.status().error_message();
     return libtextclassifier3::Status(
         document_id_or.status().CanonicalCode(),
         IcingStringUtil::StringPrintf("Document (%s, %s) not found.",
@@ -962,8 +903,16 @@ libtextclassifier3::StatusOr<DocumentProto> DocumentStore::Get(
 
 libtextclassifier3::StatusOr<DocumentProto> DocumentStore::Get(
     DocumentId document_id, bool clear_internal_fields) const {
-  ICING_ASSIGN_OR_RETURN(int64_t document_log_offset,
-                         DoesDocumentExistAndGetFileOffset(document_id));
+  ICING_RETURN_IF_ERROR(DoesDocumentExistWithStatus(document_id));
+
+  auto document_log_offset_or = document_id_mapper_->Get(document_id);
+  if (!document_log_offset_or.ok()) {
+    // Since we've just checked that our document_id is valid a few lines
+    // above, there's no reason this should fail and an error should never
+    // happen.
+    return absl_ports::InternalError("Failed to find document offset.");
+  }
+  int64_t document_log_offset = *document_log_offset_or.ValueOrDie();
 
   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
   // that can support error logging.
@@ -1014,7 +963,7 @@ std::vector<std::string> DocumentStore::GetAllNamespaces() const {
     }
     const DocumentFilterData* data = status_or_data.ValueOrDie();
 
-    if (DoesDocumentExist(document_id)) {
+    if (InternalDoesDocumentExist(document_id)) {
       existing_namespace_ids.insert(data->namespace_id());
     }
   }
@@ -1027,45 +976,78 @@ std::vector<std::string> DocumentStore::GetAllNamespaces() const {
   return existing_namespaces;
 }
 
-libtextclassifier3::StatusOr<int64_t>
-DocumentStore::DoesDocumentExistAndGetFileOffset(DocumentId document_id) const {
+bool DocumentStore::DoesDocumentExist(DocumentId document_id) const {
   if (!IsDocumentIdValid(document_id)) {
-    return absl_ports::InvalidArgumentError(
-        IcingStringUtil::StringPrintf("DocumentId %d is invalid", document_id));
+    return false;
   }
 
-  auto file_offset_or = document_id_mapper_->Get(document_id);
+  if (document_id >= document_id_mapper_->num_elements()) {
+    // Somehow got an validly constructed document_id that the document store
+    // doesn't know about
+    return false;
+  }
+
+  return InternalDoesDocumentExist(document_id);
+}
 
-  bool deleted =
-      file_offset_or.ok() && *file_offset_or.ValueOrDie() == kDocDeletedFlag;
-  if (deleted || absl_ports::IsOutOfRange(file_offset_or.status())) {
-    // Document has been deleted or doesn't exist
-    return absl_ports::NotFoundError(
-        IcingStringUtil::StringPrintf("Document %d not found", document_id));
+libtextclassifier3::Status DocumentStore::DoesDocumentExistWithStatus(
+    DocumentId document_id) const {
+  if (!IsDocumentIdValid(document_id)) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Document id '%d' invalid.", document_id));
   }
 
-  ICING_ASSIGN_OR_RETURN(const DocumentFilterData* filter_data,
-                         filter_cache_->Get(document_id));
-  if (clock_.GetSystemTimeMilliseconds() >=
-      filter_data->expiration_timestamp_ms()) {
-    // Past the expiration time, so also return NOT FOUND since it *shouldn't*
-    // exist anymore.
-    return absl_ports::NotFoundError(
-        IcingStringUtil::StringPrintf("Document %d not found", document_id));
+  if (document_id >= document_id_mapper_->num_elements()) {
+    // Somehow got a validly constructed document_id that the document store
+    // doesn't know about.
+    return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
+        "Unknown document id '%d'.", document_id));
   }
 
-  ICING_RETURN_IF_ERROR(file_offset_or.status());
-  return *file_offset_or.ValueOrDie();
+  if (!InternalDoesDocumentExist(document_id)) {
+    return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
+        "Document id '%d' doesn't exist", document_id));
+  };
+  return libtextclassifier3::Status::OK;
+}
+
+bool DocumentStore::InternalDoesDocumentExist(DocumentId document_id) const {
+  return !IsDeleted(document_id) && !IsExpired(document_id);
 }
 
-bool DocumentStore::DoesDocumentExist(DocumentId document_id) const {
-  // If we can successfully get the document log offset, the document exists.
-  return DoesDocumentExistAndGetFileOffset(document_id).ok();
+bool DocumentStore::IsDeleted(DocumentId document_id) const {
+  auto file_offset_or = document_id_mapper_->Get(document_id);
+  if (!file_offset_or.ok()) {
+    // This would only happen if document_id is out of range of the
+    // document_id_mapper, meaning we got some invalid document_id. Callers
+    // should already have checked that their document_id is valid or used
+    // DoesDocumentExist(WithStatus). Regardless, return true since the
+    // document doesn't exist.
+    return true;
+  }
+  int64_t file_offset = *file_offset_or.ValueOrDie();
+  return file_offset == kDocDeletedFlag;
+}
+
+bool DocumentStore::IsExpired(DocumentId document_id) const {
+  auto filter_data_or = filter_cache_->Get(document_id);
+  if (!filter_data_or.ok()) {
+    // This would only happen if document_id is out of range of the
+    // filter_cache, meaning we got some invalid document_id. Callers should
+    // already have checked that their document_id is valid or used
+    // DoesDocumentExist(WithStatus). Regardless, return true since the
+    // document doesn't exist.
+    return true;
+  }
+  const DocumentFilterData* filter_data = filter_data_or.ValueOrDie();
+
+  // Check if it's past the expiration time
+  return clock_.GetSystemTimeMilliseconds() >=
+         filter_data->expiration_timestamp_ms();
 }
 
 libtextclassifier3::Status DocumentStore::Delete(
-    const std::string_view name_space, const std::string_view uri,
-    bool soft_delete) {
+    const std::string_view name_space, const std::string_view uri) {
   // Try to get the DocumentId first
   auto document_id_or = GetDocumentId(name_space, uri);
   if (!document_id_or.ok()) {
@@ -1074,69 +1056,18 @@ libtextclassifier3::Status DocumentStore::Delete(
         absl_ports::StrCat("Failed to delete Document. namespace: ", name_space,
                            ", uri: ", uri));
   }
-
-  // Check if the DocumentId's Document still exists.
-  DocumentId document_id = document_id_or.ValueOrDie();
-  auto file_offset_or = DoesDocumentExistAndGetFileOffset(document_id);
-  if (!file_offset_or.ok()) {
-    return absl_ports::Annotate(
-        file_offset_or.status(),
-        absl_ports::StrCat("Failed to delete Document. namespace: ", name_space,
-                           ", uri: ", uri));
-  }
-
-  if (soft_delete) {
-    return SoftDelete(name_space, uri, document_id);
-  } else {
-    return HardDelete(document_id, file_offset_or.ValueOrDie());
-  }
+  return Delete(document_id_or.ValueOrDie());
 }
 
-libtextclassifier3::Status DocumentStore::Delete(DocumentId document_id,
-                                                 bool soft_delete) {
-  // Copy out the document to get namespace and uri.
-  ICING_ASSIGN_OR_RETURN(int64_t document_log_offset,
-                         DoesDocumentExistAndGetFileOffset(document_id));
-
-  if (soft_delete) {
-    auto document_wrapper_or = document_log_->ReadProto(document_log_offset);
-    if (!document_wrapper_or.ok()) {
-      ICING_LOG(ERROR) << document_wrapper_or.status().error_message()
-                       << "Failed to read from document log";
-      return document_wrapper_or.status();
-    }
-    DocumentWrapper document_wrapper =
-        std::move(document_wrapper_or).ValueOrDie();
+libtextclassifier3::Status DocumentStore::Delete(DocumentId document_id) {
+  ICING_RETURN_IF_ERROR(DoesDocumentExistWithStatus(document_id));
 
-    return SoftDelete(document_wrapper.document().namespace_(),
-                      document_wrapper.document().uri(), document_id);
-  } else {
-    return HardDelete(document_id, document_log_offset);
+  auto document_log_offset_or = document_id_mapper_->Get(document_id);
+  if (!document_log_offset_or.ok()) {
+    return absl_ports::InternalError("Failed to find document offset.");
   }
-}
+  int64_t document_log_offset = *document_log_offset_or.ValueOrDie();
 
-// TODO(b/169969469): Consider removing SoftDelete().
-libtextclassifier3::Status DocumentStore::SoftDelete(
-    std::string_view name_space, std::string_view uri, DocumentId document_id) {
-  // Update ground truth first.
-  // Mark the document as deleted by appending a tombstone of it and actually
-  // remove it from file later in Optimize()
-  // TODO(b/144458732): Implement a more robust version of
-  // ICING_RETURN_IF_ERROR that can support error logging.
-  libtextclassifier3::Status status =
-      document_log_->WriteProto(CreateDocumentTombstone(name_space, uri))
-          .status();
-  if (!status.ok()) {
-    return absl_ports::Annotate(
-        status, absl_ports::StrCat("Failed to delete Document. namespace:",
-                                   name_space, ", uri: ", uri));
-  }
-
-  return document_id_mapper_->Set(document_id, kDocDeletedFlag);
-}
-
-libtextclassifier3::Status DocumentStore::HardDelete(
-    DocumentId document_id, int64_t document_log_offset) {
   // Erases document proto.
   ICING_RETURN_IF_ERROR(document_log_->EraseProto(document_log_offset));
   return ClearDerivedData(document_id);
@@ -1154,7 +1085,12 @@ libtextclassifier3::StatusOr<CorpusId> DocumentStore::GetCorpusId(
 
 libtextclassifier3::StatusOr<DocumentAssociatedScoreData>
 DocumentStore::GetDocumentAssociatedScoreData(DocumentId document_id) const {
-  auto score_data_or = score_cache_->Get(document_id);
+  if (!DoesDocumentExist(document_id)) {
+    return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
+        "Can't get usage scores, document id '%d' doesn't exist", document_id));
+  }
+
+  auto score_data_or = score_cache_->GetCopy(document_id);
   if (!score_data_or.ok()) {
     ICING_LOG(ERROR) << " while trying to access DocumentId " << document_id
                      << " from score_cache_";
@@ -1162,7 +1098,7 @@ DocumentStore::GetDocumentAssociatedScoreData(DocumentId document_id) const {
   }
 
   DocumentAssociatedScoreData document_associated_score_data =
-      *std::move(score_data_or).ValueOrDie();
+      std::move(score_data_or).ValueOrDie();
   if (document_associated_score_data.document_score() < 0) {
     // An negative / invalid score means that the score data has been deleted.
     return absl_ports::NotFoundError("Document score data not found.");
@@ -1172,13 +1108,13 @@ DocumentStore::GetDocumentAssociatedScoreData(DocumentId document_id) const {
 
 libtextclassifier3::StatusOr<CorpusAssociatedScoreData>
 DocumentStore::GetCorpusAssociatedScoreData(CorpusId corpus_id) const {
-  auto score_data_or = corpus_score_cache_->Get(corpus_id);
+  auto score_data_or = corpus_score_cache_->GetCopy(corpus_id);
   if (!score_data_or.ok()) {
     return score_data_or.status();
   }
 
   CorpusAssociatedScoreData corpus_associated_score_data =
-      *std::move(score_data_or).ValueOrDie();
+      std::move(score_data_or).ValueOrDie();
   return corpus_associated_score_data;
 }
 
@@ -1200,23 +1136,28 @@ DocumentStore::GetCorpusAssociatedScoreDataToUpdate(CorpusId corpus_id) const {
 
 libtextclassifier3::StatusOr<DocumentFilterData>
 DocumentStore::GetDocumentFilterData(DocumentId document_id) const {
-  auto filter_data_or = filter_cache_->Get(document_id);
+  if (!DoesDocumentExist(document_id)) {
+    return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
+        "Can't get filter data, document id '%d' doesn't exist", document_id));
+  }
+
+  auto filter_data_or = filter_cache_->GetCopy(document_id);
   if (!filter_data_or.ok()) {
     ICING_LOG(ERROR) << " while trying to access DocumentId " << document_id
                      << " from filter_cache_";
     return filter_data_or.status();
   }
   DocumentFilterData document_filter_data =
-      *std::move(filter_data_or).ValueOrDie();
-  if (document_filter_data.namespace_id() == kInvalidNamespaceId) {
-    // An invalid namespace id means that the filter data has been deleted.
-    return absl_ports::NotFoundError("Document filter data not found.");
-  }
+      std::move(filter_data_or).ValueOrDie();
   return document_filter_data;
 }
 
 libtextclassifier3::StatusOr<UsageStore::UsageScores>
 DocumentStore::GetUsageScores(DocumentId document_id) const {
+  if (!DoesDocumentExist(document_id)) {
+    return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
+        "Can't get usage scores, document id '%d' doesn't exist", document_id));
+  }
   return usage_store_->GetUsageScores(document_id);
 }
 
@@ -1225,11 +1166,22 @@ libtextclassifier3::Status DocumentStore::ReportUsage(
   ICING_ASSIGN_OR_RETURN(DocumentId document_id,
                          GetDocumentId(usage_report.document_namespace(),
                                        usage_report.document_uri()));
+  // We can use the internal version here because we got our document_id from
+  // our internal data structures. We would have thrown some error if the
+  // namespace and/or uri were incorrect.
+  if (!InternalDoesDocumentExist(document_id)) {
+    // Document was probably deleted or expired.
+    return absl_ports::NotFoundError(absl_ports::StrCat(
+        "Couldn't report usage on a nonexistent document: (namespace: '",
+        usage_report.document_namespace(), "', uri: '",
+        usage_report.document_uri(), "')"));
+  }
+
   return usage_store_->AddUsageReport(usage_report, document_id);
 }
 
 DocumentStore::DeleteByGroupResult DocumentStore::DeleteByNamespace(
-    std::string_view name_space, bool soft_delete) {
+    std::string_view name_space) {
   DeleteByGroupResult result;
   auto namespace_id_or = namespace_mapper_->Get(name_space);
   if (!namespace_id_or.ok()) {
@@ -1239,26 +1191,7 @@ DocumentStore::DeleteByGroupResult DocumentStore::DeleteByNamespace(
     return result;
   }
   NamespaceId namespace_id = namespace_id_or.ValueOrDie();
-
-  if (soft_delete) {
-    // To delete an entire namespace, we append a tombstone that only contains
-    // the deleted bit and the name of the deleted namespace.
-    // TODO(b/144458732): Implement a more robust version of
-    // ICING_RETURN_IF_ERROR that can support error logging.
-    libtextclassifier3::Status status =
-        document_log_->WriteProto(CreateNamespaceTombstone(name_space))
-            .status();
-    if (!status.ok()) {
-      ICING_LOG(ERROR) << status.error_message()
-                       << "Failed to delete namespace. namespace = "
-                       << name_space;
-      result.status = std::move(status);
-      return result;
-    }
-  }
-
-  auto num_deleted_or =
-      BatchDelete(namespace_id, kInvalidSchemaTypeId, soft_delete);
+  auto num_deleted_or = BatchDelete(namespace_id, kInvalidSchemaTypeId);
   if (!num_deleted_or.ok()) {
     result.status = std::move(num_deleted_or).status();
     return result;
@@ -1277,7 +1210,7 @@ DocumentStore::DeleteByGroupResult DocumentStore::DeleteByNamespace(
 }
 
 DocumentStore::DeleteByGroupResult DocumentStore::DeleteBySchemaType(
-    std::string_view schema_type, bool soft_delete) {
+    std::string_view schema_type) {
   DeleteByGroupResult result;
   auto schema_type_id_or = schema_store_->GetSchemaTypeId(schema_type);
   if (!schema_type_id_or.ok()) {
@@ -1288,26 +1221,7 @@ DocumentStore::DeleteByGroupResult DocumentStore::DeleteBySchemaType(
     return result;
   }
   SchemaTypeId schema_type_id = schema_type_id_or.ValueOrDie();
-
-  if (soft_delete) {
-    // To soft-delete an entire schema type, we append a tombstone that only
-    // contains the deleted bit and the name of the deleted schema type.
-    // TODO(b/144458732): Implement a more robust version of
-    // ICING_RETURN_IF_ERROR that can support error logging.
-    libtextclassifier3::Status status =
-        document_log_->WriteProto(CreateSchemaTypeTombstone(schema_type))
-            .status();
-    if (!status.ok()) {
-      ICING_LOG(ERROR) << status.error_message()
-                       << "Failed to delete schema_type. schema_type = "
-                       << schema_type;
-      result.status = std::move(status);
-      return result;
-    }
-  }
-
-  auto num_deleted_or =
-      BatchDelete(kInvalidNamespaceId, schema_type_id, soft_delete);
+  auto num_deleted_or = BatchDelete(kInvalidNamespaceId, schema_type_id);
   if (!num_deleted_or.ok()) {
     result.status = std::move(num_deleted_or).status();
     return result;
@@ -1324,7 +1238,7 @@ DocumentStore::DeleteByGroupResult DocumentStore::DeleteBySchemaType(
 }
 
 libtextclassifier3::StatusOr<int> DocumentStore::BatchDelete(
-    NamespaceId namespace_id, SchemaTypeId schema_type_id, bool soft_delete) {
+    NamespaceId namespace_id, SchemaTypeId schema_type_id) {
   // Tracks if there were any existing documents with this namespace that we
   // will mark as deleted.
   int num_updated_documents = 0;
@@ -1356,37 +1270,27 @@ libtextclassifier3::StatusOr<int> DocumentStore::BatchDelete(
       continue;
     }
 
-    // The document has the desired namespace and schema type, it either exists
-    // or has been soft-deleted / expired.
-    if (soft_delete) {
-      if (DoesDocumentExist(document_id)) {
-        ++num_updated_documents;
-      }
-
-      // docid_mapper_->Set can only fail if document_id is < 0
-      // or >= docid_mapper_->num_elements. So the only possible way to get an
-      // error here would be if filter_cache_->num_elements >
-      // docid_mapper_->num_elements, which SHOULD NEVER HAPPEN.
-      ICING_RETURN_IF_ERROR(
-          document_id_mapper_->Set(document_id, kDocDeletedFlag));
-    } else {
-      // Hard delete.
-      libtextclassifier3::Status delete_status =
-          Delete(document_id, /*soft_delete=*/false);
-      if (absl_ports::IsNotFound(delete_status)) {
-        continue;
-      } else if (!delete_status.ok()) {
-        // Real error, pass up.
-        return delete_status;
-      }
-      ++num_updated_documents;
+    // The document has the desired namespace and schema type, it either
+    // exists or has expired.
+    libtextclassifier3::Status delete_status = Delete(document_id);
+    if (absl_ports::IsNotFound(delete_status)) {
+      continue;
+    } else if (!delete_status.ok()) {
+      // Real error, pass up.
+      return delete_status;
     }
+    ++num_updated_documents;
   }
 
   return num_updated_documents;
 }
 
-libtextclassifier3::Status DocumentStore::PersistToDisk() {
+libtextclassifier3::Status DocumentStore::PersistToDisk(
+    PersistType::Code persist_type) {
+  if (persist_type == PersistType::LITE) {
+    // only persist the document log.
+    return document_log_->PersistToDisk();
+  }
   ICING_RETURN_IF_ERROR(document_log_->PersistToDisk());
   ICING_RETURN_IF_ERROR(document_key_mapper_->PersistToDisk());
   ICING_RETURN_IF_ERROR(document_id_mapper_->PersistToDisk());
@@ -1404,30 +1308,139 @@ libtextclassifier3::Status DocumentStore::PersistToDisk() {
   return libtextclassifier3::Status::OK;
 }
 
-libtextclassifier3::StatusOr<int64_t> DocumentStore::GetDiskUsage() const {
-  ICING_ASSIGN_OR_RETURN(const int64_t document_log_disk_usage,
-                         document_log_->GetDiskUsage());
-  ICING_ASSIGN_OR_RETURN(const int64_t document_key_mapper_disk_usage,
-                         document_key_mapper_->GetDiskUsage());
-  ICING_ASSIGN_OR_RETURN(const int64_t document_id_mapper_disk_usage,
-                         document_id_mapper_->GetDiskUsage());
-  ICING_ASSIGN_OR_RETURN(const int64_t score_cache_disk_usage,
-                         score_cache_->GetDiskUsage());
-  ICING_ASSIGN_OR_RETURN(const int64_t filter_cache_disk_usage,
-                         filter_cache_->GetDiskUsage());
-  ICING_ASSIGN_OR_RETURN(const int64_t namespace_mapper_disk_usage,
-                         namespace_mapper_->GetDiskUsage());
-  ICING_ASSIGN_OR_RETURN(const int64_t corpus_mapper_disk_usage,
-                         corpus_mapper_->GetDiskUsage());
-  ICING_ASSIGN_OR_RETURN(const int64_t corpus_score_cache_disk_usage,
-                         corpus_score_cache_->GetDiskUsage());
-
-  int64_t disk_usage = document_log_disk_usage +
-                       document_key_mapper_disk_usage +
-                       document_id_mapper_disk_usage + score_cache_disk_usage +
-                       filter_cache_disk_usage + namespace_mapper_disk_usage +
-                       corpus_mapper_disk_usage + corpus_score_cache_disk_usage;
-  return disk_usage;
+int64_t GetValueOrDefault(const libtextclassifier3::StatusOr<int64_t>& value_or,
+                          int64_t default_value) {
+  return (value_or.ok()) ? value_or.ValueOrDie() : default_value;
+}
+
+DocumentStorageInfoProto DocumentStore::GetMemberStorageInfo() const {
+  DocumentStorageInfoProto storage_info;
+  storage_info.set_document_log_size(
+      GetValueOrDefault(document_log_->GetDiskUsage(), -1));
+  storage_info.set_key_mapper_size(
+      GetValueOrDefault(document_key_mapper_->GetDiskUsage(), -1));
+  storage_info.set_document_id_mapper_size(
+      GetValueOrDefault(document_id_mapper_->GetDiskUsage(), -1));
+  storage_info.set_score_cache_size(
+      GetValueOrDefault(score_cache_->GetDiskUsage(), -1));
+  storage_info.set_filter_cache_size(
+      GetValueOrDefault(filter_cache_->GetDiskUsage(), -1));
+  storage_info.set_namespace_id_mapper_size(
+      GetValueOrDefault(namespace_mapper_->GetDiskUsage(), -1));
+  storage_info.set_corpus_mapper_size(
+      GetValueOrDefault(corpus_mapper_->GetDiskUsage(), -1));
+  storage_info.set_corpus_score_cache_size(
+      GetValueOrDefault(corpus_score_cache_->GetDiskUsage(), -1));
+  return storage_info;
+}
+
+DocumentStorageInfoProto DocumentStore::CalculateDocumentStatusCounts(
+    DocumentStorageInfoProto storage_info) const {
+  int total_num_alive = 0;
+  int total_num_expired = 0;
+  int total_num_deleted = 0;
+  std::unordered_map<NamespaceId, std::string> namespace_id_to_namespace =
+      namespace_mapper_->GetValuesToKeys();
+  std::unordered_map<std::string, NamespaceStorageInfoProto>
+      namespace_to_storage_info;
+
+  for (DocumentId document_id = 0;
+       document_id < document_id_mapper_->num_elements(); ++document_id) {
+    // Check if it's deleted first.
+    if (IsDeleted(document_id)) {
+      // We don't have the namespace id of hard deleted documents anymore, so
+      // we can't add to our namespace storage info.
+      ++total_num_deleted;
+      continue;
+    }
+
+    // At this point, the document is either alive or expired, we can get
+    // namespace info for it.
+    auto filter_data_or = filter_cache_->Get(document_id);
+    if (!filter_data_or.ok()) {
+      ICING_VLOG(1) << "Error trying to get filter data for document store "
+                       "storage info counts.";
+      continue;
+    }
+    const DocumentFilterData* filter_data = filter_data_or.ValueOrDie();
+    auto itr = namespace_id_to_namespace.find(filter_data->namespace_id());
+    if (itr == namespace_id_to_namespace.end()) {
+      ICING_VLOG(1) << "Error trying to find namespace for document store "
+                       "storage info counts.";
+      continue;
+    }
+    const std::string& name_space = itr->second;
+
+    // Always set the namespace, if the NamespaceStorageInfoProto didn't exist
+    // before, we'll get back a default instance of it.
+    NamespaceStorageInfoProto& namespace_storage_info =
+        namespace_to_storage_info[name_space];
+    namespace_storage_info.set_namespace_(name_space);
+
+    // Get usage scores
+    auto usage_scores_or = usage_store_->GetUsageScores(document_id);
+    if (!usage_scores_or.ok()) {
+      ICING_VLOG(1) << "Error trying to get usage scores for document store "
+                       "storage info counts.";
+      continue;
+    }
+    UsageStore::UsageScores usage_scores = usage_scores_or.ValueOrDie();
+
+    // Update our stats
+    if (IsExpired(document_id)) {
+      ++total_num_expired;
+      namespace_storage_info.set_num_expired_documents(
+          namespace_storage_info.num_expired_documents() + 1);
+      if (usage_scores.usage_type1_count > 0) {
+        namespace_storage_info.set_num_expired_documents_usage_type1(
+            namespace_storage_info.num_expired_documents_usage_type1() + 1);
+      }
+      if (usage_scores.usage_type2_count > 0) {
+        namespace_storage_info.set_num_expired_documents_usage_type2(
+            namespace_storage_info.num_expired_documents_usage_type2() + 1);
+      }
+      if (usage_scores.usage_type3_count > 0) {
+        namespace_storage_info.set_num_expired_documents_usage_type3(
+            namespace_storage_info.num_expired_documents_usage_type3() + 1);
+      }
+    } else {
+      ++total_num_alive;
+      namespace_storage_info.set_num_alive_documents(
+          namespace_storage_info.num_alive_documents() + 1);
+      if (usage_scores.usage_type1_count > 0) {
+        namespace_storage_info.set_num_alive_documents_usage_type1(
+            namespace_storage_info.num_alive_documents_usage_type1() + 1);
+      }
+      if (usage_scores.usage_type2_count > 0) {
+        namespace_storage_info.set_num_alive_documents_usage_type2(
+            namespace_storage_info.num_alive_documents_usage_type2() + 1);
+      }
+      if (usage_scores.usage_type3_count > 0) {
+        namespace_storage_info.set_num_alive_documents_usage_type3(
+            namespace_storage_info.num_alive_documents_usage_type3() + 1);
+      }
+    }
+  }
+
+  for (auto& itr : namespace_to_storage_info) {
+    storage_info.mutable_namespace_storage_info()->Add(std::move(itr.second));
+  }
+  storage_info.set_num_alive_documents(total_num_alive);
+  storage_info.set_num_deleted_documents(total_num_deleted);
+  storage_info.set_num_expired_documents(total_num_expired);
+  return storage_info;
+}
+
+DocumentStorageInfoProto DocumentStore::GetStorageInfo() const {
+  DocumentStorageInfoProto storage_info = GetMemberStorageInfo();
+  int64_t directory_size = filesystem_->GetDiskUsage(base_dir_.c_str());
+  if (directory_size != Filesystem::kBadFileSize) {
+    storage_info.set_document_store_size(directory_size);
+  } else {
+    storage_info.set_document_store_size(-1);
+  }
+  storage_info.set_num_namespaces(namespace_mapper_->num_keys());
+  return CalculateDocumentStatusCounts(std::move(storage_info));
 }
 
 libtextclassifier3::Status DocumentStore::UpdateSchemaStore(
@@ -1486,50 +1499,19 @@ libtextclassifier3::Status DocumentStore::OptimizedUpdateSchemaStore(
   schema_store_ = schema_store;
   document_validator_.UpdateSchemaStore(schema_store);
 
-  // Append a tombstone for each deleted schema type. This way, we don't have
-  // to read out each document, check if the schema type has been deleted, and
-  // append a tombstone per-document.
-  for (const auto& schema_type :
-       set_schema_result.schema_types_deleted_by_name) {
-    // TODO(b/144458732): Implement a more robust version of
-    // ICING_RETURN_IF_ERROR that can support error logging.
-    libtextclassifier3::Status status =
-        document_log_->WriteProto(CreateSchemaTypeTombstone(schema_type))
-            .status();
-    if (!status.ok()) {
-      ICING_LOG(ERROR) << status.error_message()
-                       << "Failed to delete schema_type. schema_type = "
-                       << schema_type;
-      return status;
-    }
-  }
-
   int size = document_id_mapper_->num_elements();
   for (DocumentId document_id = 0; document_id < size; document_id++) {
-    auto exists_or = DoesDocumentExistAndGetFileOffset(document_id);
-    if (absl_ports::IsNotFound(exists_or.status())) {
+    if (!InternalDoesDocumentExist(document_id)) {
       // Skip nonexistent documents
       continue;
-    } else if (!exists_or.ok()) {
-      // Real error, pass up
-      return absl_ports::Annotate(
-          exists_or.status(),
-          IcingStringUtil::StringPrintf("Failed to retrieve DocumentId %d",
-                                        document_id));
     }
 
     // Guaranteed that the document exists now.
     ICING_ASSIGN_OR_RETURN(const DocumentFilterData* filter_data,
                            filter_cache_->Get(document_id));
 
-    if (set_schema_result.schema_types_deleted_by_id.count(
-            filter_data->schema_type_id()) != 0) {
-      // We already created a tombstone for this deleted type. Just update the
-      // derived files now.
-      ICING_RETURN_IF_ERROR(
-          document_id_mapper_->Set(document_id, kDocDeletedFlag));
-      continue;
-    }
+    bool delete_document = set_schema_result.schema_types_deleted_by_id.count(
+                               filter_data->schema_type_id()) != 0;
 
     // Check if we need to update the FilterCache entry for this document. It
     // may have been assigned a different SchemaTypeId in the new SchemaStore.
@@ -1553,17 +1535,17 @@ libtextclassifier3::Status DocumentStore::OptimizedUpdateSchemaStore(
         filter_cache_->mutable_array()[document_id].set_schema_type_id(
             schema_type_id);
       }
-
       if (revalidate_document) {
-        if (!document_validator_.Validate(document).ok()) {
-          // Document is no longer valid with the new SchemaStore. Mark as
-          // deleted
-          auto delete_status = Delete(document.namespace_(), document.uri());
-          if (!delete_status.ok() && !absl_ports::IsNotFound(delete_status)) {
-            // Real error, pass up
-            return delete_status;
-          }
-        }
+        delete_document = !document_validator_.Validate(document).ok();
+      }
+    }
+
+    if (delete_document) {
+      // Document is no longer valid with the new SchemaStore. Mark as deleted
+      auto delete_status = Delete(document_id);
+      if (!delete_status.ok() && !absl_ports::IsNotFound(delete_status)) {
+        // Real error, pass up
+        return delete_status;
       }
     }
   }
@@ -1577,7 +1559,8 @@ libtextclassifier3::Status DocumentStore::Optimize() {
 }
 
 libtextclassifier3::Status DocumentStore::OptimizeInto(
-    const std::string& new_directory, const LanguageSegmenter* lang_segmenter) {
+    const std::string& new_directory, const LanguageSegmenter* lang_segmenter,
+    OptimizeStatsProto* stats) {
   // Validates directory
   if (new_directory == base_dir_) {
     return absl_ports::InvalidArgumentError(
@@ -1592,10 +1575,17 @@ libtextclassifier3::Status DocumentStore::OptimizeInto(
 
   // Writes all valid docs into new document store (new directory)
   int size = document_id_mapper_->num_elements();
+  int num_deleted = 0;
+  int num_expired = 0;
+  UsageStore::UsageScores default_usage;
   for (DocumentId document_id = 0; document_id < size; document_id++) {
     auto document_or = Get(document_id, /*clear_internal_fields=*/false);
     if (absl_ports::IsNotFound(document_or.status())) {
-      // Skip nonexistent documents
+      if (IsDeleted(document_id)) {
+        ++num_deleted;
+      } else if (IsExpired(document_id)) {
+        ++num_expired;
+      }
       continue;
     } else if (!document_or.ok()) {
       // Real error, pass up
@@ -1636,12 +1626,21 @@ libtextclassifier3::Status DocumentStore::OptimizeInto(
     // Copy over usage scores.
     ICING_ASSIGN_OR_RETURN(UsageStore::UsageScores usage_scores,
                            usage_store_->GetUsageScores(document_id));
-    DocumentId new_document_id = new_document_id_or.ValueOrDie();
-    ICING_RETURN_IF_ERROR(
-        new_doc_store->SetUsageScores(new_document_id, usage_scores));
+    if (!(usage_scores == default_usage)) {
+      // If the usage scores for this document are the default (no usage), then
+      // don't bother setting it. No need to possibly allocate storage if
+      // there's nothing interesting to store.
+      DocumentId new_document_id = new_document_id_or.ValueOrDie();
+      ICING_RETURN_IF_ERROR(
+          new_doc_store->SetUsageScores(new_document_id, usage_scores));
+    }
   }
-
-  ICING_RETURN_IF_ERROR(new_doc_store->PersistToDisk());
+  if (stats != nullptr) {
+    stats->set_num_original_documents(size);
+    stats->set_num_deleted_documents(num_deleted);
+    stats->set_num_expired_documents(num_expired);
+  }
+  ICING_RETURN_IF_ERROR(new_doc_store->PersistToDisk(PersistType::FULL));
   return libtextclassifier3::Status::OK;
 }
 
@@ -1653,7 +1652,7 @@ DocumentStore::GetOptimizeInfo() const {
   int32_t num_documents = document_id_mapper_->num_elements();
   for (DocumentId document_id = kMinDocumentId; document_id < num_documents;
        ++document_id) {
-    if (!DoesDocumentExist(document_id)) {
+    if (!InternalDoesDocumentExist(document_id)) {
       ++optimize_info.optimizable_docs;
     }
 
@@ -1691,10 +1690,10 @@ DocumentStore::GetOptimizeInfo() const {
   ICING_ASSIGN_OR_RETURN(const int64_t document_key_mapper_size,
                          document_key_mapper_->GetElementsSize());
 
-  // We don't include the namespace_mapper or the corpus_mapper because it's not
-  // clear if we could recover any space even if Optimize were called. Deleting
-  // 100s of documents could still leave a few documents of a namespace, and
-  // then there would be no change.
+  // We don't include the namespace_mapper or the corpus_mapper because it's
+  // not clear if we could recover any space even if Optimize were called.
+  // Deleting 100s of documents could still leave a few documents of a
+  // namespace, and then there would be no change.
 
   int64_t total_size = document_log_file_size + document_key_mapper_size +
                        document_id_mapper_file_size + score_cache_file_size +
@@ -1724,8 +1723,8 @@ libtextclassifier3::Status DocumentStore::UpdateFilterCache(
 libtextclassifier3::Status DocumentStore::ClearDerivedData(
     DocumentId document_id) {
   // We intentionally leave the data in key_mapper_ because locating that data
-  // requires fetching namespace and uri. Leaving data in key_mapper_ should be
-  // fine because the data is hashed.
+  // requires fetching namespace and uri. Leaving data in key_mapper_ should
+  // be fine because the data is hashed.
 
   ICING_RETURN_IF_ERROR(document_id_mapper_->Set(document_id, kDocDeletedFlag));
 
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index b2908f0..c85c989 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -26,9 +26,13 @@
 #include "icing/file/file-backed-proto-log.h"
 #include "icing/file/file-backed-vector.h"
 #include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/document_wrapper.pb.h"
 #include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/storage.pb.h"
 #include "icing/schema/schema-store.h"
 #include "icing/store/corpus-associated-scoring-data.h"
 #include "icing/store/corpus-id.h"
@@ -106,6 +110,11 @@ class DocumentStore {
   // previously initialized with this directory, it will reload the files saved
   // by the last instance.
   //
+  // force_recovery_and_revalidate_documents=true will pre-emptively throw out
+  // the derived files and validate each document while recreating them. This
+  // can be used to indicate that the schema (and type ids) may have changed and
+  // those changes might not have been applied to the document store.
+  //
   // If initialize_stats is present, the fields related to DocumentStore will be
   // populated.
   //
@@ -122,7 +131,8 @@ class DocumentStore {
   static libtextclassifier3::StatusOr<DocumentStore::CreateResult> Create(
       const Filesystem* filesystem, const std::string& base_dir,
       const Clock* clock, const SchemaStore* schema_store,
-      NativeInitializeStats* initialize_stats = nullptr);
+      bool force_recovery_and_revalidate_documents = false,
+      InitializeStatsProto* initialize_stats = nullptr);
 
   // Returns the maximum DocumentId that the DocumentStore has assigned. If
   // there has not been any DocumentIds assigned, i.e. the DocumentStore is
@@ -146,16 +156,17 @@ class DocumentStore {
   //
   // Returns:
   //   A newly generated document id on success
+  //   RESOURCE_EXHAUSED if exceeds maximum number of allowed documents
   //   FAILED_PRECONDITION if schema hasn't been set yet
   //   NOT_FOUND if the schema_type or a property config of the document doesn't
   //     exist in schema
   //   INTERNAL_ERROR on IO error
   libtextclassifier3::StatusOr<DocumentId> Put(
       const DocumentProto& document, int32_t num_tokens = 0,
-      NativePutDocumentStats* put_document_stats = nullptr);
+      PutDocumentStatsProto* put_document_stats = nullptr);
   libtextclassifier3::StatusOr<DocumentId> Put(
       DocumentProto&& document, int32_t num_tokens = 0,
-      NativePutDocumentStats* put_document_stats = nullptr);
+      PutDocumentStatsProto* put_document_stats = nullptr);
 
   // Finds and returns the document identified by the given key (namespace +
   // uri). If 'clear_internal_fields' is true, document level data that's
@@ -189,18 +200,21 @@ class DocumentStore {
   // Check if a document exists. Existence means it hasn't been deleted and it
   // hasn't expired yet.
   //
+  // NOTE: This should be used when callers don't care about error messages,
+  // expect documents to be deleted/not found, or in frequently called code
+  // paths that could cause performance issues. A signficant amount of CPU
+  // cycles can be saved if we don't construct strings and create new Status
+  // objects on the heap. See b/185822483.
+  //
   // Returns:
   //   boolean whether a document exists or not
   bool DoesDocumentExist(DocumentId document_id) const;
 
   // Deletes the document identified by the given namespace and uri. The
-  // document proto will be marked as deleted if 'soft_delete' is true,
-  // otherwise the document proto will be erased immediately.
+  // document proto will be erased immediately.
   //
   // NOTE:
-  // 1. The soft deletion uses less CPU power, it can be applied on
-  //    non-sensitive data.
-  // 2. Space is not reclaimed for deleted documents until Optimize() is
+  //    Space is not reclaimed for deleted documents until Optimize() is
   //    called.
   //
   // Returns:
@@ -208,26 +222,21 @@ class DocumentStore {
   //   NOT_FOUND if no document exists with namespace, uri
   //   INTERNAL_ERROR on IO error
   libtextclassifier3::Status Delete(std::string_view name_space,
-                                    std::string_view uri,
-                                    bool soft_delete = false);
+                                    std::string_view uri);
 
-  // Deletes the document identified by the given document_id. The
-  // document proto will be marked as deleted if 'soft_delete' is true,
-  // otherwise the document proto will be erased immediately.
+  // Deletes the document identified by the given document_id. The document
+  // proto will be erased immediately.
   //
   // NOTE:
-  // 1. If possible, please use the other method Delete(name_space, uri,
-  //    soft_delete) for soft deletes because we need namespace and uri to
-  //    perform soft deletes.
-  // 2. Space is not reclaimed for deleted documents until Optimize() is
+  //    Space is not reclaimed for deleted documents until Optimize() is
   //    called.
   //
   // Returns:
   //   OK on success
+  //   NOT_FOUND if the document doesn't exist (i.e. deleted or expired)
   //   INTERNAL_ERROR on IO error
   //   INVALID_ARGUMENT if document_id is invalid.
-  libtextclassifier3::Status Delete(DocumentId document_id,
-                                    bool soft_delete = false);
+  libtextclassifier3::Status Delete(DocumentId document_id);
 
   // Returns the NamespaceId of the string namespace
   //
@@ -250,16 +259,9 @@ class DocumentStore {
   // Returns the DocumentAssociatedScoreData of the document specified by the
   // DocumentId.
   //
-  // NOTE: This does not check if the document exists and will return the
-  // DocumentFilterData of the document even if it has been deleted. Users
-  // should check DoesDocumentExist(document_id) if they only want existing
-  // documents' DocumentFilterData.
-  //
   // Returns:
   //   DocumentAssociatedScoreData on success
-  //   OUT_OF_RANGE if document_id is negative or exceeds previously seen
-  //                DocumentIds
-  //   NOT_FOUND if no score data is found
+  //   NOT_FOUND if the document or the score data is not found
   libtextclassifier3::StatusOr<DocumentAssociatedScoreData>
   GetDocumentAssociatedScoreData(DocumentId document_id) const;
 
@@ -279,16 +281,11 @@ class DocumentStore {
 
   // Returns the DocumentFilterData of the document specified by the DocumentId.
   //
-  // NOTE: This does not check if the document exists and will return the
-  // DocumentFilterData of the document even if it has been deleted. Users
-  // should check DoesDocumentExist(document_id) if they only want existing
-  // documents' DocumentFilterData.
-  //
   // Returns:
   //   DocumentFilterData on success
   //   OUT_OF_RANGE if document_id is negative or exceeds previously seen
   //                DocumentIds
-  //   NOT_FOUND if no filter data is found
+  //   NOT_FOUND if the document or the filter data is not found
   libtextclassifier3::StatusOr<DocumentFilterData> GetDocumentFilterData(
       DocumentId document_id) const;
 
@@ -296,8 +293,8 @@ class DocumentStore {
   //
   // Returns:
   //   UsageScores on success
+  //   NOT_FOUND if document_id no longer exists.
   //   INVALID_ARGUMENT if document_id is invalid
-  //   INTERNAL_ERROR on I/O errors
   libtextclassifier3::StatusOr<UsageStore::UsageScores> GetUsageScores(
       DocumentId document_id) const;
 
@@ -311,56 +308,43 @@ class DocumentStore {
   libtextclassifier3::Status ReportUsage(const UsageReport& usage_report);
 
   // Deletes all documents belonging to the given namespace. The documents will
-  // be marked as deleted if 'soft_delete' is true, otherwise they will be
-  // erased immediately.
+  // be erased immediately.
   //
   // NOTE:
-  // 1. The soft deletion uses less CPU power, it can be applied on
-  //    non-sensitive data.
-  // 2. Space is not reclaimed for deleted documents until Optimize() is
+  //    Space is not reclaimed for deleted documents until Optimize() is
   //    called.
   //
   // Returns:
   //   OK on success
   //   NOT_FOUND if namespace doesn't exist
   //   INTERNAL_ERROR on IO error
-  DeleteByGroupResult DeleteByNamespace(std::string_view name_space,
-                                        bool soft_delete = false);
+  DeleteByGroupResult DeleteByNamespace(std::string_view name_space);
 
   // Deletes all documents belonging to the given schema type. The documents
-  // will be marked as deleted if 'soft_delete' is true, otherwise they will be
-  // erased immediately.
+  // will be erased immediately.
   //
   // NOTE:
-  // 1. The soft deletion uses less CPU power, it can be applied on
-  //    non-sensitive data.
-  // 2. Space is not reclaimed for deleted documents until Optimize() is
+  //    Space is not reclaimed for deleted documents until Optimize() is
   //    called.
   //
   // Returns:
   //   OK on success
   //   NOT_FOUND if schema_type doesn't exist
   //   INTERNAL_ERROR on IO error
-  DeleteByGroupResult DeleteBySchemaType(std::string_view schema_type,
-                                         bool soft_delete = false);
+  DeleteByGroupResult DeleteBySchemaType(std::string_view schema_type);
 
   // Syncs all the data and metadata changes to disk.
   //
   // Returns:
   //   OK on success
   //   INTERNAL on I/O error
-  libtextclassifier3::Status PersistToDisk();
+  libtextclassifier3::Status PersistToDisk(PersistType::Code persist_type);
 
-  // Calculates and returns the disk usage in bytes. Rounds up to the nearest
-  // block size.
-  //
-  // Returns:
-  //   Disk usage on success
-  //   INTERNAL_ERROR on IO error
+  // Calculates the StorageInfo for the Document Store.
   //
-  // TODO(tjbarron): consider returning a struct which has the breakdown of each
-  // component.
-  libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
+  // If an IO error occurs while trying to calculate the value for a field, then
+  // that field will be set to -1.
+  DocumentStorageInfoProto GetStorageInfo() const;
 
   // Update any derived data off of the SchemaStore with the new SchemaStore.
   // This may include pointers, SchemaTypeIds, etc.
@@ -407,6 +391,8 @@ class DocumentStore {
   // reassigned so any files / classes that are based on old document ids may be
   // outdated.
   //
+  // stats will be set if non-null.
+  //
   // NOTE: The tasks in this method are too expensive to be executed in
   // real-time. The caller should decide how frequently and when to call this
   // method based on device usage.
@@ -416,8 +402,8 @@ class DocumentStore {
   //   INVALID_ARGUMENT if new_directory is same as current base directory
   //   INTERNAL_ERROR on IO error
   libtextclassifier3::Status OptimizeInto(
-      const std::string& new_directory,
-      const LanguageSegmenter* lang_segmenter);
+      const std::string& new_directory, const LanguageSegmenter* lang_segmenter,
+      OptimizeStatsProto* stats = nullptr);
 
   // Calculates status for a potential Optimize call. Includes how many docs
   // there are vs how many would be optimized away. And also includes an
@@ -454,7 +440,7 @@ class DocumentStore {
 
   // A log used to store all documents, it serves as a ground truth of doc
   // store. key_mapper_ and document_id_mapper_ can be regenerated from it.
-  std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> document_log_;
+  std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log_;
 
   // Key (namespace + uri) to DocumentId mapping
   std::unique_ptr<KeyMapper<DocumentId>> document_key_mapper_;
@@ -508,16 +494,22 @@ class DocumentStore {
   bool initialized_ = false;
 
   libtextclassifier3::StatusOr<DataLoss> Initialize(
-      NativeInitializeStats* initialize_stats);
+      bool force_recovery_and_revalidate_documents,
+      InitializeStatsProto* initialize_stats);
 
   // Creates sub-components and verifies the integrity of each sub-component.
+  // This assumes that the the underlying files already exist, and will return
+  // an error if it doesn't find what it's expecting.
   //
   // Returns an error if subcomponents failed to initialize successfully.
   //   INTERNAL_ERROR on IO error
-  libtextclassifier3::Status InitializeDerivedFiles();
+  libtextclassifier3::Status InitializeExistingDerivedFiles();
 
   // Re-generates all files derived from the ground truth: the document log.
   //
+  // revalidate_documents=true will also cause each document to be revalidated
+  // the schema as it is read out of the document log.
+  //
   // NOTE: if this function fails, the only thing we can do is to retry it until
   // it succeeds or prevent the initialization of a DocumentStore. The
   // DocumentStore object wouldn't work reliably if this fails.
@@ -528,7 +520,7 @@ class DocumentStore {
   //   document_id
   //      mapper.
   //   3. Create header and store the updated combined checksum
-  libtextclassifier3::Status RegenerateDerivedFiles();
+  libtextclassifier3::Status RegenerateDerivedFiles(bool revalidate_documents);
 
   // Resets the unique_ptr to the document_key_mapper, deletes the underlying
   // file, and re-creates a new instance of the document_key_mapper .
@@ -576,8 +568,8 @@ class DocumentStore {
   // if it doesn't exist.
   bool HeaderExists();
 
-  // Update and replace the header file. Creates the header file if it doesn't
-  // exist.
+  // Update, replace and persist the header file. Creates the header file if it
+  // doesn't exist.
   //
   // Returns:
   //   OK on success
@@ -586,14 +578,13 @@ class DocumentStore {
 
   libtextclassifier3::StatusOr<DocumentId> InternalPut(
       DocumentProto& document,
-      NativePutDocumentStats* put_document_stats = nullptr);
+      PutDocumentStatsProto* put_document_stats = nullptr);
 
   // Helper function to do batch deletes. Documents with the given
   // "namespace_id" and "schema_type_id" will be deleted. If callers don't need
   // to specify the namespace or schema type, pass in kInvalidNamespaceId or
-  // kInvalidSchemaTypeId. The document protos will be marked as deleted if
-  // 'soft_delete' is true, otherwise the document protos with their derived
-  // data will be erased / cleared immediately.
+  // kInvalidSchemaTypeId. The document protos with their derived data will be
+  // erased / cleared immediately.
   //
   // NOTE: Space is not reclaimed in the derived files until Optimize() is
   // called.
@@ -602,28 +593,7 @@ class DocumentStore {
   //   Number of documents that were actually updated to be deleted
   //   INTERNAL_ERROR on IO error
   libtextclassifier3::StatusOr<int> BatchDelete(NamespaceId namespace_id,
-                                                SchemaTypeId schema_type_id,
-                                                bool soft_delete);
-
-  // Marks the document identified by the given name_space, uri and document_id
-  // as deleted, to be removed later during Optimize().
-  //
-  // Returns:
-  //   OK on success
-  //   INTERNAL_ERROR on IO error
-  libtextclassifier3::Status SoftDelete(std::string_view name_space,
-                                        std::string_view uri,
-                                        DocumentId document_id);
-
-  // Erases the document at the given document_log_offset from the document_log
-  // and clears the derived data identified by the given document_id. The space
-  // will be reclaimed later during Optimize().
-  //
-  // Returns:
-  //   OK on success
-  //   INTERNAL_ERROR on IO error
-  libtextclassifier3::Status HardDelete(DocumentId document_id,
-                                        int64_t document_log_offset);
+                                                SchemaTypeId schema_type_id);
 
   // Helper method to find a DocumentId that is associated with the given
   // namespace and uri.
@@ -654,22 +624,46 @@ class DocumentStore {
   libtextclassifier3::StatusOr<CorpusAssociatedScoreData>
   GetCorpusAssociatedScoreDataToUpdate(CorpusId corpus_id) const;
 
-  // Helper method to validate the document id and return the file offset of the
-  // associated document in document_log_.
-  //
-  // This can be a more informative call than just DoesDocumentExist because it
-  // can return more status errors on whether the Document actually doesn't
-  // exist or if there was an internal error while accessing files.
+  // Check if a document exists. Existence means it hasn't been deleted and it
+  // hasn't expired yet.
   //
   // Returns:
-  //   The file offset on success
+  //   OK if the document exists
   //   INVALID_ARGUMENT if document_id is less than 0 or greater than the
   //                    maximum value
   //   NOT_FOUND if the document doesn't exist (i.e. deleted or expired)
   //   INTERNAL_ERROR on IO error
-  libtextclassifier3::StatusOr<int64_t> DoesDocumentExistAndGetFileOffset(
+  libtextclassifier3::Status DoesDocumentExistWithStatus(
       DocumentId document_id) const;
 
+  // Check if a document exists. Existence means it hasn't been deleted and it
+  // hasn't expired yet.
+  //
+  // This is for internal-use only because we assume that the document_id is
+  // already valid. If you're unsure if the document_id is valid, use
+  // DoesDocumentExist(document_id) instead, which will perform those additional
+  // checks.
+  //
+  // Returns:
+  //   boolean whether a document exists or not
+  bool InternalDoesDocumentExist(DocumentId document_id) const;
+
+  // Checks if a document has been deleted
+  //
+  // This is for internal-use only because we assume that the document_id is
+  // already valid. If you're unsure if the document_id is valid, use
+  // DoesDocumentExist(document_id) instead, which will perform those additional
+  // checks.
+  bool IsDeleted(DocumentId document_id) const;
+
+  // Checks if a document has expired.
+  //
+  // This is for internal-use only because we assume that the document_id is
+  // already valid. If you're unsure if the document_id is valid, use
+  // DoesDocumentExist(document_id) instead, which will perform those additional
+  // checks.
+  bool IsExpired(DocumentId document_id) const;
+
   // Updates the entry in the score cache for document_id.
   libtextclassifier3::Status UpdateDocumentAssociatedScoreCache(
       DocumentId document_id, const DocumentAssociatedScoreData& score_data);
@@ -688,6 +682,20 @@ class DocumentStore {
   // Sets usage scores for the given document.
   libtextclassifier3::Status SetUsageScores(
       DocumentId document_id, const UsageStore::UsageScores& usage_scores);
+
+  // Returns:
+  //   - on success, a DocumentStorageInfoProto with the fields relating to the
+  //     size of Document Store member variables populated.
+  //   - INTERNAL on failure to get file size
+  DocumentStorageInfoProto GetMemberStorageInfo() const;
+
+  // Returns:
+  //   - on success, the storage_info that was passed in but with the number of
+  //     alive, deleted and expired documents also set.
+  //   - OUT_OF_RANGE, this should never happen. This could only be returned if
+  //     the document_id_mapper somehow became larger than the filter cache.
+  DocumentStorageInfoProto CalculateDocumentStatusCounts(
+      DocumentStorageInfoProto storage_info) const;
 };
 
 }  // namespace lib
diff --git a/icing/store/document-store_benchmark.cc b/icing/store/document-store_benchmark.cc
new file mode 100644
index 0000000..77da928
--- /dev/null
+++ b/icing/store/document-store_benchmark.cc
@@ -0,0 +1,330 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <unistd.h>
+
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <ostream>
+#include <random>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <string_view>
+#include <unordered_set>
+#include <vector>
+
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/clock.h"
+
+// Run on a Linux workstation:
+//    $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
+//    //icing/store:document-store_benchmark
+//
+//    $ blaze-bin/icing/store/document-store_benchmark
+//    --benchmarks=all --benchmark_memory_usage
+//
+// Run on an Android device:
+//    $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
+//    --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
+//    //icing/store:document-store_benchmark
+//
+//    $ adb push blaze-bin/icing/store/document-store_benchmark
+//    /data/local/tmp/
+//
+//    $ adb shell /data/local/tmp/document-store_benchmark
+//    --benchmarks=all
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto_Cardinality_Code_OPTIONAL;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+    StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+
+class DestructibleDirectory {
+ public:
+  explicit DestructibleDirectory(const Filesystem& filesystem,
+                                 const std::string& dir)
+      : filesystem_(filesystem), dir_(dir) {
+    filesystem_.CreateDirectoryRecursively(dir_.c_str());
+  }
+  ~DestructibleDirectory() {
+    filesystem_.DeleteDirectoryRecursively(dir_.c_str());
+  }
+
+ private:
+  Filesystem filesystem_;
+  std::string dir_;
+};
+
+DocumentProto CreateDocument(const std::string namespace_,
+                             const std::string uri) {
+  return DocumentBuilder()
+      .SetKey(namespace_, uri)
+      .SetSchema("email")
+      .AddStringProperty("subject", "subject foo")
+      .AddStringProperty("body", "body bar")
+      .Build();
+}
+
+SchemaProto CreateSchema() {
+  return SchemaBuilder()
+      .AddType(
+          SchemaTypeConfigBuilder()
+              .SetType("email")
+              .AddProperty(PropertyConfigBuilder()
+                               .SetName("subject")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+              .AddProperty(PropertyConfigBuilder()
+                               .SetName("body")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+      .Build();
+}
+
+std::unique_ptr<SchemaStore> CreateSchemaStore(Filesystem filesystem,
+                                               const std::string directory,
+                                               const Clock* clock) {
+  const std::string schema_store_dir = directory + "/schema";
+  filesystem.CreateDirectoryRecursively(schema_store_dir.data());
+  std::unique_ptr<SchemaStore> schema_store =
+      SchemaStore::Create(&filesystem, schema_store_dir, clock).ValueOrDie();
+
+  auto set_schema_status = schema_store->SetSchema(CreateSchema());
+  if (!set_schema_status.ok()) {
+    ICING_LOG(ERROR) << set_schema_status.status().error_message();
+  }
+
+  return schema_store;
+}
+
+void BM_DoesDocumentExistBenchmark(benchmark::State& state) {
+  Filesystem filesystem;
+  Clock clock;
+
+  std::string directory = GetTestTempDir() + "/icing";
+  DestructibleDirectory ddir(filesystem, directory);
+
+  std::string document_store_dir = directory + "/store";
+  std::unique_ptr<SchemaStore> schema_store =
+      CreateSchemaStore(filesystem, directory, &clock);
+
+  filesystem.CreateDirectoryRecursively(document_store_dir.data());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem, document_store_dir, &clock,
+                            schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  int max_document_id = 300000;
+  for (int i = 0; i < max_document_id; ++i) {
+    // Put and delete a lot of documents to fill up our derived files with
+    // stuff.
+    ICING_ASSERT_OK(document_store->Put(
+        CreateDocument("namespace", /*uri=*/std::to_string(i))));
+    document_store->Delete("namespace", /*uri=*/std::to_string(i));
+  }
+
+  std::default_random_engine random;
+  std::uniform_int_distribution<> dist(1, max_document_id);
+  for (auto s : state) {
+    // Check random document ids to see if they exist. Hopefully to simulate
+    // page faulting in different sections of our mmapped derived files.
+    int document_id = dist(random);
+    benchmark::DoNotOptimize(document_store->DoesDocumentExist(document_id));
+  }
+}
+BENCHMARK(BM_DoesDocumentExistBenchmark);
+
+void BM_Put(benchmark::State& state) {
+  Filesystem filesystem;
+  Clock clock;
+
+  std::string directory = GetTestTempDir() + "/icing";
+  DestructibleDirectory ddir(filesystem, directory);
+
+  std::string document_store_dir = directory + "/store";
+  std::unique_ptr<SchemaStore> schema_store =
+      CreateSchemaStore(filesystem, directory, &clock);
+
+  filesystem.CreateDirectoryRecursively(document_store_dir.data());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem, document_store_dir, &clock,
+                            schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  DocumentProto document = CreateDocument("namespace", "uri");
+
+  for (auto s : state) {
+    // It's ok that this is the same document over and over. We'll create a new
+    // document_id for it and still insert the proto into the underlying log.
+    benchmark::DoNotOptimize(document_store->Put(document));
+  }
+}
+BENCHMARK(BM_Put);
+
+void BM_GetSameDocument(benchmark::State& state) {
+  Filesystem filesystem;
+  Clock clock;
+
+  std::string directory = GetTestTempDir() + "/icing";
+  DestructibleDirectory ddir(filesystem, directory);
+
+  std::string document_store_dir = directory + "/store";
+  std::unique_ptr<SchemaStore> schema_store =
+      CreateSchemaStore(filesystem, directory, &clock);
+
+  filesystem.CreateDirectoryRecursively(document_store_dir.data());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem, document_store_dir, &clock,
+                            schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  ICING_ASSERT_OK(document_store->Put(CreateDocument("namespace", "uri")));
+
+  for (auto s : state) {
+    benchmark::DoNotOptimize(document_store->Get("namespace", "uri"));
+  }
+}
+BENCHMARK(BM_GetSameDocument);
+
+void BM_Delete(benchmark::State& state) {
+  Filesystem filesystem;
+  Clock clock;
+
+  std::string directory = GetTestTempDir() + "/icing";
+  DestructibleDirectory ddir(filesystem, directory);
+
+  std::string document_store_dir = directory + "/store";
+  std::unique_ptr<SchemaStore> schema_store =
+      CreateSchemaStore(filesystem, directory, &clock);
+
+  filesystem.CreateDirectoryRecursively(document_store_dir.data());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem, document_store_dir, &clock,
+                            schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  DocumentProto document = CreateDocument("namespace", "uri");
+
+  for (auto s : state) {
+    state.PauseTiming();
+    ICING_ASSERT_OK(document_store->Put(document));
+    state.ResumeTiming();
+
+    benchmark::DoNotOptimize(document_store->Delete("namespace", "uri"));
+  }
+}
+BENCHMARK(BM_Delete);
+
+void BM_Create(benchmark::State& state) {
+  Filesystem filesystem;
+  Clock clock;
+
+  std::string directory = GetTestTempDir() + "/icing";
+  std::string document_store_dir = directory + "/store";
+
+  std::unique_ptr<SchemaStore> schema_store =
+      CreateSchemaStore(filesystem, directory, &clock);
+
+  // Create an initial document store and put some data in.
+  {
+    DestructibleDirectory ddir(filesystem, directory);
+
+    filesystem.CreateDirectoryRecursively(document_store_dir.data());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(&filesystem, document_store_dir, &clock,
+                              schema_store.get()));
+    std::unique_ptr<DocumentStore> document_store =
+        std::move(create_result.document_store);
+
+    DocumentProto document = CreateDocument("namespace", "uri");
+    ICING_ASSERT_OK(document_store->Put(document));
+    ICING_ASSERT_OK(document_store->PersistToDisk(PersistType::FULL));
+  }
+
+  // Recreating it with some content to checksum over.
+  DestructibleDirectory ddir(filesystem, directory);
+
+  filesystem.CreateDirectoryRecursively(document_store_dir.data());
+
+  for (auto s : state) {
+    benchmark::DoNotOptimize(DocumentStore::Create(
+        &filesystem, document_store_dir, &clock, schema_store.get()));
+  }
+}
+BENCHMARK(BM_Create);
+
+void BM_ComputeChecksum(benchmark::State& state) {
+  Filesystem filesystem;
+  Clock clock;
+
+  std::string directory = GetTestTempDir() + "/icing";
+  DestructibleDirectory ddir(filesystem, directory);
+
+  std::string document_store_dir = directory + "/store";
+  std::unique_ptr<SchemaStore> schema_store =
+      CreateSchemaStore(filesystem, directory, &clock);
+
+  filesystem.CreateDirectoryRecursively(document_store_dir.data());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem, document_store_dir, &clock,
+                            schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  DocumentProto document = CreateDocument("namespace", "uri");
+  ICING_ASSERT_OK(document_store->Put(document));
+  ICING_ASSERT_OK(document_store->PersistToDisk(PersistType::LITE));
+
+  for (auto s : state) {
+    benchmark::DoNotOptimize(document_store->ComputeChecksum());
+  }
+}
+BENCHMARK(BM_ComputeChecksum);
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index 7754373..a506eea 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -15,10 +15,12 @@
 #include "icing/store/document-store.h"
 
 #include <cstdint>
+#include <filesystem>
 #include <limits>
 #include <memory>
 #include <string>
 
+#include "icing/text_classifier/lib3/utils/base/status.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "icing/absl_ports/str_cat.h"
@@ -29,17 +31,20 @@
 #include "icing/file/mock-filesystem.h"
 #include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/schema.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/schema-builder.h"
 #include "icing/schema/schema-store.h"
 #include "icing/store/corpus-associated-scoring-data.h"
 #include "icing/store/corpus-id.h"
 #include "icing/store/document-filter-data.h"
 #include "icing/store/document-id.h"
+#include "icing/store/document-log-creator.h"
 #include "icing/store/namespace-id.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
-#include "icing/testing/platform.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
 #include "icing/tokenization/language-segmenter-factory.h"
@@ -55,6 +60,7 @@ namespace {
 using ::icing::lib::portable_equals_proto::EqualsProto;
 using ::testing::_;
 using ::testing::Eq;
+using ::testing::Ge;
 using ::testing::Gt;
 using ::testing::HasSubstr;
 using ::testing::IsEmpty;
@@ -64,6 +70,32 @@ using ::testing::Not;
 using ::testing::Return;
 using ::testing::UnorderedElementsAre;
 
+const NamespaceStorageInfoProto& GetNamespaceStorageInfo(
+    const DocumentStorageInfoProto& storage_info,
+    const std::string& name_space) {
+  for (const NamespaceStorageInfoProto& namespace_storage_info :
+       storage_info.namespace_storage_info()) {
+    if (namespace_storage_info.namespace_() == name_space) {
+      return namespace_storage_info;
+    }
+  }
+  // Didn't find our namespace, fail the test.
+  EXPECT_TRUE(false) << "Failed to find namespace '" << name_space
+                     << "' in DocumentStorageInfoProto.";
+  return std::move(NamespaceStorageInfoProto());
+}
+
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto_Cardinality_Code_OPTIONAL;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+    StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+
+constexpr PropertyConfigProto_DataType_Code TYPE_INT =
+    PropertyConfigProto_DataType_Code_INT64;
+
 UsageReport CreateUsageReport(std::string name_space, std::string uri,
                               int64 timestamp_ms,
                               UsageReport::UsageType usage_type) {
@@ -75,6 +107,22 @@ UsageReport CreateUsageReport(std::string name_space, std::string uri,
   return usage_report;
 }
 
+PortableFileBackedProtoLog<DocumentWrapper>::Header ReadDocumentLogHeader(
+    Filesystem filesystem, const std::string& file_path) {
+  PortableFileBackedProtoLog<DocumentWrapper>::Header header;
+  filesystem.PRead(file_path.c_str(), &header,
+                   sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header),
+                   /*offset=*/0);
+  return header;
+}
+
+void WriteDocumentLogHeader(
+    Filesystem filesystem, const std::string& file_path,
+    PortableFileBackedProtoLog<DocumentWrapper>::Header& header) {
+  filesystem.Write(file_path.c_str(), &header,
+                   sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header));
+}
+
 class DocumentStoreTest : public ::testing::Test {
  protected:
   DocumentStoreTest()
@@ -124,28 +172,22 @@ class DocumentStoreTest : public ::testing::Test {
     filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
     filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
 
-    SchemaProto schema;
-    auto type_config = schema.add_types();
-    type_config->set_schema_type("email");
-
-    auto subject = type_config->add_properties();
-    subject->set_property_name("subject");
-    subject->set_data_type(PropertyConfigProto::DataType::STRING);
-    subject->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    subject->mutable_string_indexing_config()->set_term_match_type(
-        TermMatchType::EXACT_ONLY);
-    subject->mutable_string_indexing_config()->set_tokenizer_type(
-        StringIndexingConfig::TokenizerType::PLAIN);
-
-    auto body = type_config->add_properties();
-    body->set_property_name("body");
-    body->set_data_type(PropertyConfigProto::DataType::STRING);
-    body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    body->mutable_string_indexing_config()->set_term_match_type(
-        TermMatchType::EXACT_ONLY);
-    body->mutable_string_indexing_config()->set_tokenizer_type(
-        StringIndexingConfig::TokenizerType::PLAIN);
-
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType("email")
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName("subject")
+                            .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                            .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName("body")
+                            .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                            .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
     ICING_ASSERT_OK_AND_ASSIGN(
         schema_store_,
         SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
@@ -161,6 +203,19 @@ class DocumentStoreTest : public ::testing::Test {
     filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
   }
 
+  void CorruptDocStoreHeaderChecksumFile() {
+    // Change the DocStore's header combined checksum so that it won't match the
+    // recalculated checksum on initialization. This will force a regeneration
+    // of derived files from ground truth.
+    const std::string header_file =
+        absl_ports::StrCat(document_store_dir_, "/document_store_header");
+    DocumentStore::Header header;
+    header.magic = DocumentStore::Header::kMagic;
+    header.checksum = 10;  // Arbitrary garbage checksum
+    filesystem_.DeleteFile(header_file.c_str());
+    filesystem_.Write(header_file.c_str(), &header, sizeof(header));
+  }
+
   const Filesystem filesystem_;
   const std::string test_dir_;
   FakeClock fake_clock_;
@@ -290,7 +345,7 @@ TEST_F(DocumentStoreTest, PutSameKey) {
   EXPECT_THAT(doc_store->Put(document3), IsOkAndHolds(Not(document_id1)));
 }
 
-TEST_F(DocumentStoreTest, IsDocumentExisting) {
+TEST_F(DocumentStoreTest, IsDocumentExistingWithoutStatus) {
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
       DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -322,7 +377,7 @@ TEST_F(DocumentStoreTest, IsDocumentExisting) {
               IsFalse());
 }
 
-TEST_F(DocumentStoreTest, GetSoftDeletedDocumentNotFound) {
+TEST_F(DocumentStoreTest, GetDeletedDocumentNotFound) {
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
       DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -336,29 +391,7 @@ TEST_F(DocumentStoreTest, GetSoftDeletedDocumentNotFound) {
       IsOkAndHolds(EqualsProto(test_document1_)));
 
   ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
-                                         test_document1_.uri(),
-                                         /*soft_delete=*/true));
-  EXPECT_THAT(
-      document_store->Get(test_document1_.namespace_(), test_document1_.uri()),
-      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-}
-
-TEST_F(DocumentStoreTest, GetHardDeletedDocumentNotFound) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      DocumentStore::CreateResult create_result,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
-  std::unique_ptr<DocumentStore> document_store =
-      std::move(create_result.document_store);
-
-  ICING_EXPECT_OK(document_store->Put(DocumentProto(test_document1_)));
-  EXPECT_THAT(
-      document_store->Get(test_document1_.namespace_(), test_document1_.uri()),
-      IsOkAndHolds(EqualsProto(test_document1_)));
-
-  ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
-                                         test_document1_.uri(),
-                                         /*soft_delete=*/false));
+                                         test_document1_.uri()));
   EXPECT_THAT(
       document_store->Get(test_document1_.namespace_(), test_document1_.uri()),
       StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -436,16 +469,20 @@ TEST_F(DocumentStoreTest, DeleteNonexistentDocumentNotFound) {
 
   // Validates that deleting something non-existing won't append anything to
   // ground truth
-  int64_t ground_truth_size_before = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+  int64_t document_log_size_before = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
 
   EXPECT_THAT(
       document_store->Delete("nonexistent_namespace", "nonexistent_uri"),
       StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 
-  int64_t ground_truth_size_after = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
-  EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
+  int64_t document_log_size_after = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
+  EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
 }
 
 TEST_F(DocumentStoreTest, DeleteAlreadyDeletedDocumentNotFound) {
@@ -468,7 +505,7 @@ TEST_F(DocumentStoreTest, DeleteAlreadyDeletedDocumentNotFound) {
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST_F(DocumentStoreTest, SoftDeleteByNamespaceOk) {
+TEST_F(DocumentStoreTest, DeleteByNamespaceOk) {
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
       DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -499,7 +536,7 @@ TEST_F(DocumentStoreTest, SoftDeleteByNamespaceOk) {
   // DELETE namespace.1. document1 and document 4 should be deleted. document2
   // and document3 should still be retrievable.
   DocumentStore::DeleteByGroupResult group_result =
-      doc_store->DeleteByNamespace("namespace.1", /*soft_delete=*/true);
+      doc_store->DeleteByNamespace("namespace.1");
   EXPECT_THAT(group_result.status, IsOk());
   EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
   EXPECT_THAT(doc_store->Get(document1.namespace_(), document1.uri()),
@@ -512,51 +549,7 @@ TEST_F(DocumentStoreTest, SoftDeleteByNamespaceOk) {
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST_F(DocumentStoreTest, HardDeleteByNamespaceOk) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      DocumentStore::CreateResult create_result,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
-  std::unique_ptr<DocumentStore> doc_store =
-      std::move(create_result.document_store);
-
-  DocumentProto document1 = test_document1_;
-  document1.set_namespace_("namespace.1");
-  document1.set_uri("uri1");
-  ICING_ASSERT_OK(doc_store->Put(document1));
-
-  DocumentProto document2 = test_document1_;
-  document2.set_namespace_("namespace.2");
-  document2.set_uri("uri1");
-  ICING_ASSERT_OK(doc_store->Put(document2));
-
-  DocumentProto document3 = test_document1_;
-  document3.set_namespace_("namespace.3");
-  document3.set_uri("uri1");
-  ICING_ASSERT_OK(doc_store->Put(document3));
-
-  DocumentProto document4 = test_document1_;
-  document4.set_namespace_("namespace.1");
-  document4.set_uri("uri2");
-  ICING_ASSERT_OK(doc_store->Put(document4));
-
-  // DELETE namespace.1. document1 and document 4 should be deleted. document2
-  // and document3 should still be retrievable.
-  DocumentStore::DeleteByGroupResult group_result =
-      doc_store->DeleteByNamespace("namespace.1", /*soft_delete=*/false);
-  EXPECT_THAT(group_result.status, IsOk());
-  EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
-  EXPECT_THAT(doc_store->Get(document1.namespace_(), document1.uri()),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-  EXPECT_THAT(doc_store->Get(document2.namespace_(), document2.uri()),
-              IsOkAndHolds(EqualsProto(document2)));
-  EXPECT_THAT(doc_store->Get(document3.namespace_(), document3.uri()),
-              IsOkAndHolds(EqualsProto(document3)));
-  EXPECT_THAT(doc_store->Get(document4.namespace_(), document4.uri()),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-}
-
-TEST_F(DocumentStoreTest, SoftDeleteByNamespaceNonexistentNamespaceNotFound) {
+TEST_F(DocumentStoreTest, DeleteByNamespaceNonexistentNamespaceNotFound) {
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
       DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -566,45 +559,22 @@ TEST_F(DocumentStoreTest, SoftDeleteByNamespaceNonexistentNamespaceNotFound) {
 
   // Validates that deleting something non-existing won't append anything to
   // ground truth
-  int64_t ground_truth_size_before = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+  int64_t document_log_size_before = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
 
-  EXPECT_THAT(doc_store
-                  ->DeleteByNamespace("nonexistent_namespace",
-                                      /*soft_delete=*/true)
-                  .status,
+  EXPECT_THAT(doc_store->DeleteByNamespace("nonexistent_namespace").status,
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 
-  int64_t ground_truth_size_after = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
-  EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
+  int64_t document_log_size_after = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
+  EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
 }
 
-TEST_F(DocumentStoreTest, HardDeleteByNamespaceNonexistentNamespaceNotFound) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      DocumentStore::CreateResult create_result,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
-  std::unique_ptr<DocumentStore> doc_store =
-      std::move(create_result.document_store);
-
-  // Validates that deleting something non-existing won't append anything to
-  // ground truth
-  int64_t ground_truth_size_before = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
-
-  EXPECT_THAT(doc_store
-                  ->DeleteByNamespace("nonexistent_namespace",
-                                      /*soft_delete=*/false)
-                  .status,
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-
-  int64_t ground_truth_size_after = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
-  EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
-}
-
-TEST_F(DocumentStoreTest, SoftDeleteByNamespaceNoExistingDocumentsNotFound) {
+TEST_F(DocumentStoreTest, DeleteByNamespaceNoExistingDocumentsNotFound) {
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
       DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -619,33 +589,9 @@ TEST_F(DocumentStoreTest, SoftDeleteByNamespaceNoExistingDocumentsNotFound) {
   // At this point, there are no existing documents with the namespace, even
   // though Icing's derived files know about this namespace. We should still
   // return NOT_FOUND since nothing existing has this namespace.
-  EXPECT_THAT(document_store
-                  ->DeleteByNamespace(test_document1_.namespace_(),
-                                      /*soft_delete=*/true)
-                  .status,
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-}
-
-TEST_F(DocumentStoreTest, HardDeleteByNamespaceNoExistingDocumentsNotFound) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      DocumentStore::CreateResult create_result,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
-  std::unique_ptr<DocumentStore> document_store =
-      std::move(create_result.document_store);
-
-  ICING_EXPECT_OK(document_store->Put(test_document1_));
-  ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
-                                         test_document1_.uri()));
-
-  // At this point, there are no existing documents with the namespace, even
-  // though Icing's derived files know about this namespace. We should still
-  // return NOT_FOUND since nothing existing has this namespace.
-  EXPECT_THAT(document_store
-                  ->DeleteByNamespace(test_document1_.namespace_(),
-                                      /*soft_delete=*/false)
-                  .status,
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(
+      document_store->DeleteByNamespace(test_document1_.namespace_()).status,
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
 TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
@@ -665,7 +611,7 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
   document4.set_namespace_("namespace.1");
   document4.set_uri("uri2");
 
-  int64_t ground_truth_size_before;
+  int64_t document_log_size_before;
   {
     ICING_ASSERT_OK_AND_ASSIGN(
         DocumentStore::CreateResult create_result,
@@ -686,21 +632,13 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
     EXPECT_THAT(group_result.status, IsOk());
     EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
 
-    ground_truth_size_before = filesystem_.GetFileSize(
-        absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+    document_log_size_before = filesystem_.GetFileSize(
+        absl_ports::StrCat(document_store_dir_, "/",
+                           DocumentLogCreator::GetDocumentLogFilename())
+            .c_str());
   }  // Destructors should update checksum and persist all data to file.
 
-  // Change the DocStore's header combined checksum so that it won't match the
-  // recalculated checksum on initialization. This will force a regeneration of
-  // derived files from ground truth.
-  const std::string header_file =
-      absl_ports::StrCat(document_store_dir_, "/document_store_header");
-  DocumentStore::Header header;
-  header.magic = DocumentStore::Header::kMagic;
-  header.checksum = 10;  // Arbitrary garbage checksum
-  filesystem_.DeleteFile(header_file.c_str());
-  filesystem_.Write(header_file.c_str(), &header, sizeof(header));
-
+  CorruptDocStoreHeaderChecksumFile();
   // Successfully recover from a corrupt derived file issue.
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
@@ -710,9 +648,11 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
       std::move(create_result.document_store);
 
   // Make sure we didn't add anything to the ground truth after we recovered.
-  int64_t ground_truth_size_after = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
-  EXPECT_EQ(ground_truth_size_before, ground_truth_size_after);
+  int64_t document_log_size_after = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
+  EXPECT_EQ(document_log_size_before, document_log_size_after);
 
   EXPECT_THAT(doc_store->Get(document1.namespace_(), document1.uri()),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -724,101 +664,13 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeOk) {
-  SchemaProto schema;
-  auto type_config = schema.add_types();
-  type_config->set_schema_type("email");
-  type_config = schema.add_types();
-  type_config->set_schema_type("message");
-  type_config = schema.add_types();
-  type_config->set_schema_type("person");
-
-  std::string schema_store_dir = schema_store_dir_ + "_custom";
-  filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
-  filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
-
-  ICING_ASSERT_OK(schema_store->SetSchema(schema));
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      DocumentStore::CreateResult create_result,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store.get()));
-  std::unique_ptr<DocumentStore> document_store =
-      std::move(create_result.document_store);
-
-  DocumentProto email_document_1 = DocumentBuilder()
-                                       .SetKey("namespace1", "1")
-                                       .SetSchema("email")
-                                       .SetCreationTimestampMs(1)
-                                       .Build();
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_1_document_id,
-                             document_store->Put(email_document_1));
-
-  DocumentProto email_document_2 = DocumentBuilder()
-                                       .SetKey("namespace2", "2")
-                                       .SetSchema("email")
-                                       .SetCreationTimestampMs(1)
-                                       .Build();
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_2_document_id,
-                             document_store->Put(email_document_2));
-
-  DocumentProto message_document = DocumentBuilder()
-                                       .SetKey("namespace", "3")
-                                       .SetSchema("message")
-                                       .SetCreationTimestampMs(1)
-                                       .Build();
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
-                             document_store->Put(message_document));
-
-  DocumentProto person_document = DocumentBuilder()
-                                      .SetKey("namespace", "4")
-                                      .SetSchema("person")
-                                      .SetCreationTimestampMs(1)
-                                      .Build();
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId person_document_id,
-                             document_store->Put(person_document));
-
-  // Delete the "email" type and ensure that it works across both
-  // email_document's namespaces. And that other documents aren't affected.
-  DocumentStore::DeleteByGroupResult group_result =
-      document_store->DeleteBySchemaType("email", /*soft_delete=*/true);
-  EXPECT_THAT(group_result.status, IsOk());
-  EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
-  EXPECT_THAT(document_store->Get(email_1_document_id),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-  EXPECT_THAT(document_store->Get(email_2_document_id),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-  EXPECT_THAT(document_store->Get(message_document_id),
-              IsOkAndHolds(EqualsProto(message_document)));
-  EXPECT_THAT(document_store->Get(person_document_id),
-              IsOkAndHolds(EqualsProto(person_document)));
-
-  // Delete the "message" type and check that other documents aren't affected
-  group_result =
-      document_store->DeleteBySchemaType("message", /*soft_delete=*/true);
-  EXPECT_THAT(group_result.status, IsOk());
-  EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
-  EXPECT_THAT(document_store->Get(email_1_document_id),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-  EXPECT_THAT(document_store->Get(email_2_document_id),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-  EXPECT_THAT(document_store->Get(message_document_id),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-  EXPECT_THAT(document_store->Get(person_document_id),
-              IsOkAndHolds(EqualsProto(person_document)));
-}
-
-TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeOk) {
-  SchemaProto schema;
-  auto type_config = schema.add_types();
-  type_config->set_schema_type("email");
-  type_config = schema.add_types();
-  type_config->set_schema_type("message");
-  type_config = schema.add_types();
-  type_config->set_schema_type("person");
+TEST_F(DocumentStoreTest, DeleteBySchemaTypeOk) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email"))
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .AddType(SchemaTypeConfigBuilder().SetType("person"))
+          .Build();
 
   std::string schema_store_dir = schema_store_dir_ + "_custom";
   filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
@@ -871,7 +723,7 @@ TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeOk) {
   // Delete the "email" type and ensure that it works across both
   // email_document's namespaces. And that other documents aren't affected.
   DocumentStore::DeleteByGroupResult group_result =
-      document_store->DeleteBySchemaType("email", /*soft_delete=*/true);
+      document_store->DeleteBySchemaType("email");
   EXPECT_THAT(group_result.status, IsOk());
   EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
   EXPECT_THAT(document_store->Get(email_1_document_id),
@@ -884,8 +736,7 @@ TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeOk) {
               IsOkAndHolds(EqualsProto(person_document)));
 
   // Delete the "message" type and check that other documents aren't affected
-  group_result =
-      document_store->DeleteBySchemaType("message", /*soft_delete=*/true);
+  group_result = document_store->DeleteBySchemaType("message");
   EXPECT_THAT(group_result.status, IsOk());
   EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
   EXPECT_THAT(document_store->Get(email_1_document_id),
@@ -898,32 +749,7 @@ TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeOk) {
               IsOkAndHolds(EqualsProto(person_document)));
 }
 
-TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      DocumentStore::CreateResult create_result,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
-  std::unique_ptr<DocumentStore> document_store =
-      std::move(create_result.document_store);
-
-  // Validates that deleting something non-existing won't append anything to
-  // ground truth
-  int64_t ground_truth_size_before = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
-
-  EXPECT_THAT(document_store
-                  ->DeleteBySchemaType("nonexistent_type",
-                                       /*soft_delete=*/true)
-                  .status,
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-
-  int64_t ground_truth_size_after = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
-
-  EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
-}
-
-TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
+TEST_F(DocumentStoreTest, DeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
       DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -933,41 +759,23 @@ TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
 
   // Validates that deleting something non-existing won't append anything to
   // ground truth
-  int64_t ground_truth_size_before = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+  int64_t document_log_size_before = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
 
-  EXPECT_THAT(document_store
-                  ->DeleteBySchemaType("nonexistent_type",
-                                       /*soft_delete=*/false)
-                  .status,
+  EXPECT_THAT(document_store->DeleteBySchemaType("nonexistent_type").status,
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 
-  int64_t ground_truth_size_after = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
-
-  EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
-}
-
-TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeNoExistingDocumentsNotFound) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      DocumentStore::CreateResult create_result,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
-  std::unique_ptr<DocumentStore> document_store =
-      std::move(create_result.document_store);
-
-  ICING_EXPECT_OK(document_store->Put(test_document1_));
-  ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
-                                         test_document1_.uri()));
+  int64_t document_log_size_after = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
 
-  EXPECT_THAT(document_store
-                  ->DeleteBySchemaType(test_document1_.schema(),
-                                       /*soft_delete=*/true)
-                  .status,
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
 }
 
-TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeNoExistingDocumentsNotFound) {
+TEST_F(DocumentStoreTest, DeleteBySchemaTypeNoExistingDocumentsNotFound) {
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
       DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -979,19 +787,17 @@ TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeNoExistingDocumentsNotFound) {
   ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
                                          test_document1_.uri()));
 
-  EXPECT_THAT(document_store
-                  ->DeleteBySchemaType(test_document1_.schema(),
-                                       /*soft_delete=*/false)
-                  .status,
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(
+      document_store->DeleteBySchemaType(test_document1_.schema()).status,
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
 TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
-  SchemaProto schema;
-  auto type_config = schema.add_types();
-  type_config->set_schema_type("email");
-  type_config = schema.add_types();
-  type_config->set_schema_type("message");
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email"))
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .Build();
 
   std::string schema_store_dir = schema_store_dir_ + "_custom";
   filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
@@ -1016,7 +822,7 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
                                        .SetSchema("message")
                                        .SetCreationTimestampMs(1)
                                        .Build();
-  int64_t ground_truth_size_before;
+  int64_t document_log_size_before;
   {
     ICING_ASSERT_OK_AND_ASSIGN(
         DocumentStore::CreateResult create_result,
@@ -1036,21 +842,13 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
     EXPECT_THAT(group_result.status, IsOk());
     EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
 
-    ground_truth_size_before = filesystem_.GetFileSize(
-        absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+    document_log_size_before = filesystem_.GetFileSize(
+        absl_ports::StrCat(document_store_dir_, "/",
+                           DocumentLogCreator::GetDocumentLogFilename())
+            .c_str());
   }  // Destructors should update checksum and persist all data to file.
 
-  // Change the DocumentStore's header combined checksum so that it won't match
-  // the recalculated checksum on initialization. This will force a regeneration
-  // of derived files from ground truth.
-  const std::string header_file =
-      absl_ports::StrCat(document_store_dir_, "/document_store_header");
-  DocumentStore::Header header;
-  header.magic = DocumentStore::Header::kMagic;
-  header.checksum = 10;  // Arbitrary garbage checksum
-  filesystem_.DeleteFile(header_file.c_str());
-  filesystem_.Write(header_file.c_str(), &header, sizeof(header));
-
+  CorruptDocStoreHeaderChecksumFile();
   // Successfully recover from a corrupt derived file issue.
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
@@ -1060,9 +858,11 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
       std::move(create_result.document_store);
 
   // Make sure we didn't add anything to the ground truth after we recovered.
-  int64_t ground_truth_size_after = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
-  EXPECT_EQ(ground_truth_size_before, ground_truth_size_after);
+  int64_t document_log_size_after = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
+  EXPECT_EQ(document_log_size_before, document_log_size_after);
 
   EXPECT_THAT(document_store->Get(email_document_id),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -1070,12 +870,25 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
               IsOkAndHolds(EqualsProto(message_document)));
 }
 
+TEST_F(DocumentStoreTest, PutDeleteThenPut) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+  ICING_EXPECT_OK(doc_store->Put(test_document1_));
+  ICING_EXPECT_OK(
+      doc_store->Delete(test_document1_.namespace_(), test_document1_.uri()));
+  ICING_EXPECT_OK(doc_store->Put(test_document1_));
+}
+
 TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
-  SchemaProto schema;
-  auto type_config = schema.add_types();
-  type_config->set_schema_type("email");
-  type_config = schema.add_types();
-  type_config->set_schema_type("message");
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email"))
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .Build();
 
   std::string schema_store_dir = schema_store_dir_ + "_custom";
   filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
@@ -1100,7 +913,7 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
                                        .SetSchema("message")
                                        .SetCreationTimestampMs(1)
                                        .Build();
-  int64_t ground_truth_size_before;
+  int64_t document_log_size_before;
   {
     ICING_ASSERT_OK_AND_ASSIGN(
         DocumentStore::CreateResult create_result,
@@ -1125,25 +938,18 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
     EXPECT_THAT(document_store->Get(message_document_id),
                 IsOkAndHolds(EqualsProto(message_document)));
 
-    ground_truth_size_before = filesystem_.GetFileSize(
-        absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+    document_log_size_before = filesystem_.GetFileSize(
+        absl_ports::StrCat(document_store_dir_, "/",
+                           DocumentLogCreator::GetDocumentLogFilename())
+            .c_str());
   }  // Destructors should update checksum and persist all data to file.
 
-  // Change the DocumentStore's header combined checksum so that it won't match
-  // the recalculated checksum on initialization. This will force a regeneration
-  // of derived files from ground truth.
-  const std::string header_file =
-      absl_ports::StrCat(document_store_dir_, "/document_store_header");
-  DocumentStore::Header header;
-  header.magic = DocumentStore::Header::kMagic;
-  header.checksum = 10;  // Arbitrary garbage checksum
-  filesystem_.DeleteFile(header_file.c_str());
-  filesystem_.Write(header_file.c_str(), &header, sizeof(header));
-
-  SchemaProto new_schema;
-  type_config = new_schema.add_types();
-  type_config->set_schema_type("message");
+  CorruptDocStoreHeaderChecksumFile();
 
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .Build();
   ICING_EXPECT_OK(schema_store->SetSchema(
       new_schema, /*ignore_errors_and_delete_documents=*/true));
 
@@ -1156,9 +962,11 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
       std::move(create_result.document_store);
 
   // Make sure we didn't add anything to the ground truth after we recovered.
-  int64_t ground_truth_size_after = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
-  EXPECT_EQ(ground_truth_size_before, ground_truth_size_after);
+  int64_t document_log_size_after = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
+  EXPECT_EQ(document_log_size_before, document_log_size_after);
 
   EXPECT_THAT(document_store->Get(email_document_id),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -1202,7 +1010,9 @@ TEST_F(DocumentStoreTest, OptimizeInto) {
   ICING_ASSERT_OK(doc_store->Put(document2));
   ICING_ASSERT_OK(doc_store->Put(document3));
 
-  std::string original_document_log = document_store_dir_ + "/document_log";
+  std::string original_document_log = absl_ports::StrCat(
+      document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename());
+
   int64_t original_size =
       filesystem_.GetFileSize(original_document_log.c_str());
 
@@ -1213,7 +1023,8 @@ TEST_F(DocumentStoreTest, OptimizeInto) {
                HasSubstr("directory is the same")));
 
   std::string optimized_dir = document_store_dir_ + "_optimize";
-  std::string optimized_document_log = optimized_dir + "/document_log";
+  std::string optimized_document_log =
+      optimized_dir + "/" + DocumentLogCreator::GetDocumentLogFilename();
 
   // Validates that the optimized document log has the same size if nothing is
   // deleted
@@ -1301,8 +1112,8 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromDataLoss) {
   DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
   const std::string serialized_document = document.SerializeAsString();
 
-  const std::string document_log_file =
-      absl_ports::StrCat(document_store_dir_, "/document_log");
+  const std::string document_log_file = absl_ports::StrCat(
+      document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename());
   int64_t file_size = filesystem_.GetFileSize(document_log_file.c_str());
   filesystem_.PWrite(document_log_file.c_str(), file_size,
                      serialized_document.data(), serialized_document.size());
@@ -1467,17 +1278,7 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromBadChecksum) {
                 IsOkAndHolds(EqualsProto(test_document2_)));
   }
 
-  // Change the DocStore's header combined checksum so that it won't match the
-  // recalculated checksum on initialization. This will force a regeneration of
-  // derived files from ground truth.
-  const std::string header_file =
-      absl_ports::StrCat(document_store_dir_, "/document_store_header");
-  DocumentStore::Header header;
-  header.magic = DocumentStore::Header::kMagic;
-  header.checksum = 10;  // Arbitrary garbage checksum
-  filesystem_.DeleteFile(header_file.c_str());
-  filesystem_.Write(header_file.c_str(), &header, sizeof(header));
-
+  CorruptDocStoreHeaderChecksumFile();
   // Successfully recover from a corrupt derived file issue.
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
@@ -1507,7 +1308,7 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromBadChecksum) {
                   /*num_docs=*/1, /*sum_length_in_tokens=*/4)));
 }
 
-TEST_F(DocumentStoreTest, GetDiskUsage) {
+TEST_F(DocumentStoreTest, GetStorageInfo) {
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
       DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1515,8 +1316,8 @@ TEST_F(DocumentStoreTest, GetDiskUsage) {
   std::unique_ptr<DocumentStore> doc_store =
       std::move(create_result.document_store);
 
-  ICING_ASSERT_OK_AND_ASSIGN(int64_t empty_doc_store_size,
-                             doc_store->GetDiskUsage());
+  DocumentStorageInfoProto doc_store_storage_info = doc_store->GetStorageInfo();
+  int64_t empty_doc_store_size = doc_store_storage_info.document_store_size();
   EXPECT_THAT(empty_doc_store_size, Gt(0));
 
   DocumentProto document = DocumentBuilder()
@@ -1525,15 +1326,16 @@ TEST_F(DocumentStoreTest, GetDiskUsage) {
                                .AddStringProperty("subject", "foo")
                                .Build();
 
-  // Since our GetDiskUsage can only get sizes in increments of block_size, we
+  // Since GetStorageInfo can only get sizes in increments of block_size, we
   // need to insert enough documents so the disk usage will increase by at least
   // 1 block size. The number 100 is a bit arbitrary, gotten from manually
   // testing.
   for (int i = 0; i < 100; ++i) {
     ICING_ASSERT_OK(doc_store->Put(document));
   }
-  EXPECT_THAT(doc_store->GetDiskUsage(),
-              IsOkAndHolds(Gt(empty_doc_store_size)));
+  doc_store_storage_info = doc_store->GetStorageInfo();
+  EXPECT_THAT(doc_store_storage_info.document_store_size(),
+              Gt(empty_doc_store_size));
 
   // Bad file system
   MockFilesystem mock_filesystem;
@@ -1546,8 +1348,8 @@ TEST_F(DocumentStoreTest, GetDiskUsage) {
   std::unique_ptr<DocumentStore> doc_store_with_mock_filesystem =
       std::move(create_result.document_store);
 
-  EXPECT_THAT(doc_store_with_mock_filesystem->GetDiskUsage(),
-              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+  doc_store_storage_info = doc_store_with_mock_filesystem->GetStorageInfo();
+  EXPECT_THAT(doc_store_storage_info.document_store_size(), Eq(-1));
 }
 
 TEST_F(DocumentStoreTest, MaxDocumentId) {
@@ -1838,7 +1640,7 @@ TEST_F(DocumentStoreTest, GetDocumentAssociatedScoreDataSameCorpus) {
           /*length_in_tokens=*/7)));
 }
 
-TEST_F(DocumentStoreTest, GetCorpusAssociatedScoreDataDifferentCorpus) {
+TEST_F(DocumentStoreTest, GetDocumentAssociatedScoreDataDifferentCorpus) {
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
       DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1882,7 +1684,7 @@ TEST_F(DocumentStoreTest, GetCorpusAssociatedScoreDataDifferentCorpus) {
           /*length_in_tokens=*/7)));
 }
 
-TEST_F(DocumentStoreTest, NonexistentDocumentAssociatedScoreDataOutOfRange) {
+TEST_F(DocumentStoreTest, NonexistentDocumentAssociatedScoreDataNotFound) {
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
       DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1891,10 +1693,10 @@ TEST_F(DocumentStoreTest, NonexistentDocumentAssociatedScoreDataOutOfRange) {
       std::move(create_result.document_store);
 
   EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(/*document_id=*/0),
-              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST_F(DocumentStoreTest, SoftDeletionDoesNotClearFilterCache) {
+TEST_F(DocumentStoreTest, NonexistentDocumentFilterDataNotFound) {
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
       DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1902,22 +1704,11 @@ TEST_F(DocumentStoreTest, SoftDeletionDoesNotClearFilterCache) {
   std::unique_ptr<DocumentStore> doc_store =
       std::move(create_result.document_store);
 
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
-                             doc_store->Put(test_document1_));
-
-  EXPECT_THAT(
-      doc_store->GetDocumentFilterData(document_id),
-      IsOkAndHolds(DocumentFilterData(
-          /*namespace_id=*/0,
-          /*schema_type_id=*/0,
-          /*expiration_timestamp_ms=*/document1_expiration_timestamp_)));
-
-  ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/true));
-  // Associated entry of the deleted document is removed.
-  EXPECT_THAT(doc_store->GetDocumentFilterData(document_id).status(), IsOk());
+  EXPECT_THAT(doc_store->GetDocumentFilterData(/*document_id=*/0),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST_F(DocumentStoreTest, HardDeleteClearsFilterCache) {
+TEST_F(DocumentStoreTest, DeleteClearsFilterCache) {
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
       DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1935,36 +1726,13 @@ TEST_F(DocumentStoreTest, HardDeleteClearsFilterCache) {
           /*schema_type_id=*/0,
           /*expiration_timestamp_ms=*/document1_expiration_timestamp_)));
 
-  ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/false));
+  ICING_ASSERT_OK(doc_store->Delete("icing", "email/1"));
   // Associated entry of the deleted document is removed.
   EXPECT_THAT(doc_store->GetDocumentFilterData(document_id),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST_F(DocumentStoreTest, SoftDeletionDoesNotClearScoreCache) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      DocumentStore::CreateResult create_result,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
-  std::unique_ptr<DocumentStore> doc_store =
-      std::move(create_result.document_store);
-
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
-                             doc_store->Put(test_document1_, /*num_tokens=*/4));
-
-  EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id),
-              IsOkAndHolds(DocumentAssociatedScoreData(
-                  /*corpus_id=*/0, /*document_score=*/document1_score_,
-                  /*creation_timestamp_ms=*/document1_creation_timestamp_,
-                  /*length_in_tokens=*/4)));
-
-  ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/true));
-  // Associated entry of the deleted document is removed.
-  EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id).status(),
-              IsOk());
-}
-
-TEST_F(DocumentStoreTest, HardDeleteClearsScoreCache) {
+TEST_F(DocumentStoreTest, DeleteClearsScoreCache) {
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
       DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1982,13 +1750,13 @@ TEST_F(DocumentStoreTest, HardDeleteClearsScoreCache) {
                   /*creation_timestamp_ms=*/document1_creation_timestamp_,
                   /*length_in_tokens=*/4)));
 
-  ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/false));
+  ICING_ASSERT_OK(doc_store->Delete("icing", "email/1"));
   // Associated entry of the deleted document is removed.
   EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST_F(DocumentStoreTest, SoftDeleteDoesNotClearUsageScores) {
+TEST_F(DocumentStoreTest, DeleteShouldPreventUsageScores) {
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
       DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -2010,15 +1778,21 @@ TEST_F(DocumentStoreTest, SoftDeleteDoesNotClearUsageScores) {
   ASSERT_THAT(doc_store->GetUsageScores(document_id),
               IsOkAndHolds(expected_scores));
 
-  // Soft delete the document.
-  ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/true));
+  // Delete the document.
+  ICING_ASSERT_OK(doc_store->Delete("icing", "email/1"));
+
+  // Can't report or get usage scores on the deleted document
+  ASSERT_THAT(
+      doc_store->ReportUsage(usage_report_type1),
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
+               HasSubstr("Couldn't report usage on a nonexistent document")));
 
-  // The scores should be the same.
   ASSERT_THAT(doc_store->GetUsageScores(document_id),
-              IsOkAndHolds(expected_scores));
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
+                       HasSubstr("Can't get usage scores")));
 }
 
-TEST_F(DocumentStoreTest, HardDeleteShouldClearUsageScores) {
+TEST_F(DocumentStoreTest, ExpirationShouldPreventUsageScores) {
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
       DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -2026,8 +1800,20 @@ TEST_F(DocumentStoreTest, HardDeleteShouldClearUsageScores) {
   std::unique_ptr<DocumentStore> doc_store =
       std::move(create_result.document_store);
 
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
-                             doc_store->Put(test_document1_));
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "email/1")
+                               .SetSchema("email")
+                               .AddStringProperty("subject", "subject foo")
+                               .AddStringProperty("body", "body bar")
+                               .SetScore(document1_score_)
+                               .SetCreationTimestampMs(10)
+                               .SetTtlMs(100)
+                               .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, doc_store->Put(document));
+
+  // Some arbitrary time before the document's creation time (10) + ttl (100)
+  fake_clock_.SetSystemTimeMilliseconds(109);
 
   // Report usage with type 1.
   UsageReport usage_report_type1 = CreateUsageReport(
@@ -2040,13 +1826,18 @@ TEST_F(DocumentStoreTest, HardDeleteShouldClearUsageScores) {
   ASSERT_THAT(doc_store->GetUsageScores(document_id),
               IsOkAndHolds(expected_scores));
 
-  // Hard delete the document.
-  ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/false));
+  // Some arbitrary time past the document's creation time (10) + ttl (100)
+  fake_clock_.SetSystemTimeMilliseconds(200);
+
+  // Can't report or get usage scores on the expired document
+  ASSERT_THAT(
+      doc_store->ReportUsage(usage_report_type1),
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
+               HasSubstr("Couldn't report usage on a nonexistent document")));
 
-  // The scores should be cleared.
-  expected_scores.usage_type1_count = 0;
   ASSERT_THAT(doc_store->GetUsageScores(document_id),
-              IsOkAndHolds(expected_scores));
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
+                       HasSubstr("Can't get usage scores")));
 }
 
 TEST_F(DocumentStoreTest,
@@ -2231,7 +2022,7 @@ TEST_F(DocumentStoreTest, ComputeChecksumSameAcrossInstances) {
   EXPECT_THAT(document_store->ComputeChecksum(), IsOkAndHolds(checksum));
 }
 
-TEST_F(DocumentStoreTest, ComputeChecksumChangesOnModification) {
+TEST_F(DocumentStoreTest, ComputeChecksumChangesOnNewDocument) {
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
       DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -2247,6 +2038,24 @@ TEST_F(DocumentStoreTest, ComputeChecksumChangesOnModification) {
               IsOkAndHolds(Not(Eq(checksum))));
 }
 
+TEST_F(DocumentStoreTest, ComputeChecksumDoesntChangeOnNewUsage) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  ICING_EXPECT_OK(document_store->Put(test_document1_));
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, document_store->ComputeChecksum());
+
+  UsageReport usage_report =
+      CreateUsageReport(test_document1_.namespace_(), test_document1_.uri(),
+                        /*timestamp_ms=*/1000, UsageReport::USAGE_TYPE1);
+  ICING_EXPECT_OK(document_store->ReportUsage(usage_report));
+  EXPECT_THAT(document_store->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
+}
+
 TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
   const std::string schema_store_dir = schema_store_dir_ + "_custom";
 
@@ -2275,11 +2084,11 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
     ICING_ASSERT_OK_AND_ASSIGN(
         std::unique_ptr<SchemaStore> schema_store,
         SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
-    SchemaProto schema;
-    auto type_config = schema.add_types();
-    type_config->set_schema_type("email");
-    type_config = schema.add_types();
-    type_config->set_schema_type("message");
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(SchemaTypeConfigBuilder().SetType("email"))
+            .AddType(SchemaTypeConfigBuilder().SetType("message"))
+            .Build();
     ICING_EXPECT_OK(schema_store->SetSchema(schema));
 
     ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
@@ -2320,16 +2129,7 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
     message_expiration_timestamp = message_data.expiration_timestamp_ms();
   }  // Everything destructs and commits changes to file
 
-  // Change the DocumentStore's header combined checksum so that it won't match
-  // the recalculated checksum on initialization. This will force a regeneration
-  // of derived files from ground truth.
-  const std::string header_file =
-      absl_ports::StrCat(document_store_dir_, "/document_store_header");
-  DocumentStore::Header header;
-  header.magic = DocumentStore::Header::kMagic;
-  header.checksum = 10;  // Arbitrary garbage checksum
-  filesystem_.DeleteFile(header_file.c_str());
-  filesystem_.Write(header_file.c_str(), &header, sizeof(header));
+  CorruptDocStoreHeaderChecksumFile();
 
   // Change the schema so that we don't know of the Document's type anymore.
   // Since we can't set backwards incompatible changes, we do some file-level
@@ -2340,9 +2140,10 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
       SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
-  SchemaProto schema;
-  auto type_config = schema.add_types();
-  type_config->set_schema_type("email");
+
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
   ICING_EXPECT_OK(schema_store->SetSchema(schema));
 
   ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
@@ -2388,11 +2189,11 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) {
   filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
 
   // Set a schema
-  SchemaProto schema;
-  auto type_config = schema.add_types();
-  type_config->set_schema_type("email");
-  type_config = schema.add_types();
-  type_config->set_schema_type("message");
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email"))
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
@@ -2440,11 +2241,10 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) {
 
   // Rearrange the schema types. Since SchemaTypeId is assigned based on order,
   // this should change the SchemaTypeIds.
-  schema.clear_types();
-  type_config = schema.add_types();
-  type_config->set_schema_type("message");
-  type_config = schema.add_types();
-  type_config->set_schema_type("email");
+  schema = SchemaBuilder()
+               .AddType(SchemaTypeConfigBuilder().SetType("message"))
+               .AddType(SchemaTypeConfigBuilder().SetType("email"))
+               .Build();
 
   ICING_EXPECT_OK(schema_store->SetSchema(schema));
 
@@ -2475,18 +2275,14 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreDeletesInvalidDocuments) {
   filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
 
   // Set a schema
-  SchemaProto schema;
-  auto type_config = schema.add_types();
-  type_config->set_schema_type("email");
-
-  auto property_config = type_config->add_properties();
-  property_config->set_property_name("subject");
-  property_config->set_data_type(PropertyConfigProto::DataType::STRING);
-  property_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  property_config->mutable_string_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  property_config->mutable_string_indexing_config()->set_tokenizer_type(
-      StringIndexingConfig::TokenizerType::PLAIN);
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("subject")
+                  .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
@@ -2553,11 +2349,11 @@ TEST_F(DocumentStoreTest,
   filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
 
   // Set a schema
-  SchemaProto schema;
-  auto type_config = schema.add_types();
-  type_config->set_schema_type("email");
-  type_config = schema.add_types();
-  type_config->set_schema_type("message");
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email"))
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
@@ -2597,9 +2393,10 @@ TEST_F(DocumentStoreTest,
   EXPECT_THAT(document_store->Get(message_document_id),
               IsOkAndHolds(EqualsProto(message_document)));
 
-  SchemaProto new_schema;
-  type_config = new_schema.add_types();
-  type_config->set_schema_type("message");
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .Build();
 
   ICING_EXPECT_OK(
       schema_store->SetSchema(new_schema,
@@ -2622,11 +2419,11 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) {
   filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
 
   // Set a schema
-  SchemaProto schema;
-  auto type_config = schema.add_types();
-  type_config->set_schema_type("email");
-  type_config = schema.add_types();
-  type_config->set_schema_type("message");
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email"))
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
@@ -2674,11 +2471,10 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) {
 
   // Rearrange the schema types. Since SchemaTypeId is assigned based on order,
   // this should change the SchemaTypeIds.
-  schema.clear_types();
-  type_config = schema.add_types();
-  type_config->set_schema_type("message");
-  type_config = schema.add_types();
-  type_config->set_schema_type("email");
+  schema = SchemaBuilder()
+               .AddType(SchemaTypeConfigBuilder().SetType("message"))
+               .AddType(SchemaTypeConfigBuilder().SetType("email"))
+               .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(SchemaStore::SetSchemaResult set_schema_result,
                              schema_store->SetSchema(schema));
@@ -2711,18 +2507,14 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreDeletesInvalidDocuments) {
   filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
 
   // Set a schema
-  SchemaProto schema;
-  auto type_config = schema.add_types();
-  type_config->set_schema_type("email");
-
-  auto property_config = type_config->add_properties();
-  property_config->set_property_name("subject");
-  property_config->set_data_type(PropertyConfigProto::DataType::STRING);
-  property_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  property_config->mutable_string_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  property_config->mutable_string_indexing_config()->set_tokenizer_type(
-      StringIndexingConfig::TokenizerType::PLAIN);
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("subject")
+                  .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
@@ -2792,11 +2584,11 @@ TEST_F(DocumentStoreTest,
   filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
 
   // Set a schema
-  SchemaProto schema;
-  auto type_config = schema.add_types();
-  type_config->set_schema_type("email");
-  type_config = schema.add_types();
-  type_config->set_schema_type("message");
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email"))
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
@@ -2836,9 +2628,10 @@ TEST_F(DocumentStoreTest,
   EXPECT_THAT(document_store->Get(message_document_id),
               IsOkAndHolds(EqualsProto(message_document)));
 
-  SchemaProto new_schema;
-  type_config = new_schema.add_types();
-  type_config->set_schema_type("message");
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       SchemaStore::SetSchemaResult set_schema_result,
@@ -3126,17 +2919,7 @@ TEST_F(DocumentStoreTest, UsageScoresShouldNotBeClearedOnChecksumMismatch) {
                 IsOkAndHolds(expected_scores));
   }
 
-  // Change the DocStore's header combined checksum so that it won't match the
-  // recalculated checksum on initialization. This will force a regeneration of
-  // derived files from ground truth.
-  const std::string header_file =
-      absl_ports::StrCat(document_store_dir_, "/document_store_header");
-  DocumentStore::Header header;
-  header.magic = DocumentStore::Header::kMagic;
-  header.checksum = 10;  // Arbitrary garbage checksum
-  filesystem_.DeleteFile(header_file.c_str());
-  filesystem_.Write(header_file.c_str(), &header, sizeof(header));
-
+  CorruptDocStoreHeaderChecksumFile();
   // Successfully recover from a corrupt derived file issue.
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
@@ -3181,8 +2964,8 @@ TEST_F(DocumentStoreTest, UsageScoresShouldBeAvailableAfterDataLoss) {
   DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
   const std::string serialized_document = document.SerializeAsString();
 
-  const std::string document_log_file =
-      absl_ports::StrCat(document_store_dir_, "/document_log");
+  const std::string document_log_file = absl_ports::StrCat(
+      document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename());
   int64_t file_size = filesystem_.GetFileSize(document_log_file.c_str());
   filesystem_.PWrite(document_log_file.c_str(), file_size,
                      serialized_document.data(), serialized_document.size());
@@ -3235,45 +3018,6 @@ TEST_F(DocumentStoreTest, UsageScoresShouldBeCopiedOverToUpdatedDocument) {
               IsOkAndHolds(expected_scores));
 }
 
-TEST_F(DocumentStoreTest,
-       UsageScoresShouldNotBeCopiedOverFromOldSoftDeletedDocs) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      DocumentStore::CreateResult create_result,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
-  std::unique_ptr<DocumentStore> document_store =
-      std::move(create_result.document_store);
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      DocumentId document_id,
-      document_store->Put(DocumentProto(test_document1_)));
-
-  // Report usage with type 1.
-  UsageReport usage_report_type1 = CreateUsageReport(
-      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
-      UsageReport::USAGE_TYPE1);
-  ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1));
-
-  UsageStore::UsageScores expected_scores;
-  ++expected_scores.usage_type1_count;
-  ASSERT_THAT(document_store->GetUsageScores(document_id),
-              IsOkAndHolds(expected_scores));
-
-  // Soft delete the doc.
-  ICING_ASSERT_OK(document_store->Delete(document_id, /*soft_delete=*/true));
-
-  // Put the same document.
-  ICING_ASSERT_OK_AND_ASSIGN(
-      DocumentId updated_document_id,
-      document_store->Put(DocumentProto(test_document1_)));
-  // We should get a different document id.
-  ASSERT_THAT(updated_document_id, Not(Eq(document_id)));
-
-  // Usage scores should be cleared.
-  EXPECT_THAT(document_store->GetUsageScores(updated_document_id),
-              IsOkAndHolds(UsageStore::UsageScores()));
-}
-
 TEST_F(DocumentStoreTest, UsageScoresShouldPersistOnOptimize) {
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
@@ -3344,7 +3088,9 @@ TEST_F(DocumentStoreTest, DetectPartialDataLoss) {
   const std::string serialized_document = document.SerializeAsString();
 
   const std::string document_log_file =
-      absl_ports::StrCat(document_store_dir_, "/document_log");
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str();
   int64_t file_size = filesystem_.GetFileSize(document_log_file.c_str());
   filesystem_.PWrite(document_log_file.c_str(), file_size,
                      serialized_document.data(), serialized_document.size());
@@ -3361,8 +3107,8 @@ TEST_F(DocumentStoreTest, DetectPartialDataLoss) {
 
 TEST_F(DocumentStoreTest, DetectCompleteDataLoss) {
   int64_t corruptible_offset;
-  const std::string document_log_file =
-      absl_ports::StrCat(document_store_dir_, "/document_log");
+  const std::string document_log_file = absl_ports::StrCat(
+      document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename());
   {
     // Can put and delete fine.
     ICING_ASSERT_OK_AND_ASSIGN(
@@ -3389,8 +3135,30 @@ TEST_F(DocumentStoreTest, DetectCompleteDataLoss) {
   // "Corrupt" the persisted content written in the log. We can't recover if
   // the persisted data was corrupted.
   std::string corruption = "abc";
-  filesystem_.PWrite(document_log_file.c_str(), /*offset=*/corruptible_offset,
-                     corruption.data(), corruption.size());
+  filesystem_.PWrite(document_log_file.c_str(),
+                     /*offset=*/corruptible_offset, corruption.data(),
+                     corruption.size());
+
+  {
+    // "Corrupt" the content written in the log. Make the corrupt document
+    // smaller than our original one so we don't accidentally write past our
+    // file.
+    DocumentProto document =
+        DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build();
+    std::string serialized_document = document.SerializeAsString();
+    ASSERT_TRUE(filesystem_.PWrite(
+        document_log_file.c_str(), corruptible_offset,
+        serialized_document.data(), serialized_document.size()));
+
+    PortableFileBackedProtoLog<DocumentWrapper>::Header header =
+        ReadDocumentLogHeader(filesystem_, document_log_file);
+
+    // Set dirty bit to true to reflect that something changed in the log.
+    header.SetDirtyFlag(true);
+    header.SetHeaderChecksum(header.CalculateHeaderChecksum());
+
+    WriteDocumentLogHeader(filesystem_, document_log_file, header);
+  }
 
   // Successfully recover from a data loss issue.
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -3402,54 +3170,699 @@ TEST_F(DocumentStoreTest, DetectCompleteDataLoss) {
   ASSERT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE));
 }
 
+// TODO(b/185845269) Re-enable this test by copying over a full valid set of
+// document store files. Right now this test only includes the score_cache and
+// the document store header.
+//
+// This causes a problem now because this cl changes behavior to not consider an
+// InitializeExistingDerivedFiles failure to be a recovery if there is nothing
+// to recover because the doocument store is empty.
+#define DISABLE_BACKWARDS_COMPAT_TEST
+#ifndef DISABLE_BACKWARDS_COMPAT_TEST
 TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
-  // The directory testdata/v0/document_store contains only the scoring_cache
-  // and the document_store_header (holding the crc for the scoring_cache). If
-  // the current code is compatible with the format of the v0 scoring_cache,
-  // then an empty document store should be initialized, but the non-empty
-  // scoring_cache should be retained.
-  // The current document-asscoiated-score-data has a new field with respect to
-  // the ones stored in testdata/v0, hence the document store's initialization
-  // requires regenerating its derived files.
+  // The directory testdata/score_cache_without_length_in_tokens/document_store
+  // contains only the scoring_cache and the document_store_header (holding the
+  // crc for the scoring_cache). If the current code is compatible with the
+  // format of the v0 scoring_cache, then an empty document store should be
+  // initialized, but the non-empty scoring_cache should be retained. The
+  // current document-asscoiated-score-data has a new field with respect to the
+  // ones stored in testdata/score_cache_Without_length_in_tokens, hence the
+  // document store's initialization requires regenerating its derived files.
 
   // Create dst directory
   ASSERT_THAT(filesystem_.CreateDirectory(document_store_dir_.c_str()), true);
 
   // Get src files
-  std::string document_store_v0;
+  std::string document_store_without_length_in_tokens;
   if (IsAndroidPlatform() || IsIosPlatform()) {
-    document_store_v0 = GetTestFilePath(
-        "icing/testdata/v0/document_store_android_ios_compatible");
+    document_store_without_length_in_tokens = GetTestFilePath(
+        "icing/testdata/score_cache_without_length_in_tokens/"
+        "document_store_android_ios_compatible");
   } else {
-    document_store_v0 =
-        GetTestFilePath("icing/testdata/v0/document_store");
+    document_store_without_length_in_tokens = GetTestFilePath(
+        "icing/testdata/score_cache_without_length_in_tokens/"
+        "document_store");
   }
   std::vector<std::string> document_store_files;
   Filesystem filesystem;
-  filesystem.ListDirectory(document_store_v0.c_str(), &document_store_files);
+  filesystem.ListDirectory(document_store_without_length_in_tokens.c_str(),
+                           &document_store_files);
 
-  VLOG(1) << "Copying files " << document_store_v0 << ' '
-          << document_store_files.size();
+  ICING_LOG(INFO) << "Copying files " << document_store_without_length_in_tokens
+                  << ' ' << document_store_files.size();
   for (size_t i = 0; i != document_store_files.size(); i++) {
-    std::string src =
-        absl_ports::StrCat(document_store_v0, "/", document_store_files[i]);
+    std::string src = absl_ports::StrCat(
+        document_store_without_length_in_tokens, "/", document_store_files[i]);
     std::string dst =
         absl_ports::StrCat(document_store_dir_, "/", document_store_files[i]);
     ASSERT_THAT(filesystem_.CopyFile(src.c_str(), dst.c_str()), true);
   }
 
-  NativeInitializeStats initializeStats;
+  InitializeStatsProto initialize_stats;
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
       DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get(), &initializeStats));
+                            schema_store_.get(),
+                            /*force_recovery_and_revalidate_documents=*/false,
+                            &initialize_stats));
   std::unique_ptr<DocumentStore> doc_store =
       std::move(create_result.document_store);
   // The store_cache trigger regeneration because its element size is
   // inconsistent: expected 20 (current new size), actual 12 (as per the v0
   // score_cache).
-  EXPECT_TRUE(initializeStats.has_document_store_recovery_cause());
+  EXPECT_TRUE(initialize_stats.has_document_store_recovery_cause());
+}
+#endif  // DISABLE_BACKWARDS_COMPAT_TEST
+
+TEST_F(DocumentStoreTest, DocumentStoreStorageInfo) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  // Add three documents.
+  DocumentProto document1 = test_document1_;
+  document1.set_namespace_("namespace.1");
+  document1.set_uri("uri1");
+  ICING_ASSERT_OK(doc_store->Put(document1));
+
+  DocumentProto document2 = test_document1_;
+  document2.set_namespace_("namespace.1");
+  document2.set_uri("uri2");
+  document2.set_creation_timestamp_ms(fake_clock_.GetSystemTimeMilliseconds());
+  document2.set_ttl_ms(100);
+  ICING_ASSERT_OK(doc_store->Put(document2));
+
+  DocumentProto document3 = test_document1_;
+  document3.set_namespace_("namespace.1");
+  document3.set_uri("uri3");
+  ICING_ASSERT_OK(doc_store->Put(document3));
+
+  DocumentProto document4 = test_document1_;
+  document4.set_namespace_("namespace.2");
+  document4.set_uri("uri1");
+  ICING_ASSERT_OK(doc_store->Put(document4));
+
+  // Report usage with type 1 on document1
+  UsageReport usage_report_type1 = CreateUsageReport(
+      /*name_space=*/"namespace.1", /*uri=*/"uri1", /*timestamp_ms=*/1000,
+      UsageReport::USAGE_TYPE1);
+  ICING_ASSERT_OK(doc_store->ReportUsage(usage_report_type1));
+
+  // Report usage with type 2 on document2
+  UsageReport usage_report_type2 = CreateUsageReport(
+      /*name_space=*/"namespace.1", /*uri=*/"uri2", /*timestamp_ms=*/1000,
+      UsageReport::USAGE_TYPE2);
+  ICING_ASSERT_OK(doc_store->ReportUsage(usage_report_type2));
+
+  // Report usage with type 3 on document3
+  UsageReport usage_report_type3 = CreateUsageReport(
+      /*name_space=*/"namespace.1", /*uri=*/"uri3", /*timestamp_ms=*/1000,
+      UsageReport::USAGE_TYPE3);
+  ICING_ASSERT_OK(doc_store->ReportUsage(usage_report_type3));
+
+  // Report usage with type 1 on document4
+  usage_report_type1 = CreateUsageReport(
+      /*name_space=*/"namespace.2", /*uri=*/"uri1", /*timestamp_ms=*/1000,
+      UsageReport::USAGE_TYPE1);
+  ICING_ASSERT_OK(doc_store->ReportUsage(usage_report_type1));
+
+  // Delete the first doc.
+  ICING_ASSERT_OK(doc_store->Delete(document1.namespace_(), document1.uri()));
+
+  // Expire the second doc.
+  fake_clock_.SetSystemTimeMilliseconds(document2.creation_timestamp_ms() +
+                                        document2.ttl_ms() + 1);
+
+  // Check high level info
+  DocumentStorageInfoProto storage_info = doc_store->GetStorageInfo();
+  EXPECT_THAT(storage_info.num_alive_documents(), Eq(2));
+  EXPECT_THAT(storage_info.num_deleted_documents(), Eq(1));
+  EXPECT_THAT(storage_info.num_expired_documents(), Eq(1));
+  EXPECT_THAT(storage_info.document_store_size(), Ge(0));
+  EXPECT_THAT(storage_info.document_log_size(), Ge(0));
+  EXPECT_THAT(storage_info.key_mapper_size(), Ge(0));
+  EXPECT_THAT(storage_info.document_id_mapper_size(), Ge(0));
+  EXPECT_THAT(storage_info.score_cache_size(), Ge(0));
+  EXPECT_THAT(storage_info.filter_cache_size(), Ge(0));
+  EXPECT_THAT(storage_info.corpus_mapper_size(), Ge(0));
+  EXPECT_THAT(storage_info.corpus_score_cache_size(), Ge(0));
+  EXPECT_THAT(storage_info.namespace_id_mapper_size(), Ge(0));
+  EXPECT_THAT(storage_info.num_namespaces(), Eq(2));
+
+  // Check per-namespace info
+  EXPECT_THAT(storage_info.namespace_storage_info_size(), Eq(2));
+
+  NamespaceStorageInfoProto namespace_storage_info =
+      GetNamespaceStorageInfo(storage_info, "namespace.1");
+  EXPECT_THAT(namespace_storage_info.num_alive_documents(), Eq(1));
+  EXPECT_THAT(namespace_storage_info.num_expired_documents(), Eq(1));
+  EXPECT_THAT(namespace_storage_info.num_alive_documents_usage_type1(), Eq(0));
+  EXPECT_THAT(namespace_storage_info.num_alive_documents_usage_type2(), Eq(0));
+  EXPECT_THAT(namespace_storage_info.num_alive_documents_usage_type3(), Eq(1));
+  EXPECT_THAT(namespace_storage_info.num_expired_documents_usage_type1(),
+              Eq(0));
+  EXPECT_THAT(namespace_storage_info.num_expired_documents_usage_type2(),
+              Eq(1));
+  EXPECT_THAT(namespace_storage_info.num_expired_documents_usage_type3(),
+              Eq(0));
+
+  namespace_storage_info = GetNamespaceStorageInfo(storage_info, "namespace.2");
+  EXPECT_THAT(namespace_storage_info.num_alive_documents(), Eq(1));
+  EXPECT_THAT(namespace_storage_info.num_expired_documents(), Eq(0));
+  EXPECT_THAT(namespace_storage_info.num_alive_documents_usage_type1(), Eq(1));
+  EXPECT_THAT(namespace_storage_info.num_alive_documents_usage_type2(), Eq(0));
+  EXPECT_THAT(namespace_storage_info.num_alive_documents_usage_type3(), Eq(0));
+  EXPECT_THAT(namespace_storage_info.num_expired_documents_usage_type1(),
+              Eq(0));
+  EXPECT_THAT(namespace_storage_info.num_expired_documents_usage_type2(),
+              Eq(0));
+  EXPECT_THAT(namespace_storage_info.num_expired_documents_usage_type3(),
+              Eq(0));
+}
+
+TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) {
+  // Start fresh and set the schema with one type.
+  filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+  filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
+  filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+  SchemaTypeConfigProto email_type_config =
+      SchemaTypeConfigBuilder()
+          .SetType("email")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("body")
+                           .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(email_type_config).Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+  ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+  // The typeid for "email" should be 0.
+  ASSERT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
+
+  DocumentId docid = kInvalidDocumentId;
+  {
+    // Create the document store the first time and add an email document.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                              schema_store.get()));
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+
+    DocumentProto doc =
+        DocumentBuilder()
+            .SetKey("icing", "email/1")
+            .SetSchema("email")
+            .AddStringProperty("subject", "subject foo")
+            .AddStringProperty("body", "body bar")
+            .SetScore(document1_score_)
+            .SetCreationTimestampMs(
+                document1_creation_timestamp_)  // A random timestamp
+            .SetTtlMs(document1_ttl_)
+            .Build();
+    ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(doc));
+    ICING_ASSERT_OK_AND_ASSIGN(DocumentFilterData filter_data,
+                               doc_store->GetDocumentFilterData(docid));
+
+    ASSERT_THAT(filter_data.schema_type_id(), Eq(0));
+  }
+
+  // Add another type to the schema before the email type.
+  schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("alarm")
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("name")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("time")
+                                        .SetDataType(TYPE_INT)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(email_type_config)
+          .Build();
+  ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+  // Adding a new type should cause ids to be reassigned. Ids are assigned in
+  // order of appearance so 'alarm' should be 0 and 'email' should be 1.
+  ASSERT_THAT(schema_store->GetSchemaTypeId("alarm"), IsOkAndHolds(0));
+  ASSERT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(1));
+
+  {
+    // Create the document store the second time and force recovery
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(
+            &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
+            /*force_recovery_and_revalidate_documents=*/true));
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+
+    // Ensure that the type id of the email document has been correctly updated.
+    ICING_ASSERT_OK_AND_ASSIGN(DocumentFilterData filter_data,
+                               doc_store->GetDocumentFilterData(docid));
+    ASSERT_THAT(filter_data.schema_type_id(), Eq(1));
+  }
+}
+
+TEST_F(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) {
+  // Start fresh and set the schema with one type.
+  filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+  filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
+  filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+  SchemaTypeConfigProto email_type_config =
+      SchemaTypeConfigBuilder()
+          .SetType("email")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("body")
+                           .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(email_type_config).Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+  ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+  // The typeid for "email" should be 0.
+  ASSERT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
+
+  DocumentId docid = kInvalidDocumentId;
+  {
+    // Create the document store the first time and add an email document.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                              schema_store.get()));
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+
+    DocumentProto doc =
+        DocumentBuilder()
+            .SetKey("icing", "email/1")
+            .SetSchema("email")
+            .AddStringProperty("subject", "subject foo")
+            .AddStringProperty("body", "body bar")
+            .SetScore(document1_score_)
+            .SetCreationTimestampMs(
+                document1_creation_timestamp_)  // A random timestamp
+            .SetTtlMs(document1_ttl_)
+            .Build();
+    ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(doc));
+    ICING_ASSERT_OK_AND_ASSIGN(DocumentFilterData filter_data,
+                               doc_store->GetDocumentFilterData(docid));
+
+    ASSERT_THAT(filter_data.schema_type_id(), Eq(0));
+  }
+
+  // Add another type to the schema.
+  schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("alarm")
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("name")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("time")
+                                        .SetDataType(TYPE_INT)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(email_type_config)
+          .Build();
+  ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+  // Adding a new type should cause ids to be reassigned. Ids are assigned in
+  // order of appearance so 'alarm' should be 0 and 'email' should be 1.
+  ASSERT_THAT(schema_store->GetSchemaTypeId("alarm"), IsOkAndHolds(0));
+  ASSERT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(1));
+
+  {
+    // Create the document store the second time. Don't force recovery.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(
+            &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
+            /*force_recovery_and_revalidate_documents=*/false));
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+
+    // Check that the type id of the email document has not been updated.
+    ICING_ASSERT_OK_AND_ASSIGN(DocumentFilterData filter_data,
+                               doc_store->GetDocumentFilterData(docid));
+    ASSERT_THAT(filter_data.schema_type_id(), Eq(0));
+  }
+}
+
+TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) {
+  // Start fresh and set the schema with one type.
+  filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+  filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
+  filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+  SchemaTypeConfigProto email_type_config =
+      SchemaTypeConfigBuilder()
+          .SetType("email")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("body")
+                           .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(email_type_config).Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+  ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+
+  DocumentProto docWithBody =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .AddStringProperty("body", "body bar")
+          .SetScore(document1_score_)
+          .SetCreationTimestampMs(
+              document1_creation_timestamp_)  // A random timestamp
+          .SetTtlMs(document1_ttl_)
+          .Build();
+  DocumentProto docWithoutBody =
+      DocumentBuilder()
+          .SetKey("icing", "email/2")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .SetScore(document1_score_)
+          .SetCreationTimestampMs(
+              document1_creation_timestamp_)  // A random timestamp
+          .SetTtlMs(document1_ttl_)
+          .Build();
+
+  {
+    // Create the document store the first time and add two email documents: one
+    // that has the 'body' section and one that doesn't.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                              schema_store.get()));
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+
+    DocumentId docid = kInvalidDocumentId;
+    ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithBody));
+    ASSERT_NE(docid, kInvalidDocumentId);
+    docid = kInvalidDocumentId;
+    ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithoutBody));
+    ASSERT_NE(docid, kInvalidDocumentId);
+
+    ASSERT_THAT(doc_store->Get(docWithBody.namespace_(), docWithBody.uri()),
+                IsOkAndHolds(EqualsProto(docWithBody)));
+    ASSERT_THAT(
+        doc_store->Get(docWithoutBody.namespace_(), docWithoutBody.uri()),
+        IsOkAndHolds(EqualsProto(docWithoutBody)));
+  }
+
+  // Delete the 'body' property from the 'email' type, making all pre-existing
+  // documents with the 'body' property invalid.
+  email_type_config =
+      SchemaTypeConfigBuilder()
+          .SetType("email")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  schema = SchemaBuilder().AddType(email_type_config).Build();
+  ASSERT_THAT(schema_store->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/true),
+              IsOk());
+
+  {
+    // Create the document store the second time and force recovery
+    CorruptDocStoreHeaderChecksumFile();
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(
+            &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
+            /*force_recovery_and_revalidate_documents=*/true));
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+
+    ASSERT_THAT(doc_store->Get(docWithBody.namespace_(), docWithBody.uri()),
+                StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+    ASSERT_THAT(
+        doc_store->Get(docWithoutBody.namespace_(), docWithoutBody.uri()),
+        IsOkAndHolds(EqualsProto(docWithoutBody)));
+  }
+}
+
+TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) {
+  // Start fresh and set the schema with one type.
+  filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+  filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
+  filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+  SchemaTypeConfigProto email_type_config =
+      SchemaTypeConfigBuilder()
+          .SetType("email")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("body")
+                           .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(email_type_config).Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+  ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+
+  DocumentProto docWithBody =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .AddStringProperty("body", "body bar")
+          .SetScore(document1_score_)
+          .SetCreationTimestampMs(
+              document1_creation_timestamp_)  // A random timestamp
+          .SetTtlMs(document1_ttl_)
+          .Build();
+  DocumentProto docWithoutBody =
+      DocumentBuilder()
+          .SetKey("icing", "email/2")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .SetScore(document1_score_)
+          .SetCreationTimestampMs(
+              document1_creation_timestamp_)  // A random timestamp
+          .SetTtlMs(document1_ttl_)
+          .Build();
+
+  {
+    // Create the document store the first time and add two email documents: one
+    // that has the 'body' section and one that doesn't.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                              schema_store.get()));
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+
+    DocumentId docid = kInvalidDocumentId;
+    ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithBody));
+    ASSERT_NE(docid, kInvalidDocumentId);
+    docid = kInvalidDocumentId;
+    ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithoutBody));
+    ASSERT_NE(docid, kInvalidDocumentId);
+
+    ASSERT_THAT(doc_store->Get(docWithBody.namespace_(), docWithBody.uri()),
+                IsOkAndHolds(EqualsProto(docWithBody)));
+    ASSERT_THAT(
+        doc_store->Get(docWithoutBody.namespace_(), docWithoutBody.uri()),
+        IsOkAndHolds(EqualsProto(docWithoutBody)));
+  }
+
+  // Delete the 'body' property from the 'email' type, making all pre-existing
+  // documents with the 'body' property invalid.
+  email_type_config =
+      SchemaTypeConfigBuilder()
+          .SetType("email")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  schema = SchemaBuilder().AddType(email_type_config).Build();
+  ASSERT_THAT(schema_store->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/true),
+              IsOk());
+
+  {
+    // Corrupt the document store header checksum so that we will perform
+    // recovery, but without revalidation.
+    CorruptDocStoreHeaderChecksumFile();
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(
+            &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
+            /*force_recovery_and_revalidate_documents=*/false));
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+
+    ASSERT_THAT(doc_store->Get(docWithBody.namespace_(), docWithBody.uri()),
+                IsOkAndHolds(EqualsProto(docWithBody)));
+    ASSERT_THAT(
+        doc_store->Get(docWithoutBody.namespace_(), docWithoutBody.uri()),
+        IsOkAndHolds(EqualsProto(docWithoutBody)));
+  }
+}
+
+#ifndef DISABLE_BACKWARDS_COMPAT_TEST
+TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
+  // Set up schema.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("email")
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("subject")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("body")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  std::string schema_store_dir = schema_store_dir_ + "_migrate";
+  filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
+  filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
+
+  ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+
+  // Create dst directory that we'll initialize the DocumentStore over.
+  std::string document_store_dir = document_store_dir_ + "_migrate";
+  ASSERT_THAT(
+      filesystem_.DeleteDirectoryRecursively(document_store_dir.c_str()), true);
+  ASSERT_THAT(
+      filesystem_.CreateDirectoryRecursively(document_store_dir.c_str()), true);
+
+  // Copy the testdata files into our DocumentStore directory
+  std::string document_store_without_portable_log;
+  if (IsAndroidX86()) {
+    document_store_without_portable_log = GetTestFilePath(
+        "icing/testdata/not_portable_log/"
+        "icing_search_engine_android_x86/document_dir");
+  } else if (IsAndroidArm()) {
+    document_store_without_portable_log = GetTestFilePath(
+        "icing/testdata/not_portable_log/"
+        "icing_search_engine_android_arm/document_dir");
+  } else if (IsIosPlatform()) {
+    document_store_without_portable_log = GetTestFilePath(
+        "icing/testdata/not_portable_log/"
+        "icing_search_engine_ios/document_dir");
+  } else {
+    document_store_without_portable_log = GetTestFilePath(
+        "icing/testdata/not_portable_log/"
+        "icing_search_engine_linux/document_dir");
+  }
+
+  ASSERT_TRUE(filesystem_.CopyDirectory(
+      document_store_without_portable_log.c_str(), document_store_dir.c_str(),
+      /*recursive=*/true));
+
+  // Initialize the DocumentStore over our copied files.
+  InitializeStatsProto initialize_stats;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem_, document_store_dir, &fake_clock_,
+                            schema_store.get(),
+                            /*force_recovery_and_revalidate_documents=*/false,
+                            &initialize_stats));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  // These are the documents that are stored in the testdata files. Do not
+  // change unless you're also updating the testdata files.
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "foo")
+                                .AddStringProperty("body", "bar")
+                                .Build();
+
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace1", "uri2")
+                                .SetSchema("email")
+                                .SetCreationTimestampMs(20)
+                                .SetScore(321)
+                                .AddStringProperty("body", "baz bat")
+                                .Build();
+
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace2", "uri1")
+                                .SetSchema("email")
+                                .SetCreationTimestampMs(30)
+                                .SetScore(123)
+                                .AddStringProperty("subject", "phoo")
+                                .Build();
+
+  // Check that we didn't lose anything. A migration also doesn't technically
+  // count as a recovery.
+  EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
+  EXPECT_FALSE(initialize_stats.has_document_store_recovery_cause());
+
+  // Document 1 and 3 were put normally, and document 2 was deleted in our
+  // testdata files.
+  //
+  // Check by namespace, uri
+  EXPECT_THAT(document_store->Get(document1.namespace_(), document1.uri()),
+              IsOkAndHolds(EqualsProto(document1)));
+  EXPECT_THAT(document_store->Get(document2.namespace_(), document2.uri()),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(document_store->Get(document3.namespace_(), document3.uri()),
+              IsOkAndHolds(EqualsProto(document3)));
+
+  // Check by document_id
+  EXPECT_THAT(document_store->Get(/*document_id=*/0),
+              IsOkAndHolds(EqualsProto(document1)));
+  EXPECT_THAT(document_store->Get(/*document_id=*/1),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(document_store->Get(/*document_id=*/2),
+              IsOkAndHolds(EqualsProto(document3)));
 }
+#endif  // DISABLE_BACKWARDS_COMPAT_TEST
 
 }  // namespace
 
diff --git a/icing/store/usage-store.cc b/icing/store/usage-store.cc
index 54896dc..546067d 100644
--- a/icing/store/usage-store.cc
+++ b/icing/store/usage-store.cc
@@ -74,6 +74,9 @@ libtextclassifier3::Status UsageStore::AddUsageReport(const UsageReport& report,
         "Document id %d is invalid.", document_id));
   }
 
+  // We don't need a copy here because we'll set the value at the same index.
+  // This won't unintentionally grow the underlying file since we already have
+  // enough space for the current index.
   auto usage_scores_or = usage_score_cache_->Get(document_id);
 
   // OutOfRange means that the mapper hasn't seen this document id before, it's
@@ -159,7 +162,7 @@ UsageStore::GetUsageScores(DocumentId document_id) {
         "Document id %d is invalid.", document_id));
   }
 
-  auto usage_scores_or = usage_score_cache_->Get(document_id);
+  auto usage_scores_or = usage_score_cache_->GetCopy(document_id);
   if (absl_ports::IsOutOfRange(usage_scores_or.status())) {
     // No usage scores found. Return the default scores.
     return UsageScores();
@@ -168,7 +171,7 @@ UsageStore::GetUsageScores(DocumentId document_id) {
     return usage_scores_or.status();
   }
 
-  return *std::move(usage_scores_or).ValueOrDie();
+  return std::move(usage_scores_or).ValueOrDie();
 }
 
 libtextclassifier3::Status UsageStore::SetUsageScores(
@@ -193,10 +196,10 @@ libtextclassifier3::Status UsageStore::CloneUsageScores(
         "to_document_id %d is invalid.", to_document_id));
   }
 
-  auto usage_scores_or = usage_score_cache_->Get(from_document_id);
+  auto usage_scores_or = usage_score_cache_->GetCopy(from_document_id);
   if (usage_scores_or.ok()) {
     return usage_score_cache_->Set(to_document_id,
-                                   *std::move(usage_scores_or).ValueOrDie());
+                                   std::move(usage_scores_or).ValueOrDie());
   } else if (absl_ports::IsOutOfRange(usage_scores_or.status())) {
     // No usage scores found. Set default scores to to_document_id.
     return usage_score_cache_->Set(to_document_id, UsageScores());
@@ -218,6 +221,10 @@ libtextclassifier3::StatusOr<int64_t> UsageStore::GetElementsFileSize() const {
   return usage_score_cache_->GetElementsFileSize();
 }
 
+libtextclassifier3::StatusOr<int64_t> UsageStore::GetDiskUsage() const {
+  return usage_score_cache_->GetDiskUsage();
+}
+
 libtextclassifier3::Status UsageStore::TruncateTo(DocumentId num_documents) {
   if (num_documents >= usage_score_cache_->num_elements()) {
     // No need to truncate
diff --git a/icing/store/usage-store.h b/icing/store/usage-store.h
index b7de970..fd77df4 100644
--- a/icing/store/usage-store.h
+++ b/icing/store/usage-store.h
@@ -157,6 +157,14 @@ class UsageStore {
   //   INTERNAL_ERROR on IO error
   libtextclassifier3::StatusOr<int64_t> GetElementsFileSize() const;
 
+  // Calculates and returns the disk usage in bytes. Rounds up to the nearest
+  // block size.
+  //
+  // Returns:
+  //   Disk usage on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
+
   // Resizes the storage so that only the usage scores of and before
   // last_document_id are stored.
   //
diff --git a/icing/store/usage-store_test.cc b/icing/store/usage-store_test.cc
index 220c226..b2dbe4b 100644
--- a/icing/store/usage-store_test.cc
+++ b/icing/store/usage-store_test.cc
@@ -577,6 +577,41 @@ TEST_F(UsageStoreTest, GetElementsFileSize) {
               IsOkAndHolds(Gt(empty_file_size)));
 }
 
+TEST_F(UsageStoreTest, GetDiskUsageEmpty) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                             UsageStore::Create(&filesystem_, test_dir_));
+
+  // There's some internal metadata, so our disk usage will round up to 1 block.
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t empty_disk_usage,
+                             usage_store->GetDiskUsage());
+  EXPECT_THAT(empty_disk_usage, Gt(0));
+}
+
+TEST_F(UsageStoreTest, GetDiskUsageNonEmpty) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                             UsageStore::Create(&filesystem_, test_dir_));
+
+  // There's some internal metadata, so our disk usage will round up to 1 block.
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t empty_disk_usage,
+                             usage_store->GetDiskUsage());
+
+  // Since our GetDiskUsage can only get sizes in increments of block_size, we
+  // need to insert enough usage reports so the disk usage will increase by at
+  // least 1 block size. The number 200 is a bit arbitrary, gotten from manually
+  // testing.
+  UsageReport usage_report = CreateUsageReport(
+      "namespace", "uri", /*timestamp_ms=*/1000, UsageReport::USAGE_TYPE1);
+  for (int i = 0; i < 200; ++i) {
+    usage_store->AddUsageReport(usage_report, /*document_id=*/i);
+  }
+
+  // We need to persist since iOS won't see the new disk allocations until after
+  // everything gets written.
+  usage_store->PersistToDisk();
+
+  EXPECT_THAT(usage_store->GetDiskUsage(), IsOkAndHolds(Gt(empty_disk_usage)));
+}
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/testing/common-matchers.h b/icing/testing/common-matchers.h
index b7f54ba..f83fe0a 100644
--- a/icing/testing/common-matchers.h
+++ b/icing/testing/common-matchers.h
@@ -25,7 +25,6 @@
 #include "icing/absl_ports/str_join.h"
 #include "icing/index/hit/doc-hit-info.h"
 #include "icing/legacy/core/icing-string-util.h"
-#include "icing/proto/search.proto.h"
 #include "icing/proto/search.pb.h"
 #include "icing/schema/schema-store.h"
 #include "icing/schema/section.h"
@@ -122,7 +121,6 @@ MATCHER_P(EqualsSetSchemaResult, expected, "") {
   const SchemaStore::SetSchemaResult& actual = arg;
 
   if (actual.success == expected.success &&
-      actual.index_incompatible == expected.index_incompatible &&
       actual.old_schema_type_ids_changed ==
           expected.old_schema_type_ids_changed &&
       actual.schema_types_deleted_by_name ==
@@ -132,7 +130,12 @@ MATCHER_P(EqualsSetSchemaResult, expected, "") {
       actual.schema_types_incompatible_by_name ==
           expected.schema_types_incompatible_by_name &&
       actual.schema_types_incompatible_by_id ==
-          expected.schema_types_incompatible_by_id) {
+          expected.schema_types_incompatible_by_id &&
+      actual.schema_types_new_by_name == expected.schema_types_new_by_name &&
+      actual.schema_types_changed_fully_compatible_by_name ==
+          expected.schema_types_changed_fully_compatible_by_name &&
+      actual.schema_types_index_incompatible_by_name ==
+          expected.schema_types_index_incompatible_by_name) {
     return true;
   }
 
@@ -192,37 +195,82 @@ MATCHER_P(EqualsSetSchemaResult, expected, "") {
                           absl_ports::NumberFormatter()),
       "]");
 
+  // Format schema_types_new_by_name
+  std::string actual_schema_types_new_by_name = absl_ports::StrCat(
+      "[", absl_ports::StrJoin(actual.schema_types_new_by_name, ","), "]");
+
+  std::string expected_schema_types_new_by_name = absl_ports::StrCat(
+      "[", absl_ports::StrJoin(expected.schema_types_new_by_name, ","), "]");
+
+  // Format schema_types_changed_fully_compatible_by_name
+  std::string actual_schema_types_changed_fully_compatible_by_name =
+      absl_ports::StrCat(
+          "[",
+          absl_ports::StrJoin(
+              actual.schema_types_changed_fully_compatible_by_name, ","),
+          "]");
+
+  std::string expected_schema_types_changed_fully_compatible_by_name =
+      absl_ports::StrCat(
+          "[",
+          absl_ports::StrJoin(
+              expected.schema_types_changed_fully_compatible_by_name, ","),
+          "]");
+
+  // Format schema_types_deleted_by_id
+  std::string actual_schema_types_index_incompatible_by_name =
+      absl_ports::StrCat(
+          "[",
+          absl_ports::StrJoin(actual.schema_types_index_incompatible_by_name,
+                              ","),
+          "]");
+
+  std::string expected_schema_types_index_incompatible_by_name =
+      absl_ports::StrCat(
+          "[",
+          absl_ports::StrJoin(expected.schema_types_index_incompatible_by_name,
+                              ","),
+          "]");
+
   *result_listener << IcingStringUtil::StringPrintf(
       "\nExpected {\n"
       "\tsuccess=%d,\n"
-      "\tindex_incompatible=%d,\n"
       "\told_schema_type_ids_changed=%s,\n"
       "\tschema_types_deleted_by_name=%s,\n"
       "\tschema_types_deleted_by_id=%s,\n"
       "\tschema_types_incompatible_by_name=%s,\n"
       "\tschema_types_incompatible_by_id=%s\n"
+      "\tschema_types_new_by_name=%s,\n"
+      "\tschema_types_index_incompatible_by_name=%s,\n"
+      "\tschema_types_changed_fully_compatible_by_name=%s\n"
       "}\n"
       "Actual {\n"
       "\tsuccess=%d,\n"
-      "\tindex_incompatible=%d,\n"
       "\told_schema_type_ids_changed=%s,\n"
       "\tschema_types_deleted_by_name=%s,\n"
       "\tschema_types_deleted_by_id=%s,\n"
       "\tschema_types_incompatible_by_name=%s,\n"
       "\tschema_types_incompatible_by_id=%s\n"
+      "\tschema_types_new_by_name=%s,\n"
+      "\tschema_types_index_incompatible_by_name=%s,\n"
+      "\tschema_types_changed_fully_compatible_by_name=%s\n"
       "}\n",
-      expected.success, expected.index_incompatible,
-      expected_old_schema_type_ids_changed.c_str(),
+      expected.success, expected_old_schema_type_ids_changed.c_str(),
       expected_schema_types_deleted_by_name.c_str(),
       expected_schema_types_deleted_by_id.c_str(),
       expected_schema_types_incompatible_by_name.c_str(),
-      expected_schema_types_incompatible_by_id.c_str(), actual.success,
-      actual.index_incompatible, actual_old_schema_type_ids_changed.c_str(),
+      expected_schema_types_incompatible_by_id.c_str(),
+      expected_schema_types_new_by_name.c_str(),
+      expected_schema_types_changed_fully_compatible_by_name.c_str(),
+      expected_schema_types_index_incompatible_by_name.c_str(), actual.success,
+      actual_old_schema_type_ids_changed.c_str(),
       actual_schema_types_deleted_by_name.c_str(),
       actual_schema_types_deleted_by_id.c_str(),
       actual_schema_types_incompatible_by_name.c_str(),
-      actual_schema_types_incompatible_by_id.c_str());
-
+      actual_schema_types_incompatible_by_id.c_str(),
+      actual_schema_types_new_by_name.c_str(),
+      actual_schema_types_changed_fully_compatible_by_name.c_str(),
+      actual_schema_types_index_incompatible_by_name.c_str());
   return false;
 }
 
@@ -267,7 +315,7 @@ std::string StatusCodeToString(libtextclassifier3::StatusCode code) {
   }
 }
 
-string ProtoStatusCodeToString(StatusProto::Code code) {
+std::string ProtoStatusCodeToString(StatusProto::Code code) {
   switch (code) {
     case StatusProto::OK:
       return "OK";
@@ -376,14 +424,22 @@ MATCHER_P2(ProtoStatusIs, status_code, error_matcher, "") {
   return ExplainMatchResult(error_matcher, arg.message(), result_listener);
 }
 
-MATCHER_P(EqualsSearchResultIgnoreStats, expected, "") {
+MATCHER_P(EqualsSearchResultIgnoreStatsAndScores, expected, "") {
   SearchResultProto actual_copy = arg;
   actual_copy.clear_query_stats();
   actual_copy.clear_debug_info();
+  for (SearchResultProto::ResultProto& result :
+       *actual_copy.mutable_results()) {
+    result.clear_score();
+  }
 
   SearchResultProto expected_copy = expected;
   expected_copy.clear_query_stats();
   expected_copy.clear_debug_info();
+  for (SearchResultProto::ResultProto& result :
+       *expected_copy.mutable_results()) {
+    result.clear_score();
+  }
   return ExplainMatchResult(testing::EqualsProto(expected_copy), actual_copy,
                             result_listener);
 }
diff --git a/icing/testing/jni-test-helpers.h b/icing/testing/jni-test-helpers.h
index adc469a..67a98c3 100644
--- a/icing/testing/jni-test-helpers.h
+++ b/icing/testing/jni-test-helpers.h
@@ -15,6 +15,8 @@
 #ifndef ICING_TESTING_JNI_TEST_HELPERS_H_
 #define ICING_TESTING_JNI_TEST_HELPERS_H_
 
+#include <memory>
+
 #include "icing/jni/jni-cache.h"
 
 #ifdef ICING_REVERSE_JNI_SEGMENTATION
diff --git a/icing/testing/schema-generator.h b/icing/testing/schema-generator.h
index 78430cc..12133f5 100644
--- a/icing/testing/schema-generator.h
+++ b/icing/testing/schema-generator.h
@@ -18,7 +18,6 @@
 #include <random>
 #include <string>
 
-#include "icing/proto/schema.proto.h"
 #include "icing/proto/schema.pb.h"
 
 namespace icing {
diff --git a/icing/testing/snippet-helpers.cc b/icing/testing/snippet-helpers.cc
index fde0004..7a71987 100644
--- a/icing/testing/snippet-helpers.cc
+++ b/icing/testing/snippet-helpers.cc
@@ -17,28 +17,37 @@
 #include <algorithm>
 #include <string_view>
 
+#include "icing/absl_ports/str_join.h"
 #include "icing/proto/search.pb.h"
+#include "icing/schema/section-manager.h"
 
 namespace icing {
 namespace lib {
 
-const SnippetMatchProto* GetSnippetMatch(const SnippetProto& snippet_proto,
-                                         const std::string& property_name,
-                                         int snippet_index) {
-  auto iterator = std::find_if(
-      snippet_proto.entries().begin(), snippet_proto.entries().end(),
-      [&property_name](const SnippetProto::EntryProto& entry) {
-        return entry.property_name() == property_name;
-      });
-  if (iterator == snippet_proto.entries().end() ||
-      iterator->snippet_matches_size() <= snippet_index) {
-    return nullptr;
+namespace {
+
+// Returns the property index and the property name with the index removed.
+// Examples:
+//   GetPropertyIndex("foo") will return ["foo", 0]
+//   GetPropertyIndex("foo[5]") will return ["foo", 5]
+std::pair<std::string_view, int> GetPropertyIndex(std::string_view property) {
+  size_t l_bracket = property.find(kLBracket);
+  if (l_bracket == std::string_view::npos || l_bracket >= property.length()) {
+    return {property, 0};
+  }
+  size_t r_bracket = property.find(kRBracket, l_bracket);
+  if (r_bracket == std::string_view::npos || r_bracket - l_bracket < 2) {
+    return {property, 0};
   }
-  return &iterator->snippet_matches(snippet_index);
+  std::string index_string =
+      std::string(property.substr(l_bracket + 1, r_bracket - l_bracket - 1));
+  return {property.substr(0, l_bracket), std::stoi(index_string)};
 }
 
+}  // namespace
+
 const PropertyProto* GetProperty(const DocumentProto& document,
-                                 const std::string& property_name) {
+                                 std::string_view property_name) {
   const PropertyProto* property = nullptr;
   for (const PropertyProto& prop : document.properties()) {
     if (prop.name() == property_name) {
@@ -48,32 +57,65 @@ const PropertyProto* GetProperty(const DocumentProto& document,
   return property;
 }
 
-std::string GetWindow(const DocumentProto& document,
-                      const SnippetProto& snippet_proto,
-                      const std::string& property_name, int snippet_index) {
-  const SnippetMatchProto* match =
-      GetSnippetMatch(snippet_proto, property_name, snippet_index);
-  const PropertyProto* property = GetProperty(document, property_name);
-  if (match == nullptr || property == nullptr) {
-    return "";
+std::vector<std::string_view> GetWindows(
+    std::string_view content, const SnippetProto::EntryProto& snippet_proto) {
+  std::vector<std::string_view> windows;
+  for (const SnippetMatchProto& match : snippet_proto.snippet_matches()) {
+    windows.push_back(content.substr(match.window_byte_position(),
+                                     match.window_byte_length()));
+  }
+  return windows;
+}
+
+std::vector<std::string_view> GetMatches(
+    std::string_view content, const SnippetProto::EntryProto& snippet_proto) {
+  std::vector<std::string_view> matches;
+  for (const SnippetMatchProto& match : snippet_proto.snippet_matches()) {
+    matches.push_back(content.substr(match.exact_match_byte_position(),
+                                     match.exact_match_byte_length()));
   }
-  std::string_view value = property->string_values(match->values_index());
-  return std::string(
-      value.substr(match->window_position(), match->window_bytes()));
+  return matches;
 }
 
-std::string GetMatch(const DocumentProto& document,
-                     const SnippetProto& snippet_proto,
-                     const std::string& property_name, int snippet_index) {
-  const SnippetMatchProto* match =
-      GetSnippetMatch(snippet_proto, property_name, snippet_index);
-  const PropertyProto* property = GetProperty(document, property_name);
-  if (match == nullptr || property == nullptr) {
-    return "";
+std::vector<std::string_view> GetSubMatches(
+    std::string_view content, const SnippetProto::EntryProto& snippet_proto) {
+  std::vector<std::string_view> matches;
+  for (const SnippetMatchProto& match : snippet_proto.snippet_matches()) {
+    matches.push_back(content.substr(match.exact_match_byte_position(),
+                                     match.submatch_byte_length()));
+  }
+  return matches;
+}
+
+std::string_view GetString(const DocumentProto* document,
+                           std::string_view property_path) {
+  std::vector<std::string_view> properties =
+      absl_ports::StrSplit(property_path, kPropertySeparator);
+  for (int i = 0; i < properties.size(); ++i) {
+    std::string_view property = properties.at(i);
+    int property_index;
+    std::tie(property, property_index) = GetPropertyIndex(property);
+    const PropertyProto* prop = GetProperty(*document, property);
+    if (prop == nullptr) {
+      // requested property doesn't exist in the document. Return empty string.
+      return "";
+    }
+    if (i == properties.size() - 1) {
+      // The last property. Get the string_value
+      if (prop->string_values_size() - 1 < property_index) {
+        // The requested string doesn't exist. Return empty string.
+        return "";
+      }
+      return prop->string_values(property_index);
+    } else if (prop->document_values_size() - 1 < property_index) {
+      // The requested subproperty doesn't exist. return an empty string.
+      return "";
+    } else {
+      // Go to the next subproperty.
+      document = &prop->document_values(property_index);
+    }
   }
-  std::string_view value = property->string_values(match->values_index());
-  return std::string(
-      value.substr(match->exact_match_position(), match->exact_match_bytes()));
+  return "";
 }
 
 }  // namespace lib
diff --git a/icing/testing/snippet-helpers.h b/icing/testing/snippet-helpers.h
index 124e421..73b2ce2 100644
--- a/icing/testing/snippet-helpers.h
+++ b/icing/testing/snippet-helpers.h
@@ -23,36 +23,36 @@
 namespace icing {
 namespace lib {
 
-// Retrieve pointer to the snippet_index'th SnippetMatchProto within the
-// EntryProto identified by property_name within snippet_proto.
-// Returns nullptr
-//   - if there is no EntryProto within snippet_proto corresponding to
-//     property_name.
-//   - if there is no SnippetMatchProto at snippet_index within the EntryProto
-const SnippetMatchProto* GetSnippetMatch(const SnippetProto& snippet_proto,
-                                         const std::string& property_name,
-                                         int snippet_index);
-
 // Retrieve pointer to the PropertyProto identified by property_name.
 // Returns nullptr if no such property exists.
+//
+// NOTE: This function does not handle nesting or indexes. "foo.bar" will return
+// a nullptr even if document contains a property called "foo" that contains a
+// subproperty called "bar".
 const PropertyProto* GetProperty(const DocumentProto& document,
                                  const std::string& property_name);
 
-// Retrieves the window defined by the SnippetMatchProto returned by
-// GetSnippetMatch(snippet_proto, property_name, snippet_index) for the property
-// returned by GetProperty(document, property_name).
-// Returns "" if no such property, snippet or window exists.
-std::string GetWindow(const DocumentProto& document,
-                      const SnippetProto& snippet_proto,
-                      const std::string& property_name, int snippet_index);
-
-// Retrieves the match defined by the SnippetMatchProto returned by
-// GetSnippetMatch(snippet_proto, property_name, snippet_index) for the property
-// returned by GetProperty(document, property_name).
-// Returns "" if no such property or snippet exists.
-std::string GetMatch(const DocumentProto& document,
-                     const SnippetProto& snippet_proto,
-                     const std::string& property_name, int snippet_index);
+// Retrieves all windows defined by the snippet_proto for the content.
+std::vector<std::string_view> GetWindows(
+    std::string_view content, const SnippetProto::EntryProto& snippet_proto);
+
+// Retrieves all matches defined by the snippet_proto for the content.
+std::vector<std::string_view> GetMatches(
+    std::string_view content, const SnippetProto::EntryProto& snippet_proto);
+
+// Retrieves all submatches defined by the snippet_proto for the content.
+std::vector<std::string_view> GetSubMatches(
+    std::string_view content, const SnippetProto::EntryProto& snippet_proto);
+
+// Retrieves the string value held in the document corresponding to the
+// property_path.
+// Example:
+//   - GetString(doc, "foo") will retrieve the first string value in the
+//     property "foo" in document or an empty string if it doesn't exist.
+//   - GetString(doc, "foo[1].bar[2]") will retrieve the third string value in
+//     the subproperty "bar" of the second document value in the property "foo".
+std::string_view GetString(const DocumentProto* document,
+                           std::string_view property_path);
 
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/tokenization/icu/icu-language-segmenter.cc b/icing/tokenization/icu/icu-language-segmenter.cc
index 74d22cd..cb31441 100644
--- a/icing/tokenization/icu/icu-language-segmenter.cc
+++ b/icing/tokenization/icu/icu-language-segmenter.cc
@@ -25,6 +25,7 @@
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/absl_ports/canonical_errors.h"
 #include "icing/legacy/core/icing-string-util.h"
+#include "icing/util/character-iterator.h"
 #include "icing/util/i18n-utils.h"
 #include "icing/util/status-macros.h"
 #include "unicode/ubrk.h"
@@ -101,59 +102,149 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
     return text_.substr(term_start_index_, term_length);
   }
 
-  libtextclassifier3::StatusOr<int32_t> ResetToTermStartingAfter(
+  libtextclassifier3::StatusOr<CharacterIterator> CalculateTermStart()
+      override {
+    if (!offset_iterator_.MoveToUtf8(term_start_index_)) {
+      return absl_ports::AbortedError(
+          "Could not retrieve valid utf8 character!");
+    }
+    return offset_iterator_;
+  }
+
+  libtextclassifier3::StatusOr<CharacterIterator> CalculateTermEndExclusive()
+      override {
+    if (!offset_iterator_.MoveToUtf8(term_end_index_exclusive_)) {
+      return absl_ports::AbortedError(
+          "Could not retrieve valid utf8 character!");
+    }
+    return offset_iterator_;
+  }
+
+  libtextclassifier3::StatusOr<int32_t> ResetToTermStartingAfterUtf32(
       int32_t offset) override {
-    if (offset < 0 || offset >= text_.length()) {
+    if (offset < 0) {
+      // Very simple. The first term start after a negative offset is the first
+      // term. So just reset to start and Advance.
+      return ResetToStartUtf32();
+    }
+
+    // 1. Find the unicode character that contains the byte at offset.
+    if (!offset_iterator_.MoveToUtf32(offset)) {
+      // An error occurred. Mark as DONE
+      if (offset_iterator_.utf8_index() != text_.length()) {
+        // We returned false for some reason other than hitting the end. This is
+        // a real error. Just return.
+        MarkAsDone();
+        return absl_ports::AbortedError(
+            "Could not retrieve valid utf8 character!");
+      }
+    }
+    if (offset_iterator_.utf8_index() == text_.length()) {
       return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
-          "Illegal offset provided! Offset %d is not within bounds of string "
-          "of length %zu",
-          offset, text_.length()));
+          "Illegal offset provided! Offset utf-32:%d, utf-8:%d is not within "
+          "bounds of string of length %zu",
+          offset_iterator_.utf32_index(), offset_iterator_.utf8_index(),
+          text_.length()));
     }
-    term_start_index_ = ubrk_following(break_iterator_, offset);
-    if (term_start_index_ == UBRK_DONE) {
+
+    // 2. We've got the unicode character containing byte offset. Now, we need
+    // to point to the segment that starts after this character.
+    int following_utf8_index =
+        ubrk_following(break_iterator_, offset_iterator_.utf8_index());
+    if (following_utf8_index == UBRK_DONE) {
       MarkAsDone();
       return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
           "No segments begin after provided offset %d.", offset));
     }
-    term_end_index_exclusive_ = ubrk_next(break_iterator_);
-    if (term_end_index_exclusive_ == UBRK_DONE) {
-      MarkAsDone();
+    term_end_index_exclusive_ = following_utf8_index;
+
+    // 3. The term_end_exclusive_ points to the start of the term that we want
+    // to return. We need to Advance so that term_start_ will now point to this
+    // term.
+    if (!Advance()) {
       return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
           "No segments begin after provided offset %d.", offset));
     }
-    if (!IsValidSegment()) {
-      if (!Advance()) {
-        return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
-            "No segments begin after provided offset %d.", offset));
-      }
+    if (!offset_iterator_.MoveToUtf8(term_start_index_)) {
+      return absl_ports::AbortedError(
+          "Could not retrieve valid utf8 character!");
     }
-    return term_start_index_;
+    return offset_iterator_.utf32_index();
   }
 
-  libtextclassifier3::StatusOr<int32_t> ResetToTermEndingBefore(
+  libtextclassifier3::StatusOr<int32_t> ResetToTermEndingBeforeUtf32(
       int32_t offset) override {
-    if (offset < 0 || offset >= text_.length()) {
+    if (offset < 0) {
       return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
           "Illegal offset provided! Offset %d is not within bounds of string "
           "of length %zu",
           offset, text_.length()));
     }
-    ICING_RETURN_IF_ERROR(ResetToTermStartingBefore(offset));
-    if (term_end_index_exclusive_ > offset) {
-      // This term ends after offset. So we need to get the term just before
-      // this one.
-      ICING_RETURN_IF_ERROR(ResetToTermStartingBefore(term_start_index_));
+
+    if (!offset_iterator_.MoveToUtf32(offset)) {
+      // An error occurred. Mark as DONE
+      if (offset_iterator_.utf8_index() != text_.length()) {
+        // We returned false for some reason other than hitting the end. This is
+        // a real error. Just return.
+        MarkAsDone();
+        return absl_ports::AbortedError(
+            "Could not retrieve valid utf8 character!");
+      }
+      // If it returned false because we hit the end. Then that's fine. We'll
+      // just treat it as if the request was for the end.
+    }
+
+    // 2. We've got the unicode character containing byte offset. Now, we need
+    // to point to the segment that ends before this character.
+    int starting_utf8_index =
+        ubrk_preceding(break_iterator_, offset_iterator_.utf8_index());
+    if (starting_utf8_index == UBRK_DONE) {
+      // Rewind the end indices.
+      MarkAsDone();
+      return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
+          "No segments end before provided offset %d.", offset));
     }
-    return term_start_index_;
+    term_start_index_ = starting_utf8_index;
+
+    // 3. We've correctly set the start index and the iterator currently points
+    // to that position. Now we need to find the correct end position and
+    // advance the iterator to that position.
+    int ending_utf8_index = ubrk_next(break_iterator_);
+    if (ending_utf8_index == UBRK_DONE) {
+      // This shouldn't ever happen.
+      MarkAsDone();
+      return absl_ports::AbortedError(IcingStringUtil::StringPrintf(
+          "No segments end before provided offset %d.", offset));
+    }
+    term_end_index_exclusive_ = ending_utf8_index;
+
+    // 4. The start and end indices point to a segment, but we need to ensure
+    // that this segment is 1) valid and 2) ends before offset. Otherwise, we'll
+    // need a segment prior to this one.
+    CharacterIterator term_start_iterator = offset_iterator_;
+    if (!term_start_iterator.MoveToUtf8(term_start_index_)) {
+      return absl_ports::AbortedError(
+          "Could not retrieve valid utf8 character!");
+    }
+    if (term_end_index_exclusive_ > offset_iterator_.utf8_index() ||
+        !IsValidSegment()) {
+      return ResetToTermEndingBeforeUtf32(term_start_iterator.utf32_index());
+    }
+    return term_start_iterator.utf32_index();
   }
 
-  libtextclassifier3::StatusOr<int32_t> ResetToStart() override {
+  libtextclassifier3::StatusOr<int32_t> ResetToStartUtf32() override {
     term_start_index_ = 0;
     term_end_index_exclusive_ = 0;
     if (!Advance()) {
-      return absl_ports::NotFoundError("");
+      return absl_ports::NotFoundError(
+          "Unable to find any valid terms in text.");
+    }
+    if (!offset_iterator_.MoveToUtf8(term_start_index_)) {
+      return absl_ports::AbortedError(
+          "Could not retrieve valid utf8 character!");
     }
-    return term_start_index_;
+    return offset_iterator_.utf32_index();
   }
 
  private:
@@ -163,6 +254,7 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
         text_(text),
         locale_(locale),
         u_text_(UTEXT_INITIALIZER),
+        offset_iterator_(text),
         term_start_index_(0),
         term_end_index_exclusive_(0) {}
 
@@ -232,6 +324,15 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
   // utext_close() must be called after using.
   UText u_text_;
 
+  // Offset iterator. This iterator is not guaranteed to point to any particular
+  // character, but is guaranteed to point to a valid UTF character sequence.
+  //
+  // This iterator is used to save some amount of linear traversal when seeking
+  // to a specific UTF-32 offset. Each function that uses it could just create
+  // a CharacterIterator starting at the beginning of the text and traverse
+  // forward from there.
+  CharacterIterator offset_iterator_;
+
   // The start and end indices are used to track the positions of current
   // term.
   int term_start_index_;
diff --git a/icing/tokenization/icu/icu-language-segmenter_test.cc b/icing/tokenization/icu/icu-language-segmenter_test.cc
index c0d6d43..01eb7d8 100644
--- a/icing/tokenization/icu/icu-language-segmenter_test.cc
+++ b/icing/tokenization/icu/icu-language-segmenter_test.cc
@@ -12,24 +12,39 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include <memory>
+#include <string_view>
+
+#include "icing/jni/jni-cache.h"
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "icing/absl_ports/str_cat.h"
 #include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/icu-i18n-test-utils.h"
+#include "icing/testing/jni-test-helpers.h"
 #include "icing/testing/test-data.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/tokenization/language-segmenter.h"
+#include "icing/util/character-iterator.h"
 #include "unicode/uloc.h"
 
 namespace icing {
 namespace lib {
-namespace {
+
 using ::testing::ElementsAre;
 using ::testing::Eq;
 using ::testing::IsEmpty;
 
+namespace {
+
+language_segmenter_factory::SegmenterOptions GetSegmenterOptions(
+    const std::string& locale, const JniCache* jni_cache) {
+  return language_segmenter_factory::SegmenterOptions(locale, jni_cache);
+}
+
 // Returns a vector containing all terms retrieved by Advancing on the iterator.
 std::vector<std::string_view> GetAllTermsAdvance(
     LanguageSegmenter::Iterator* itr) {
@@ -40,70 +55,61 @@ std::vector<std::string_view> GetAllTermsAdvance(
   return terms;
 }
 
-// Returns a vector containing all terms retrieved by calling
-// ResetToStart/ResetAfter with the current position to simulate Advancing on
-// the iterator.
-std::vector<std::string_view> GetAllTermsResetAfter(
+// Returns a vector containing all terms retrieved by calling ResetAfter with
+// the UTF-32 position of the current term start to simulate Advancing on the
+// iterator.
+std::vector<std::string_view> GetAllTermsResetAfterUtf32(
     LanguageSegmenter::Iterator* itr) {
   std::vector<std::string_view> terms;
-  if (!itr->ResetToStart().ok()) {
-    return terms;
-  }
-  terms.push_back(itr->GetTerm());
-  const char* text_begin = itr->GetTerm().data();
-  // Calling ResetToTermStartingAfter with the current position should get the
-  // very next term in the sequence.
-  for (int current_pos = 0; itr->ResetToTermStartingAfter(current_pos).ok();
-       current_pos = itr->GetTerm().data() - text_begin) {
+  // Calling ResetToTermStartingAfterUtf32 with -1 should get the first term in
+  // the sequence.
+  bool is_ok = itr->ResetToTermStartingAfterUtf32(-1).ok();
+  while (is_ok) {
     terms.push_back(itr->GetTerm());
+    // Calling ResetToTermStartingAfterUtf32 with the current position should
+    // get the very next term in the sequence.
+    CharacterIterator char_itr = itr->CalculateTermStart().ValueOrDie();
+    is_ok = itr->ResetToTermStartingAfterUtf32(char_itr.utf32_index()).ok();
   }
   return terms;
 }
 
 // Returns a vector containing all terms retrieved by alternating calls to
-// Advance and calls to ResetAfter with the current position to simulate
-// Advancing.
-std::vector<std::string_view> GetAllTermsAdvanceAndResetAfter(
+// Advance and calls to ResetAfter with the UTF-32 position of the current term
+// start to simulate Advancing.
+std::vector<std::string_view> GetAllTermsAdvanceAndResetAfterUtf32(
     LanguageSegmenter::Iterator* itr) {
-  const char* text_begin = itr->GetTerm().data();
   std::vector<std::string_view> terms;
-
-  bool is_ok = true;
-  int current_pos = 0;
+  bool is_ok = itr->Advance();
   while (is_ok) {
+    terms.push_back(itr->GetTerm());
     // Alternate between using Advance and ResetToTermAfter.
     if (terms.size() % 2 == 0) {
       is_ok = itr->Advance();
     } else {
-      // Calling ResetToTermStartingAfter with the current position should get
-      // the very next term in the sequence.
-      current_pos = itr->GetTerm().data() - text_begin;
-      is_ok = itr->ResetToTermStartingAfter(current_pos).ok();
-    }
-    if (is_ok) {
-      terms.push_back(itr->GetTerm());
+      // Calling ResetToTermStartingAfterUtf32 with the current position should
+      // get the very next term in the sequence.
+      CharacterIterator char_itr = itr->CalculateTermStart().ValueOrDie();
+      is_ok = itr->ResetToTermStartingAfterUtf32(char_itr.utf32_index()).ok();
     }
   }
   return terms;
 }
 
 // Returns a vector containing all terms retrieved by calling ResetBefore with
-// the current position, starting at the end of the text. This vector should be
-// in reverse order of GetAllTerms and missing the last term.
-std::vector<std::string_view> GetAllTermsResetBefore(
+// the UTF-32 position of the current term start, starting at the end of the
+// text. This vector should be in reverse order of GetAllTerms and missing the
+// last term.
+std::vector<std::string_view> GetAllTermsResetBeforeUtf32(
     LanguageSegmenter::Iterator* itr) {
-  const char* text_begin = itr->GetTerm().data();
-  int last_pos = 0;
-  while (itr->Advance()) {
-    last_pos = itr->GetTerm().data() - text_begin;
-  }
   std::vector<std::string_view> terms;
-  // Calling ResetToTermEndingBefore with the current position should get the
-  // previous term in the sequence.
-  for (int current_pos = last_pos;
-       itr->ResetToTermEndingBefore(current_pos).ok();
-       current_pos = itr->GetTerm().data() - text_begin) {
+  bool is_ok = itr->ResetToTermEndingBeforeUtf32(1000).ok();
+  while (is_ok) {
     terms.push_back(itr->GetTerm());
+    // Calling ResetToTermEndingBeforeUtf32 with the current position should get
+    // the previous term in the sequence.
+    CharacterIterator char_itr = itr->CalculateTermStart().ValueOrDie();
+    is_ok = itr->ResetToTermEndingBeforeUtf32(char_itr.utf32_index()).ok();
   }
   return terms;
 }
@@ -119,27 +125,34 @@ class IcuLanguageSegmenterAllLocalesTest
   }
 
   static std::string GetLocale() { return GetParam(); }
-  static language_segmenter_factory::SegmenterOptions GetOptions() {
-    return language_segmenter_factory::SegmenterOptions(GetLocale());
-  }
+
+  std::unique_ptr<const JniCache> jni_cache_ = GetTestJniCache();
 };
 
+}  // namespace
+
 TEST_P(IcuLanguageSegmenterAllLocalesTest, EmptyText) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   EXPECT_THAT(language_segmenter->GetAllTerms(""), IsOkAndHolds(IsEmpty()));
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, SimpleText) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   EXPECT_THAT(language_segmenter->GetAllTerms("Hello World"),
               IsOkAndHolds(ElementsAre("Hello", " ", "World")));
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, ASCII_Punctuation) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // ASCII punctuation marks are kept
   EXPECT_THAT(
       language_segmenter->GetAllTerms("Hello, World!!!"),
@@ -153,8 +166,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ASCII_Punctuation) {
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, ASCII_SpecialCharacter) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // ASCII special characters are kept
   EXPECT_THAT(language_segmenter->GetAllTerms("Pay $1000"),
               IsOkAndHolds(ElementsAre("Pay", " ", "$", "1000")));
@@ -169,8 +184,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ASCII_SpecialCharacter) {
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, Non_ASCII_Non_Alphabetic) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // Full-width (non-ASCII) punctuation marks and special characters are left
   // out.
   EXPECT_THAT(language_segmenter->GetAllTerms("。？·Hello！×"),
@@ -178,10 +195,12 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, Non_ASCII_Non_Alphabetic) {
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, Acronym) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
-  EXPECT_THAT(language_segmenter->GetAllTerms("U.S. Bank"),
-              IsOkAndHolds(ElementsAre("U.S", ".", " ", "Bank")));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+  EXPECT_THAT(language_segmenter->GetAllTerms("U.S.𡔖 Bank"),
+              IsOkAndHolds(ElementsAre("U.S", ".", "𡔖", " ", "Bank")));
   EXPECT_THAT(language_segmenter->GetAllTerms("I.B.M."),
               IsOkAndHolds(ElementsAre("I.B.M", ".")));
   EXPECT_THAT(language_segmenter->GetAllTerms("I,B,M"),
@@ -191,8 +210,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, Acronym) {
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, WordConnector) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // According to unicode word break rules
   // WB6(https://unicode.org/reports/tr29/#WB6),
   // WB7(https://unicode.org/reports/tr29/#WB7), and a few others, some
@@ -274,8 +295,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, WordConnector) {
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, Apostrophes) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   EXPECT_THAT(language_segmenter->GetAllTerms("It's ok."),
               IsOkAndHolds(ElementsAre("It's", " ", "ok", ".")));
   EXPECT_THAT(language_segmenter->GetAllTerms("He'll be back."),
@@ -295,8 +318,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, Apostrophes) {
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, Parentheses) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
 
   EXPECT_THAT(language_segmenter->GetAllTerms("(Hello)"),
               IsOkAndHolds(ElementsAre("(", "Hello", ")")));
@@ -306,8 +331,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, Parentheses) {
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, Quotes) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
 
   EXPECT_THAT(language_segmenter->GetAllTerms("\"Hello\""),
               IsOkAndHolds(ElementsAre("\"", "Hello", "\"")));
@@ -317,8 +344,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, Quotes) {
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, Alphanumeric) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
 
   // Alphanumeric terms are allowed
   EXPECT_THAT(language_segmenter->GetAllTerms("Se7en A4 3a"),
@@ -326,8 +355,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, Alphanumeric) {
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, Number) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
 
   // Alphanumeric terms are allowed
   EXPECT_THAT(
@@ -342,8 +373,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, Number) {
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, ContinuousWhitespaces) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // Multiple continuous whitespaces are treated as one.
   const int kNumSeparators = 256;
   std::string text_with_spaces =
@@ -367,8 +400,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ContinuousWhitespaces) {
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, CJKT) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // CJKT (Chinese, Japanese, Khmer, Thai) are the 4 main languages that don't
   // have whitespaces as word delimiter.
 
@@ -389,15 +424,19 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, CJKT) {
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, LatinLettersWithAccents) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   EXPECT_THAT(language_segmenter->GetAllTerms("āăąḃḅḇčćç"),
               IsOkAndHolds(ElementsAre("āăąḃḅḇčćç")));
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, WhitespaceSplitLanguages) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // Turkish
   EXPECT_THAT(language_segmenter->GetAllTerms("merhaba dünya"),
               IsOkAndHolds(ElementsAre("merhaba", " ", "dünya")));
@@ -408,8 +447,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, WhitespaceSplitLanguages) {
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, MixedLanguages) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   EXPECT_THAT(language_segmenter->GetAllTerms("How are you你好吗お元気ですか"),
               IsOkAndHolds(ElementsAre("How", " ", "are", " ", "you", "你好",
                                        "吗", "お", "元気", "です", "か")));
@@ -420,8 +461,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, MixedLanguages) {
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, NotCopyStrings) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // Validates that the input strings are not copied
   const std::string text = "Hello World";
   const char* word1_address = text.c_str();
@@ -437,127 +480,141 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, NotCopyStrings) {
   EXPECT_THAT(word2_address, Eq(word2_result_address));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToStartWordConnector) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToStartUtf32WordConnector) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kText = "com:google:android is package";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              segmenter->Segment(kText));
 
-  // String: "com:google:android is package"
-  //          ^                 ^^ ^^
-  // Bytes:   0              18 19 21 22
-  auto position_or = itr->ResetToStart();
+  // String:      "com:google:android is package"
+  //               ^                 ^^ ^^
+  // UTF-8 idx:    0              18 19 21 22
+  // UTF-32 idx:   0              18 19 21 22
+  auto position_or = itr->ResetToStartUtf32();
   EXPECT_THAT(position_or, IsOk());
   ASSERT_THAT(itr->GetTerm(), Eq("com:google:android"));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, NewIteratorResetToStart) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest, NewIteratorResetToStartUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kText = "How are you你好吗お元気ですか";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              segmenter->Segment(kText));
 
-  // String: "How are you你好吗お元気ですか"
-  //          ^  ^^  ^^  ^  ^ ^ ^  ^  ^
-  // Bytes:   0  3 4 7 8 11 172023 29 35
-  EXPECT_THAT(itr->ResetToStart(), IsOkAndHolds(Eq(0)));
+  // String:     "How are you你好吗お元気ですか"
+  //              ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:   0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:  0  3 4 7 8 11 131415 17 19
+  EXPECT_THAT(itr->ResetToStartUtf32(), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("How"));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, IteratorOneAdvanceResetToStart) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest,
+       IteratorOneAdvanceResetToStartUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kText = "How are you你好吗お元気ですか";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              segmenter->Segment(kText));
 
-  // String: "How are you你好吗お元気ですか"
-  //          ^  ^^  ^^  ^  ^ ^ ^  ^  ^
-  // Bytes:   0  3 4 7 8 11 172023 29 35
+  // String:     "How are you你好吗お元気ですか"
+  //              ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:   0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:  0  3 4 7 8 11 131415 17 19
   ASSERT_TRUE(itr->Advance());  // itr points to 'How'
-  EXPECT_THAT(itr->ResetToStart(), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToStartUtf32(), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("How"));
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest,
-       IteratorMultipleAdvancesResetToStart) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+       IteratorMultipleAdvancesResetToStartUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kText = "How are you你好吗お元気ですか";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              segmenter->Segment(kText));
 
-  // String: "How are you你好吗お元気ですか"
-  //          ^  ^^  ^^  ^  ^ ^ ^  ^  ^
-  // Bytes:   0  3 4 7 8 11 172023 29 35
+  // String:     "How are you你好吗お元気ですか"
+  //              ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:   0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:  0  3 4 7 8 11 131415 17 19
   ASSERT_TRUE(itr->Advance());
   ASSERT_TRUE(itr->Advance());
   ASSERT_TRUE(itr->Advance());
   ASSERT_TRUE(itr->Advance());  // itr points to ' '
-  EXPECT_THAT(itr->ResetToStart(), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToStartUtf32(), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("How"));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, IteratorDoneResetToStart) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest, IteratorDoneResetToStartUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kText = "How are you你好吗お元気ですか";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              segmenter->Segment(kText));
 
-  // String: "How are you你好吗お元気ですか"
-  //          ^  ^^  ^^  ^  ^ ^ ^  ^  ^
-  // Bytes:   0  3 4 7 8 11 172023 29 35
+  // String:     "How are you你好吗お元気ですか"
+  //              ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:   0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:  0  3 4 7 8 11 131415 17 19
   while (itr->Advance()) {
     // Do nothing.
   }
-  EXPECT_THAT(itr->ResetToStart(), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToStartUtf32(), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("How"));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermAfterWordConnector) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermAfterUtf32WordConnector) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kText = "package com:google:android name";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              segmenter->Segment(kText));
 
-  // String: "package com:google:android name"
-  //          ^      ^^                 ^^
-  // Bytes:   0      7 8               26 27
-  auto position_or = itr->ResetToTermStartingAfter(8);
+  // String:     "package com:google:android name"
+  //              ^      ^^                 ^^
+  // UTF-8 idx:   0      7 8               26 27
+  // UTF-32 idx:  0      7 8               26 27
+  auto position_or = itr->ResetToTermStartingAfterUtf32(8);
   EXPECT_THAT(position_or, IsOk());
   EXPECT_THAT(position_or.ValueOrDie(), Eq(26));
   ASSERT_THAT(itr->GetTerm(), Eq(" "));
 
-  position_or = itr->ResetToTermStartingAfter(7);
+  position_or = itr->ResetToTermStartingAfterUtf32(7);
   EXPECT_THAT(position_or, IsOk());
   EXPECT_THAT(position_or.ValueOrDie(), Eq(8));
   ASSERT_THAT(itr->GetTerm(), Eq("com:google:android"));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermAfterOutOfBounds) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermAfterUtf32OutOfBounds) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kText = "How are you你好吗お元気ですか";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              segmenter->Segment(kText));
 
-  // String: "How are you你好吗お元気ですか"
-  //          ^  ^^  ^^  ^  ^ ^ ^  ^  ^
-  // Bytes:   0  3 4 7 8 11 172023 29 35
-  ASSERT_THAT(itr->ResetToTermStartingAfter(7), IsOkAndHolds(Eq(8)));
+  // String:     "How are you你好吗お元気ですか"
+  //              ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:   0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:  0  3 4 7 8 11 131415 17 19
+  ASSERT_THAT(itr->ResetToTermStartingAfterUtf32(7), IsOkAndHolds(Eq(8)));
   ASSERT_THAT(itr->GetTerm(), Eq("you"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(-1),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-  EXPECT_THAT(itr->GetTerm(), Eq("you"));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(-1), IsOk());
+  EXPECT_THAT(itr->GetTerm(), Eq("How"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(kText.length()),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(21),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-  EXPECT_THAT(itr->GetTerm(), Eq("you"));
+  EXPECT_THAT(itr->GetTerm(), Eq("How"));
 }
 
 // Tests that ResetToTermAfter and Advance produce the same output. With the
@@ -566,9 +623,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermAfterOutOfBounds) {
 // terms produced by ResetToTermAfter calls with the current position
 // provided as the argument.
 TEST_P(IcuLanguageSegmenterAllLocalesTest,
-       MixedLanguagesResetToTermAfterEquivalentToAdvance) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+       MixedLanguagesResetToTermAfterUtf32EquivalentToAdvance) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kText = "How are𡔖 you你好吗お元気ですか";
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
@@ -580,16 +638,17 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
       std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
       segmenter->Segment(kText));
   std::vector<std::string_view> reset_terms =
-      GetAllTermsResetAfter(reset_to_term_itr.get());
+      GetAllTermsResetAfterUtf32(reset_to_term_itr.get());
 
   EXPECT_THAT(reset_terms, testing::ElementsAreArray(advance_terms));
   EXPECT_THAT(reset_to_term_itr->GetTerm(), Eq(advance_itr->GetTerm()));
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest,
-       ThaiResetToTermAfterEquivalentToAdvance) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+       ThaiResetToTermAfterUtf32EquivalentToAdvance) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
@@ -601,16 +660,17 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
       std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
       segmenter->Segment(kThai));
   std::vector<std::string_view> reset_terms =
-      GetAllTermsResetAfter(reset_to_term_itr.get());
+      GetAllTermsResetAfterUtf32(reset_to_term_itr.get());
 
   EXPECT_THAT(reset_terms, testing::ElementsAreArray(advance_terms));
   EXPECT_THAT(reset_to_term_itr->GetTerm(), Eq(advance_itr->GetTerm()));
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest,
-       KoreanResetToTermAfterEquivalentToAdvance) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+       KoreanResetToTermAfterUtf32EquivalentToAdvance) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kKorean = "나는 매일 출근합니다.";
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
@@ -622,7 +682,7 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
       std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
       segmenter->Segment(kKorean));
   std::vector<std::string_view> reset_terms =
-      GetAllTermsResetAfter(reset_to_term_itr.get());
+      GetAllTermsResetAfterUtf32(reset_to_term_itr.get());
 
   EXPECT_THAT(reset_terms, testing::ElementsAreArray(advance_terms));
   EXPECT_THAT(reset_to_term_itr->GetTerm(), Eq(advance_itr->GetTerm()));
@@ -633,9 +693,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
 // should be able to mix ResetToTermAfter(current_position) calls and Advance
 // calls to mimic calling Advance.
 TEST_P(IcuLanguageSegmenterAllLocalesTest,
-       MixedLanguagesResetToTermAfterInteroperableWithAdvance) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+       MixedLanguagesResetToTermAfterUtf32InteroperableWithAdvance) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kText = "How are𡔖 you你好吗お元気ですか";
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
@@ -647,7 +708,7 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
       std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr,
       segmenter->Segment(kText));
   std::vector<std::string_view> advance_and_reset_terms =
-      GetAllTermsAdvanceAndResetAfter(advance_and_reset_itr.get());
+      GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get());
 
   EXPECT_THAT(advance_and_reset_terms,
               testing::ElementsAreArray(advance_terms));
@@ -655,9 +716,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest,
-       ThaiResetToTermAfterInteroperableWithAdvance) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+       ThaiResetToTermAfterUtf32InteroperableWithAdvance) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
@@ -669,7 +731,7 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
       std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr,
       segmenter->Segment(kThai));
   std::vector<std::string_view> advance_and_reset_terms =
-      GetAllTermsAdvanceAndResetAfter(advance_and_reset_itr.get());
+      GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get());
 
   EXPECT_THAT(advance_and_reset_terms,
               testing::ElementsAreArray(advance_terms));
@@ -677,9 +739,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest,
-       KoreanResetToTermAfterInteroperableWithAdvance) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+       KoreanResetToTermAfterUtf32InteroperableWithAdvance) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kKorean = "나는 매일 출근합니다.";
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
@@ -691,211 +754,234 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
       std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr,
       segmenter->Segment(kKorean));
   std::vector<std::string_view> advance_and_reset_terms =
-      GetAllTermsAdvanceAndResetAfter(advance_and_reset_itr.get());
+      GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get());
 
   EXPECT_THAT(advance_and_reset_terms,
               testing::ElementsAreArray(advance_terms));
   EXPECT_THAT(advance_and_reset_itr->GetTerm(), Eq(advance_itr->GetTerm()));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, MixedLanguagesResetToTermAfter) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest,
+       MixedLanguagesResetToTermAfterUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> itr,
       language_segmenter->Segment("How are you你好吗お元気ですか"));
 
-  // String: "How are you你好吗お元気ですか"
-  //          ^  ^^  ^^  ^  ^ ^ ^  ^  ^
-  // Bytes:   0  3 4 7 8 11 172023 29 35
-  EXPECT_THAT(itr->ResetToTermStartingAfter(2), IsOkAndHolds(Eq(3)));
+  // String:      "How are you你好吗お元気ですか"
+  //               ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:    0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:   0  3 4 7 8 11 131415 17 19
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(2), IsOkAndHolds(Eq(3)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(10), IsOkAndHolds(Eq(11)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(10), IsOkAndHolds(Eq(11)));
   EXPECT_THAT(itr->GetTerm(), Eq("你好"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(7), IsOkAndHolds(Eq(8)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(7), IsOkAndHolds(Eq(8)));
   EXPECT_THAT(itr->GetTerm(), Eq("you"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(32), IsOkAndHolds(Eq(35)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(18), IsOkAndHolds(Eq(19)));
   EXPECT_THAT(itr->GetTerm(), Eq("か"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(14), IsOkAndHolds(Eq(17)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(12), IsOkAndHolds(Eq(13)));
   EXPECT_THAT(itr->GetTerm(), Eq("吗"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(0), IsOkAndHolds(Eq(3)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(3)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(35),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(19),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest,
-       ContinuousWhitespacesResetToTermAfter) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+       ContinuousWhitespacesResetToTermAfterUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // Multiple continuous whitespaces are treated as one.
   constexpr std::string_view kTextWithSpace = "Hello          World";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kTextWithSpace));
 
-  // String: "Hello          World"
-  //          ^    ^         ^
-  // Bytes:   0    5         15
-  EXPECT_THAT(itr->ResetToTermStartingAfter(0), IsOkAndHolds(Eq(5)));
+  // String:      "Hello          World"
+  //               ^    ^         ^
+  // UTF-8 idx:    0    5         15
+  // UTF-32 idx:   0    5         15
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(5)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(2), IsOkAndHolds(Eq(5)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(2), IsOkAndHolds(Eq(5)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(10), IsOkAndHolds(Eq(15)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(10), IsOkAndHolds(Eq(15)));
   EXPECT_THAT(itr->GetTerm(), Eq("World"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(5), IsOkAndHolds(Eq(15)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(5), IsOkAndHolds(Eq(15)));
   EXPECT_THAT(itr->GetTerm(), Eq("World"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(15),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(15),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(17),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(17),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(19),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(19),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, ChineseResetToTermAfter) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest, ChineseResetToTermAfterUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // CJKT (Chinese, Japanese, Khmer, Thai) are the 4 main languages that
   // don't have whitespaces as word delimiter. Chinese
   constexpr std::string_view kChinese = "我每天走路去上班。";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kChinese));
-  // String: "我每天走路去上班。"
-  //          ^ ^  ^   ^^
-  // Bytes:   0 3  9  15 18
-  EXPECT_THAT(itr->ResetToTermStartingAfter(0), IsOkAndHolds(Eq(3)));
+  // String:       "我每天走路去上班。"
+  //                ^ ^  ^   ^^
+  // UTF-8 idx:     0 3  9  15 18
+  // UTF-832 idx:   0 1  3   5 6
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(1)));
   EXPECT_THAT(itr->GetTerm(), Eq("每天"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(7), IsOkAndHolds(Eq(9)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(2), IsOkAndHolds(Eq(3)));
   EXPECT_THAT(itr->GetTerm(), Eq("走路"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(19),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(7),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, JapaneseResetToTermAfter) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest, JapaneseResetToTermAfterUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // Japanese
   constexpr std::string_view kJapanese = "私は毎日仕事に歩いています。";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kJapanese));
-  // String: "私は毎日仕事に歩いています。"
-  //          ^ ^ ^  ^  ^ ^ ^ ^  ^
-  // Bytes:   0 3 6  12 18212427 33
-  EXPECT_THAT(itr->ResetToTermStartingAfter(0), IsOkAndHolds(Eq(3)));
+  // String:       "私は毎日仕事に歩いています。"
+  //                ^ ^ ^  ^  ^ ^ ^ ^  ^
+  // UTF-8 idx:     0 3 6  12 18212427 33
+  // UTF-32 idx:    0 1 2  4  6 7 8 9  11
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(1)));
   EXPECT_THAT(itr->GetTerm(), Eq("は"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(33),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(11),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(7), IsOkAndHolds(Eq(12)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(3), IsOkAndHolds(Eq(4)));
   EXPECT_THAT(itr->GetTerm(), Eq("仕事"));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, KhmerResetToTermAfter) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest, KhmerResetToTermAfterUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kKhmer = "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kKhmer));
-  // String: "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"
-  //          ^ ^   ^   ^
-  // Bytes:   0 9   24  45
-  EXPECT_THAT(itr->ResetToTermStartingAfter(0), IsOkAndHolds(Eq(9)));
+  // String:            "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"
+  //                     ^ ^   ^   ^
+  // UTF-8 idx:          0 9   24  45
+  // UTF-32 idx:         0 3   8   15
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(3)));
   EXPECT_THAT(itr->GetTerm(), Eq("ដើរទៅ"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(47),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(15),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(14), IsOkAndHolds(Eq(24)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(6), IsOkAndHolds(Eq(8)));
   EXPECT_THAT(itr->GetTerm(), Eq("ធ្វើការ"));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, ThaiResetToTermAfter) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest, ThaiResetToTermAfterUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // Thai
   constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kThai));
-  // String: "ฉันเดินไปทำงานทุกวัน"
-  //          ^ ^  ^ ^    ^ ^
-  // Bytes:   0 9 21 27  42 51
-  EXPECT_THAT(itr->ResetToTermStartingAfter(0), IsOkAndHolds(Eq(9)));
+  // String:      "ฉันเดินไปทำงานทุกวัน"
+  //               ^ ^  ^ ^    ^ ^
+  // UTF-8 idx:    0 9 21 27  42 51
+  // UTF-32 idx:   0 3  7 9   14 17
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(3)));
   EXPECT_THAT(itr->GetTerm(), Eq("เดิน"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(51),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(17),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(13), IsOkAndHolds(Eq(21)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(6), IsOkAndHolds(Eq(7)));
   EXPECT_THAT(itr->GetTerm(), Eq("ไป"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(34), IsOkAndHolds(Eq(42)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(12), IsOkAndHolds(Eq(14)));
   EXPECT_THAT(itr->GetTerm(), Eq("ทุก"));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermBeforeWordConnector) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest,
+       ResetToTermBeforeWordConnectorUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kText = "package name com:google:android!";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              segmenter->Segment(kText));
 
-  // String: "package name com:google:android!"
-  //          ^      ^^   ^^                 ^
-  // Bytes:   0      7 8 12 13               31
-  auto position_or = itr->ResetToTermEndingBefore(31);
+  // String:      "package name com:google:android!"
+  //               ^      ^^   ^^                 ^
+  // UTF-8 idx:    0      7 8 12 13               31
+  // UTF-32 idx:   0      7 8 12 13               31
+  auto position_or = itr->ResetToTermEndingBeforeUtf32(31);
   EXPECT_THAT(position_or, IsOk());
   EXPECT_THAT(position_or.ValueOrDie(), Eq(13));
   ASSERT_THAT(itr->GetTerm(), Eq("com:google:android"));
 
-  position_or = itr->ResetToTermEndingBefore(21);
+  position_or = itr->ResetToTermEndingBeforeUtf32(21);
   EXPECT_THAT(position_or, IsOk());
   EXPECT_THAT(position_or.ValueOrDie(), Eq(12));
   ASSERT_THAT(itr->GetTerm(), Eq(" "));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermBeforeOutOfBounds) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermBeforeOutOfBoundsUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kText = "How are you你好吗お元気ですか";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              segmenter->Segment(kText));
 
-  // String: "How are you你好吗お元気ですか"
-  //          ^  ^^  ^^  ^  ^ ^ ^  ^  ^
-  // Bytes:   0  3 4 7 8 11 172023 29 35
-  ASSERT_THAT(itr->ResetToTermEndingBefore(7), IsOkAndHolds(Eq(4)));
+  // String:      "How are you你好吗お元気ですか"
+  //               ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:    0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:   0  3 4 7 8 11 131415 17 19
+  ASSERT_THAT(itr->ResetToTermEndingBeforeUtf32(7), IsOkAndHolds(Eq(4)));
   ASSERT_THAT(itr->GetTerm(), Eq("are"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(-1),
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(-1),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
   EXPECT_THAT(itr->GetTerm(), Eq("are"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(kText.length()),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-  EXPECT_THAT(itr->GetTerm(), Eq("are"));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(29), IsOk());
+  EXPECT_THAT(itr->GetTerm(), Eq("か"));
 }
 
 // Tests that ResetToTermBefore and Advance produce the same output. With the
@@ -904,26 +990,22 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermBeforeOutOfBounds) {
 // terms produced by ResetToTermBefore calls with the current position
 // provided as the argument (after their order has been reversed).
 TEST_P(IcuLanguageSegmenterAllLocalesTest,
-       MixedLanguagesResetToTermBeforeEquivalentToAdvance) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+       MixedLanguagesResetToTermBeforeEquivalentToAdvanceUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kText = "How are𡔖 you你好吗お元気ですか";
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
       segmenter->Segment(kText));
   std::vector<std::string_view> advance_terms =
       GetAllTermsAdvance(advance_itr.get());
-  // Can't produce the last term via calls to ResetToTermBefore. So skip
-  // past that one.
-  auto itr = advance_terms.begin();
-  std::advance(itr, advance_terms.size() - 1);
-  advance_terms.erase(itr);
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
       segmenter->Segment(kText));
   std::vector<std::string_view> reset_terms =
-      GetAllTermsResetBefore(reset_to_term_itr.get());
+      GetAllTermsResetBeforeUtf32(reset_to_term_itr.get());
   std::reverse(reset_terms.begin(), reset_terms.end());
 
   EXPECT_THAT(reset_terms, testing::ElementsAreArray(advance_terms));
@@ -932,26 +1014,22 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest,
-       ThaiResetToTermBeforeEquivalentToAdvance) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+       ThaiResetToTermBeforeEquivalentToAdvanceUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
       segmenter->Segment(kThai));
   std::vector<std::string_view> advance_terms =
       GetAllTermsAdvance(advance_itr.get());
-  // Can't produce the last term via calls to ResetToTermBefore. So skip
-  // past that one.
-  auto itr = advance_terms.begin();
-  std::advance(itr, advance_terms.size() - 1);
-  advance_terms.erase(itr);
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
       segmenter->Segment(kThai));
   std::vector<std::string_view> reset_terms =
-      GetAllTermsResetBefore(reset_to_term_itr.get());
+      GetAllTermsResetBeforeUtf32(reset_to_term_itr.get());
   std::reverse(reset_terms.begin(), reset_terms.end());
 
   EXPECT_THAT(reset_terms, testing::ElementsAreArray(advance_terms));
@@ -959,192 +1037,209 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest,
-       KoreanResetToTermBeforeEquivalentToAdvance) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+       KoreanResetToTermBeforeEquivalentToAdvanceUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kKorean = "나는 매일 출근합니다.";
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
       segmenter->Segment(kKorean));
   std::vector<std::string_view> advance_terms =
       GetAllTermsAdvance(advance_itr.get());
-  // Can't produce the last term via calls to ResetToTermBefore. So skip
-  // past that one.
-  auto itr = advance_terms.begin();
-  std::advance(itr, advance_terms.size() - 1);
-  advance_terms.erase(itr);
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
       segmenter->Segment(kKorean));
   std::vector<std::string_view> reset_terms =
-      GetAllTermsResetBefore(reset_to_term_itr.get());
+      GetAllTermsResetBeforeUtf32(reset_to_term_itr.get());
   std::reverse(reset_terms.begin(), reset_terms.end());
 
   EXPECT_THAT(reset_terms, testing::ElementsAreArray(advance_terms));
   EXPECT_THAT(reset_to_term_itr->GetTerm(), Eq(advance_itr->GetTerm()));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, MixedLanguagesResetToTermBefore) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest,
+       MixedLanguagesResetToTermBeforeUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> itr,
       language_segmenter->Segment("How are you你好吗お元気ですか"));
 
-  // String: "How are you你好吗お元気ですか"
-  //          ^  ^^  ^^  ^  ^ ^ ^  ^  ^
-  // Bytes:   0  3 4 7 8 11 172023 29 35
-  EXPECT_THAT(itr->ResetToTermEndingBefore(2),
+  // String:      "How are you你好吗お元気ですか"
+  //               ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:    0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:   0  3 4 7 8 11 131415 17 19
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(2),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(10), IsOkAndHolds(Eq(7)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(10), IsOkAndHolds(Eq(7)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(7), IsOkAndHolds(Eq(4)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(7), IsOkAndHolds(Eq(4)));
   EXPECT_THAT(itr->GetTerm(), Eq("are"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(32), IsOkAndHolds(Eq(23)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(18), IsOkAndHolds(Eq(15)));
   EXPECT_THAT(itr->GetTerm(), Eq("元気"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(14), IsOkAndHolds(Eq(8)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(12), IsOkAndHolds(Eq(8)));
   EXPECT_THAT(itr->GetTerm(), Eq("you"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(0),
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(35), IsOkAndHolds(Eq(29)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(19), IsOkAndHolds(Eq(17)));
   EXPECT_THAT(itr->GetTerm(), Eq("です"));
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest,
-       ContinuousWhitespacesResetToTermBefore) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+       ContinuousWhitespacesResetToTermBeforeUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // Multiple continuous whitespaces are treated as one.
   constexpr std::string_view kTextWithSpace = "Hello          World";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kTextWithSpace));
 
-  // String: "Hello          World"
-  //          ^    ^         ^
-  // Bytes:   0    5         15
-  EXPECT_THAT(itr->ResetToTermEndingBefore(0),
+  // String:      "Hello          World"
+  //               ^    ^         ^
+  // UTF-8 idx:    0    5         15
+  // UTF-32 idx:   0    5         15
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(2),
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(2),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(10), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(10), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("Hello"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(5), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(5), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("Hello"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(15), IsOkAndHolds(Eq(5)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(15), IsOkAndHolds(Eq(5)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(17), IsOkAndHolds(Eq(5)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(17), IsOkAndHolds(Eq(5)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(19), IsOkAndHolds(Eq(5)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(19), IsOkAndHolds(Eq(5)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, ChineseResetToTermBefore) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest, ChineseResetToTermBeforeUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // CJKT (Chinese, Japanese, Khmer, Thai) are the 4 main languages that
   // don't have whitespaces as word delimiter. Chinese
   constexpr std::string_view kChinese = "我每天走路去上班。";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kChinese));
-  // String: "我每天走路去上班。"
-  //          ^ ^  ^   ^^
-  // Bytes:   0 3  9  15 18
-  EXPECT_THAT(itr->ResetToTermEndingBefore(0),
+  // String:      "我每天走路去上班。"
+  //               ^ ^  ^   ^^
+  // UTF-8 idx:    0 3  9  15 18
+  // UTF-32 idx:   0 1  3   5 6
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(7), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(2), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("我"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(19), IsOkAndHolds(Eq(15)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(7), IsOkAndHolds(Eq(5)));
   EXPECT_THAT(itr->GetTerm(), Eq("去"));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, JapaneseResetToTermBefore) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest, JapaneseResetToTermBeforeUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // Japanese
   constexpr std::string_view kJapanese = "私は毎日仕事に歩いています。";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kJapanese));
-  // String: "私は毎日仕事に歩いています。"
-  //          ^ ^ ^  ^  ^ ^ ^ ^  ^
-  // Bytes:   0 3 6  12 18212427 33
-  EXPECT_THAT(itr->ResetToTermEndingBefore(0),
+  // String:      "私は毎日仕事に歩いています。"
+  //               ^ ^ ^  ^  ^ ^ ^ ^  ^
+  // UTF-8 idx:    0 3 6  12 18212427 33
+  // UTF-32 idx:   0 1 2  4  6 7 8 9  11
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(33), IsOkAndHolds(Eq(27)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(11), IsOkAndHolds(Eq(9)));
   EXPECT_THAT(itr->GetTerm(), Eq("てい"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(7), IsOkAndHolds(Eq(3)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(3), IsOkAndHolds(Eq(1)));
   EXPECT_THAT(itr->GetTerm(), Eq("は"));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, KhmerResetToTermBefore) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest, KhmerResetToTermBeforeUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kKhmer = "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kKhmer));
-  // String: "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"
-  //          ^ ^   ^   ^
-  // Bytes:   0 9   24  45
-  EXPECT_THAT(itr->ResetToTermEndingBefore(0),
+  // String:      "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"
+  //               ^ ^   ^   ^
+  // UTF-8 idx:    0 9   24  45
+  // UTF-32 idx:   0 3   8   15
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(47), IsOkAndHolds(Eq(24)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(16), IsOkAndHolds(Eq(8)));
   EXPECT_THAT(itr->GetTerm(), Eq("ធ្វើការ"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(14), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(5), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("ញុំ"));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, ThaiResetToTermBefore) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest, ThaiResetToTermBeforeUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // Thai
   constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kThai));
-  // String: "ฉันเดินไปทำงานทุกวัน"
-  //          ^ ^  ^ ^    ^ ^
-  // Bytes:   0 9 21 27  42 51
-  EXPECT_THAT(itr->ResetToTermEndingBefore(0),
+  // String:      "ฉันเดินไปทำงานทุกวัน"
+  //               ^ ^  ^ ^    ^ ^
+  // UTF-8 idx:    0 9 21 27  42 51
+  // UTF-32 idx:   0 3  7 9   14 17
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(51), IsOkAndHolds(Eq(42)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(17), IsOkAndHolds(Eq(14)));
   EXPECT_THAT(itr->GetTerm(), Eq("ทุก"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(13), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(4), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("ฉัน"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(34), IsOkAndHolds(Eq(21)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(11), IsOkAndHolds(Eq(7)));
   EXPECT_THAT(itr->GetTerm(), Eq("ไป"));
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, QuerySyntax) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // Validates that the input strings are not copied
   ICING_ASSERT_OK_AND_ASSIGN(
       std::vector<std::string_view> terms,
@@ -1174,6 +1269,5 @@ INSTANTIATE_TEST_SUITE_P(
                     ""              // Will fall back to ICU default locale
                     ));
 
-}  // namespace
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/tokenization/language-segmenter-factory.h b/icing/tokenization/language-segmenter-factory.h
index e60c168..cae3eee 100644
--- a/icing/tokenization/language-segmenter-factory.h
+++ b/icing/tokenization/language-segmenter-factory.h
@@ -18,11 +18,7 @@
 #include <memory>
 #include <string_view>
 
-#ifdef __ANDROID__
 #include "icing/jni/jni-cache.h"
-#else   // __ANDROID__
-class JniCache;  // forward declaration to let non-Android builds work.
-#endif  // __ANDROID__
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/tokenization/language-segmenter.h"
diff --git a/icing/tokenization/language-segmenter-iterator-test-jni-layer.cc b/icing/tokenization/language-segmenter-iterator-test-jni-layer.cc
new file mode 100644
index 0000000..3a94af3
--- /dev/null
+++ b/icing/tokenization/language-segmenter-iterator-test-jni-layer.cc
@@ -0,0 +1,37 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <jni.h>
+
+#include "gtest/gtest.h"
+#include "icing/testing/logging-event-listener.h"
+
+// Global variable used so that the test implementation can access the JNIEnv.
+JNIEnv* g_jenv = nullptr;
+
+extern "C" JNIEXPORT jboolean JNICALL
+Java_icing_jni_LanguageSegmenterIteratorJniTest_testsMain(JNIEnv* env,
+                                                          jclass ignored) {
+  g_jenv = env;
+
+  std::vector<char*> my_argv;
+  char arg[] = "jni-test-lib";
+  my_argv.push_back(arg);
+  int argc = 1;
+  char** argv = &(my_argv[0]);
+  testing::InitGoogleTest(&argc, argv);
+  testing::UnitTest::GetInstance()->listeners().Append(
+      new icing::lib::LoggingEventListener());
+  return RUN_ALL_TESTS() == 0;
+}
diff --git a/icing/tokenization/language-segmenter-iterator_test.cc b/icing/tokenization/language-segmenter-iterator_test.cc
index 2b1911e..d293581 100644
--- a/icing/tokenization/language-segmenter-iterator_test.cc
+++ b/icing/tokenization/language-segmenter-iterator_test.cc
@@ -16,8 +16,9 @@
 #include "gtest/gtest.h"
 #include "icing/absl_ports/str_cat.h"
 #include "icing/helpers/icu/icu-data-file-helper.h"
+#include "icing/portable/platform.h"
 #include "icing/testing/common-matchers.h"
-#include "icing/testing/platform.h"
+#include "icing/testing/jni-test-helpers.h"
 #include "icing/testing/test-data.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/tokenization/language-segmenter.h"
@@ -43,10 +44,13 @@ class LanguageSegmenterIteratorTest : public testing::Test {
               GetTestFilePath("icing/icu.dat")));
     }
   }
+
+  std::unique_ptr<const JniCache> jni_cache_ = GetTestJniCache();
 };
 
 TEST_F(LanguageSegmenterIteratorTest, AdvanceAndGetTerm) {
-  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
@@ -66,86 +70,91 @@ TEST_F(LanguageSegmenterIteratorTest, AdvanceAndGetTerm) {
 }
 
 TEST_F(LanguageSegmenterIteratorTest,
-       ResetToTermStartingAfterWithOffsetInText) {
-  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+       ResetToTermStartingAfterUtf32WithOffsetInText) {
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
   ICING_ASSERT_OK_AND_ASSIGN(auto iterator,
                              language_segmenter->Segment("foo bar"));
 
-  EXPECT_THAT(iterator->ResetToTermStartingAfter(/*offset=*/0),
+  EXPECT_THAT(iterator->ResetToTermStartingAfterUtf32(/*offset=*/0),
               IsOkAndHolds(3));  // The term " "
-  EXPECT_THAT(iterator->ResetToTermStartingAfter(/*offset=*/3),
+  EXPECT_THAT(iterator->ResetToTermStartingAfterUtf32(/*offset=*/3),
               IsOkAndHolds(4));  // The term "bar"
-  EXPECT_THAT(iterator->ResetToTermStartingAfter(/*offset=*/4),
+  EXPECT_THAT(iterator->ResetToTermStartingAfterUtf32(/*offset=*/4),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
 TEST_F(LanguageSegmenterIteratorTest,
-       ResetToTermStartingAfterWithNegativeOffsetNotOk) {
-  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+       ResetToTermStartingAfterUtf32WithNegativeOffsetNotOk) {
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
   ICING_ASSERT_OK_AND_ASSIGN(auto iterator,
                              language_segmenter->Segment("foo bar"));
 
-  EXPECT_THAT(iterator->ResetToTermStartingAfter(/*offset=*/-1),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(iterator->ResetToTermStartingAfterUtf32(/*offset=*/-1), IsOk());
 
-  EXPECT_THAT(iterator->ResetToTermStartingAfter(/*offset=*/-100),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(iterator->ResetToTermStartingAfterUtf32(/*offset=*/-100), IsOk());
 
-  EXPECT_THAT(iterator->ResetToStart(), IsOkAndHolds(0));
+  EXPECT_THAT(iterator->ResetToStartUtf32(), IsOkAndHolds(0));
   EXPECT_THAT(iterator->GetTerm(), Eq("foo"));
 }
 
 TEST_F(LanguageSegmenterIteratorTest,
-       ResetToTermStartingAfterWithTextLengthOffsetInvalidArgument) {
+       ResetToTermStartingAfterUtf32WithTextLengthOffsetInvalidArgument) {
   std::string text = "foo bar";
-  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
   ICING_ASSERT_OK_AND_ASSIGN(auto iterator, language_segmenter->Segment(text));
 
-  EXPECT_THAT(iterator->ResetToTermStartingAfter(/*offset=*/text.size()),
+  EXPECT_THAT(iterator->ResetToTermStartingAfterUtf32(/*offset=*/text.length()),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
 TEST_F(LanguageSegmenterIteratorTest,
-       ResetToTermStartingAfterWithOffsetPastTextLengthInvalidArgument) {
+       ResetToTermStartingAfterUtf32WithOffsetPastTextLengthInvalidArgument) {
   std::string text = "foo bar";
-  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
   ICING_ASSERT_OK_AND_ASSIGN(auto iterator, language_segmenter->Segment(text));
 
-  EXPECT_THAT(iterator->ResetToTermStartingAfter(/*offset=*/100),
+  EXPECT_THAT(iterator->ResetToTermStartingAfterUtf32(/*offset=*/100),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(LanguageSegmenterIteratorTest, ResetToTermEndingBeforeWithOffsetInText) {
-  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+TEST_F(LanguageSegmenterIteratorTest,
+       ResetToTermEndingBeforeUtf32WithOffsetInText) {
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
   ICING_ASSERT_OK_AND_ASSIGN(auto iterator,
                              language_segmenter->Segment("foo bar"));
 
-  EXPECT_THAT(iterator->ResetToTermEndingBefore(/*offset=*/6),
+  EXPECT_THAT(iterator->ResetToTermEndingBeforeUtf32(/*offset=*/6),
               IsOkAndHolds(3));  // The term " "
-  EXPECT_THAT(iterator->ResetToTermEndingBefore(/*offset=*/3),
+  EXPECT_THAT(iterator->ResetToTermEndingBeforeUtf32(/*offset=*/3),
               IsOkAndHolds(0));  // The term "foo"
-  EXPECT_THAT(iterator->ResetToTermEndingBefore(/*offset=*/2),
+  EXPECT_THAT(iterator->ResetToTermEndingBeforeUtf32(/*offset=*/2),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
 TEST_F(LanguageSegmenterIteratorTest,
-       ResetToTermEndingBeforeWithZeroNotFound) {
-  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+       ResetToTermEndingBeforeUtf32WithZeroNotFound) {
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
@@ -153,40 +162,43 @@ TEST_F(LanguageSegmenterIteratorTest,
                              language_segmenter->Segment("foo bar"));
 
   // Zero is a valid argument, but there aren't any terms that end before it.
-  EXPECT_THAT(iterator->ResetToTermEndingBefore(/*offset=*/0),
+  EXPECT_THAT(iterator->ResetToTermEndingBeforeUtf32(/*offset=*/0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
 TEST_F(LanguageSegmenterIteratorTest,
-       ResetToTermEndingBeforeWithNegativeOffsetInvalidArgument) {
-  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+       ResetToTermEndingBeforeUtf32WithNegativeOffsetInvalidArgument) {
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
   ICING_ASSERT_OK_AND_ASSIGN(auto iterator,
                              language_segmenter->Segment("foo bar"));
 
-  EXPECT_THAT(iterator->ResetToTermEndingBefore(/*offset=*/-1),
+  EXPECT_THAT(iterator->ResetToTermEndingBeforeUtf32(/*offset=*/-1),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 
-  EXPECT_THAT(iterator->ResetToTermEndingBefore(/*offset=*/-100),
+  EXPECT_THAT(iterator->ResetToTermEndingBeforeUtf32(/*offset=*/-100),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
 TEST_F(LanguageSegmenterIteratorTest,
-       ResetToTermEndingBeforeWithOffsetPastTextEndInvalidArgument) {
+       ResetToTermEndingBeforeUtf32WithOffsetPastTextEndInvalidArgument) {
   std::string text = "foo bar";
-  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
   ICING_ASSERT_OK_AND_ASSIGN(auto iterator, language_segmenter->Segment(text));
 
-  EXPECT_THAT(iterator->ResetToTermEndingBefore(/*offset=*/text.length()),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(iterator->ResetToTermEndingBeforeUtf32(/*offset=*/text.length()),
+              IsOk());
 
-  EXPECT_THAT(iterator->ResetToTermEndingBefore(/*offset=*/text.length() + 1),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(
+      iterator->ResetToTermEndingBeforeUtf32(/*offset=*/text.length() + 1),
+      IsOk());
 }
 
 }  // namespace
diff --git a/icing/tokenization/language-segmenter.h b/icing/tokenization/language-segmenter.h
index 7ca31d1..913386a 100644
--- a/icing/tokenization/language-segmenter.h
+++ b/icing/tokenization/language-segmenter.h
@@ -21,6 +21,8 @@
 #include <vector>
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/util/character-iterator.h"
 
 namespace icing {
 namespace lib {
@@ -56,51 +58,81 @@ class LanguageSegmenter {
     // true.
     virtual std::string_view GetTerm() const = 0;
 
-    // Resets the iterator to point to the first term that starts after offset.
+    // RETURNS:
+    //   On success, a CharacterIterator pointing to the beginning of the
+    //   current term.
+    //   ABORTED if an invalid unicode character is encountered while
+    //   calculating the term start.
+    virtual libtextclassifier3::StatusOr<CharacterIterator>
+    CalculateTermStart() {
+      return absl_ports::UnimplementedError("");
+    }
+
+    // RETURNS:
+    //   On success, a CharacterIterator pointing just past the end of the
+    //   current term.
+    //   ABORTED if an invalid unicode character is encountered while
+    //   calculating the term end.
+    virtual libtextclassifier3::StatusOr<CharacterIterator>
+    CalculateTermEndExclusive() {
+      return absl_ports::UnimplementedError("");
+    }
+
+    // Resets the iterator to point to the first term that starts after UTF-32
+    // offset.
     // GetTerm will now return that term. For example:
     //
     //   language_segmenter = language_segmenter_factory::Create(type);
     //   iterator = language_segmenter->Segment("foo bar baz");
-    //   iterator.ResetToTermStartingAfter(4);
+    //   iterator.ResetToTermStartingAfterUtf32(4);
     //   iterator.GetTerm() // returns "baz";
     //
     // Return types of OK and NOT_FOUND indicate that the function call was
     // valid and the state of the iterator has changed. Return type of
-    // INVALID_ARGUMENT will leave the iterator unchanged.
+    // INVALID_ARGUMENT will leave the iterator unchanged. Lastly, a return type
+    // of ABORTED means that the iterator may be left in an undefined state and
+    // no longer be usable.
     //
     // Returns:
-    //   On success, the starting position of the first term that starts after
+    //   On success, the UTF-32 offset of the first term that starts after
     //   offset.
     //   NOT_FOUND if an error occurred or there are no terms that start after
     //   offset.
-    //   INVALID_ARGUMENT if offset is out of bounds for the provided text.
+    //   INVALID_ARGUMENT if offset is beyond the end of the text.
     //   ABORTED if an invalid unicode character is encountered while
     //   traversing the text.
-    virtual libtextclassifier3::StatusOr<int32_t> ResetToTermStartingAfter(
-        int32_t offset) = 0;
+    virtual libtextclassifier3::StatusOr<int32_t> ResetToTermStartingAfterUtf32(
+        int32_t offset) {
+      return absl_ports::UnimplementedError("");
+    }
 
-    // Resets the iterator to point to the first term that ends before offset.
+    // Resets the iterator to point to the first term that ends before UTF-32
+    // offset.
     // GetTerm will now return that term. For example:
     //
     //   language_segmenter = language_segmenter_factory::Create(type);
     //   iterator = language_segmenter->Segment("foo bar baz");
-    //   iterator.ResetToTermEndingBefore(7);
+    //   iterator.ResetToTermEndingBeforeUtf32(7);
     //   iterator.GetTerm() // returns "bar";
     //
     // Return types of OK and NOT_FOUND indicate that the function call was
     // valid and the state of the iterator has changed. Return type of
-    // INVALID_ARGUMENT will leave the iterator unchanged.
+    // INVALID_ARGUMENT will leave the iterator unchanged. Lastly, a return type
+    // of ABORTED means that the iterator may be left in an undefined state and
+    // no longer be usable.
     //
     // Returns:
-    //   On success, the starting position of the first term that ends before
+    //   On success, the UTF-32 offset of the first term that ends before
     //   offset.
     //   NOT_FOUND if an error occurred or there are no terms that ends before
     //   offset.
-    //   INVALID_ARGUMENT if offset is out of bounds for the provided text.
+    //   INVALID_ARGUMENT if offset is negative
     //   ABORTED if an invalid unicode character is encountered while
     //   traversing the text.
-    virtual libtextclassifier3::StatusOr<int32_t> ResetToTermEndingBefore(
-        int32_t offset) = 0;
+    virtual libtextclassifier3::StatusOr<int32_t> ResetToTermEndingBeforeUtf32(
+        int32_t offset) {
+      return absl_ports::UnimplementedError("");
+    }
 
     // Resets the iterator to point to the first term.
     // GetTerm will now return that term. For example:
@@ -108,7 +140,7 @@ class LanguageSegmenter {
     //   language_segmenter = language_segmenter_factory::Create(type);
     //   iterator = language_segmenter->Segment("foo bar baz");
     //   iterator.Advance();
-    //   iterator.ResetToStart();
+    //   iterator.ResetToStartUtf32();
     //   iterator.GetTerm() // returns "foo";
     //
     // Return types of OK and NOT_FOUND indicate that the function call was
@@ -119,7 +151,7 @@ class LanguageSegmenter {
     //   NOT_FOUND if an error occurred or there are no valid terms in the text.
     //   ABORTED if an invalid unicode character is encountered while
     //   traversing the text.
-    virtual libtextclassifier3::StatusOr<int32_t> ResetToStart() = 0;
+    virtual libtextclassifier3::StatusOr<int32_t> ResetToStartUtf32() = 0;
   };
 
   // Segments the input text into terms.
diff --git a/icing/tokenization/plain-tokenizer-test-jni-layer.cc b/icing/tokenization/plain-tokenizer-test-jni-layer.cc
new file mode 100644
index 0000000..efa6427
--- /dev/null
+++ b/icing/tokenization/plain-tokenizer-test-jni-layer.cc
@@ -0,0 +1,36 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <jni.h>
+
+#include "gtest/gtest.h"
+#include "icing/testing/logging-event-listener.h"
+
+// Global variable used so that the test implementation can access the JNIEnv.
+JNIEnv* g_jenv = nullptr;
+
+extern "C" JNIEXPORT jboolean JNICALL
+Java_icing_jni_PlainTokenizerJniTest_testsMain(JNIEnv* env, jclass ignored) {
+  g_jenv = env;
+
+  std::vector<char*> my_argv;
+  char arg[] = "jni-test-lib";
+  my_argv.push_back(arg);
+  int argc = 1;
+  char** argv = &(my_argv[0]);
+  testing::InitGoogleTest(&argc, argv);
+  testing::UnitTest::GetInstance()->listeners().Append(
+      new icing::lib::LoggingEventListener());
+  return RUN_ALL_TESTS() == 0;
+}
diff --git a/icing/tokenization/plain-tokenizer.cc b/icing/tokenization/plain-tokenizer.cc
index 6e54af9..13fe550 100644
--- a/icing/tokenization/plain-tokenizer.cc
+++ b/icing/tokenization/plain-tokenizer.cc
@@ -18,6 +18,7 @@
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/tokenization/language-segmenter.h"
+#include "icing/util/character-iterator.h"
 #include "icing/util/i18n-utils.h"
 #include "icing/util/status-macros.h"
 
@@ -70,8 +71,18 @@ class PlainTokenIterator : public Tokenizer::Iterator {
     return Token(Token::REGULAR, current_term_);
   }
 
+  libtextclassifier3::StatusOr<CharacterIterator> CalculateTokenStart()
+      override {
+    return base_iterator_->CalculateTermStart();
+  }
+
+  libtextclassifier3::StatusOr<CharacterIterator> CalculateTokenEndExclusive()
+      override {
+    return base_iterator_->CalculateTermEndExclusive();
+  }
+
   bool ResetToTokenAfter(int32_t offset) override {
-    if (!base_iterator_->ResetToTermStartingAfter(offset).ok()) {
+    if (!base_iterator_->ResetToTermStartingAfterUtf32(offset).ok()) {
       return false;
     }
     current_term_ = base_iterator_->GetTerm();
@@ -84,20 +95,20 @@ class PlainTokenIterator : public Tokenizer::Iterator {
 
   bool ResetToTokenBefore(int32_t offset) override {
     ICING_ASSIGN_OR_RETURN(
-        offset, base_iterator_->ResetToTermEndingBefore(offset), false);
+        offset, base_iterator_->ResetToTermEndingBeforeUtf32(offset), false);
     current_term_ = base_iterator_->GetTerm();
     while (!IsValidTerm(current_term_)) {
       // Haven't found a valid term yet. Retrieve the term prior to this one
       // from the segmenter.
       ICING_ASSIGN_OR_RETURN(
-          offset, base_iterator_->ResetToTermEndingBefore(offset), false);
+          offset, base_iterator_->ResetToTermEndingBeforeUtf32(offset), false);
       current_term_ = base_iterator_->GetTerm();
     }
     return true;
   }
 
   bool ResetToStart() override {
-    if (!base_iterator_->ResetToStart().ok()) {
+    if (!base_iterator_->ResetToStartUtf32().ok()) {
       return false;
     }
     current_term_ = base_iterator_->GetTerm();
diff --git a/icing/tokenization/plain-tokenizer_test.cc b/icing/tokenization/plain-tokenizer_test.cc
index f578567..7490bfa 100644
--- a/icing/tokenization/plain-tokenizer_test.cc
+++ b/icing/tokenization/plain-tokenizer_test.cc
@@ -19,9 +19,10 @@
 #include "gmock/gmock.h"
 #include "icing/absl_ports/str_cat.h"
 #include "icing/helpers/icu/icu-data-file-helper.h"
+#include "icing/portable/platform.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/icu-i18n-test-utils.h"
-#include "icing/testing/platform.h"
+#include "icing/testing/jni-test-helpers.h"
 #include "icing/testing/test-data.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/tokenization/tokenizer-factory.h"
@@ -43,6 +44,8 @@ class PlainTokenizerTest : public ::testing::Test {
               GetTestFilePath("icing/icu.dat")));
     }
   }
+
+  std::unique_ptr<const JniCache> jni_cache_ = GetTestJniCache();
 };
 
 TEST_F(PlainTokenizerTest, CreationWithNullPointerShouldFail) {
@@ -53,7 +56,8 @@ TEST_F(PlainTokenizerTest, CreationWithNullPointerShouldFail) {
 }
 
 TEST_F(PlainTokenizerTest, Simple) {
-  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
@@ -87,7 +91,8 @@ TEST_F(PlainTokenizerTest, Simple) {
 }
 
 TEST_F(PlainTokenizerTest, Whitespace) {
-  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
@@ -115,7 +120,8 @@ TEST_F(PlainTokenizerTest, Whitespace) {
 }
 
 TEST_F(PlainTokenizerTest, Punctuation) {
-  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
@@ -161,7 +167,8 @@ TEST_F(PlainTokenizerTest, Punctuation) {
 }
 
 TEST_F(PlainTokenizerTest, SpecialCharacters) {
-  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
@@ -187,7 +194,8 @@ TEST_F(PlainTokenizerTest, CJKT) {
   // In plain tokenizer, CJKT characters are handled the same way as non-CJKT
   // characters, just add these tests as sanity checks.
   // Chinese
-  language_segmenter_factory::SegmenterOptions options(ULOC_SIMPLIFIED_CHINESE);
+  language_segmenter_factory::SegmenterOptions options(ULOC_SIMPLIFIED_CHINESE,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
@@ -202,7 +210,8 @@ TEST_F(PlainTokenizerTest, CJKT) {
                                        EqualsToken(Token::REGULAR, "去"),
                                        EqualsToken(Token::REGULAR, "上班"))));
   // Japanese
-  options = language_segmenter_factory::SegmenterOptions(ULOC_JAPANESE);
+  options = language_segmenter_factory::SegmenterOptions(ULOC_JAPANESE,
+                                                         jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
       language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
@@ -272,7 +281,8 @@ TEST_F(PlainTokenizerTest, CJKT) {
 }
 
 TEST_F(PlainTokenizerTest, ResetToTokenAfterSimple) {
-  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
@@ -291,7 +301,8 @@ TEST_F(PlainTokenizerTest, ResetToTokenAfterSimple) {
 }
 
 TEST_F(PlainTokenizerTest, ResetToTokenBeforeSimple) {
-  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
@@ -310,7 +321,8 @@ TEST_F(PlainTokenizerTest, ResetToTokenBeforeSimple) {
 }
 
 TEST_F(PlainTokenizerTest, ResetToTokenAfter) {
-  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
@@ -360,7 +372,8 @@ TEST_F(PlainTokenizerTest, ResetToTokenAfter) {
 }
 
 TEST_F(PlainTokenizerTest, ResetToTokenBefore) {
-  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
diff --git a/icing/tokenization/raw-query-tokenizer_test.cc b/icing/tokenization/raw-query-tokenizer_test.cc
index e1a666b..500efa0 100644
--- a/icing/tokenization/raw-query-tokenizer_test.cc
+++ b/icing/tokenization/raw-query-tokenizer_test.cc
@@ -17,8 +17,8 @@
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "icing/helpers/icu/icu-data-file-helper.h"
+#include "icing/portable/platform.h"
 #include "icing/testing/common-matchers.h"
-#include "icing/testing/platform.h"
 #include "icing/testing/test-data.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/tokenization/tokenizer-factory.h"
diff --git a/icing/jni/reverse-jni-break-iterator.cc b/icing/tokenization/reverse_jni/reverse-jni-break-iterator.cc
index 1a8a799..6b1cb3a 100644
--- a/icing/jni/reverse-jni-break-iterator.cc
+++ b/icing/tokenization/reverse_jni/reverse-jni-break-iterator.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "icing/jni/reverse-jni-break-iterator.h"
+#include "icing/tokenization/reverse_jni/reverse-jni-break-iterator.h"
 
 #include <jni.h>
 #include <math.h>
diff --git a/icing/jni/reverse-jni-break-iterator.h b/icing/tokenization/reverse_jni/reverse-jni-break-iterator.h
index c1f05f4..41b470c 100644
--- a/icing/jni/reverse-jni-break-iterator.h
+++ b/icing/tokenization/reverse_jni/reverse-jni-break-iterator.h
@@ -12,8 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#ifndef ICING_JNI_REVERSE_JNI_BREAK_ITERATOR_H_
-#define ICING_JNI_REVERSE_JNI_BREAK_ITERATOR_H_
+#ifndef ICING_TOKENIZATION_REVERSE_JNI_REVERSE_JNI_BREAK_ITERATOR_H_
+#define ICING_TOKENIZATION_REVERSE_JNI_REVERSE_JNI_BREAK_ITERATOR_H_
 
 #include <jni.h>
 
@@ -121,4 +121,4 @@ class ReverseJniBreakIterator {
 }  // namespace lib
 }  // namespace icing
 
-#endif  // ICING_JNI_REVERSE_JNI_BREAK_ITERATOR_H_
+#endif  // ICING_TOKENIZATION_REVERSE_JNI_REVERSE_JNI_BREAK_ITERATOR_H_
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test-jni-layer.cc b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test-jni-layer.cc
new file mode 100644
index 0000000..5f5202c
--- /dev/null
+++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test-jni-layer.cc
@@ -0,0 +1,37 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <jni.h>
+
+#include "gtest/gtest.h"
+#include "icing/testing/logging-event-listener.h"
+
+// Global variable used so that the test implementation can access the JNIEnv.
+JNIEnv* g_jenv = nullptr;
+
+extern "C" JNIEXPORT jboolean JNICALL
+Java_icing_jni_ReverseJniLanguageSegmenterJniTest_testsMain(JNIEnv* env,
+                                                            jclass ignored) {
+  g_jenv = env;
+
+  std::vector<char*> my_argv;
+  char arg[] = "jni-test-lib";
+  my_argv.push_back(arg);
+  int argc = 1;
+  char** argv = &(my_argv[0]);
+  testing::InitGoogleTest(&argc, argv);
+  testing::UnitTest::GetInstance()->listeners().Append(
+      new icing::lib::LoggingEventListener());
+  return RUN_ALL_TESTS() == 0;
+}
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.h b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.h
deleted file mode 100644
index 64b68ec..0000000
--- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.h
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_TOKENIZATION_REVERSE_JNI_REVERSE_JNI_LANGUAGE_SEGMENTER_TEST_H_
-#define ICING_TOKENIZATION_REVERSE_JNI_REVERSE_JNI_LANGUAGE_SEGMENTER_TEST_H_
-
-#include <jni.h>
-
-#include "icing/jni/jni-cache.h"
-#include "gtest/gtest.h"
-
-extern JNIEnv* g_jenv;
-
-namespace icing {
-namespace lib {
-
-namespace test_internal {
-
-class ReverseJniLanguageSegmenterTest
-    : public testing::TestWithParam<const char*> {
- protected:
-  ReverseJniLanguageSegmenterTest()
-      : jni_cache_(std::move(JniCache::Create(g_jenv)).ValueOrDie()) {}
-
-  static std::string GetLocale() { return GetParam(); }
-
-  std::unique_ptr<JniCache> jni_cache_;
-};
-
-}  // namespace test_internal
-
-}  // namespace lib
-}  // namespace icing
-
-#endif  // ICING_TOKENIZATION_REVERSE_JNI_REVERSE_JNI_LANGUAGE_SEGMENTER_TEST_H_
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc
index bb26364..76219b5 100644
--- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc
+++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc
@@ -19,11 +19,11 @@
 #include <string>
 #include <string_view>
 
-#include "icing/jni/reverse-jni-break-iterator.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/absl_ports/canonical_errors.h"
 #include "icing/legacy/core/icing-string-util.h"
 #include "icing/tokenization/language-segmenter.h"
+#include "icing/tokenization/reverse_jni/reverse-jni-break-iterator.h"
 #include "icing/util/character-iterator.h"
 #include "icing/util/i18n-utils.h"
 #include "icing/util/status-macros.h"
@@ -44,13 +44,13 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
   // Advances to the next term. Returns false if it has reached the end.
   bool Advance() override {
     // Prerequisite check
-    if (term_end_exclusive_.utf16_index() == ReverseJniBreakIterator::kDone) {
+    if (IsDone()) {
       return false;
     }
 
     if (term_end_exclusive_.utf16_index() == 0) {
       int first = break_iterator_->First();
-      if (!term_start_.AdvanceToUtf16(first)) {
+      if (!term_start_.MoveToUtf16(first)) {
         // First is guaranteed to succeed and return a position within bonds. So
         // the only possible failure could be an invalid sequence. Mark as DONE
         // and return.
@@ -67,7 +67,7 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
       MarkAsDone();
       return false;
     }
-    if (!term_end_exclusive_.AdvanceToUtf16(next_utf16_index_exclusive)) {
+    if (!term_end_exclusive_.MoveToUtf16(next_utf16_index_exclusive)) {
       // next_utf16_index_exclusive is guaranteed to be within bonds thanks to
       // the check for kDone above. So the only possible failure could be an
       // invalid sequence. Mark as DONE and return.
@@ -87,6 +87,9 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
   // Returns the current term. It can be called only when Advance() returns
   // true.
   std::string_view GetTerm() const override {
+    if (IsDone()) {
+      return text_.substr(0, 0);
+    }
     int term_length =
         term_end_exclusive_.utf8_index() - term_start_.utf8_index();
     if (term_length > 0 && std::isspace(text_[term_start_.utf8_index()])) {
@@ -96,6 +99,16 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
     return text_.substr(term_start_.utf8_index(), term_length);
   }
 
+  libtextclassifier3::StatusOr<CharacterIterator> CalculateTermStart()
+      override {
+    return term_start_;
+  }
+
+  libtextclassifier3::StatusOr<CharacterIterator> CalculateTermEndExclusive()
+      override {
+    return term_end_exclusive_;
+  }
+
   // Resets the iterator to point to the first term that starts after offset.
   // GetTerm will now return that term.
   //
@@ -107,15 +120,14 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
   //   INVALID_ARGUMENT if offset is out of bounds for the provided text.
   //   ABORTED if an invalid unicode character is encountered while
   //   traversing the text.
-  libtextclassifier3::StatusOr<int32_t> ResetToTermStartingAfter(
+  libtextclassifier3::StatusOr<int32_t> ResetToTermStartingAfterUtf32(
       int32_t offset) override {
-    if (offset < 0 || offset >= text_.length()) {
-      return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
-          "Illegal offset provided! Offset %d is not within bounds of string "
-          "of length %zu",
-          offset, text_.length()));
+    if (offset < 0) {
+      // Very simple. The first term start after a negative offset is the first
+      // term. So just reset to start.
+      return ResetToStartUtf32();
     }
-    if (term_end_exclusive_.utf16_index() == ReverseJniBreakIterator::kDone) {
+    if (IsDone()) {
       // We're done. Need to start from the beginning if we're going to reset
       // properly.
       term_start_ = CharacterIterator(text_);
@@ -123,43 +135,48 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
     }
 
     // 1. Find the unicode character that contains the byte at offset.
-    CharacterIterator offset_iterator = term_end_exclusive_;
-    bool success = (offset > offset_iterator.utf8_index())
-                       ? offset_iterator.AdvanceToUtf8(offset)
-                       : offset_iterator.RewindToUtf8(offset);
-    if (!success) {
-      // Offset is guaranteed to be within bounds thanks to the check above. So
-      // the only possible failure could be an invalid sequence. Mark as DONE
-      // and return.
-      MarkAsDone();
-      return absl_ports::AbortedError("Encountered invalid UTF sequence!");
+    CharacterIterator offset_iterator = (offset < term_start_.utf32_index())
+                                            ? term_start_
+                                            : term_end_exclusive_;
+    if (!offset_iterator.MoveToUtf32(offset)) {
+      if (offset_iterator.utf8_index() != text_.length()) {
+        // We returned false for some reason other than hitting the end. This is
+        // a real error. Just return.
+        MarkAsDone();
+        return absl_ports::AbortedError(
+            "Could not retrieve valid utf8 character!");
+      }
+    }
+    // Check to see if offset is past the end of the text. If it is, then
+    // there's no term starting after it. Return an invalid argument.
+    if (offset_iterator.utf8_index() == text_.length()) {
+      return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+          "Illegal offset provided! Offset utf-32:%d, utf-8:%d is not within "
+          "bounds of string of length %zu",
+          offset_iterator.utf32_index(), offset_iterator.utf8_index(),
+          text_.length()));
     }
 
     // 2. We've got the unicode character containing byte offset. Now, we need
     // to point to the segment that starts after this character.
     int following_utf16_index =
         break_iterator_->Following(offset_iterator.utf16_index());
-    if (following_utf16_index == ReverseJniBreakIterator::kDone) {
+    if (following_utf16_index == ReverseJniBreakIterator::kDone ||
+        !offset_iterator.MoveToUtf16(following_utf16_index)) {
       MarkAsDone();
       return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
           "No segments begin after provided offset %d.", offset));
     }
-    if (!offset_iterator.AdvanceToUtf16(following_utf16_index)) {
-      // following_utf16_index is guaranteed to be within bonds thanks to the
-      // check for kDone above. So the only possible failure could be an invalid
-      // sequence. Mark as DONE and return.
-      MarkAsDone();
-      return absl_ports::AbortedError("Encountered invalid UTF sequence!");
-    }
     term_end_exclusive_ = offset_iterator;
 
-    // 3. The term_end_exclusive_ points to the term that we want to return. We
-    // need to Advance so that term_start_ will now point to this term.
+    // 3. The term_end_exclusive_ points to the start of the term that we want
+    // to return. We need to Advance so that term_start_ will now point to this
+    // term.
     if (!Advance()) {
       return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
           "No segments begin after provided offset %d.", offset));
     }
-    return term_start_.utf8_index();
+    return term_start_.utf32_index();
   }
 
   // Resets the iterator to point to the first term that ends before offset.
@@ -173,52 +190,48 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
   //   INVALID_ARGUMENT if offset is out of bounds for the provided text.
   //   ABORTED if an invalid unicode character is encountered while
   //   traversing the text.
-  libtextclassifier3::StatusOr<int32_t> ResetToTermEndingBefore(
+  libtextclassifier3::StatusOr<int32_t> ResetToTermEndingBeforeUtf32(
       int32_t offset) override {
-    if (offset < 0 || offset >= text_.length()) {
+    if (offset < 0) {
       return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
           "Illegal offset provided! Offset %d is not within bounds of string "
           "of length %zu",
           offset, text_.length()));
     }
-    if (term_end_exclusive_.utf16_index() == ReverseJniBreakIterator::kDone) {
+    if (IsDone()) {
       // We're done. Need to start from the beginning if we're going to reset
       // properly.
       term_start_ = CharacterIterator(text_);
       term_end_exclusive_ = CharacterIterator(text_);
     }
 
-    // 1. Find the unicode character that contains the byte at offset.
-    CharacterIterator offset_iterator = term_end_exclusive_;
-    bool success = (offset > offset_iterator.utf8_index())
-                       ? offset_iterator.AdvanceToUtf8(offset)
-                       : offset_iterator.RewindToUtf8(offset);
-    if (!success) {
-      // Offset is guaranteed to be within bounds thanks to the check above. So
-      // the only possible failure could be an invalid sequence. Mark as DONE
-      // and return.
-      MarkAsDone();
-      return absl_ports::AbortedError(
-          "Could not retrieve valid utf8 character!");
+    CharacterIterator offset_iterator = (offset < term_start_.utf32_index())
+                                            ? term_start_
+                                            : term_end_exclusive_;
+    if (!offset_iterator.MoveToUtf32(offset)) {
+      // An error occurred. Mark as DONE
+      if (offset_iterator.utf8_index() != text_.length()) {
+        // We returned false for some reason other than hitting the end. This is
+        // a real error. Just return.
+        MarkAsDone();
+        return absl_ports::AbortedError(
+            "Could not retrieve valid utf8 character!");
+      }
+      // If it returned false because we hit the end. Then that's fine. We'll
+      // just treat it as if the request was for the end.
     }
 
     // 2. We've got the unicode character containing byte offset. Now, we need
-    // to point to the segment that starts before this character.
+    // to point to the segment that ends before this character.
     int starting_utf16_index =
         break_iterator_->Preceding(offset_iterator.utf16_index());
-    if (starting_utf16_index == ReverseJniBreakIterator::kDone) {
+    if (starting_utf16_index == ReverseJniBreakIterator::kDone ||
+        !offset_iterator.MoveToUtf16(starting_utf16_index)) {
       // Rewind the end indices.
       MarkAsDone();
       return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
           "No segments end before provided offset %d.", offset));
     }
-    if (!offset_iterator.RewindToUtf16(starting_utf16_index)) {
-      // starting_utf16_index is guaranteed to be within bonds thanks to the
-      // check for kDone above. So the only possible failure could be an invalid
-      // sequence. Mark as DONE and return.
-      MarkAsDone();
-      return absl_ports::AbortedError("Encountered invalid UTF sequence!");
-    }
     term_start_ = offset_iterator;
 
     // 3. We've correctly set the start index and the iterator currently points
@@ -226,24 +239,25 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
     // advance the iterator to that position.
     int end_utf16_index = break_iterator_->Next();
     term_end_exclusive_ = term_start_;
-    term_end_exclusive_.AdvanceToUtf16(end_utf16_index);
+    term_end_exclusive_.MoveToUtf16(end_utf16_index);
 
     // 4. The start and end indices point to a segment, but we need to ensure
     // that this segment is 1) valid and 2) ends before offset. Otherwise, we'll
     // need a segment prior to this one.
-    if (term_end_exclusive_.utf8_index() > offset || !IsValidTerm()) {
-      return ResetToTermEndingBefore(term_start_.utf8_index());
+    if (term_end_exclusive_.utf32_index() > offset || !IsValidTerm()) {
+      return ResetToTermEndingBeforeUtf32(term_start_.utf32_index());
     }
-    return term_start_.utf8_index();
+    return term_start_.utf32_index();
   }
 
-  libtextclassifier3::StatusOr<int32_t> ResetToStart() override {
+  libtextclassifier3::StatusOr<int32_t> ResetToStartUtf32() override {
     term_start_ = CharacterIterator(text_);
     term_end_exclusive_ = CharacterIterator(text_);
     if (!Advance()) {
-      return absl_ports::NotFoundError("");
+      return absl_ports::NotFoundError(
+          "Unable to find any valid terms in text.");
     }
-    return term_start_.utf8_index();
+    return term_start_.utf32_index();
   }
 
  private:
@@ -255,11 +269,19 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
   // break_iterator_ may be in any state.
   void MarkAsDone() {
     term_start_ =
-        CharacterIterator(text_, /*utf8_index=*/0,
-                          /*utf16_index=*/ReverseJniBreakIterator::kDone);
+        CharacterIterator(text_, /*utf8_index=*/ReverseJniBreakIterator::kDone,
+                          /*utf16_index=*/ReverseJniBreakIterator::kDone,
+                          /*utf32_index=*/ReverseJniBreakIterator::kDone);
     term_end_exclusive_ =
-        CharacterIterator(text_, /*utf8_index=*/0,
-                          /*utf16_index=*/ReverseJniBreakIterator::kDone);
+        CharacterIterator(text_, /*utf8_index=*/ReverseJniBreakIterator::kDone,
+                          /*utf16_index=*/ReverseJniBreakIterator::kDone,
+                          /*utf32_index=*/ReverseJniBreakIterator::kDone);
+  }
+  bool IsDone() const {
+    // We could just as easily check the other utf indices or the values in
+    // term_start_ to check for done. There's no particular reason to choose any
+    // one since they should all hold kDone.
+    return term_end_exclusive_.utf16_index() == ReverseJniBreakIterator::kDone;
   }
 
   bool IsValidTerm() const {
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.cc b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc
index 2c268ff..b1a8f72 100644
--- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.cc
+++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc
@@ -12,19 +12,22 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.h"
+#include <jni.h>
 
 #include <memory>
 #include <string_view>
 
+#include "icing/jni/jni-cache.h"
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "gmock/gmock.h"
 #include "icing/absl_ports/str_cat.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/icu-i18n-test-utils.h"
+#include "icing/testing/jni-test-helpers.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/tokenization/language-segmenter.h"
+#include "icing/util/character-iterator.h"
 #include "unicode/uloc.h"
 
 namespace icing {
@@ -54,72 +57,72 @@ std::vector<std::string_view> GetAllTermsAdvance(
 }
 
 // Returns a vector containing all terms retrieved by calling ResetAfter with
-// the current position to simulate Advancing on the iterator.
-std::vector<std::string_view> GetAllTermsResetAfter(
+// the UTF-32 position of the current term start to simulate Advancing on the
+// iterator.
+std::vector<std::string_view> GetAllTermsResetAfterUtf32(
     LanguageSegmenter::Iterator* itr) {
   std::vector<std::string_view> terms;
-  if (!itr->ResetToStart().ok()) {
-    return terms;
-  }
-  terms.push_back(itr->GetTerm());
-  const char* text_begin = itr->GetTerm().data();
-  // Calling ResetToTermStartingAfter with the current position should get the
-  // very next term in the sequence.
-  for (int current_pos = 0; itr->ResetToTermStartingAfter(current_pos).ok();
-       current_pos = itr->GetTerm().data() - text_begin) {
+  // Calling ResetToTermStartingAfterUtf32 with -1 should get the first term in
+  // the sequence.
+  bool is_ok = itr->ResetToTermStartingAfterUtf32(-1).ok();
+  while (is_ok) {
     terms.push_back(itr->GetTerm());
+    // Calling ResetToTermStartingAfterUtf32 with the current position should
+    // get the very next term in the sequence.
+    CharacterIterator char_itr = itr->CalculateTermStart().ValueOrDie();
+    is_ok = itr->ResetToTermStartingAfterUtf32(char_itr.utf32_index()).ok();
   }
   return terms;
 }
 
 // Returns a vector containing all terms retrieved by alternating calls to
-// Advance and calls to ResetAfter with the current position to simulate
-// Advancing.
-std::vector<std::string_view> GetAllTermsAdvanceAndResetAfter(
+// Advance and calls to ResetAfter with the UTF-32 position of the current term
+// start to simulate Advancing.
+std::vector<std::string_view> GetAllTermsAdvanceAndResetAfterUtf32(
     LanguageSegmenter::Iterator* itr) {
-  const char* text_begin = itr->GetTerm().data();
   std::vector<std::string_view> terms;
-
-  bool is_ok = true;
-  int current_pos = 0;
+  bool is_ok = itr->Advance();
   while (is_ok) {
+    terms.push_back(itr->GetTerm());
     // Alternate between using Advance and ResetToTermAfter.
     if (terms.size() % 2 == 0) {
       is_ok = itr->Advance();
     } else {
-      // Calling ResetToTermStartingAfter with the current position should get
-      // the very next term in the sequence.
-      current_pos = itr->GetTerm().data() - text_begin;
-      is_ok = itr->ResetToTermStartingAfter(current_pos).ok();
-    }
-    if (is_ok) {
-      terms.push_back(itr->GetTerm());
+      // Calling ResetToTermStartingAfterUtf32 with the current position should
+      // get the very next term in the sequence.
+      CharacterIterator char_itr = itr->CalculateTermStart().ValueOrDie();
+      is_ok = itr->ResetToTermStartingAfterUtf32(char_itr.utf32_index()).ok();
     }
   }
   return terms;
 }
 
 // Returns a vector containing all terms retrieved by calling ResetBefore with
-// the current position, starting at the end of the text. This vector should be
-// in reverse order of GetAllTerms and missing the last term.
-std::vector<std::string_view> GetAllTermsResetBefore(
+// the UTF-32 position of the current term start, starting at the end of the
+// text. This vector should be in reverse order of GetAllTerms and missing the
+// last term.
+std::vector<std::string_view> GetAllTermsResetBeforeUtf32(
     LanguageSegmenter::Iterator* itr) {
-  const char* text_begin = itr->GetTerm().data();
-  int last_pos = 0;
-  while (itr->Advance()) {
-    last_pos = itr->GetTerm().data() - text_begin;
-  }
   std::vector<std::string_view> terms;
-  // Calling ResetToTermEndingBefore with the current position should get the
-  // previous term in the sequence.
-  for (int current_pos = last_pos;
-       itr->ResetToTermEndingBefore(current_pos).ok();
-       current_pos = itr->GetTerm().data() - text_begin) {
+  bool is_ok = itr->ResetToTermEndingBeforeUtf32(1000).ok();
+  while (is_ok) {
     terms.push_back(itr->GetTerm());
+    // Calling ResetToTermEndingBeforeUtf32 with the current position should get
+    // the previous term in the sequence.
+    CharacterIterator char_itr = itr->CalculateTermStart().ValueOrDie();
+    is_ok = itr->ResetToTermEndingBeforeUtf32(char_itr.utf32_index()).ok();
   }
   return terms;
 }
 
+class ReverseJniLanguageSegmenterTest
+    : public testing::TestWithParam<const char*> {
+ protected:
+  static std::string GetLocale() { return GetParam(); }
+
+  std::unique_ptr<const JniCache> jni_cache_ = GetTestJniCache();
+};
+
 }  // namespace
 
 TEST_P(ReverseJniLanguageSegmenterTest, EmptyText) {
@@ -471,7 +474,7 @@ TEST_P(ReverseJniLanguageSegmenterTest, NotCopyStrings) {
   EXPECT_THAT(word2_address, Eq(word2_result_address));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, ResetToStartWordConnector) {
+TEST_P(ReverseJniLanguageSegmenterTest, ResetToStartUtf32WordConnector) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -479,15 +482,16 @@ TEST_P(ReverseJniLanguageSegmenterTest, ResetToStartWordConnector) {
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              segmenter->Segment(kText));
 
-  // String: "com:google:android is package"
-  //          ^                 ^^ ^^
-  // Bytes:   0              18 19 21 22
-  auto position_or = itr->ResetToStart();
+  // String:      "com:google:android is package"
+  //               ^                 ^^ ^^
+  // UTF-8 idx:    0              18 19 21 22
+  // UTF-32 idx:   0              18 19 21 22
+  auto position_or = itr->ResetToStartUtf32();
   EXPECT_THAT(position_or, IsOk());
   ASSERT_THAT(itr->GetTerm(), Eq("com:google:android"));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, NewIteratorResetToStart) {
+TEST_P(ReverseJniLanguageSegmenterTest, NewIteratorResetToStartUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -495,14 +499,15 @@ TEST_P(ReverseJniLanguageSegmenterTest, NewIteratorResetToStart) {
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              segmenter->Segment(kText));
 
-  // String: "How are you你好吗お元気ですか"
-  //          ^  ^^  ^^  ^  ^ ^ ^  ^  ^
-  // Bytes:   0  3 4 7 8 11 172023 29 35
-  EXPECT_THAT(itr->ResetToStart(), IsOkAndHolds(Eq(0)));
+  // String:     "How are you你好吗お元気ですか"
+  //              ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:   0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:  0  3 4 7 8 11 131415 17 19
+  EXPECT_THAT(itr->ResetToStartUtf32(), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("How"));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, IteratorOneAdvanceResetToStart) {
+TEST_P(ReverseJniLanguageSegmenterTest, IteratorOneAdvanceResetToStartUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -510,15 +515,17 @@ TEST_P(ReverseJniLanguageSegmenterTest, IteratorOneAdvanceResetToStart) {
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              segmenter->Segment(kText));
 
-  // String: "How are you你好吗お元気ですか"
-  //          ^  ^^  ^^  ^  ^ ^ ^  ^  ^
-  // Bytes:   0  3 4 7 8 11 172023 29 35
+  // String:     "How are you你好吗お元気ですか"
+  //              ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:   0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:  0  3 4 7 8 11 131415 17 19
   ASSERT_TRUE(itr->Advance());  // itr points to 'How'
-  EXPECT_THAT(itr->ResetToStart(), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToStartUtf32(), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("How"));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, IteratorMultipleAdvancesResetToStart) {
+TEST_P(ReverseJniLanguageSegmenterTest,
+       IteratorMultipleAdvancesResetToStartUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -526,18 +533,19 @@ TEST_P(ReverseJniLanguageSegmenterTest, IteratorMultipleAdvancesResetToStart) {
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              segmenter->Segment(kText));
 
-  // String: "How are you你好吗お元気ですか"
-  //          ^  ^^  ^^  ^  ^ ^ ^  ^  ^
-  // Bytes:   0  3 4 7 8 11 172023 29 35
+  // String:     "How are you你好吗お元気ですか"
+  //              ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:   0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:  0  3 4 7 8 11 131415 17 19
   ASSERT_TRUE(itr->Advance());
   ASSERT_TRUE(itr->Advance());
   ASSERT_TRUE(itr->Advance());
   ASSERT_TRUE(itr->Advance());  // itr points to ' '
-  EXPECT_THAT(itr->ResetToStart(), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToStartUtf32(), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("How"));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, IteratorDoneResetToStart) {
+TEST_P(ReverseJniLanguageSegmenterTest, IteratorDoneResetToStartUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -545,17 +553,18 @@ TEST_P(ReverseJniLanguageSegmenterTest, IteratorDoneResetToStart) {
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              segmenter->Segment(kText));
 
-  // String: "How are you你好吗お元気ですか"
-  //          ^  ^^  ^^  ^  ^ ^ ^  ^  ^
-  // Bytes:   0  3 4 7 8 11 172023 29 35
+  // String:     "How are you你好吗お元気ですか"
+  //              ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:   0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:  0  3 4 7 8 11 131415 17 19
   while (itr->Advance()) {
     // Do nothing.
   }
-  EXPECT_THAT(itr->ResetToStart(), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToStartUtf32(), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("How"));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermAfterWordConnector) {
+TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermAfterUtf32WordConnector) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -563,21 +572,22 @@ TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermAfterWordConnector) {
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              segmenter->Segment(kText));
 
-  // String: "package com:google:android name"
-  //          ^      ^^                 ^^
-  // Bytes:   0      7 8               26 27
-  auto position_or = itr->ResetToTermStartingAfter(8);
+  // String:     "package com:google:android name"
+  //              ^      ^^                 ^^
+  // UTF-8 idx:   0      7 8               26 27
+  // UTF-32 idx:  0      7 8               26 27
+  auto position_or = itr->ResetToTermStartingAfterUtf32(8);
   EXPECT_THAT(position_or, IsOk());
   EXPECT_THAT(position_or.ValueOrDie(), Eq(26));
   ASSERT_THAT(itr->GetTerm(), Eq(" "));
 
-  position_or = itr->ResetToTermStartingAfter(7);
+  position_or = itr->ResetToTermStartingAfterUtf32(7);
   EXPECT_THAT(position_or, IsOk());
   EXPECT_THAT(position_or.ValueOrDie(), Eq(8));
   ASSERT_THAT(itr->GetTerm(), Eq("com:google:android"));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermAfterOutOfBounds) {
+TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermAfterUtf32OutOfBounds) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -585,19 +595,19 @@ TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermAfterOutOfBounds) {
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              segmenter->Segment(kText));
 
-  // String: "How are you你好吗お元気ですか"
-  //          ^  ^^  ^^  ^  ^ ^ ^  ^  ^
-  // Bytes:   0  3 4 7 8 11 172023 29 35
-  ASSERT_THAT(itr->ResetToTermStartingAfter(7), IsOkAndHolds(Eq(8)));
+  // String:     "How are you你好吗お元気ですか"
+  //              ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:   0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:  0  3 4 7 8 11 131415 17 19
+  ASSERT_THAT(itr->ResetToTermStartingAfterUtf32(7), IsOkAndHolds(Eq(8)));
   ASSERT_THAT(itr->GetTerm(), Eq("you"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(-1),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-  EXPECT_THAT(itr->GetTerm(), Eq("you"));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(-1), IsOk());
+  EXPECT_THAT(itr->GetTerm(), Eq("How"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(kText.length()),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(21),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-  EXPECT_THAT(itr->GetTerm(), Eq("you"));
+  EXPECT_THAT(itr->GetTerm(), Eq("How"));
 }
 
 // Tests that ResetToTermAfter and Advance produce the same output. With the
@@ -606,7 +616,7 @@ TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermAfterOutOfBounds) {
 // terms produced by ResetToTermAfter calls with the current position
 // provided as the argument.
 TEST_P(ReverseJniLanguageSegmenterTest,
-       MixedLanguagesResetToTermAfterEquivalentToAdvance) {
+       MixedLanguagesResetToTermAfterUtf32EquivalentToAdvance) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -621,14 +631,14 @@ TEST_P(ReverseJniLanguageSegmenterTest,
       std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
       segmenter->Segment(kText));
   std::vector<std::string_view> reset_terms =
-      GetAllTermsResetAfter(reset_to_term_itr.get());
+      GetAllTermsResetAfterUtf32(reset_to_term_itr.get());
 
   EXPECT_THAT(reset_terms, testing::ElementsAreArray(advance_terms));
   EXPECT_THAT(reset_to_term_itr->GetTerm(), Eq(advance_itr->GetTerm()));
 }
 
 TEST_P(ReverseJniLanguageSegmenterTest,
-       ThaiResetToTermAfterEquivalentToAdvance) {
+       ThaiResetToTermAfterUtf32EquivalentToAdvance) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -643,14 +653,14 @@ TEST_P(ReverseJniLanguageSegmenterTest,
       std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
       segmenter->Segment(kThai));
   std::vector<std::string_view> reset_terms =
-      GetAllTermsResetAfter(reset_to_term_itr.get());
+      GetAllTermsResetAfterUtf32(reset_to_term_itr.get());
 
   EXPECT_THAT(reset_terms, testing::ElementsAreArray(advance_terms));
   EXPECT_THAT(reset_to_term_itr->GetTerm(), Eq(advance_itr->GetTerm()));
 }
 
 TEST_P(ReverseJniLanguageSegmenterTest,
-       KoreanResetToTermAfterEquivalentToAdvance) {
+       KoreanResetToTermAfterUtf32EquivalentToAdvance) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -665,7 +675,7 @@ TEST_P(ReverseJniLanguageSegmenterTest,
       std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
       segmenter->Segment(kKorean));
   std::vector<std::string_view> reset_terms =
-      GetAllTermsResetAfter(reset_to_term_itr.get());
+      GetAllTermsResetAfterUtf32(reset_to_term_itr.get());
 
   EXPECT_THAT(reset_terms, testing::ElementsAreArray(advance_terms));
   EXPECT_THAT(reset_to_term_itr->GetTerm(), Eq(advance_itr->GetTerm()));
@@ -676,7 +686,7 @@ TEST_P(ReverseJniLanguageSegmenterTest,
 // should be able to mix ResetToTermAfter(current_position) calls and Advance
 // calls to mimic calling Advance.
 TEST_P(ReverseJniLanguageSegmenterTest,
-       MixedLanguagesResetToTermAfterInteroperableWithAdvance) {
+       MixedLanguagesResetToTermAfterUtf32InteroperableWithAdvance) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -691,7 +701,7 @@ TEST_P(ReverseJniLanguageSegmenterTest,
       std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr,
       segmenter->Segment(kText));
   std::vector<std::string_view> advance_and_reset_terms =
-      GetAllTermsAdvanceAndResetAfter(advance_and_reset_itr.get());
+      GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get());
 
   EXPECT_THAT(advance_and_reset_terms,
               testing::ElementsAreArray(advance_terms));
@@ -699,7 +709,7 @@ TEST_P(ReverseJniLanguageSegmenterTest,
 }
 
 TEST_P(ReverseJniLanguageSegmenterTest,
-       ThaiResetToTermAfterInteroperableWithAdvance) {
+       ThaiResetToTermAfterUtf32InteroperableWithAdvance) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -714,7 +724,7 @@ TEST_P(ReverseJniLanguageSegmenterTest,
       std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr,
       segmenter->Segment(kThai));
   std::vector<std::string_view> advance_and_reset_terms =
-      GetAllTermsAdvanceAndResetAfter(advance_and_reset_itr.get());
+      GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get());
 
   EXPECT_THAT(advance_and_reset_terms,
               testing::ElementsAreArray(advance_terms));
@@ -722,7 +732,7 @@ TEST_P(ReverseJniLanguageSegmenterTest,
 }
 
 TEST_P(ReverseJniLanguageSegmenterTest,
-       KoreanResetToTermAfterInteroperableWithAdvance) {
+       KoreanResetToTermAfterUtf32InteroperableWithAdvance) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -737,14 +747,14 @@ TEST_P(ReverseJniLanguageSegmenterTest,
       std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr,
       segmenter->Segment(kKorean));
   std::vector<std::string_view> advance_and_reset_terms =
-      GetAllTermsAdvanceAndResetAfter(advance_and_reset_itr.get());
+      GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get());
 
   EXPECT_THAT(advance_and_reset_terms,
               testing::ElementsAreArray(advance_terms));
   EXPECT_THAT(advance_and_reset_itr->GetTerm(), Eq(advance_itr->GetTerm()));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, MixedLanguagesResetToTermAfter) {
+TEST_P(ReverseJniLanguageSegmenterTest, MixedLanguagesResetToTermAfterUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(
@@ -753,33 +763,35 @@ TEST_P(ReverseJniLanguageSegmenterTest, MixedLanguagesResetToTermAfter) {
       std::unique_ptr<LanguageSegmenter::Iterator> itr,
       language_segmenter->Segment("How are you你好吗お元気ですか"));
 
-  // String: "How are you你好吗お元気ですか"
-  //          ^  ^^  ^^  ^  ^ ^ ^  ^  ^
-  // Bytes:   0  3 4 7 8 11 172023 29 35
-  EXPECT_THAT(itr->ResetToTermStartingAfter(2), IsOkAndHolds(Eq(3)));
+  // String:      "How are you你好吗お元気ですか"
+  //               ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:    0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:   0  3 4 7 8 11 131415 17 19
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(2), IsOkAndHolds(Eq(3)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(10), IsOkAndHolds(Eq(11)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(10), IsOkAndHolds(Eq(11)));
   EXPECT_THAT(itr->GetTerm(), Eq("你好"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(7), IsOkAndHolds(Eq(8)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(7), IsOkAndHolds(Eq(8)));
   EXPECT_THAT(itr->GetTerm(), Eq("you"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(32), IsOkAndHolds(Eq(35)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(18), IsOkAndHolds(Eq(19)));
   EXPECT_THAT(itr->GetTerm(), Eq("か"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(14), IsOkAndHolds(Eq(17)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(12), IsOkAndHolds(Eq(13)));
   EXPECT_THAT(itr->GetTerm(), Eq("吗"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(0), IsOkAndHolds(Eq(3)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(3)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(35),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(19),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, ContinuousWhitespacesResetToTermAfter) {
+TEST_P(ReverseJniLanguageSegmenterTest,
+       ContinuousWhitespacesResetToTermAfterUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(
@@ -789,35 +801,36 @@ TEST_P(ReverseJniLanguageSegmenterTest, ContinuousWhitespacesResetToTermAfter) {
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kTextWithSpace));
 
-  // String: "Hello          World"
-  //          ^    ^         ^
-  // Bytes:   0    5         15
-  EXPECT_THAT(itr->ResetToTermStartingAfter(0), IsOkAndHolds(Eq(5)));
+  // String:      "Hello          World"
+  //               ^    ^         ^
+  // UTF-8 idx:    0    5         15
+  // UTF-32 idx:   0    5         15
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(5)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(2), IsOkAndHolds(Eq(5)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(2), IsOkAndHolds(Eq(5)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(10), IsOkAndHolds(Eq(15)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(10), IsOkAndHolds(Eq(15)));
   EXPECT_THAT(itr->GetTerm(), Eq("World"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(5), IsOkAndHolds(Eq(15)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(5), IsOkAndHolds(Eq(15)));
   EXPECT_THAT(itr->GetTerm(), Eq("World"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(15),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(15),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(17),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(17),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(19),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(19),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, ChineseResetToTermAfter) {
+TEST_P(ReverseJniLanguageSegmenterTest, ChineseResetToTermAfterUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(
@@ -827,21 +840,22 @@ TEST_P(ReverseJniLanguageSegmenterTest, ChineseResetToTermAfter) {
   constexpr std::string_view kChinese = "我每天走路去上班。";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kChinese));
-  // String: "我每天走路去上班。"
-  //          ^ ^  ^   ^^
-  // Bytes:   0 3  9  15 18
-  EXPECT_THAT(itr->ResetToTermStartingAfter(0), IsOkAndHolds(Eq(3)));
+  // String:       "我每天走路去上班。"
+  //                ^ ^  ^   ^^
+  // UTF-8 idx:     0 3  9  15 18
+  // UTF-832 idx:   0 1  3   5 6
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(1)));
   EXPECT_THAT(itr->GetTerm(), Eq("每天"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(7), IsOkAndHolds(Eq(9)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(2), IsOkAndHolds(Eq(3)));
   EXPECT_THAT(itr->GetTerm(), Eq("走路"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(19),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(7),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, JapaneseResetToTermAfter) {
+TEST_P(ReverseJniLanguageSegmenterTest, JapaneseResetToTermAfterUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(
@@ -850,21 +864,22 @@ TEST_P(ReverseJniLanguageSegmenterTest, JapaneseResetToTermAfter) {
   constexpr std::string_view kJapanese = "私は毎日仕事に歩いています。";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kJapanese));
-  // String: "私は毎日仕事に歩いています。"
-  //          ^ ^ ^  ^  ^ ^ ^ ^  ^
-  // Bytes:   0 3 6  12 18212427 33
-  EXPECT_THAT(itr->ResetToTermStartingAfter(0), IsOkAndHolds(Eq(3)));
+  // String:       "私は毎日仕事に歩いています。"
+  //                ^ ^ ^  ^  ^ ^ ^ ^  ^
+  // UTF-8 idx:     0 3 6  12 18212427 33
+  // UTF-32 idx:    0 1 2  4  6 7 8 9  11
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(1)));
   EXPECT_THAT(itr->GetTerm(), Eq("は"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(33),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(11),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(7), IsOkAndHolds(Eq(12)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(3), IsOkAndHolds(Eq(4)));
   EXPECT_THAT(itr->GetTerm(), Eq("仕事"));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, KhmerResetToTermAfter) {
+TEST_P(ReverseJniLanguageSegmenterTest, KhmerResetToTermAfterUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(
@@ -872,21 +887,22 @@ TEST_P(ReverseJniLanguageSegmenterTest, KhmerResetToTermAfter) {
   constexpr std::string_view kKhmer = "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kKhmer));
-  // String: "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"
-  //          ^ ^   ^   ^
-  // Bytes:   0 9   24  45
-  EXPECT_THAT(itr->ResetToTermStartingAfter(0), IsOkAndHolds(Eq(9)));
+  // String:            "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"
+  //                     ^ ^   ^   ^
+  // UTF-8 idx:          0 9   24  45
+  // UTF-32 idx:         0 3   8   15
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(3)));
   EXPECT_THAT(itr->GetTerm(), Eq("ដើរទៅ"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(47),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(15),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(14), IsOkAndHolds(Eq(24)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(6), IsOkAndHolds(Eq(8)));
   EXPECT_THAT(itr->GetTerm(), Eq("ធ្វើការ"));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, ThaiResetToTermAfter) {
+TEST_P(ReverseJniLanguageSegmenterTest, ThaiResetToTermAfterUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(
@@ -895,24 +911,25 @@ TEST_P(ReverseJniLanguageSegmenterTest, ThaiResetToTermAfter) {
   constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kThai));
-  // String: "ฉันเดินไปทำงานทุกวัน"
-  //          ^ ^  ^ ^    ^ ^
-  // Bytes:   0 9 21 27  42 51
-  EXPECT_THAT(itr->ResetToTermStartingAfter(0), IsOkAndHolds(Eq(9)));
+  // String:      "ฉันเดินไปทำงานทุกวัน"
+  //               ^ ^  ^ ^    ^ ^
+  // UTF-8 idx:    0 9 21 27  42 51
+  // UTF-32 idx:   0 3  7 9   14 17
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(3)));
   EXPECT_THAT(itr->GetTerm(), Eq("เดิน"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(51),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(17),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(13), IsOkAndHolds(Eq(21)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(6), IsOkAndHolds(Eq(7)));
   EXPECT_THAT(itr->GetTerm(), Eq("ไป"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(34), IsOkAndHolds(Eq(42)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(12), IsOkAndHolds(Eq(14)));
   EXPECT_THAT(itr->GetTerm(), Eq("ทุก"));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermBeforeWordConnector) {
+TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermBeforeWordConnectorUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -920,21 +937,22 @@ TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermBeforeWordConnector) {
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              segmenter->Segment(kText));
 
-  // String: "package name com:google:android!"
-  //          ^      ^^   ^^                 ^
-  // Bytes:   0      7 8 12 13               31
-  auto position_or = itr->ResetToTermEndingBefore(31);
+  // String:      "package name com:google:android!"
+  //               ^      ^^   ^^                 ^
+  // UTF-8 idx:    0      7 8 12 13               31
+  // UTF-32 idx:   0      7 8 12 13               31
+  auto position_or = itr->ResetToTermEndingBeforeUtf32(31);
   EXPECT_THAT(position_or, IsOk());
   EXPECT_THAT(position_or.ValueOrDie(), Eq(13));
   ASSERT_THAT(itr->GetTerm(), Eq("com:google:android"));
 
-  position_or = itr->ResetToTermEndingBefore(21);
+  position_or = itr->ResetToTermEndingBeforeUtf32(21);
   EXPECT_THAT(position_or, IsOk());
   EXPECT_THAT(position_or.ValueOrDie(), Eq(12));
   ASSERT_THAT(itr->GetTerm(), Eq(" "));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermBeforeOutOfBounds) {
+TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermBeforeOutOfBoundsUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -942,19 +960,19 @@ TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermBeforeOutOfBounds) {
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              segmenter->Segment(kText));
 
-  // String: "How are you你好吗お元気ですか"
-  //          ^  ^^  ^^  ^  ^ ^ ^  ^  ^
-  // Bytes:   0  3 4 7 8 11 172023 29 35
-  ASSERT_THAT(itr->ResetToTermEndingBefore(7), IsOkAndHolds(Eq(4)));
+  // String:      "How are you你好吗お元気ですか"
+  //               ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:    0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:   0  3 4 7 8 11 131415 17 19
+  ASSERT_THAT(itr->ResetToTermEndingBeforeUtf32(7), IsOkAndHolds(Eq(4)));
   ASSERT_THAT(itr->GetTerm(), Eq("are"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(-1),
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(-1),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
   EXPECT_THAT(itr->GetTerm(), Eq("are"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(kText.length()),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-  EXPECT_THAT(itr->GetTerm(), Eq("are"));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(29), IsOk());
+  EXPECT_THAT(itr->GetTerm(), Eq("か"));
 }
 
 // Tests that ResetToTermBefore and Advance produce the same output. With the
@@ -963,7 +981,7 @@ TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermBeforeOutOfBounds) {
 // terms produced by ResetToTermBefore calls with the current position
 // provided as the argument (after their order has been reversed).
 TEST_P(ReverseJniLanguageSegmenterTest,
-       MixedLanguagesResetToTermBeforeEquivalentToAdvance) {
+       MixedLanguagesResetToTermBeforeEquivalentToAdvanceUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -973,17 +991,12 @@ TEST_P(ReverseJniLanguageSegmenterTest,
       segmenter->Segment(kText));
   std::vector<std::string_view> advance_terms =
       GetAllTermsAdvance(advance_itr.get());
-  // Can't produce the last term via calls to ResetToTermBefore. So skip
-  // past that one.
-  auto itr = advance_terms.begin();
-  std::advance(itr, advance_terms.size() - 1);
-  advance_terms.erase(itr);
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
       segmenter->Segment(kText));
   std::vector<std::string_view> reset_terms =
-      GetAllTermsResetBefore(reset_to_term_itr.get());
+      GetAllTermsResetBeforeUtf32(reset_to_term_itr.get());
   std::reverse(reset_terms.begin(), reset_terms.end());
 
   EXPECT_THAT(reset_terms, testing::ElementsAreArray(advance_terms));
@@ -992,7 +1005,7 @@ TEST_P(ReverseJniLanguageSegmenterTest,
 }
 
 TEST_P(ReverseJniLanguageSegmenterTest,
-       ThaiResetToTermBeforeEquivalentToAdvance) {
+       ThaiResetToTermBeforeEquivalentToAdvanceUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -1002,17 +1015,12 @@ TEST_P(ReverseJniLanguageSegmenterTest,
       segmenter->Segment(kThai));
   std::vector<std::string_view> advance_terms =
       GetAllTermsAdvance(advance_itr.get());
-  // Can't produce the last term via calls to ResetToTermBefore. So skip
-  // past that one.
-  auto itr = advance_terms.begin();
-  std::advance(itr, advance_terms.size() - 1);
-  advance_terms.erase(itr);
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
       segmenter->Segment(kThai));
   std::vector<std::string_view> reset_terms =
-      GetAllTermsResetBefore(reset_to_term_itr.get());
+      GetAllTermsResetBeforeUtf32(reset_to_term_itr.get());
   std::reverse(reset_terms.begin(), reset_terms.end());
 
   EXPECT_THAT(reset_terms, testing::ElementsAreArray(advance_terms));
@@ -1020,7 +1028,7 @@ TEST_P(ReverseJniLanguageSegmenterTest,
 }
 
 TEST_P(ReverseJniLanguageSegmenterTest,
-       KoreanResetToTermBeforeEquivalentToAdvance) {
+       KoreanResetToTermBeforeEquivalentToAdvanceUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -1030,24 +1038,19 @@ TEST_P(ReverseJniLanguageSegmenterTest,
       segmenter->Segment(kKorean));
   std::vector<std::string_view> advance_terms =
       GetAllTermsAdvance(advance_itr.get());
-  // Can't produce the last term via calls to ResetToTermBefore. So skip
-  // past that one.
-  auto itr = advance_terms.begin();
-  std::advance(itr, advance_terms.size() - 1);
-  advance_terms.erase(itr);
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
       segmenter->Segment(kKorean));
   std::vector<std::string_view> reset_terms =
-      GetAllTermsResetBefore(reset_to_term_itr.get());
+      GetAllTermsResetBeforeUtf32(reset_to_term_itr.get());
   std::reverse(reset_terms.begin(), reset_terms.end());
 
   EXPECT_THAT(reset_terms, testing::ElementsAreArray(advance_terms));
   EXPECT_THAT(reset_to_term_itr->GetTerm(), Eq(advance_itr->GetTerm()));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, MixedLanguagesResetToTermBefore) {
+TEST_P(ReverseJniLanguageSegmenterTest, MixedLanguagesResetToTermBeforeUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(
@@ -1056,35 +1059,36 @@ TEST_P(ReverseJniLanguageSegmenterTest, MixedLanguagesResetToTermBefore) {
       std::unique_ptr<LanguageSegmenter::Iterator> itr,
       language_segmenter->Segment("How are you你好吗お元気ですか"));
 
-  // String: "How are you你好吗お元気ですか"
-  //          ^  ^^  ^^  ^  ^ ^ ^  ^  ^
-  // Bytes:   0  3 4 7 8 11 172023 29 35
-  EXPECT_THAT(itr->ResetToTermEndingBefore(2),
+  // String:      "How are you你好吗お元気ですか"
+  //               ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:    0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:   0  3 4 7 8 11 131415 17 19
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(2),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(10), IsOkAndHolds(Eq(7)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(10), IsOkAndHolds(Eq(7)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(7), IsOkAndHolds(Eq(4)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(7), IsOkAndHolds(Eq(4)));
   EXPECT_THAT(itr->GetTerm(), Eq("are"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(32), IsOkAndHolds(Eq(23)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(18), IsOkAndHolds(Eq(15)));
   EXPECT_THAT(itr->GetTerm(), Eq("元気"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(14), IsOkAndHolds(Eq(8)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(12), IsOkAndHolds(Eq(8)));
   EXPECT_THAT(itr->GetTerm(), Eq("you"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(0),
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(35), IsOkAndHolds(Eq(29)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(19), IsOkAndHolds(Eq(17)));
   EXPECT_THAT(itr->GetTerm(), Eq("です"));
 }
 
 TEST_P(ReverseJniLanguageSegmenterTest,
-       ContinuousWhitespacesResetToTermBefore) {
+       ContinuousWhitespacesResetToTermBeforeUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(
@@ -1094,34 +1098,35 @@ TEST_P(ReverseJniLanguageSegmenterTest,
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kTextWithSpace));
 
-  // String: "Hello          World"
-  //          ^    ^         ^
-  // Bytes:   0    5         15
-  EXPECT_THAT(itr->ResetToTermEndingBefore(0),
+  // String:      "Hello          World"
+  //               ^    ^         ^
+  // UTF-8 idx:    0    5         15
+  // UTF-32 idx:   0    5         15
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(2),
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(2),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(10), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(10), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("Hello"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(5), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(5), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("Hello"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(15), IsOkAndHolds(Eq(5)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(15), IsOkAndHolds(Eq(5)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(17), IsOkAndHolds(Eq(5)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(17), IsOkAndHolds(Eq(5)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(19), IsOkAndHolds(Eq(5)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(19), IsOkAndHolds(Eq(5)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, ChineseResetToTermBefore) {
+TEST_P(ReverseJniLanguageSegmenterTest, ChineseResetToTermBeforeUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(
@@ -1131,21 +1136,22 @@ TEST_P(ReverseJniLanguageSegmenterTest, ChineseResetToTermBefore) {
   constexpr std::string_view kChinese = "我每天走路去上班。";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kChinese));
-  // String: "我每天走路去上班。"
-  //          ^ ^  ^   ^^
-  // Bytes:   0 3  9  15 18
-  EXPECT_THAT(itr->ResetToTermEndingBefore(0),
+  // String:      "我每天走路去上班。"
+  //               ^ ^  ^   ^^
+  // UTF-8 idx:    0 3  9  15 18
+  // UTF-32 idx:   0 1  3   5 6
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(7), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(2), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("我"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(19), IsOkAndHolds(Eq(15)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(7), IsOkAndHolds(Eq(5)));
   EXPECT_THAT(itr->GetTerm(), Eq("去"));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, JapaneseResetToTermBefore) {
+TEST_P(ReverseJniLanguageSegmenterTest, JapaneseResetToTermBeforeUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(
@@ -1154,21 +1160,22 @@ TEST_P(ReverseJniLanguageSegmenterTest, JapaneseResetToTermBefore) {
   constexpr std::string_view kJapanese = "私は毎日仕事に歩いています。";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kJapanese));
-  // String: "私は毎日仕事に歩いています。"
-  //          ^ ^ ^  ^  ^ ^ ^ ^  ^
-  // Bytes:   0 3 6  12 18212427 33
-  EXPECT_THAT(itr->ResetToTermEndingBefore(0),
+  // String:      "私は毎日仕事に歩いています。"
+  //               ^ ^ ^  ^  ^ ^ ^ ^  ^
+  // UTF-8 idx:    0 3 6  12 18212427 33
+  // UTF-32 idx:   0 1 2  4  6 7 8 9  11
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(33), IsOkAndHolds(Eq(27)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(11), IsOkAndHolds(Eq(9)));
   EXPECT_THAT(itr->GetTerm(), Eq("てい"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(7), IsOkAndHolds(Eq(3)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(3), IsOkAndHolds(Eq(1)));
   EXPECT_THAT(itr->GetTerm(), Eq("は"));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, KhmerResetToTermBefore) {
+TEST_P(ReverseJniLanguageSegmenterTest, KhmerResetToTermBeforeUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(
@@ -1176,21 +1183,22 @@ TEST_P(ReverseJniLanguageSegmenterTest, KhmerResetToTermBefore) {
   constexpr std::string_view kKhmer = "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kKhmer));
-  // String: "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"
-  //          ^ ^   ^   ^
-  // Bytes:   0 9   24  45
-  EXPECT_THAT(itr->ResetToTermEndingBefore(0),
+  // String:      "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"
+  //               ^ ^   ^   ^
+  // UTF-8 idx:    0 9   24  45
+  // UTF-32 idx:   0 3   8   15
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(47), IsOkAndHolds(Eq(24)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(16), IsOkAndHolds(Eq(8)));
   EXPECT_THAT(itr->GetTerm(), Eq("ធ្វើការ"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(14), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(5), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("ញុំ"));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, ThaiResetToTermBefore) {
+TEST_P(ReverseJniLanguageSegmenterTest, ThaiResetToTermBeforeUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(
@@ -1199,20 +1207,21 @@ TEST_P(ReverseJniLanguageSegmenterTest, ThaiResetToTermBefore) {
   constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kThai));
-  // String: "ฉันเดินไปทำงานทุกวัน"
-  //          ^ ^  ^ ^    ^ ^
-  // Bytes:   0 9 21 27  42 51
-  EXPECT_THAT(itr->ResetToTermEndingBefore(0),
+  // String:      "ฉันเดินไปทำงานทุกวัน"
+  //               ^ ^  ^ ^    ^ ^
+  // UTF-8 idx:    0 9 21 27  42 51
+  // UTF-32 idx:   0 3  7 9   14 17
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(51), IsOkAndHolds(Eq(42)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(17), IsOkAndHolds(Eq(14)));
   EXPECT_THAT(itr->GetTerm(), Eq("ทุก"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(13), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(4), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("ฉัน"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(34), IsOkAndHolds(Eq(21)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(11), IsOkAndHolds(Eq(7)));
   EXPECT_THAT(itr->GetTerm(), Eq("ไป"));
 }
 
diff --git a/icing/tokenization/simple/space-language-segmenter-factory.cc b/icing/tokenization/simple/space-language-segmenter-factory.cc
deleted file mode 100644
index 856ba0a..0000000
--- a/icing/tokenization/simple/space-language-segmenter-factory.cc
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/tokenization/language-segmenter-factory.h"
-#include "icing/tokenization/simple/space-language-segmenter.h"
-#include "icing/util/logging.h"
-
-namespace icing {
-namespace lib {
-
-namespace language_segmenter_factory {
-
-// Creates a language segmenter with the given locale.
-//
-// Returns:
-//   A LanguageSegmenter on success
-//   INVALID_ARGUMENT if locale string is invalid
-//
-// TODO(b/156383798): Figure out if we want to verify locale strings and notify
-// users. Right now illegal locale strings will be ignored by ICU. ICU
-// components will be created with its default locale.
-libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter>> Create(
-    SegmenterOptions) {
-  return std::make_unique<SpaceLanguageSegmenter>();
-}
-
-}  // namespace language_segmenter_factory
-
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/tokenization/simple/space-language-segmenter.cc b/icing/tokenization/simple/space-language-segmenter.cc
deleted file mode 100644
index 7e301ec..0000000
--- a/icing/tokenization/simple/space-language-segmenter.cc
+++ /dev/null
@@ -1,205 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/tokenization/simple/space-language-segmenter.h"
-
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <string_view>
-#include <utility>
-#include <vector>
-
-#include "icing/text_classifier/lib3/utils/base/status.h"
-#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/absl_ports/canonical_errors.h"
-#include "icing/legacy/core/icing-string-util.h"
-#include "icing/util/status-macros.h"
-
-namespace icing {
-namespace lib {
-
-namespace {
-constexpr char kASCIISpace = ' ';
-}  // namespace
-
-class SpaceLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
- public:
-  SpaceLanguageSegmenterIterator(std::string_view text)
-      : text_(text), term_start_index_(0), term_end_index_exclusive_(0) {}
-
-  // Advances to the next term. Returns false if it has reached the end.
-  bool Advance() override {
-    if (term_end_index_exclusive_ >= text_.size() ||
-        term_start_index_ >= text_.size()) {
-      // Reached the end
-      return false;
-    }
-
-    // Next term starts where we left off.
-    term_start_index_ = term_end_index_exclusive_;
-
-    // We know a term is at least one length, so we can +1 first.
-    term_end_index_exclusive_++;
-
-    // We alternate terms between space and non-space. Figure out what type of
-    // term we're currently on so we know how to stop.
-    bool is_space = text_[term_start_index_] == kASCIISpace;
-
-    while (term_end_index_exclusive_ < text_.size()) {
-      bool end_is_space = text_[term_end_index_exclusive_] == kASCIISpace;
-      if (is_space != end_is_space) {
-        // We finally see a different type of character, reached the end.
-        break;
-      }
-      // We're still seeing the same types of characters (saw a space and
-      // still seeing spaces, or saw a non-space and still seeing non-spaces).
-      // Haven't reached the next term yet, keep advancing.
-      term_end_index_exclusive_++;
-    }
-
-    return true;
-  }
-
-  // Returns the current term. It can be called only when Advance() returns
-  // true.
-  std::string_view GetTerm() const override {
-    if (text_[term_start_index_] == kASCIISpace) {
-      // Rule: multiple continuous whitespaces are treated as one.
-      return std::string_view(&text_[term_start_index_], 1);
-    }
-    return text_.substr(term_start_index_,
-                        term_end_index_exclusive_ - term_start_index_);
-  }
-
-  libtextclassifier3::StatusOr<int32_t> ResetToTermStartingAfter(
-      int32_t offset) override {
-    if (offset < 0) {
-      // Start over from the beginning to find the first term.
-      term_start_index_ = 0;
-      term_end_index_exclusive_ = 0;
-    } else {
-      // Offset points to a term right now. Advance to get past the current
-      // term.
-      term_end_index_exclusive_ = offset;
-      if (!Advance()) {
-        return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
-            "No term found in '%s' that starts after offset %d",
-            std::string(text_).c_str(), offset));
-      }
-    }
-
-    // Advance again so we can point to the next term.
-    if (!Advance()) {
-      return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
-          "No term found in '%s' that starts after offset %d",
-          std::string(text_).c_str(), offset));
-    }
-
-    return term_start_index_;
-  }
-
-  libtextclassifier3::StatusOr<int32_t> ResetToTermEndingBefore(
-      int32_t offset) override {
-    if (offset <= 0 || offset > text_.size()) {
-      return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
-          "No term found in '%s' that ends before offset %d",
-          std::string(text_).c_str(), offset));
-    }
-
-    if (offset == text_.size()) {
-      // Special-case if the offset is the text length, this is the last term in
-      // the text, which is also considered to be "ending before" the offset.
-      term_end_index_exclusive_ = offset;
-      ICING_ASSIGN_OR_RETURN(term_start_index_, GetTermStartingBefore(offset));
-      return term_start_index_;
-    }
-
-    // Otherwise, this is just the end of the previous term and we still need to
-    // find the start of the previous term.
-    ICING_ASSIGN_OR_RETURN(term_end_index_exclusive_,
-                           GetTermStartingBefore(offset));
-
-    if (term_end_index_exclusive_ == 0) {
-      // The current term starts at the beginning of the underlying text_.
-      // There is no term before this.
-      return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
-          "No term found in '%s' that ends before offset %d",
-          std::string(text_).c_str(), offset));
-    }
-
-    // Reset ourselves to find the term before the end.
-    ICING_ASSIGN_OR_RETURN(
-        term_start_index_,
-        GetTermStartingBefore(term_end_index_exclusive_ - 1));
-    return term_start_index_;
-  }
-
-  libtextclassifier3::StatusOr<int32_t> ResetToStart() override {
-    term_start_index_ = 0;
-    term_end_index_exclusive_ = 0;
-    if (!Advance()) {
-      return absl_ports::NotFoundError("");
-    }
-    return term_start_index_;
-  }
-
- private:
-  // Return the start offset of the term starting right before the given offset.
-  libtextclassifier3::StatusOr<int32_t> GetTermStartingBefore(int32_t offset) {
-    bool is_space = text_[offset] == kASCIISpace;
-
-    // Special-case that if offset was the text length, then we're already at
-    // the "end" of our current term.
-    if (offset == text_.size()) {
-      is_space = text_[--offset] == kASCIISpace;
-    }
-
-    // While it's the same type of character (space vs non-space), we're in the
-    // same term. So keep iterating backwards until we see a change.
-    while (offset >= 0 && (text_[offset] == kASCIISpace) == is_space) {
-      --offset;
-    }
-
-    // +1 is because offset was off-by-one to exit the while-loop.
-    return ++offset;
-  }
-
-  // Text to be segmented
-  std::string_view text_;
-
-  // The start and end indices are used to track the positions of current
-  // term.
-  int term_start_index_;
-  int term_end_index_exclusive_;
-};
-
-libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter::Iterator>>
-SpaceLanguageSegmenter::Segment(const std::string_view text) const {
-  return std::make_unique<SpaceLanguageSegmenterIterator>(text);
-}
-
-libtextclassifier3::StatusOr<std::vector<std::string_view>>
-SpaceLanguageSegmenter::GetAllTerms(const std::string_view text) const {
-  ICING_ASSIGN_OR_RETURN(std::unique_ptr<LanguageSegmenter::Iterator> iterator,
-                         Segment(text));
-  std::vector<std::string_view> terms;
-  while (iterator->Advance()) {
-    terms.push_back(iterator->GetTerm());
-  }
-  return terms;
-}
-
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/tokenization/simple/space-language-segmenter.h b/icing/tokenization/simple/space-language-segmenter.h
deleted file mode 100644
index de0a6d3..0000000
--- a/icing/tokenization/simple/space-language-segmenter.h
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_TOKENIZATION_SIMPLE_SPACE_LANGUAGE_SEGMENTER_H_
-#define ICING_TOKENIZATION_SIMPLE_SPACE_LANGUAGE_SEGMENTER_H_
-
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <string_view>
-#include <vector>
-
-#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/tokenization/language-segmenter.h"
-
-namespace icing {
-namespace lib {
-
-// Simple segmenter that splits on spaces, regardless of language. Continuous
-// whitespaces will be returned as a single whitespace character.
-class SpaceLanguageSegmenter : public LanguageSegmenter {
- public:
-  SpaceLanguageSegmenter() = default;
-  SpaceLanguageSegmenter(const SpaceLanguageSegmenter&) = delete;
-  SpaceLanguageSegmenter& operator=(const SpaceLanguageSegmenter&) = delete;
-
-  // Segmentation is based purely on whitespace; does not take into account the
-  // language of the text.
-  //
-  // Returns:
-  //   An iterator of terms on success
-  libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter::Iterator>>
-  Segment(std::string_view text) const override;
-
-  // Does not take into account the language of the text.
-  //
-  // Returns:
-  //   A list of terms on success
-  //   INTERNAL_ERROR if any error occurs
-  libtextclassifier3::StatusOr<std::vector<std::string_view>> GetAllTerms(
-      std::string_view text) const override;
-};
-
-}  // namespace lib
-}  // namespace icing
-
-#endif  // ICING_TOKENIZATION_SIMPLE_SPACE_LANGUAGE_SEGMENTER_H_
diff --git a/icing/tokenization/simple/space-language-segmenter_test.cc b/icing/tokenization/simple/space-language-segmenter_test.cc
deleted file mode 100644
index 6c5e3f6..0000000
--- a/icing/tokenization/simple/space-language-segmenter_test.cc
+++ /dev/null
@@ -1,129 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-#include "icing/absl_ports/str_cat.h"
-#include "icing/testing/common-matchers.h"
-#include "icing/tokenization/language-segmenter-factory.h"
-#include "icing/tokenization/language-segmenter.h"
-#include "unicode/uloc.h"
-
-namespace icing {
-namespace lib {
-namespace {
-
-using ::testing::ElementsAre;
-using ::testing::Eq;
-using ::testing::IsEmpty;
-
-TEST(SpaceLanguageSegmenterTest, EmptyText) {
-  language_segmenter_factory::SegmenterOptions options(ULOC_US);
-  ICING_ASSERT_OK_AND_ASSIGN(
-      auto language_segmenter,
-      language_segmenter_factory::Create(std::move(options)));
-  EXPECT_THAT(language_segmenter->GetAllTerms(""), IsOkAndHolds(IsEmpty()));
-}
-
-TEST(SpaceLanguageSegmenterTest, SimpleText) {
-  language_segmenter_factory::SegmenterOptions options(ULOC_US);
-  ICING_ASSERT_OK_AND_ASSIGN(
-      auto language_segmenter,
-      language_segmenter_factory::Create(std::move(options)));
-  EXPECT_THAT(language_segmenter->GetAllTerms("Hello World"),
-              IsOkAndHolds(ElementsAre("Hello", " ", "World")));
-}
-
-TEST(SpaceLanguageSegmenterTest, Punctuation) {
-  language_segmenter_factory::SegmenterOptions options(ULOC_US);
-  ICING_ASSERT_OK_AND_ASSIGN(
-      auto language_segmenter,
-      language_segmenter_factory::Create(std::move(options)));
-
-  EXPECT_THAT(language_segmenter->GetAllTerms("Hello, World!!!"),
-              IsOkAndHolds(ElementsAre("Hello,", " ", "World!!!")));
-  EXPECT_THAT(language_segmenter->GetAllTerms("Open-source project"),
-              IsOkAndHolds(ElementsAre("Open-source", " ", "project")));
-  EXPECT_THAT(language_segmenter->GetAllTerms("100%"),
-              IsOkAndHolds(ElementsAre("100%")));
-  EXPECT_THAT(language_segmenter->GetAllTerms("(A&B)"),
-              IsOkAndHolds(ElementsAre("(A&B)")));
-}
-
-TEST(SpaceLanguageSegmenterTest, Alphanumeric) {
-  language_segmenter_factory::SegmenterOptions options(ULOC_US);
-  ICING_ASSERT_OK_AND_ASSIGN(
-      auto language_segmenter,
-      language_segmenter_factory::Create(std::move(options)));
-
-  // Alphanumeric terms are allowed
-  EXPECT_THAT(language_segmenter->GetAllTerms("Se7en A4 3a"),
-              IsOkAndHolds(ElementsAre("Se7en", " ", "A4", " ", "3a")));
-}
-
-TEST(SpaceLanguageSegmenterTest, Number) {
-  language_segmenter_factory::SegmenterOptions options(ULOC_US);
-  ICING_ASSERT_OK_AND_ASSIGN(
-      auto language_segmenter,
-      language_segmenter_factory::Create(std::move(options)));
-
-  // Alphanumeric terms are allowed
-  EXPECT_THAT(
-      language_segmenter->GetAllTerms("3.141592653589793238462643383279"),
-      IsOkAndHolds(ElementsAre("3.141592653589793238462643383279")));
-
-  EXPECT_THAT(language_segmenter->GetAllTerms("3,456.789"),
-              IsOkAndHolds(ElementsAre("3,456.789")));
-
-  EXPECT_THAT(language_segmenter->GetAllTerms("-123"),
-              IsOkAndHolds(ElementsAre("-123")));
-}
-
-TEST(SpaceLanguageSegmenterTest, ContinuousWhitespaces) {
-  language_segmenter_factory::SegmenterOptions options(ULOC_US);
-  ICING_ASSERT_OK_AND_ASSIGN(
-      auto language_segmenter,
-      language_segmenter_factory::Create(std::move(options)));
-
-  // Multiple continuous whitespaces are treated as one.
-  const int kNumSeparators = 256;
-  const std::string text_with_spaces =
-      absl_ports::StrCat("Hello", std::string(kNumSeparators, ' '), "World");
-  EXPECT_THAT(language_segmenter->GetAllTerms(text_with_spaces),
-              IsOkAndHolds(ElementsAre("Hello", " ", "World")));
-}
-
-TEST(SpaceLanguageSegmenterTest, NotCopyStrings) {
-  language_segmenter_factory::SegmenterOptions options(ULOC_US);
-  ICING_ASSERT_OK_AND_ASSIGN(
-      auto language_segmenter,
-      language_segmenter_factory::Create(std::move(options)));
-  // Validates that the input strings are not copied
-  const std::string text = "Hello World";
-  const char* word1_address = text.c_str();
-  const char* word2_address = text.c_str() + 6;
-  ICING_ASSERT_OK_AND_ASSIGN(std::vector<std::string_view> terms,
-                             language_segmenter->GetAllTerms(text));
-  ASSERT_THAT(terms, ElementsAre("Hello", " ", "World"));
-  const char* word1_result_address = terms.at(0).data();
-  const char* word2_result_address = terms.at(2).data();
-
-  // The underlying char* should be the same
-  EXPECT_THAT(word1_address, Eq(word1_result_address));
-  EXPECT_THAT(word2_address, Eq(word2_result_address));
-}
-
-}  // namespace
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/tokenization/tokenizer.h b/icing/tokenization/tokenizer.h
index 38c4745..b4f0c6e 100644
--- a/icing/tokenization/tokenizer.h
+++ b/icing/tokenization/tokenizer.h
@@ -20,7 +20,9 @@
 #include <string_view>
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
 #include "icing/tokenization/token.h"
+#include "icing/util/character-iterator.h"
 
 namespace icing {
 namespace lib {
@@ -64,6 +66,18 @@ class Tokenizer {
     // true, otherwise an invalid token could be returned.
     virtual Token GetToken() const = 0;
 
+    virtual libtextclassifier3::StatusOr<CharacterIterator>
+    CalculateTokenStart() {
+      return absl_ports::UnimplementedError(
+          "CalculateTokenStart is not implemented!");
+    }
+
+    virtual libtextclassifier3::StatusOr<CharacterIterator>
+    CalculateTokenEndExclusive() {
+      return absl_ports::UnimplementedError(
+          "CalculateTokenEndExclusive is not implemented!");
+    }
+
     // Sets the tokenizer to point at the first token that *starts* *after*
     // offset. Returns false if there are no valid tokens starting after
     // offset.
diff --git a/icing/tools/document-store-dump.cc b/icing/tools/document-store-dump.cc
deleted file mode 100644
index 45c9bf5..0000000
--- a/icing/tools/document-store-dump.cc
+++ /dev/null
@@ -1,119 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/tools/document-store-dump.h"
-
-#include <cinttypes>
-
-#include "icing/absl_ports/str_cat.h"
-#include "icing/legacy/core/icing-string-util.h"
-#include "icing/util/logging.h"
-
-namespace icing {
-namespace lib {
-namespace {
-
-void AppendDocumentProto(DocId document_id, const Document& doc,
-                         std::string* output) {
-  absl_ports::StrAppend(
-      output, IcingStringUtil::StringPrintf(
-                  "Document {\n   document_id: %d\n  corpus_id: %d\n  uri: "
-                  "'%s'\n  score: %d\n  created_timestamp_ms: %" PRIu64 "\n",
-                  static_cast<int>(document_id), doc.corpus_id(),
-                  doc.uri().c_str(), static_cast<int>(doc.score()),
-                  static_cast<int64_t>(doc.created_timestamp_ms())));
-  for (const auto& section : doc.sections()) {
-    absl_ports::StrAppend(
-        output, IcingStringUtil::StringPrintf(
-                    "  section {\n    id: %d\n    indexed_length: "
-                    "%d\n    content: '%s'\n    snippet: '%s'\n",
-                    static_cast<int>(section.id()),
-                    static_cast<int>(section.indexed_length()),
-                    section.content().c_str(), section.snippet().c_str()));
-    for (int64_t extracted_number : section.extracted_numbers()) {
-      absl_ports::StrAppend(output, IcingStringUtil::StringPrintf(
-                                        "    extracted_numbers: %" PRId64 "\n",
-                                        extracted_number));
-    }
-    for (const std::string& annotation_token : section.annotation_tokens()) {
-      absl_ports::StrAppend(
-          output, IcingStringUtil::StringPrintf("    annotation_tokens: '%s'\n",
-                                                annotation_token.c_str()));
-    }
-    std::string indexed = (section.config().indexed()) ? "true" : "false";
-    std::string index_prefixes =
-        (section.config().index_prefixes()) ? "true" : "false";
-    absl_ports::StrAppend(
-        output,
-        IcingStringUtil::StringPrintf(
-            "    config {\n      name: '%s'\n      indexed: %s\n      "
-            "tokenizer: %d\n      weight: %d\n      index_prefixes: %s\n      "
-            "subsection_separator: '%s'\n",
-            section.config().name().c_str(), indexed.c_str(),
-            section.config().tokenizer(),
-            static_cast<int>(section.config().weight()), index_prefixes.c_str(),
-            section.config().subsection_separator().c_str()));
-    for (const auto& variant_generator :
-         section.config().variant_generators()) {
-      absl_ports::StrAppend(
-          output, IcingStringUtil::StringPrintf(
-                      "      variant_generators: %d\n", variant_generator));
-    }
-    absl_ports::StrAppend(
-        output,
-        IcingStringUtil::StringPrintf(
-            "      common_term_legacy_hit_score: %d\n      "
-            "rfc822_host_name_term_legacy_hit_score: %d\n      "
-            "semantic_property: '%s'\n      universal_section_id: %d\n      "
-            "omnibox_section_type: %d\n      st_section_type: %d\n    }\n  }\n",
-            section.config().common_term_legacy_hit_score(),
-            section.config().rfc822_host_name_term_legacy_hit_score(),
-            section.config().semantic_property().c_str(),
-            section.config().universal_section_id(),
-            section.config().omnibox_section_type(),
-            section.config().st_section_type()));
-  }
-  for (const auto& language : doc.languages()) {
-    std::string used_classifier =
-        (language.used_classifier()) ? "true" : "false";
-    absl_ports::StrAppend(
-        output, IcingStringUtil::StringPrintf(
-                    "  languages {\n    language: %d\n    score: %d\n    "
-                    "used_classifier: %s\n  }\n",
-                    language.language(), static_cast<int>(language.score()),
-                    used_classifier.c_str()));
-  }
-  absl_ports::StrAppend(
-      output, IcingStringUtil::StringPrintf(
-                  " ANNOTATIONS PRINTING NOT IMPLEMENTED YET IN ICING-TOOL\n"));
-}
-
-}  // namespace
-
-std::string GetDocumentStoreDump(const DocumentStore& document_store) {
-  std::string output;
-  for (DocId document_id = 0; document_id < document_store.num_documents();
-       document_id++) {
-    Document doc;
-    if (!document_store.ReadDocument(document_id, &doc)) {
-      ICING_LOG(FATAL) << "Failed to read document";
-    }
-
-    AppendDocumentProto(document_id, doc, &output);
-  }
-  return output;
-}
-
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/tools/document-store-dump.h b/icing/tools/document-store-dump.h
deleted file mode 100644
index 023b301..0000000
--- a/icing/tools/document-store-dump.h
+++ /dev/null
@@ -1,35 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_TOOLS_DOCUMENT_STORE_DUMP_H_
-#define ICING_TOOLS_DOCUMENT_STORE_DUMP_H_
-
-#include <string>
-
-#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/document-store.h"
-
-namespace icing {
-namespace lib {
-
-// Utility function for dumping the complete document store content.
-// This provides a human-readable representation of the document store, mainly
-// provided for easier understandability for developers.
-// The output of this class should only be available on cmdline-tool-level
-// (with root access), or unit tests. In other words it should not be possible
-// to trigger this on a release key device, for data protection reasons.
-std::string GetDocumentStoreDump(const DocumentStore& document_store);
-
-}  // namespace lib
-}  // namespace icing
-#endif  // ICING_TOOLS_DOCUMENT_STORE_DUMP_H_
diff --git a/icing/tools/icing-tool.cc b/icing/tools/icing-tool.cc
deleted file mode 100644
index 72a11e9..0000000
--- a/icing/tools/icing-tool.cc
+++ /dev/null
@@ -1,306 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Copyright 2012 Google Inc. All Rights Reserved.
-// Author: ulas@google.com (Ulas Kirazci)
-//
-// A tool to debug the native index.
-
-#include <getopt.h>
-#include <unistd.h>
-
-#include <string>
-
-#include "java/com/google/android/gmscore/integ/modules/icing/jni/core/string-util.h"
-#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/doc-property-filter.h"
-#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/document-store.h"
-#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/dynamic-trie.h"
-#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/filesystem.h"
-#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/mobstore.h"
-#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/native-index-impl.h"
-#include "icing/absl_ports/str_cat.h"
-#include "icing/legacy/core/icing-string-util.h"
-#include "icing/tools/document-store-dump.h"
-#include "icing/util/logging.h"
-
-using std::vector;
-using ::wireless_android_play_playlog::icing::IndexRestorationStats;
-
-namespace icing {
-namespace lib {
-
-// 256KB for debugging.
-const size_t kMaxDocumentSizeForDebugging = 1u << 18;
-// Dump dynamic trie stats and contents.
-void ProcessDynamicTrie(const char* filename) {
-  Filesystem filesystem;
-  DynamicTrie trie(filename, DynamicTrie::RuntimeOptions(), &filesystem);
-  if (!trie.Init()) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Opening trie %s failed",
-                                                      filename);
-    return;
-  }
-
-  std::string out;
-  trie.GetDebugInfo(true, &out);
-  printf("Stats:\n%s", out.c_str());
-
-  std::ostringstream contents;
-  vector<std::string> keys;
-  trie.DumpTrie(&contents, &keys);
-  printf("Contents:\n%s", contents.str().c_str());
-}
-
-NativeIndexImpl* MakeIndex(const char* root_dir) {
-  NativeConfig native_config;
-  native_config.set_max_document_size(kMaxDocumentSizeForDebugging);
-  FlashIndexOptions flash_index_options(
-      NativeIndexImpl::GetNativeIndexDir(root_dir));
-  NativeIndexImpl* ni =
-      new NativeIndexImpl(root_dir, native_config, flash_index_options);
-  InitStatus init_status;
-  if (!ni->Init(&init_status)) {
-    ICING_LOG(FATAL) << "Failed to initialize legacy native index impl";
-  }
-
-  IndexRestorationStats unused;
-  ni->RestoreIndex(IndexRequestSpec::default_instance(), &unused);
-  return ni;
-}
-
-void RunQuery(NativeIndexImpl* ni, const std::string& query, int start,
-              int num_results) {
-  // Pull out corpusids and uris.
-  QueryRequestSpec spec;
-  spec.set_no_corpus_filter(true);
-  spec.set_want_uris(true);
-  spec.set_scoring_verbosity_level(1);
-  spec.set_prefix_match(true);
-
-  QueryResponse response;
-  ni->ExecuteQuery(query, spec, 10000, start, num_results, &response);
-
-  ICING_VLOG(1) << IcingStringUtil::StringPrintf(
-      "Query [%s] num results %u", query.c_str(), response.num_results());
-
-  for (int i = 0, uri_offset = 0; i < response.num_results(); i++) {
-    ICING_VLOG(1) << IcingStringUtil::StringPrintf(
-        "%d: (cid=%u) uri %.*s", i, response.corpus_ids(i),
-        response.uri_lengths(i), response.uri_buffer().data() + uri_offset);
-    uri_offset += response.uri_lengths(i);
-  }
-}
-
-void RunSuggest(NativeIndexImpl* ni, const std::string& prefix,
-                int num_results) {
-  SuggestionResponse results;
-  ni->Suggest(prefix, num_results, vector<CorpusId>(), &results);
-
-  ICING_VLOG(1) << IcingStringUtil::StringPrintf(
-      "Query [%s] num results %zu", prefix.c_str(),
-      static_cast<size_t>(results.suggestions_size()));
-
-  for (size_t i = 0; i < results.suggestions_size(); i++) {
-    ICING_VLOG(1) << IcingStringUtil::StringPrintf(
-        "Sugg: [%s] display text [%s]", results.suggestions(i).query().c_str(),
-        results.suggestions(i).display_text().c_str());
-  }
-}
-
-int IcingTool(int argc, char** argv) {
-  auto file_storage = CreatePosixFileStorage();
-  enum Options {
-    OPT_FILENAME,
-    OPT_OP,
-    OPT_QUERY,
-    NUM_OPT,
-  };
-  static const option kOptions[NUM_OPT + 1] = {
-      {"filename", 1, nullptr, 0},
-      {"op", 1, nullptr, 0},
-      {"query", 1, nullptr, 0},
-      {nullptr, 0, nullptr, 0},
-  };
-  const char* opt_values[NUM_OPT];
-  memset(opt_values, 0, sizeof(opt_values));
-
-  while (true) {
-    int opt_idx = -1;
-    int ret = getopt_long(argc, argv, "", kOptions, &opt_idx);
-    if (ret != 0) break;
-
-    if (opt_idx >= 0 && opt_idx < NUM_OPT) {
-      opt_values[opt_idx] = optarg;
-    }
-  }
-
-  if (!opt_values[OPT_OP]) {
-    ICING_LOG(ERROR) << "No op specified";
-    return -1;
-  }
-
-  if (!opt_values[OPT_FILENAME]) {
-    ICING_LOG(ERROR) << "No filename specified";
-    return -1;
-  }
-  if (!strncmp(
-          opt_values[OPT_FILENAME],
-          "/data/data/com.google.android.gms/files/AppDataSearch",
-          strlen("/data/data/com.google.android.gms/files/AppDataSearch"))) {
-    ICING_LOG(ERROR)
-        << "Should not read directly from the file in gmscore - "
-           "icing-tool also commits writes as side-effects which corrupts "
-           "the index on concurrent modification";
-    return -1;
-  }
-
-  const char* op = opt_values[OPT_OP];
-  DocumentStore::Options options(file_storage.get(),
-                                 kMaxDocumentSizeForDebugging);
-  if (!strcmp(op, "dyntrie")) {
-    std::string full_file_path =
-        absl_ports::StrCat(opt_values[OPT_FILENAME], "/idx.lexicon");
-    ProcessDynamicTrie(full_file_path.c_str());
-  } else if (!strcmp(op, "verify")) {
-    std::unique_ptr<NativeIndexImpl> ni(MakeIndex(opt_values[OPT_FILENAME]));
-    ni->CheckVerify();
-  } else if (!strcmp(op, "query")) {
-    if (opt_values[OPT_QUERY] == nullptr) {
-      ICING_LOG(FATAL) << "Opt value is null";
-    }
-
-    std::unique_ptr<NativeIndexImpl> ni(MakeIndex(opt_values[OPT_FILENAME]));
-    RunQuery(ni.get(), opt_values[OPT_QUERY], 0, 100);
-  } else if (!strcmp(op, "suggest")) {
-    if (opt_values[OPT_QUERY] == nullptr) {
-      ICING_LOG(FATAL) << "Opt value is null";
-    }
-
-    std::unique_ptr<NativeIndexImpl> ni(MakeIndex(opt_values[OPT_FILENAME]));
-    RunSuggest(ni.get(), opt_values[OPT_QUERY], 100);
-  } else if (!strcmp(op, "dump-all-docs")) {
-    DocumentStore ds(opt_values[OPT_FILENAME], options);
-    if (!ds.Init()) {
-      ICING_LOG(FATAL) << "Legacy document store failed to initialize";
-    }
-
-    printf(
-        "------ Document Store Dump Start ------\n"
-        "%s\n"
-        "------ Document Store Dump End ------\n",
-        GetDocumentStoreDump(ds).c_str());
-  } else if (!strcmp(op, "dump-uris")) {
-    CorpusId corpus_id = kInvalidCorpusId;
-    if (opt_values[OPT_QUERY]) {
-      // Query is corpus id.
-      corpus_id = atoi(opt_values[OPT_QUERY]);  // NOLINT
-    }
-    DocumentStore ds(opt_values[OPT_FILENAME], options);
-    if (!ds.Init()) {
-      ICING_LOG(FATAL) << "Legacy document store failed to initialize";
-    }
-
-    DocPropertyFilter dpf;
-    ds.AddDeletedTagFilter(&dpf);
-
-    // Dump with format "<corpusid> <uri> <tagname>*".
-    int filtered = 0;
-    vector<std::string> tagnames;
-    for (DocId document_id = 0; document_id < ds.num_documents();
-         document_id++) {
-      Document doc;
-      if (!ds.ReadDocument(document_id, &doc)) {
-        ICING_LOG(FATAL) << "Failed to read document.";
-      }
-
-      if (corpus_id != kInvalidCorpusId && corpus_id != doc.corpus_id()) {
-        filtered++;
-        continue;
-      }
-      if (dpf.Match(0, document_id)) {
-        filtered++;
-        continue;
-      }
-
-      tagnames.clear();
-      ds.GetAllSetUserTagNames(document_id, &tagnames);
-
-      printf("%d %s %s\n", doc.corpus_id(), doc.uri().c_str(),
-             StringUtil::JoinStrings("/", tagnames).c_str());
-    }
-    ICING_VLOG(1) << IcingStringUtil::StringPrintf(
-        "Processed %u filtered %d", ds.num_documents(), filtered);
-  } else if (!strcmp(op, "dump-docs")) {
-    std::string out_filename = opt_values[OPT_FILENAME];
-    out_filename.append("/docs-dump");
-    CorpusId corpus_id = kInvalidCorpusId;
-    if (opt_values[OPT_QUERY]) {
-      // Query is corpus id.
-      corpus_id = atoi(opt_values[OPT_QUERY]);  // NOLINT
-      out_filename.push_back('.');
-      out_filename.append(opt_values[OPT_QUERY]);
-    }
-    DocumentStore ds(opt_values[OPT_FILENAME], options);
-    if (!ds.Init()) {
-      ICING_LOG(FATAL) << "Legacy document store failed to initialize";
-    }
-
-    DocPropertyFilter dpf;
-    ds.AddDeletedTagFilter(&dpf);
-
-    // Dump with format (<32-bit length><serialized content>)*.
-    FILE* fp = fopen(out_filename.c_str(), "w");
-    int filtered = 0;
-    for (DocId document_id = 0; document_id < ds.num_documents();
-         document_id++) {
-      Document doc;
-      if (!ds.ReadDocument(document_id, &doc)) {
-        ICING_LOG(FATAL) << "Failed to read document.";
-      }
-
-      if (corpus_id != kInvalidCorpusId && corpus_id != doc.corpus_id()) {
-        filtered++;
-        continue;
-      }
-      if (dpf.Match(0, document_id)) {
-        filtered++;
-        continue;
-      }
-
-      std::string serialized = doc.SerializeAsString();
-      uint32_t length = serialized.size();
-      if (fwrite(&length, 1, sizeof(length), fp) != sizeof(length)) {
-        ICING_LOG(FATAL) << "Failed to write length information to file";
-      }
-
-      if (fwrite(serialized.data(), 1, serialized.size(), fp) !=
-          serialized.size()) {
-        ICING_LOG(FATAL) << "Failed to write document to file";
-      }
-    }
-    ICING_VLOG(1) << IcingStringUtil::StringPrintf(
-        "Processed %u filtered %d", ds.num_documents(), filtered);
-    fclose(fp);
-  } else {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unknown op %s", op);
-    return -1;
-  }
-
-  return 0;
-}
-
-}  // namespace lib
-}  // namespace icing
-
-int main(int argc, char** argv) { return icing::lib::IcingTool(argc, argv); }
diff --git a/icing/transform/icu/icu-normalizer-factory.cc b/icing/transform/icu/icu-normalizer-factory.cc
index 493aeb5..9951325 100644
--- a/icing/transform/icu/icu-normalizer-factory.cc
+++ b/icing/transform/icu/icu-normalizer-factory.cc
@@ -44,6 +44,8 @@ libtextclassifier3::StatusOr<std::unique_ptr<Normalizer>> Create(
   return IcuNormalizer::Create(max_term_byte_size);
 }
 
+std::string_view GetNormalizerName() { return IcuNormalizer::kName; }
+
 }  // namespace normalizer_factory
 
 }  // namespace lib
diff --git a/icing/transform/icu/icu-normalizer.h b/icing/transform/icu/icu-normalizer.h
index f20a9fb..4442f3b 100644
--- a/icing/transform/icu/icu-normalizer.h
+++ b/icing/transform/icu/icu-normalizer.h
@@ -39,6 +39,8 @@ namespace lib {
 // details.
 class IcuNormalizer : public Normalizer {
  public:
+  static constexpr std::string_view kName = "IcuNormalizer";
+
   // Creates a normalizer with the subcomponents it needs. max_term_byte_size
   // enforces the max size of text after normalization, text will be truncated
   // if exceeds the max size.
diff --git a/icing/transform/map/map-normalizer-factory.cc b/icing/transform/map/map-normalizer-factory.cc
index 3bf84b3..286b8f6 100644
--- a/icing/transform/map/map-normalizer-factory.cc
+++ b/icing/transform/map/map-normalizer-factory.cc
@@ -42,6 +42,8 @@ libtextclassifier3::StatusOr<std::unique_ptr<Normalizer>> Create(
   return std::make_unique<MapNormalizer>(max_term_byte_size);
 }
 
+std::string_view GetNormalizerName() { return MapNormalizer::kName; }
+
 }  // namespace normalizer_factory
 
 }  // namespace lib
diff --git a/icing/transform/map/map-normalizer.cc b/icing/transform/map/map-normalizer.cc
index c888551..4ad5dec 100644
--- a/icing/transform/map/map-normalizer.cc
+++ b/icing/transform/map/map-normalizer.cc
@@ -23,6 +23,7 @@
 
 #include "icing/absl_ports/str_cat.h"
 #include "icing/transform/map/normalization-map.h"
+#include "icing/util/character-iterator.h"
 #include "icing/util/i18n-utils.h"
 #include "icing/util/logging.h"
 #include "unicode/utypes.h"
@@ -30,48 +31,64 @@
 namespace icing {
 namespace lib {
 
+namespace {
+
+UChar32 NormalizeChar(UChar32 c) {
+  if (i18n_utils::GetUtf16Length(c) > 1) {
+    // All the characters we need to normalize can be encoded into a
+    // single char16_t. If this character needs more than 1 char16_t code
+    // unit, we can skip normalization and append it directly.
+    return c;
+  }
+
+  // The original character can be encoded into a single char16_t.
+  const std::unordered_map<char16_t, char16_t>& normalization_map =
+      GetNormalizationMap();
+  auto iterator = normalization_map.find(static_cast<char16_t>(c));
+  if (iterator == normalization_map.end()) {
+    // Normalization mapping not found, append the original character.
+    return c;
+  }
+
+  // Found a normalization mapping. The normalized character (stored in a
+  // char16_t) can have 1 or 2 bytes.
+  if (i18n_utils::IsAscii(iterator->second)) {
+    // The normalized character has 1 byte. It may be an upper-case char.
+    // Lower-case it before returning it.
+    return std::tolower(static_cast<char>(iterator->second));
+  } else {
+    return iterator->second;
+  }
+}
+
+}  // namespace
+
 std::string MapNormalizer::NormalizeTerm(std::string_view term) const {
   std::string normalized_text;
   normalized_text.reserve(term.length());
 
-  for (int i = 0; i < term.length(); ++i) {
-    if (i18n_utils::IsAscii(term[i])) {
-      // The original character has 1 byte.
-      normalized_text.push_back(std::tolower(term[i]));
-    } else if (i18n_utils::IsLeadUtf8Byte(term[i])) {
-      UChar32 uchar32 = i18n_utils::GetUChar32At(term.data(), term.length(), i);
+  int current_pos = 0;
+  while (current_pos < term.length()) {
+    if (i18n_utils::IsAscii(term[current_pos])) {
+      normalized_text.push_back(std::tolower(term[current_pos]));
+      ++current_pos;
+    } else {
+      UChar32 uchar32 =
+          i18n_utils::GetUChar32At(term.data(), term.length(), current_pos);
       if (uchar32 == i18n_utils::kInvalidUChar32) {
         ICING_LOG(WARNING) << "Unable to get uchar32 from " << term
-                           << " at position" << i;
-        continue;
-      }
-      int utf8_length = i18n_utils::GetUtf8Length(uchar32);
-      if (i18n_utils::GetUtf16Length(uchar32) > 1) {
-        // All the characters we need to normalize can be encoded into a
-        // single char16_t. If this character needs more than 1 char16_t code
-        // unit, we can skip normalization and append it directly.
-        absl_ports::StrAppend(&normalized_text, term.substr(i, utf8_length));
+                           << " at position" << current_pos;
+        ++current_pos;
         continue;
       }
-      // The original character can be encoded into a single char16_t.
-      const std::unordered_map<char16_t, char16_t>& normalization_map =
-          GetNormalizationMap();
-      auto iterator = normalization_map.find(static_cast<char16_t>(uchar32));
-      if (iterator != normalization_map.end()) {
-        // Found a normalization mapping. The normalized character (stored in a
-        // char16_t) can have 1 or 2 bytes.
-        if (i18n_utils::IsAscii(iterator->second)) {
-          // The normalized character has 1 byte.
-          normalized_text.push_back(
-              std::tolower(static_cast<char>(iterator->second)));
-        } else {
-          // The normalized character has 2 bytes.
-          i18n_utils::AppendUchar32ToUtf8(&normalized_text, iterator->second);
-        }
+      UChar32 normalized_char32 = NormalizeChar(uchar32);
+      if (i18n_utils::IsAscii(normalized_char32)) {
+        normalized_text.push_back(normalized_char32);
       } else {
-        // Normalization mapping not found, append the original character.
-        absl_ports::StrAppend(&normalized_text, term.substr(i, utf8_length));
+        // The normalized character has 2 bytes.
+        i18n_utils::AppendUchar32ToUtf8(&normalized_text, normalized_char32);
       }
+      current_pos += i18n_utils::GetUtf8Length(uchar32);
     }
   }
 
@@ -82,5 +99,27 @@ std::string MapNormalizer::NormalizeTerm(std::string_view term) const {
   return normalized_text;
 }
 
+CharacterIterator MapNormalizer::CalculateNormalizedMatchLength(
+    std::string_view term, std::string_view normalized_term) const {
+  CharacterIterator char_itr(term);
+  CharacterIterator normalized_char_itr(normalized_term);
+  while (char_itr.utf8_index() < term.length() &&
+         normalized_char_itr.utf8_index() < normalized_term.length()) {
+    UChar32 c = char_itr.GetCurrentChar();
+    if (i18n_utils::IsAscii(c)) {
+      c = std::tolower(c);
+    } else {
+      c = NormalizeChar(c);
+    }
+    UChar32 normalized_c = normalized_char_itr.GetCurrentChar();
+    if (c != normalized_c) {
+      return char_itr;
+    }
+    char_itr.AdvanceToUtf32(char_itr.utf32_index() + 1);
+    normalized_char_itr.AdvanceToUtf32(normalized_char_itr.utf32_index() + 1);
+  }
+  return char_itr;
+}
+
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/transform/map/map-normalizer.h b/icing/transform/map/map-normalizer.h
index f9c0e42..8fbe83b 100644
--- a/icing/transform/map/map-normalizer.h
+++ b/icing/transform/map/map-normalizer.h
@@ -19,12 +19,15 @@
 #include <string_view>
 
 #include "icing/transform/normalizer.h"
+#include "icing/util/character-iterator.h"
 
 namespace icing {
 namespace lib {
 
 class MapNormalizer : public Normalizer {
  public:
+  static constexpr std::string_view kName = "MapNormalizer";
+
   explicit MapNormalizer(int max_term_byte_size)
       : max_term_byte_size_(max_term_byte_size){};
 
@@ -39,6 +42,17 @@ class MapNormalizer : public Normalizer {
   // Read more mapping details in normalization-map.cc
   std::string NormalizeTerm(std::string_view term) const override;
 
+  // Returns a CharacterIterator pointing to one past the end of the segment of
+  // term that (once normalized) matches with normalized_term.
+  //
+  // Ex. CalculateNormalizedMatchLength("YELLOW", "yell") will return
+  // CharacterIterator(u8:4, u16:4, u32:4).
+  //
+  // Ex. CalculateNormalizedMatchLength("YELLOW", "red") will return
+  // CharacterIterator(u8:0, u16:0, u32:0).
+  CharacterIterator CalculateNormalizedMatchLength(
+      std::string_view term, std::string_view normalized_term) const override;
+
  private:
   // The maximum term length allowed after normalization.
   int max_term_byte_size_;
diff --git a/icing/transform/map/map-normalizer_test.cc b/icing/transform/map/map-normalizer_test.cc
index b62ae0e..26fdd4a 100644
--- a/icing/transform/map/map-normalizer_test.cc
+++ b/icing/transform/map/map-normalizer_test.cc
@@ -23,6 +23,7 @@
 #include "icing/testing/icu-i18n-test-utils.h"
 #include "icing/transform/normalizer-factory.h"
 #include "icing/transform/normalizer.h"
+#include "icing/util/character-iterator.h"
 
 namespace icing {
 namespace lib {
@@ -199,6 +200,52 @@ TEST(MapNormalizerTest, Truncate) {
   }
 }
 
+TEST(MapNormalizerTest, PrefixMatchLength) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto normalizer, normalizer_factory::Create(
+                                                  /*max_term_byte_size=*/1000));
+
+  // Upper to lower
+  std::string term = "MDI";
+  CharacterIterator match_end =
+      normalizer->CalculateNormalizedMatchLength(term, "md");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("MD"));
+
+  term = "Icing";
+  match_end = normalizer->CalculateNormalizedMatchLength(term, "icin");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Icin"));
+
+  // Full-width
+  term = "５２５６００";
+  match_end = normalizer->CalculateNormalizedMatchLength(term, "525");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("５２５"));
+
+  term = "ＦＵＬＬＷＩＤＴＨ";
+  match_end = normalizer->CalculateNormalizedMatchLength(term, "full");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("ＦＵＬＬ"));
+
+  // Hiragana to Katakana
+  term = "あいうえお";
+  match_end = normalizer->CalculateNormalizedMatchLength(term, "アイ");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("あい"));
+
+  term = "かきくけこ";
+  match_end = normalizer->CalculateNormalizedMatchLength(term, "カ");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("か"));
+
+  // Latin accents
+  term = "Zürich";
+  match_end = normalizer->CalculateNormalizedMatchLength(term, "zur");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Zür"));
+
+  term = "après-midi";
+  match_end = normalizer->CalculateNormalizedMatchLength(term, "apre");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("aprè"));
+
+  term = "Buenos días";
+  match_end = normalizer->CalculateNormalizedMatchLength(term, "buenos di");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Buenos dí"));
+}
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/transform/normalizer-factory.h b/icing/transform/normalizer-factory.h
index f1f3f62..1db9915 100644
--- a/icing/transform/normalizer-factory.h
+++ b/icing/transform/normalizer-factory.h
@@ -36,6 +36,9 @@ namespace normalizer_factory {
 libtextclassifier3::StatusOr<std::unique_ptr<Normalizer>> Create(
     int max_term_byte_size);
 
+// Returns the name of the normalizer being used.
+std::string_view GetNormalizerName();
+
 }  // namespace normalizer_factory
 
 }  // namespace lib
diff --git a/icing/transform/normalizer.h b/icing/transform/normalizer.h
index 4cbfa63..7305c46 100644
--- a/icing/transform/normalizer.h
+++ b/icing/transform/normalizer.h
@@ -20,6 +20,7 @@
 #include <string_view>
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/util/character-iterator.h"
 
 namespace icing {
 namespace lib {
@@ -39,6 +40,21 @@ class Normalizer {
   // Normalizes the input term based on rules. See implementation classes for
   // specific transformation rules.
   virtual std::string NormalizeTerm(std::string_view term) const = 0;
+
+  // Returns a CharacterIterator pointing to one past the end of the segment of
+  // term that (once normalized) matches with normalized_term.
+  //
+  // Ex. CalculateNormalizedMatchLength("YELLOW", "yell") will return
+  // CharacterIterator(u8:4, u16:4, u32:4).
+  //
+  // Ex. CalculateNormalizedMatchLength("YELLOW", "red") will return
+  // CharacterIterator(u8:0, u16:0, u32:0).
+  virtual CharacterIterator CalculateNormalizedMatchLength(
+      std::string_view term, std::string_view normalized_term) const {
+    // TODO(b/195720764) Remove this default impl and implement in all
+    // subclasses.
+    return CharacterIterator(term, 0, 0, 0);
+  }
 };
 
 }  // namespace lib
diff --git a/icing/transform/simple/none-normalizer-factory.cc b/icing/transform/simple/none-normalizer-factory.cc
deleted file mode 100644
index 6b35270..0000000
--- a/icing/transform/simple/none-normalizer-factory.cc
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_TRANSFORM_SIMPLE_NONE_NORMALIZER_FACTORY_H_
-#define ICING_TRANSFORM_SIMPLE_NONE_NORMALIZER_FACTORY_H_
-
-#include <memory>
-#include <string_view>
-
-#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/absl_ports/canonical_errors.h"
-#include "icing/transform/normalizer.h"
-#include "icing/transform/simple/none-normalizer.h"
-
-namespace icing {
-namespace lib {
-
-namespace normalizer_factory {
-
-// Creates a dummy normalizer. The term is not normalized, but
-// the text will be truncated to max_term_byte_size if it exceeds the max size.
-//
-// Returns:
-//   A normalizer on success
-//   INVALID_ARGUMENT if max_term_byte_size <= 0
-//   INTERNAL_ERROR on errors
-libtextclassifier3::StatusOr<std::unique_ptr<Normalizer>> Create(
-    int max_term_byte_size) {
-  if (max_term_byte_size <= 0) {
-    return absl_ports::InvalidArgumentError(
-        "max_term_byte_size must be greater than zero.");
-  }
-
-  return std::make_unique<NoneNormalizer>(max_term_byte_size);
-}
-
-}  // namespace normalizer_factory
-
-}  // namespace lib
-}  // namespace icing
-
-#endif  // ICING_TRANSFORM_SIMPLE_NONE_NORMALIZER_FACTORY_H_
diff --git a/icing/transform/simple/none-normalizer.h b/icing/transform/simple/none-normalizer.h
deleted file mode 100644
index 47085e1..0000000
--- a/icing/transform/simple/none-normalizer.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_TRANSFORM_SIMPLE_NONE_NORMALIZER_H_
-#define ICING_TRANSFORM_SIMPLE_NONE_NORMALIZER_H_
-
-#include <string>
-#include <string_view>
-
-#include "icing/transform/normalizer.h"
-
-namespace icing {
-namespace lib {
-
-// This normalizer is not meant for production use. Currently only used to get
-// the Icing library to compile in Jetpack.
-//
-// No normalization is done, but the term is truncated if it exceeds
-// max_term_byte_size.
-class NoneNormalizer : public Normalizer {
- public:
-  explicit NoneNormalizer(int max_term_byte_size)
-      : max_term_byte_size_(max_term_byte_size){};
-
-  std::string NormalizeTerm(std::string_view term) const override {
-    if (term.length() > max_term_byte_size_) {
-      return std::string(term.substr(0, max_term_byte_size_));
-    }
-    return std::string(term);
-  }
-
- private:
-  // The maximum term length allowed after normalization.
-  int max_term_byte_size_;
-};
-
-}  // namespace lib
-}  // namespace icing
-
-#endif  // ICING_TRANSFORM_SIMPLE_NONE_NORMALIZER_H_
diff --git a/icing/transform/simple/none-normalizer_test.cc b/icing/transform/simple/none-normalizer_test.cc
deleted file mode 100644
index e074828..0000000
--- a/icing/transform/simple/none-normalizer_test.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <memory>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-#include "icing/testing/common-matchers.h"
-#include "icing/transform/normalizer-factory.h"
-#include "icing/transform/normalizer.h"
-
-namespace icing {
-namespace lib {
-namespace {
-
-using ::testing::Eq;
-
-TEST(NoneNormalizerTest, Creation) {
-  EXPECT_THAT(normalizer_factory::Create(
-                  /*max_term_byte_size=*/5),
-              IsOk());
-  EXPECT_THAT(normalizer_factory::Create(
-                  /*max_term_byte_size=*/0),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-  EXPECT_THAT(normalizer_factory::Create(
-                  /*max_term_byte_size=*/-1),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-}
-
-TEST(IcuNormalizerTest, NoNormalizationDone) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto normalizer, normalizer_factory::Create(
-                                                  /*max_term_byte_size=*/1000));
-  EXPECT_THAT(normalizer->NormalizeTerm(""), Eq(""));
-  EXPECT_THAT(normalizer->NormalizeTerm("hello world"), Eq("hello world"));
-
-  // Capitalization
-  EXPECT_THAT(normalizer->NormalizeTerm("MDI"), Eq("MDI"));
-
-  // Accents
-  EXPECT_THAT(normalizer->NormalizeTerm("Zürich"), Eq("Zürich"));
-
-  // Full-width punctuation to ASCII punctuation
-  EXPECT_THAT(normalizer->NormalizeTerm("。，！？：”"), Eq("。，！？：”"));
-
-  // Half-width katakana
-  EXPECT_THAT(normalizer->NormalizeTerm("ｶ"), Eq("ｶ"));
-}
-
-TEST(NoneNormalizerTest, Truncate) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto normalizer, normalizer_factory::Create(
-                                                  /*max_term_byte_size=*/5));
-
-  // Won't be truncated
-  EXPECT_THAT(normalizer->NormalizeTerm("hi"), Eq("hi"));
-  EXPECT_THAT(normalizer->NormalizeTerm("hello"), Eq("hello"));
-
-  // Truncated to length 5.
-  EXPECT_THAT(normalizer->NormalizeTerm("hello!"), Eq("hello"));
-}
-
-}  // namespace
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/util/bit-util.h b/icing/util/bit-util.h
index e2bb817..7ca20b4 100644
--- a/icing/util/bit-util.h
+++ b/icing/util/bit-util.h
@@ -24,19 +24,18 @@ namespace bit_util {
 
 // Manipulating bit fields.
 //
-// x       value containing the bit field(s)
-// offset  offset of bit field in x
-// len     len of bit field in x
+// value       value containing the bit field(s)
+// lsb_offset  offset of bit field in value, starting from the least significant
+//             bit. for example, the '1' in '0100' has a lsb_offset of 2
+// len         len of bit field in value
 //
 // REQUIREMENTS
 //
-// - x an unsigned integer <= 64 bits
-// - offset + len <= sizeof(x) * 8
+// - value is an unsigned integer <= 64 bits
+// - lsb_offset + len <= sizeof(value) * 8
 //
 // There is no error checking so you will get garbage if you don't
 // ensure the above.
-//
-// To set a value, use BITFIELD_CLEAR then BITFIELD_OR.
 
 // Shifting by more than the word length is undefined (on ARM it has the
 // intended effect, but on Intel it shifts by % word length), so check the
@@ -44,20 +43,65 @@ namespace bit_util {
 inline uint64_t BitfieldMask(uint32_t len) {
   return ((len == 0) ? 0U : ((~uint64_t{0}) >> (64 - (len))));
 }
-inline uint64_t BitfieldGet(uint64_t mask, uint32_t lsb_offset, uint32_t len) {
-  return ((mask) >> (lsb_offset)) & BitfieldMask(len);
+
+inline void BitfieldClear(uint32_t lsb_offset, uint32_t len,
+                          uint8_t* value_out) {
+  *value_out &= ~(BitfieldMask(len) << lsb_offset);
+}
+
+inline void BitfieldClear(uint32_t lsb_offset, uint32_t len,
+                          uint16_t* value_out) {
+  *value_out &= ~(BitfieldMask(len) << lsb_offset);
+}
+
+inline void BitfieldClear(uint32_t lsb_offset, uint32_t len,
+                          uint32_t* value_out) {
+  *value_out &= ~(BitfieldMask(len) << lsb_offset);
+}
+
+inline void BitfieldClear(uint32_t lsb_offset, uint32_t len,
+                          uint64_t* value_out) {
+  *value_out &= ~(BitfieldMask(len) << lsb_offset);
+}
+
+inline uint64_t BitfieldGet(uint64_t value, uint32_t lsb_offset, uint32_t len) {
+  return ((value) >> (lsb_offset)) & BitfieldMask(len);
+}
+
+inline void BitfieldSet(uint8_t new_value, uint32_t lsb_offset, uint32_t len,
+                        uint8_t* value_out) {
+  BitfieldClear(lsb_offset, len, value_out);
+
+  // We conservatively mask new_value at len so value won't be corrupted if
+  // new_value >= (1 << len).
+  *value_out |= (new_value & BitfieldMask(len)) << (lsb_offset);
+}
+
+inline void BitfieldSet(uint16_t new_value, uint32_t lsb_offset, uint32_t len,
+                        uint16_t* value_out) {
+  BitfieldClear(lsb_offset, len, value_out);
+
+  // We conservatively mask new_value at len so value won't be corrupted if
+  // new_value >= (1 << len).
+  *value_out |= (new_value & BitfieldMask(len)) << (lsb_offset);
 }
-inline void BitfieldSet(uint32_t value, uint32_t lsb_offset, uint32_t len,
-                        uint32_t* mask) {
-  // We conservatively mask val at len so x won't be corrupted if val >=
-  // 1 << len.
-  *mask |= (uint64_t{value} & BitfieldMask(len)) << (lsb_offset);
+
+inline void BitfieldSet(uint32_t new_value, uint32_t lsb_offset, uint32_t len,
+                        uint32_t* value_out) {
+  BitfieldClear(lsb_offset, len, value_out);
+
+  // We conservatively mask new_value at len so value won't be corrupted if
+  // new_value >= (1 << len).
+  *value_out |= (new_value & BitfieldMask(len)) << (lsb_offset);
 }
-inline void BitfieldSet(uint64_t value, uint32_t lsb_offset, uint32_t len,
-                        uint64_t* mask) {
-  // We conservatively mask val at len so x won't be corrupted if val >=
-  // 1 << len.
-  *mask |= (value & BitfieldMask(len)) << (lsb_offset);
+
+inline void BitfieldSet(uint64_t new_value, uint32_t lsb_offset, uint32_t len,
+                        uint64_t* value_out) {
+  BitfieldClear(lsb_offset, len, value_out);
+
+  // We conservatively mask new_value at len so value won't be corrupted if
+  // new_value >= (1 << len).
+  *value_out |= (new_value & BitfieldMask(len)) << (lsb_offset);
 }
 
 }  // namespace bit_util
diff --git a/icing/util/bit-util_test.cc b/icing/util/bit-util_test.cc
new file mode 100644
index 0000000..3b86a21
--- /dev/null
+++ b/icing/util/bit-util_test.cc
@@ -0,0 +1,145 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/bit-util.h"
+
+#include <memory>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace icing {
+namespace lib {
+namespace {
+
+using ::testing::Eq;
+
+TEST(BitUtilTest, BitfieldMask) {
+  // Check that we can handle up to uint8_t's
+  EXPECT_THAT(bit_util::BitfieldMask(/*len=*/0), Eq(0b0));
+  EXPECT_THAT(bit_util::BitfieldMask(/*len=*/1), Eq(0b01));
+
+  // Check that we can handle up to uint32_t's
+  EXPECT_THAT(bit_util::BitfieldMask(/*len=*/16), Eq(0b01111111111111111));
+
+  // Check that we can handle up to uint64_t's
+  EXPECT_THAT(
+      bit_util::BitfieldMask(/*len=*/63),
+      Eq(0b0111111111111111111111111111111111111111111111111111111111111111));
+}
+
+TEST(BitUtilTest, BitfieldClear) {
+  // Check that we can handle up to uint8_t's
+  uint8_t value_8 = 0b0;
+  bit_util::BitfieldClear(/*lsb_offset=*/0, /*len=*/1, &value_8);
+  EXPECT_THAT(value_8, Eq(0b0));
+
+  value_8 = 0b01;
+  bit_util::BitfieldClear(/*lsb_offset=*/0, /*len=*/1, &value_8);
+  EXPECT_THAT(value_8, Eq(0b00));
+
+  value_8 = 0b011;
+  bit_util::BitfieldClear(/*lsb_offset=*/1, /*len=*/1, &value_8);
+  EXPECT_THAT(value_8, Eq(0b001));
+
+  value_8 = 0b011;
+  bit_util::BitfieldClear(/*lsb_offset=*/0, /*len=*/2, &value_8);
+  EXPECT_THAT(value_8, Eq(0b000));
+
+  value_8 = 0b0110;
+  bit_util::BitfieldClear(/*lsb_offset=*/1, /*len=*/2, &value_8);
+  EXPECT_THAT(value_8, Eq(0b0000));
+
+  // Check that we can handle up to uint32_t's
+  uint32_t value_32 = 0b010000000000000000000000;
+  bit_util::BitfieldClear(/*lsb_offset=*/22, /*len=*/1, &value_32);
+  EXPECT_THAT(value_32, Eq(0b0));
+
+  // Check that we can handle up to uint64_t's
+  uint64_t value_64 = 0b0100000000000000000000000000000000000;
+  bit_util::BitfieldClear(/*lsb_offset=*/35, /*len=*/1, &value_64);
+  EXPECT_THAT(value_64, Eq(0b0));
+}
+
+TEST(BitUtilTest, BitfieldGet) {
+  // Get something in the uint8_t range
+  EXPECT_THAT(bit_util::BitfieldGet(0b0, /*lsb_offset=*/0, /*len=*/1), Eq(0b0));
+  EXPECT_THAT(bit_util::BitfieldGet(0b01, /*lsb_offset=*/0, /*len=*/1),
+              Eq(0b01));
+  EXPECT_THAT(bit_util::BitfieldGet(0b010, /*lsb_offset=*/1, /*len=*/1),
+              Eq(0b01));
+  EXPECT_THAT(bit_util::BitfieldGet(0b001, /*lsb_offset=*/1, /*len=*/1),
+              Eq(0b0));
+  EXPECT_THAT(bit_util::BitfieldGet(0b011, /*lsb_offset=*/0, /*len=*/2),
+              Eq(0b011));
+  EXPECT_THAT(bit_util::BitfieldGet(0b0110, /*lsb_offset=*/1, /*len=*/2),
+              Eq(0b011));
+  EXPECT_THAT(bit_util::BitfieldGet(0b0101, /*lsb_offset=*/0, /*len=*/3),
+              Eq(0b0101));
+
+  // Get something in the uint32_t range
+  EXPECT_THAT(
+      bit_util::BitfieldGet(0b01000000000000, /*lsb_offset=*/12, /*len=*/1),
+      Eq(0b01));
+
+  // Get something in the uint64_t range
+  EXPECT_THAT(bit_util::BitfieldGet(0b010000000000000000000000000000000000,
+                                    /*lsb_offset=*/34, /*len=*/1),
+              Eq(0b01));
+}
+
+TEST(BitUtilTest, BitfieldSet) {
+  // Set something in the uint8_t range
+  uint8_t value_8 = 0b0;
+  bit_util::BitfieldSet(0b0, /*lsb_offset=*/0, /*len=*/1, &value_8);
+  EXPECT_THAT(value_8, Eq(0b0));
+
+  value_8 = 0b01;
+  bit_util::BitfieldSet(0b01, /*lsb_offset=*/0, /*len=*/1, &value_8);
+  EXPECT_THAT(value_8, Eq(0b01));
+
+  value_8 = 0b00;
+  bit_util::BitfieldSet(0b01, /*lsb_offset=*/0, /*len=*/1, &value_8);
+  EXPECT_THAT(value_8, Eq(0b01));
+
+  value_8 = 0b00;
+  bit_util::BitfieldSet(0b011, /*lsb_offset=*/0, /*len=*/2, &value_8);
+  EXPECT_THAT(value_8, Eq(0b011));
+
+  value_8 = 0b01;
+  bit_util::BitfieldSet(0b011, /*lsb_offset=*/0, /*len=*/2, &value_8);
+  EXPECT_THAT(value_8, Eq(0b011));
+
+  value_8 = 0b01;
+  bit_util::BitfieldSet(0b01, /*lsb_offset=*/1, /*len=*/1, &value_8);
+  EXPECT_THAT(value_8, Eq(0b011));
+
+  value_8 = 0b0001;
+  bit_util::BitfieldSet(0b011, /*lsb_offset=*/1, /*len=*/2, &value_8);
+  EXPECT_THAT(value_8, Eq(0b0111));
+
+  // Set something in the uint32_t range
+  uint32_t value_32 = 0b0;
+  bit_util::BitfieldSet(0b01, /*lsb_offset=*/16, /*len=*/1, &value_32);
+  EXPECT_THAT(value_32, Eq(0b010000000000000000));
+
+  // Set something in the uint64_t range
+  uint64_t value_64 = 0b0;
+  bit_util::BitfieldSet(0b01, /*lsb_offset=*/34, /*len=*/1, &value_64);
+  EXPECT_THAT(value_64, Eq(0b010000000000000000000000000000000000));
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/util/character-iterator.cc b/icing/util/character-iterator.cc
index 3707f95..d483031 100644
--- a/icing/util/character-iterator.cc
+++ b/icing/util/character-iterator.cc
@@ -14,6 +14,8 @@
 
 #include "icing/util/character-iterator.h"
 
+#include "icing/util/i18n-utils.h"
+
 namespace icing {
 namespace lib {
 
@@ -30,17 +32,35 @@ int GetUTF8StartPosition(std::string_view text, int current_byte_index) {
 
 }  // namespace
 
+UChar32 CharacterIterator::GetCurrentChar() {
+  if (cached_current_char_ == i18n_utils::kInvalidUChar32) {
+    // Our indices point to the right character, we just need to read that
+    // character. No need to worry about an error. If GetUChar32At fails, then
+    // current_char will be i18n_utils::kInvalidUChar32.
+    cached_current_char_ =
+        i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
+  }
+  return cached_current_char_;
+}
+
+bool CharacterIterator::MoveToUtf8(int desired_utf8_index) {
+  return (desired_utf8_index > utf8_index_) ? AdvanceToUtf8(desired_utf8_index)
+                                            : RewindToUtf8(desired_utf8_index);
+}
+
 bool CharacterIterator::AdvanceToUtf8(int desired_utf8_index) {
   if (desired_utf8_index > text_.length()) {
     // Enforce the requirement.
     return false;
   }
   // Need to work forwards.
+  UChar32 uchar32 = cached_current_char_;
   while (utf8_index_ < desired_utf8_index) {
-    UChar32 uchar32 =
+    uchar32 =
         i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
     if (uchar32 == i18n_utils::kInvalidUChar32) {
       // Unable to retrieve a valid UTF-32 character at the previous position.
+      cached_current_char_ = i18n_utils::kInvalidUChar32;
       return false;
     }
     int utf8_length = i18n_utils::GetUtf8Length(uchar32);
@@ -50,7 +70,10 @@ bool CharacterIterator::AdvanceToUtf8(int desired_utf8_index) {
     }
     utf8_index_ += utf8_length;
     utf16_index_ += i18n_utils::GetUtf16Length(uchar32);
+    ++utf32_index_;
   }
+  cached_current_char_ =
+      i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
   return true;
 }
 
@@ -60,32 +83,50 @@ bool CharacterIterator::RewindToUtf8(int desired_utf8_index) {
     return false;
   }
   // Need to work backwards.
+  UChar32 uchar32 = cached_current_char_;
   while (utf8_index_ > desired_utf8_index) {
-    --utf8_index_;
-    utf8_index_ = GetUTF8StartPosition(text_, utf8_index_);
-    if (utf8_index_ < 0) {
+    int utf8_index = utf8_index_ - 1;
+    utf8_index = GetUTF8StartPosition(text_, utf8_index);
+    if (utf8_index < 0) {
       // Somehow, there wasn't a single UTF-8 lead byte at
       // requested_byte_index or an earlier byte.
+      cached_current_char_ = i18n_utils::kInvalidUChar32;
       return false;
     }
     // We've found the start of a unicode char!
-    UChar32 uchar32 =
-        i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
-    if (uchar32 == i18n_utils::kInvalidUChar32) {
-      // Unable to retrieve a valid UTF-32 character at the previous position.
+    uchar32 =
+        i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index);
+    int expected_length = utf8_index_ - utf8_index;
+    if (uchar32 == i18n_utils::kInvalidUChar32 ||
+        expected_length != i18n_utils::GetUtf8Length(uchar32)) {
+      // Either unable to retrieve a valid UTF-32 character at the previous
+      // position or we skipped past an invalid sequence while seeking the
+      // previous start position.
+      cached_current_char_ = i18n_utils::kInvalidUChar32;
       return false;
     }
+    cached_current_char_ = uchar32;
+    utf8_index_ = utf8_index;
     utf16_index_ -= i18n_utils::GetUtf16Length(uchar32);
+    --utf32_index_;
   }
   return true;
 }
 
+bool CharacterIterator::MoveToUtf16(int desired_utf16_index) {
+  return (desired_utf16_index > utf16_index_)
+             ? AdvanceToUtf16(desired_utf16_index)
+             : RewindToUtf16(desired_utf16_index);
+}
+
 bool CharacterIterator::AdvanceToUtf16(int desired_utf16_index) {
+  UChar32 uchar32 = cached_current_char_;
   while (utf16_index_ < desired_utf16_index) {
-    UChar32 uchar32 =
+    uchar32 =
         i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
     if (uchar32 == i18n_utils::kInvalidUChar32) {
       // Unable to retrieve a valid UTF-32 character at the previous position.
+      cached_current_char_ = i18n_utils::kInvalidUChar32;
       return false;
     }
     int utf16_length = i18n_utils::GetUtf16Length(uchar32);
@@ -96,11 +137,15 @@ bool CharacterIterator::AdvanceToUtf16(int desired_utf16_index) {
     int utf8_length = i18n_utils::GetUtf8Length(uchar32);
     if (utf8_index_ + utf8_length > text_.length()) {
       // Enforce the requirement.
+      cached_current_char_ = i18n_utils::kInvalidUChar32;
       return false;
     }
     utf8_index_ += utf8_length;
     utf16_index_ += utf16_length;
+    ++utf32_index_;
   }
+  cached_current_char_ =
+      i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
   return true;
 }
 
@@ -108,17 +153,98 @@ bool CharacterIterator::RewindToUtf16(int desired_utf16_index) {
   if (desired_utf16_index < 0) {
     return false;
   }
+  UChar32 uchar32 = cached_current_char_;
   while (utf16_index_ > desired_utf16_index) {
-    --utf8_index_;
-    utf8_index_ = GetUTF8StartPosition(text_, utf8_index_);
+    int utf8_index = utf8_index_ - 1;
+    utf8_index = GetUTF8StartPosition(text_, utf8_index);
+    if (utf8_index < 0) {
+      // Somehow, there wasn't a single UTF-8 lead byte at
+      // requested_byte_index or an earlier byte.
+      cached_current_char_ = i18n_utils::kInvalidUChar32;
+      return false;
+    }
     // We've found the start of a unicode char!
-    UChar32 uchar32 =
+    uchar32 =
+        i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index);
+    int expected_length = utf8_index_ - utf8_index;
+    if (uchar32 == i18n_utils::kInvalidUChar32 ||
+        expected_length != i18n_utils::GetUtf8Length(uchar32)) {
+      // Either unable to retrieve a valid UTF-32 character at the previous
+      // position or we skipped past an invalid sequence while seeking the
+      // previous start position.
+      cached_current_char_ = i18n_utils::kInvalidUChar32;
+      return false;
+    }
+    cached_current_char_ = uchar32;
+    utf8_index_ = utf8_index;
+    utf16_index_ -= i18n_utils::GetUtf16Length(uchar32);
+    --utf32_index_;
+  }
+  return true;
+}
+
+bool CharacterIterator::MoveToUtf32(int desired_utf32_index) {
+  return (desired_utf32_index > utf32_index_)
+             ? AdvanceToUtf32(desired_utf32_index)
+             : RewindToUtf32(desired_utf32_index);
+}
+
+bool CharacterIterator::AdvanceToUtf32(int desired_utf32_index) {
+  UChar32 uchar32 = cached_current_char_;
+  while (utf32_index_ < desired_utf32_index) {
+    uchar32 =
         i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
     if (uchar32 == i18n_utils::kInvalidUChar32) {
       // Unable to retrieve a valid UTF-32 character at the previous position.
+      cached_current_char_ = i18n_utils::kInvalidUChar32;
+      return false;
+    }
+    int utf16_length = i18n_utils::GetUtf16Length(uchar32);
+    int utf8_length = i18n_utils::GetUtf8Length(uchar32);
+    if (utf8_index_ + utf8_length > text_.length()) {
+      // Enforce the requirement.
+      cached_current_char_ = i18n_utils::kInvalidUChar32;
+      return false;
+    }
+    utf8_index_ += utf8_length;
+    utf16_index_ += utf16_length;
+    ++utf32_index_;
+  }
+  cached_current_char_ =
+      i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
+  return true;
+}
+
+bool CharacterIterator::RewindToUtf32(int desired_utf32_index) {
+  if (desired_utf32_index < 0) {
+    return false;
+  }
+  UChar32 uchar32 = cached_current_char_;
+  while (utf32_index_ > desired_utf32_index) {
+    int utf8_index = utf8_index_ - 1;
+    utf8_index = GetUTF8StartPosition(text_, utf8_index);
+    if (utf8_index < 0) {
+      // Somehow, there wasn't a single UTF-8 lead byte at
+      // requested_byte_index or an earlier byte.
+      cached_current_char_ = i18n_utils::kInvalidUChar32;
+      return false;
+    }
+    // We've found the start of a unicode char!
+    uchar32 =
+        i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index);
+    int expected_length = utf8_index_ - utf8_index;
+    if (uchar32 == i18n_utils::kInvalidUChar32 ||
+        expected_length != i18n_utils::GetUtf8Length(uchar32)) {
+      // Either unable to retrieve a valid UTF-32 character at the previous
+      // position or we skipped past an invalid sequence while seeking the
+      // previous start position.
+      cached_current_char_ = i18n_utils::kInvalidUChar32;
       return false;
     }
+    cached_current_char_ = uchar32;
+    utf8_index_ = utf8_index;
     utf16_index_ -= i18n_utils::GetUtf16Length(uchar32);
+    --utf32_index_;
   }
   return true;
 }
diff --git a/icing/util/character-iterator.h b/icing/util/character-iterator.h
index 22de6c5..c7569a7 100644
--- a/icing/util/character-iterator.h
+++ b/icing/util/character-iterator.h
@@ -15,6 +15,7 @@
 #ifndef ICING_UTIL_CHARACTER_ITERATOR_H_
 #define ICING_UTIL_CHARACTER_ITERATOR_H_
 
+#include "icing/legacy/core/icing-string-util.h"
 #include "icing/util/i18n-utils.h"
 
 namespace icing {
@@ -23,23 +24,40 @@ namespace lib {
 class CharacterIterator {
  public:
   explicit CharacterIterator(std::string_view text)
-      : CharacterIterator(text, 0, 0) {}
+      : CharacterIterator(text, 0, 0, 0) {}
 
-  CharacterIterator(std::string_view text, int utf8_index, int utf16_index)
-      : text_(text), utf8_index_(utf8_index), utf16_index_(utf16_index) {}
+  CharacterIterator(std::string_view text, int utf8_index, int utf16_index,
+                    int utf32_index)
+      : text_(text),
+        cached_current_char_(i18n_utils::kInvalidUChar32),
+        utf8_index_(utf8_index),
+        utf16_index_(utf16_index),
+        utf32_index_(utf32_index) {}
 
-  // Moves from current position to the character that includes the specified
+  // Returns the character that the iterator currently points to.
+  // i18n_utils::kInvalidUChar32 if unable to read that character.
+  UChar32 GetCurrentChar();
+
+  // Moves current position to desired_utf8_index.
+  // REQUIRES: 0 <= desired_utf8_index <= text_.length()
+  bool MoveToUtf8(int desired_utf8_index);
+
+  // Advances from current position to the character that includes the specified
   // UTF-8 index.
   // REQUIRES: desired_utf8_index <= text_.length()
   // desired_utf8_index is allowed to point one index past the end, but no
   // further.
   bool AdvanceToUtf8(int desired_utf8_index);
 
-  // Moves from current position to the character that includes the specified
+  // Rewinds from current position to the character that includes the specified
   // UTF-8 index.
   // REQUIRES: 0 <= desired_utf8_index
   bool RewindToUtf8(int desired_utf8_index);
 
+  // Moves current position to desired_utf16_index.
+  // REQUIRES: 0 <= desired_utf16_index <= text_.utf16_length()
+  bool MoveToUtf16(int desired_utf16_index);
+
   // Advances current position to desired_utf16_index.
   // REQUIRES: desired_utf16_index <= text_.utf16_length()
   // desired_utf16_index is allowed to point one index past the end, but no
@@ -50,18 +68,42 @@ class CharacterIterator {
   // REQUIRES: 0 <= desired_utf16_index
   bool RewindToUtf16(int desired_utf16_index);
 
+  // Moves current position to desired_utf32_index.
+  // REQUIRES: 0 <= desired_utf32_index <= text_.utf32_length()
+  bool MoveToUtf32(int desired_utf32_index);
+
+  // Advances current position to desired_utf32_index.
+  // REQUIRES: desired_utf32_index <= text_.utf32_length()
+  // desired_utf32_index is allowed to point one index past the end, but no
+  // further.
+  bool AdvanceToUtf32(int desired_utf32_index);
+
+  // Rewinds current position to desired_utf32_index.
+  // REQUIRES: 0 <= desired_utf32_index
+  bool RewindToUtf32(int desired_utf32_index);
+
   int utf8_index() const { return utf8_index_; }
   int utf16_index() const { return utf16_index_; }
+  int utf32_index() const { return utf32_index_; }
 
   bool operator==(const CharacterIterator& rhs) const {
+    // cached_current_char_ is just that: a cached value. As such, it's not
+    // considered for equality.
     return text_ == rhs.text_ && utf8_index_ == rhs.utf8_index_ &&
-           utf16_index_ == rhs.utf16_index_;
+           utf16_index_ == rhs.utf16_index_ && utf32_index_ == rhs.utf32_index_;
+  }
+
+  std::string DebugString() const {
+    return IcingStringUtil::StringPrintf("(u8:%d,u16:%d,u32:%d)", utf8_index_,
+                                         utf16_index_, utf32_index_);
   }
 
  private:
   std::string_view text_;
+  UChar32 cached_current_char_;
   int utf8_index_;
   int utf16_index_;
+  int utf32_index_;
 };
 
 }  // namespace lib
diff --git a/icing/util/character-iterator_test.cc b/icing/util/character-iterator_test.cc
new file mode 100644
index 0000000..445f837
--- /dev/null
+++ b/icing/util/character-iterator_test.cc
@@ -0,0 +1,235 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/character-iterator.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/testing/icu-i18n-test-utils.h"
+
+namespace icing {
+namespace lib {
+
+using ::testing::Eq;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+
+TEST(CharacterIteratorTest, BasicUtf8) {
+  constexpr std::string_view kText = "¿Dónde está la biblioteca?";
+  CharacterIterator iterator(kText);
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
+
+  EXPECT_THAT(iterator.AdvanceToUtf8(4), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
+                                   /*utf32_index=*/2)));
+
+  EXPECT_THAT(iterator.AdvanceToUtf8(18), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
+                                   /*utf32_index=*/15)));
+
+  EXPECT_THAT(iterator.AdvanceToUtf8(28), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
+                                   /*utf32_index=*/25)));
+
+  EXPECT_THAT(iterator.AdvanceToUtf8(29), IsTrue());
+  EXPECT_THAT(iterator.GetCurrentChar(), Eq(0));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/29, /*utf16_index=*/26,
+                                   /*utf32_index=*/26)));
+
+  EXPECT_THAT(iterator.RewindToUtf8(28), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
+                                   /*utf32_index=*/25)));
+
+  EXPECT_THAT(iterator.RewindToUtf8(18), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
+                                   /*utf32_index=*/15)));
+
+  EXPECT_THAT(iterator.RewindToUtf8(4), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
+                                   /*utf32_index=*/2)));
+
+  EXPECT_THAT(iterator.RewindToUtf8(0), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/0, /*utf16_index=*/0,
+                                   /*utf32_index=*/0)));
+}
+
+TEST(CharacterIteratorTest, BasicUtf16) {
+  constexpr std::string_view kText = "¿Dónde está la biblioteca?";
+  CharacterIterator iterator(kText);
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
+
+  EXPECT_THAT(iterator.AdvanceToUtf16(2), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
+                                   /*utf32_index=*/2)));
+
+  EXPECT_THAT(iterator.AdvanceToUtf16(15), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
+                                   /*utf32_index=*/15)));
+
+  EXPECT_THAT(iterator.AdvanceToUtf16(25), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
+                                   /*utf32_index=*/25)));
+
+  EXPECT_THAT(iterator.AdvanceToUtf16(26), IsTrue());
+  EXPECT_THAT(iterator.GetCurrentChar(), Eq(0));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/29, /*utf16_index=*/26,
+                                   /*utf32_index=*/26)));
+
+  EXPECT_THAT(iterator.RewindToUtf16(25), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
+                                   /*utf32_index=*/25)));
+
+  EXPECT_THAT(iterator.RewindToUtf16(15), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
+                                   /*utf32_index=*/15)));
+
+  EXPECT_THAT(iterator.RewindToUtf16(2), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
+                                   /*utf32_index=*/2)));
+
+  EXPECT_THAT(iterator.RewindToUtf8(0), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/0, /*utf16_index=*/0,
+                                   /*utf32_index=*/0)));
+}
+
+TEST(CharacterIteratorTest, BasicUtf32) {
+  constexpr std::string_view kText = "¿Dónde está la biblioteca?";
+  CharacterIterator iterator(kText);
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
+
+  EXPECT_THAT(iterator.AdvanceToUtf32(2), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
+                                   /*utf32_index=*/2)));
+
+  EXPECT_THAT(iterator.AdvanceToUtf32(15), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
+                                   /*utf32_index=*/15)));
+
+  EXPECT_THAT(iterator.AdvanceToUtf32(25), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
+                                   /*utf32_index=*/25)));
+
+  EXPECT_THAT(iterator.AdvanceToUtf32(26), IsTrue());
+  EXPECT_THAT(iterator.GetCurrentChar(), Eq(0));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/29, /*utf16_index=*/26,
+                                   /*utf32_index=*/26)));
+
+  EXPECT_THAT(iterator.RewindToUtf32(25), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
+                                   /*utf32_index=*/25)));
+
+  EXPECT_THAT(iterator.RewindToUtf32(15), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
+                                   /*utf32_index=*/15)));
+
+  EXPECT_THAT(iterator.RewindToUtf32(2), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
+                                   /*utf32_index=*/2)));
+
+  EXPECT_THAT(iterator.RewindToUtf32(0), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/0, /*utf16_index=*/0,
+                                   /*utf32_index=*/0)));
+}
+
+TEST(CharacterIteratorTest, InvalidUtf) {
+  // "\255" is an invalid sequence.
+  constexpr std::string_view kText = "foo \255 bar";
+  CharacterIterator iterator(kText);
+
+  // Try to advance to the 'b' in 'bar'. This will fail and leave us pointed at
+  // the invalid sequence '\255'. Get CurrentChar() should return an invalid
+  // character.
+  EXPECT_THAT(iterator.AdvanceToUtf8(6), IsFalse());
+  EXPECT_THAT(iterator.GetCurrentChar(), Eq(i18n_utils::kInvalidUChar32));
+  CharacterIterator exp_iterator(kText, /*utf8_index=*/4, /*utf16_index=*/4,
+                                 /*utf32_index=*/4);
+  EXPECT_THAT(iterator, Eq(exp_iterator));
+
+  EXPECT_THAT(iterator.AdvanceToUtf16(6), IsFalse());
+  EXPECT_THAT(iterator.GetCurrentChar(), Eq(i18n_utils::kInvalidUChar32));
+  EXPECT_THAT(iterator, Eq(exp_iterator));
+
+  EXPECT_THAT(iterator.AdvanceToUtf32(6), IsFalse());
+  EXPECT_THAT(iterator.GetCurrentChar(), Eq(i18n_utils::kInvalidUChar32));
+  EXPECT_THAT(iterator, Eq(exp_iterator));
+
+  // Create the iterator with it pointing at the 'b' in 'bar'.
+  iterator = CharacterIterator(kText, /*utf8_index=*/6, /*utf16_index=*/6,
+                               /*utf32_index=*/6);
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
+
+  // Try to advance to the last 'o' in 'foo'. This will fail and leave us
+  // pointed at the ' ' before the invalid sequence '\255'.
+  exp_iterator = CharacterIterator(kText, /*utf8_index=*/5, /*utf16_index=*/5,
+                                   /*utf32_index=*/5);
+  EXPECT_THAT(iterator.RewindToUtf8(2), IsFalse());
+  EXPECT_THAT(iterator.GetCurrentChar(), Eq(' '));
+  EXPECT_THAT(iterator, Eq(exp_iterator));
+
+  EXPECT_THAT(iterator.RewindToUtf16(2), IsFalse());
+  EXPECT_THAT(iterator.GetCurrentChar(), Eq(' '));
+  EXPECT_THAT(iterator, Eq(exp_iterator));
+
+  EXPECT_THAT(iterator.RewindToUtf32(2), IsFalse());
+  EXPECT_THAT(iterator.GetCurrentChar(), Eq(' '));
+  EXPECT_THAT(iterator, Eq(exp_iterator));
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/util/document-validator_test.cc b/icing/util/document-validator_test.cc
index f05e8a6..cb013d7 100644
--- a/icing/util/document-validator_test.cc
+++ b/icing/util/document-validator_test.cc
@@ -21,6 +21,7 @@
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
 #include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
 #include "icing/schema/schema-store.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
@@ -45,17 +46,52 @@ constexpr char kPropertyEmails[] = "emails";
 constexpr char kDefaultNamespace[] = "icing";
 constexpr char kDefaultString[] = "This is a string.";
 
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
+    PropertyConfigProto_Cardinality_Code_REQUIRED;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED =
+    PropertyConfigProto_Cardinality_Code_REPEATED;
+
+constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
+    PropertyConfigProto_DataType_Code_STRING;
+
 class DocumentValidatorTest : public ::testing::Test {
  protected:
   DocumentValidatorTest() {}
 
   void SetUp() override {
-    SchemaProto schema;
-    auto type_config = schema.add_types();
-    CreateEmailTypeConfig(type_config);
-
-    type_config = schema.add_types();
-    CreateConversationTypeConfig(type_config);
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType(kTypeEmail)
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertySubject)
+                                     .SetDataType(TYPE_STRING)
+                                     .SetCardinality(CARDINALITY_REQUIRED))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyText)
+                                     .SetDataType(TYPE_STRING)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyRecipients)
+                                     .SetDataType(TYPE_STRING)
+                                     .SetCardinality(CARDINALITY_REPEATED)))
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType(kTypeConversation)
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyName)
+                                     .SetDataType(TYPE_STRING)
+                                     .SetCardinality(CARDINALITY_REQUIRED))
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName(kPropertyEmails)
+                            .SetDataTypeDocument(
+                                kTypeEmail, /*index_nested_properties=*/true)
+                            .SetCardinality(CARDINALITY_REPEATED)))
+            .Build();
 
     ICING_ASSERT_OK_AND_ASSIGN(
         schema_store_,
@@ -66,25 +102,6 @@ class DocumentValidatorTest : public ::testing::Test {
         std::make_unique<DocumentValidator>(schema_store_.get());
   }
 
-  static void CreateEmailTypeConfig(SchemaTypeConfigProto* type_config) {
-    type_config->set_schema_type(kTypeEmail);
-
-    auto subject = type_config->add_properties();
-    subject->set_property_name(kPropertySubject);
-    subject->set_data_type(PropertyConfigProto::DataType::STRING);
-    subject->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
-    auto text = type_config->add_properties();
-    text->set_property_name(kPropertyText);
-    text->set_data_type(PropertyConfigProto::DataType::STRING);
-    text->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
-    auto recipients = type_config->add_properties();
-    recipients->set_property_name(kPropertyRecipients);
-    recipients->set_data_type(PropertyConfigProto::DataType::STRING);
-    recipients->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-  }
-
   static DocumentBuilder SimpleEmailBuilder() {
     return DocumentBuilder()
         .SetKey(kDefaultNamespace, "email/1")
@@ -95,21 +112,6 @@ class DocumentValidatorTest : public ::testing::Test {
                            kDefaultString);
   }
 
-  static void CreateConversationTypeConfig(SchemaTypeConfigProto* type_config) {
-    type_config->set_schema_type(kTypeConversation);
-
-    auto name = type_config->add_properties();
-    name->set_property_name(kPropertyName);
-    name->set_data_type(PropertyConfigProto::DataType::STRING);
-    name->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
-    auto emails = type_config->add_properties();
-    emails->set_property_name(kPropertyEmails);
-    emails->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-    emails->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-    emails->set_schema_type(kTypeEmail);
-  }
-
   static DocumentBuilder SimpleConversationBuilder() {
     return DocumentBuilder()
         .SetKey(kDefaultNamespace, "conversation/1")
@@ -326,12 +328,26 @@ TEST_F(DocumentValidatorTest, ValidateNestedPropertyInvalid) {
 }
 
 TEST_F(DocumentValidatorTest, HandleTypeConfigMapChangesOk) {
-  SchemaProto email_schema;
-  auto type_config = email_schema.add_types();
-  CreateEmailTypeConfig(type_config);
+  SchemaProto email_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kTypeEmail)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName(kPropertySubject)
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName(kPropertyText)
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName(kPropertyRecipients)
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
 
-  // Create a custom directory so we don't collide with the test's preset schema
-  // in SetUp
+  // Create a custom directory so we don't collide
+  // with the test's preset schema in SetUp
   const std::string custom_schema_dir = GetTestTempDir() + "/custom_schema";
   filesystem_.DeleteDirectoryRecursively(custom_schema_dir.c_str());
   filesystem_.CreateDirectoryRecursively(custom_schema_dir.c_str());
@@ -352,9 +368,21 @@ TEST_F(DocumentValidatorTest, HandleTypeConfigMapChangesOk) {
                        HasSubstr("'Conversation' not found")));
 
   // Add the 'Conversation' type
-  SchemaProto email_and_conversation_schema = email_schema;
-  type_config = email_and_conversation_schema.add_types();
-  CreateConversationTypeConfig(type_config);
+  SchemaProto email_and_conversation_schema =
+      SchemaBuilder(email_schema)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kTypeConversation)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName(kPropertyName)
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName(kPropertyEmails)
+                               .SetDataTypeDocument(
+                                   kTypeEmail, /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
 
   // DocumentValidator should be able to handle the SchemaStore getting updated
   // separately
diff --git a/icing/util/math-util.h b/icing/util/math-util.h
index fc11a09..3f2a69d 100644
--- a/icing/util/math-util.h
+++ b/icing/util/math-util.h
@@ -37,7 +37,7 @@ inline double SafeDivide(double first, double second) {
 template <typename IntType>
 static IntType RoundDownTo(IntType input_value, IntType rounding_value) {
   static_assert(std::numeric_limits<IntType>::is_integer,
-                "RoundUpTo() operation type is not integer");
+                "RoundDownTo() operation type is not integer");
 
   if (input_value <= 0) {
     return 0;
diff --git a/java/src/com/google/android/icing/BreakIteratorBatcher.java b/java/src/com/google/android/icing/BreakIteratorBatcher.java
index 58efbfc..2b87327 100644
--- a/java/src/com/google/android/icing/BreakIteratorBatcher.java
+++ b/java/src/com/google/android/icing/BreakIteratorBatcher.java
@@ -14,9 +14,6 @@
 
 package com.google.android.icing;
 
-import androidx.annotation.NonNull;
-import androidx.annotation.RestrictTo;
-
 import java.text.BreakIterator;
 import java.util.ArrayList;
 import java.util.List;
@@ -38,20 +35,17 @@ import java.util.Locale;
  * utf16Boundaries = brkItrBatcher.next(5);
  * assertThat(utf16Boundaries).asList().containsExactly(9);
  * }</pre>
- *
- * @hide
  */
-@RestrictTo(RestrictTo.Scope.LIBRARY_GROUP)
 public class BreakIteratorBatcher {
 
   private final BreakIterator iterator;
 
-  public BreakIteratorBatcher(@NonNull Locale locale) {
+  public BreakIteratorBatcher(Locale locale) {
     this.iterator = BreakIterator.getWordInstance(locale);
   }
 
   /* Direct calls to BreakIterator */
-  public void setText(@NonNull String text) {
+  public void setText(String text) {
     iterator.setText(text);
   }
 
@@ -73,7 +67,6 @@ public class BreakIteratorBatcher {
    * the end of the text (returns BreakIterator#DONE), then only the results of the previous calls
    * in that batch will be returned.
    */
-  @NonNull
   public int[] next(int batchSize) {
     List<Integer> breakIndices = new ArrayList<>(batchSize);
     for (int i = 0; i < batchSize; ++i) {
diff --git a/java/src/com/google/android/icing/IcingSearchEngine.java b/java/src/com/google/android/icing/IcingSearchEngine.java
index 88d0578..1f5fb51 100644
--- a/java/src/com/google/android/icing/IcingSearchEngine.java
+++ b/java/src/com/google/android/icing/IcingSearchEngine.java
@@ -31,6 +31,7 @@ import com.google.android.icing.proto.IcingSearchEngineOptions;
 import com.google.android.icing.proto.InitializeResultProto;
 import com.google.android.icing.proto.OptimizeResultProto;
 import com.google.android.icing.proto.PersistToDiskResultProto;
+import com.google.android.icing.proto.PersistType;
 import com.google.android.icing.proto.PutResultProto;
 import com.google.android.icing.proto.ReportUsageResultProto;
 import com.google.android.icing.proto.ResetResultProto;
@@ -41,6 +42,7 @@ import com.google.android.icing.proto.SearchResultProto;
 import com.google.android.icing.proto.SearchSpecProto;
 import com.google.android.icing.proto.SetSchemaResultProto;
 import com.google.android.icing.proto.StatusProto;
+import com.google.android.icing.proto.StorageInfoResultProto;
 import com.google.android.icing.proto.UsageReport;
 import com.google.protobuf.ExtensionRegistryLite;
 import com.google.protobuf.InvalidProtocolBufferException;
@@ -51,9 +53,11 @@ import java.io.Closeable;
  *
  * <p>If this instance has been closed, the instance is no longer usable.
  *
+ * <p>Keep this class to be non-Final so that it can be mocked in AppSearch.
+ *
  * <p>NOTE: This class is NOT thread-safe.
  */
-public final class IcingSearchEngine implements Closeable {
+public class IcingSearchEngine implements Closeable {
 
   private static final String TAG = "IcingSearchEngine";
   private static final ExtensionRegistryLite EXTENSION_REGISTRY_LITE =
@@ -434,10 +438,10 @@ public final class IcingSearchEngine implements Closeable {
   }
 
   @NonNull
-  public PersistToDiskResultProto persistToDisk() {
+  public PersistToDiskResultProto persistToDisk(@NonNull PersistType.Code persistTypeCode) {
     throwIfClosed();
 
-    byte[] persistToDiskResultBytes = nativePersistToDisk(this);
+    byte[] persistToDiskResultBytes = nativePersistToDisk(this, persistTypeCode.getNumber());
     if (persistToDiskResultBytes == null) {
       Log.e(TAG, "Received null PersistToDiskResultProto from native.");
       return PersistToDiskResultProto.newBuilder()
@@ -501,6 +505,29 @@ public final class IcingSearchEngine implements Closeable {
   }
 
   @NonNull
+  public StorageInfoResultProto getStorageInfo() {
+    throwIfClosed();
+
+    byte[] storageInfoResultProtoBytes = nativeGetStorageInfo(this);
+    if (storageInfoResultProtoBytes == null) {
+      Log.e(TAG, "Received null StorageInfoResultProto from native.");
+      return StorageInfoResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+
+    try {
+      return StorageInfoResultProto.parseFrom(
+          storageInfoResultProtoBytes, EXTENSION_REGISTRY_LITE);
+    } catch (InvalidProtocolBufferException e) {
+      Log.e(TAG, "Error parsing GetOptimizeInfoResultProto.", e);
+      return StorageInfoResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+  }
+
+  @NonNull
   public ResetResultProto reset() {
     throwIfClosed();
 
@@ -568,11 +595,13 @@ public final class IcingSearchEngine implements Closeable {
   private static native byte[] nativeDeleteByQuery(
       IcingSearchEngine instance, byte[] searchSpecBytes);
 
-  private static native byte[] nativePersistToDisk(IcingSearchEngine instance);
+  private static native byte[] nativePersistToDisk(IcingSearchEngine instance, int persistType);
 
   private static native byte[] nativeOptimize(IcingSearchEngine instance);
 
   private static native byte[] nativeGetOptimizeInfo(IcingSearchEngine instance);
 
+  private static native byte[] nativeGetStorageInfo(IcingSearchEngine instance);
+
   private static native byte[] nativeReset(IcingSearchEngine instance);
 }
diff --git a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
index 56edaf1..0cee80c 100644
--- a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
+++ b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
@@ -32,6 +32,7 @@ import com.google.android.icing.proto.IcingSearchEngineOptions;
 import com.google.android.icing.proto.InitializeResultProto;
 import com.google.android.icing.proto.OptimizeResultProto;
 import com.google.android.icing.proto.PersistToDiskResultProto;
+import com.google.android.icing.proto.PersistType;
 import com.google.android.icing.proto.PropertyConfigProto;
 import com.google.android.icing.proto.PropertyProto;
 import com.google.android.icing.proto.PutResultProto;
@@ -44,7 +45,10 @@ import com.google.android.icing.proto.ScoringSpecProto;
 import com.google.android.icing.proto.SearchResultProto;
 import com.google.android.icing.proto.SearchSpecProto;
 import com.google.android.icing.proto.SetSchemaResultProto;
+import com.google.android.icing.proto.SnippetMatchProto;
+import com.google.android.icing.proto.SnippetProto;
 import com.google.android.icing.proto.StatusProto;
+import com.google.android.icing.proto.StorageInfoResultProto;
 import com.google.android.icing.proto.StringIndexingConfig;
 import com.google.android.icing.proto.StringIndexingConfig.TokenizerType;
 import com.google.android.icing.proto.TermMatchType;
@@ -394,7 +398,8 @@ public final class IcingSearchEngineTest {
   public void testPersistToDisk() throws Exception {
     assertStatusOk(icingSearchEngine.initialize().getStatus());
 
-    PersistToDiskResultProto persistToDiskResultProto = icingSearchEngine.persistToDisk();
+    PersistToDiskResultProto persistToDiskResultProto =
+        icingSearchEngine.persistToDisk(PersistType.Code.LITE);
     assertStatusOk(persistToDiskResultProto.getStatus());
   }
 
@@ -417,6 +422,14 @@ public final class IcingSearchEngineTest {
   }
 
   @Test
+  public void testGetStorageInfo() throws Exception {
+    assertStatusOk(icingSearchEngine.initialize().getStatus());
+
+    StorageInfoResultProto storageInfoResultProto = icingSearchEngine.getStorageInfo();
+    assertStatusOk(storageInfoResultProto.getStatus());
+  }
+
+  @Test
   public void testGetAllNamespaces() throws Exception {
     assertStatusOk(icingSearchEngine.initialize().getStatus());
 
@@ -475,6 +488,141 @@ public final class IcingSearchEngineTest {
     assertStatusOk(reportUsageResultProto.getStatus());
   }
 
+  @Test
+  public void testCJKTSnippets() throws Exception {
+    assertStatusOk(icingSearchEngine.initialize().getStatus());
+
+    SchemaProto schema = SchemaProto.newBuilder().addTypes(createEmailTypeConfig()).build();
+    assertStatusOk(
+        icingSearchEngine.setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false).getStatus());
+
+    // String:     "天是蓝的"
+    //              ^ ^^ ^
+    // UTF16 idx:   0 1 2 3
+    // Breaks into segments: "天", "是", "蓝", "的"
+    // "The sky is blue"
+    String chinese = "天是蓝的";
+    assertThat(chinese.length()).isEqualTo(4);
+    DocumentProto emailDocument1 =
+        createEmailDocument("namespace", "uri1").toBuilder()
+            .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues(chinese))
+            .build();
+    assertStatusOk(icingSearchEngine.put(emailDocument1).getStatus());
+
+    // Search and request snippet matching but no windowing.
+    SearchSpecProto searchSpec =
+        SearchSpecProto.newBuilder()
+            .setQuery("是")
+            .setTermMatchType(TermMatchType.Code.PREFIX)
+            .build();
+    ResultSpecProto resultSpecProto =
+        ResultSpecProto.newBuilder()
+            .setSnippetSpec(
+                ResultSpecProto.SnippetSpecProto.newBuilder()
+                    .setNumToSnippet(Integer.MAX_VALUE)
+                    .setNumMatchesPerProperty(Integer.MAX_VALUE))
+            .build();
+
+    // Search and make sure that we got a single successful results
+    SearchResultProto searchResultProto =
+        icingSearchEngine.search(
+            searchSpec, ScoringSpecProto.getDefaultInstance(), resultSpecProto);
+    assertStatusOk(searchResultProto.getStatus());
+    assertThat(searchResultProto.getResultsCount()).isEqualTo(1);
+
+    // Ensure that one and only one property was matched and it was "subject"
+    SnippetProto snippetProto = searchResultProto.getResults(0).getSnippet();
+    assertThat(snippetProto.getEntriesList()).hasSize(1);
+    SnippetProto.EntryProto entryProto = snippetProto.getEntries(0);
+    assertThat(entryProto.getPropertyName()).isEqualTo("subject");
+
+    // Get the content for "subject" and see what the match is.
+    DocumentProto resultDocument = searchResultProto.getResults(0).getDocument();
+    assertThat(resultDocument.getPropertiesList()).hasSize(1);
+    PropertyProto subjectProperty = resultDocument.getProperties(0);
+    assertThat(subjectProperty.getName()).isEqualTo("subject");
+    assertThat(subjectProperty.getStringValuesList()).hasSize(1);
+    String content = subjectProperty.getStringValues(0);
+
+    // Ensure that there is one and only one match within "subject"
+    assertThat(entryProto.getSnippetMatchesList()).hasSize(1);
+    SnippetMatchProto matchProto = entryProto.getSnippetMatches(0);
+
+    int matchStart = matchProto.getExactMatchUtf16Position();
+    int matchEnd = matchStart + matchProto.getExactMatchUtf16Length();
+    assertThat(matchStart).isEqualTo(1);
+    assertThat(matchEnd).isEqualTo(2);
+    String match = content.substring(matchStart, matchEnd);
+    assertThat(match).isEqualTo("是");
+  }
+
+  @Test
+  public void testUtf16MultiByteSnippets() throws Exception {
+    assertStatusOk(icingSearchEngine.initialize().getStatus());
+
+    SchemaProto schema = SchemaProto.newBuilder().addTypes(createEmailTypeConfig()).build();
+    assertStatusOk(
+        icingSearchEngine.setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false).getStatus());
+
+    // String:    "𐀀𐀁 𐀂𐀃 𐀄"
+    //             ^  ^  ^
+    // UTF16 idx:  0  5  10
+    // Breaks into segments: "𐀀𐀁", "𐀂𐀃", "𐀄"
+    String text = "𐀀𐀁 𐀂𐀃 𐀄";
+    assertThat(text.length()).isEqualTo(12);
+    DocumentProto emailDocument1 =
+        createEmailDocument("namespace", "uri1").toBuilder()
+            .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues(text))
+            .build();
+    assertStatusOk(icingSearchEngine.put(emailDocument1).getStatus());
+
+    // Search and request snippet matching but no windowing.
+    SearchSpecProto searchSpec =
+        SearchSpecProto.newBuilder()
+            .setQuery("𐀂")
+            .setTermMatchType(TermMatchType.Code.PREFIX)
+            .build();
+    ResultSpecProto resultSpecProto =
+        ResultSpecProto.newBuilder()
+            .setSnippetSpec(
+                ResultSpecProto.SnippetSpecProto.newBuilder()
+                    .setNumToSnippet(Integer.MAX_VALUE)
+                    .setNumMatchesPerProperty(Integer.MAX_VALUE))
+            .build();
+
+    // Search and make sure that we got a single successful results
+    SearchResultProto searchResultProto =
+        icingSearchEngine.search(
+            searchSpec, ScoringSpecProto.getDefaultInstance(), resultSpecProto);
+    assertStatusOk(searchResultProto.getStatus());
+    assertThat(searchResultProto.getResultsCount()).isEqualTo(1);
+
+    // Ensure that one and only one property was matched and it was "subject"
+    SnippetProto snippetProto = searchResultProto.getResults(0).getSnippet();
+    assertThat(snippetProto.getEntriesList()).hasSize(1);
+    SnippetProto.EntryProto entryProto = snippetProto.getEntries(0);
+    assertThat(entryProto.getPropertyName()).isEqualTo("subject");
+
+    // Get the content for "subject" and see what the match is.
+    DocumentProto resultDocument = searchResultProto.getResults(0).getDocument();
+    assertThat(resultDocument.getPropertiesList()).hasSize(1);
+    PropertyProto subjectProperty = resultDocument.getProperties(0);
+    assertThat(subjectProperty.getName()).isEqualTo("subject");
+    assertThat(subjectProperty.getStringValuesList()).hasSize(1);
+    String content = subjectProperty.getStringValues(0);
+
+    // Ensure that there is one and only one match within "subject"
+    assertThat(entryProto.getSnippetMatchesList()).hasSize(1);
+    SnippetMatchProto matchProto = entryProto.getSnippetMatches(0);
+
+    int matchStart = matchProto.getExactMatchUtf16Position();
+    int matchEnd = matchStart + matchProto.getExactMatchUtf16Length();
+    assertThat(matchStart).isEqualTo(5);
+    assertThat(matchEnd).isEqualTo(9);
+    String match = content.substring(matchStart, matchEnd);
+    assertThat(match).isEqualTo("𐀂𐀃");
+  }
+
   private static void assertStatusOk(StatusProto status) {
     assertWithMessage(status.getMessage()).that(status.getCode()).isEqualTo(StatusProto.Code.OK);
   }
diff --git a/proto/icing/proto/document.proto b/proto/icing/proto/document.proto
index d55b7e2..2e8321b 100644
--- a/proto/icing/proto/document.proto
+++ b/proto/icing/proto/document.proto
@@ -110,11 +110,11 @@ message PutResultProto {
   // go/icing-library-apis.
   optional StatusProto status = 1;
 
-  // Stats of the function call. Inside NativePutDocumentStats, the function
+  // Stats of the function call. Inside PutDocumentStatsProto, the function
   // call latency 'latency_ms' will always be populated. The other fields will
   // be accurate only when the status above is OK. See logging.proto for
   // details.
-  optional NativePutDocumentStats native_put_document_stats = 2;
+  optional PutDocumentStatsProto put_document_stats = 2;
 }
 
 // Result of a call to IcingSearchEngine.Get
@@ -167,7 +167,7 @@ message DeleteResultProto {
   optional StatusProto status = 1;
 
   // Stats for delete execution performance.
-  optional NativeDeleteStats delete_stats = 2;
+  optional DeleteStatsProto delete_stats = 2;
 }
 
 // Result of a call to IcingSearchEngine.DeleteByNamespace
@@ -186,7 +186,7 @@ message DeleteByNamespaceResultProto {
   optional StatusProto status = 1;
 
   // Stats for delete execution performance.
-  optional NativeDeleteStats delete_stats = 2;
+  optional DeleteStatsProto delete_stats = 2;
 }
 
 // Result of a call to IcingSearchEngine.DeleteBySchemaType
@@ -205,11 +205,11 @@ message DeleteBySchemaTypeResultProto {
   optional StatusProto status = 1;
 
   // Stats for delete execution performance.
-  optional NativeDeleteStats delete_stats = 2;
+  optional DeleteStatsProto delete_stats = 2;
 }
 
 // Result of a call to IcingSearchEngine.DeleteByQuery
-// Next tag: 3
+// Next tag: 4
 message DeleteByQueryResultProto {
   // Status code can be one of:
   //   OK
@@ -224,5 +224,7 @@ message DeleteByQueryResultProto {
   optional StatusProto status = 1;
 
   // Stats for delete execution performance.
-  optional NativeDeleteStats delete_stats = 2;
+  optional DeleteByQueryStatsProto delete_by_query_stats = 3;
+
+  reserved 2;
 }
diff --git a/proto/icing/proto/document_wrapper.proto b/proto/icing/proto/document_wrapper.proto
index e8eb992..929ee33 100644
--- a/proto/icing/proto/document_wrapper.proto
+++ b/proto/icing/proto/document_wrapper.proto
@@ -20,7 +20,6 @@ import "icing/proto/document.proto";
 
 option java_package = "com.google.android.icing.proto";
 option java_multiple_files = true;
-
 option objc_class_prefix = "ICNG";
 
 // DocumentWrapper as a wrapper of the user-facing DocumentProto is meant to
@@ -30,6 +29,5 @@ option objc_class_prefix = "ICNG";
 message DocumentWrapper {
   optional DocumentProto document = 1;
 
-  // Indicates if the document is marked as deleted
-  optional bool deleted = 2;
+  reserved 2;
 }
diff --git a/proto/icing/proto/initialize.proto b/proto/icing/proto/initialize.proto
index ae2944c..ab2556d 100644
--- a/proto/icing/proto/initialize.proto
+++ b/proto/icing/proto/initialize.proto
@@ -16,12 +16,11 @@ syntax = "proto2";
 
 package icing.lib;
 
-import "icing/proto/status.proto";
 import "icing/proto/logging.proto";
+import "icing/proto/status.proto";
 
 option java_package = "com.google.android.icing.proto";
 option java_multiple_files = true;
-
 option objc_class_prefix = "ICNG";
 
 // Next tag: 5
@@ -89,11 +88,11 @@ message InitializeResultProto {
   // go/icing-library-apis.
   optional StatusProto status = 1;
 
-  // Stats of the function call. Inside NativeInitializeStats, the function call
+  // Stats of the function call. Inside InitializeStatsProto, the function call
   // latency 'latency_ms' will always be populated. The other fields will be
   // accurate only when the status above is OK or WARNING_DATA_LOSS. See
   // logging.proto for details.
-  optional NativeInitializeStats native_initialize_stats = 2;
+  optional InitializeStatsProto initialize_stats = 2;
 
   // TODO(b/147699081): Add a field to indicate lost_schema and lost_documents.
   // go/icing-library-apis.
diff --git a/proto/icing/proto/internal/optimize.proto b/proto/icing/proto/internal/optimize.proto
new file mode 100644
index 0000000..4ed3d73
--- /dev/null
+++ b/proto/icing/proto/internal/optimize.proto
@@ -0,0 +1,29 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package icing.lib;
+
+option java_package = "com.google.android.icing.internal.proto";
+option java_multiple_files = true;
+option objc_class_prefix = "ICNG";
+
+// A status that is saved internally in Icing to track information about how
+// often Optimize runs.
+// Next tag: 2
+message OptimizeStatusProto {
+  // The Epoch time at which the last successfuly optimize ran.
+  optional int64 last_successful_optimize_run_time_ms = 1;
+}
diff --git a/proto/icing/proto/logging.proto b/proto/icing/proto/logging.proto
index 09ec756..7abbf4a 100644
--- a/proto/icing/proto/logging.proto
+++ b/proto/icing/proto/logging.proto
@@ -23,8 +23,8 @@ option java_multiple_files = true;
 option objc_class_prefix = "ICNG";
 
 // Stats of the top-level function IcingSearchEngine::Initialize().
-// Next tag: 11
-message NativeInitializeStats {
+// Next tag: 12
+message InitializeStatsProto {
   // Overall time used for the function call.
   optional int32 latency_ms = 1;
 
@@ -40,8 +40,9 @@ message NativeInitializeStats {
     // Data in index is inconsistent with ground truth.
     INCONSISTENT_WITH_GROUND_TRUTH = 2;
 
-    // Total checksum of all the components does not match.
-    TOTAL_CHECKSUM_MISMATCH = 3;
+    // Changes were made to the schema, but possibly not fully applied to the
+    // document store and the index - requiring a recovery.
+    SCHEMA_CHANGES_OUT_OF_SYNC = 3;
 
     // Random I/O errors.
     IO_ERROR = 4;
@@ -49,13 +50,13 @@ message NativeInitializeStats {
 
   // Possible recovery causes for document store:
   // - DATA_LOSS
-  // - TOTAL_CHECKSUM_MISMATCH
+  // - SCHEMA_CHANGES_OUT_OF_SYNC
   // - IO_ERROR
   optional RecoveryCause document_store_recovery_cause = 2;
 
   // Possible recovery causes for index:
   // - INCONSISTENT_WITH_GROUND_TRUTH
-  // - TOTAL_CHECKSUM_MISMATCH
+  // - SCHEMA_CHANGES_OUT_OF_SYNC
   // - IO_ERROR
   optional RecoveryCause index_restoration_cause = 3;
 
@@ -91,11 +92,15 @@ message NativeInitializeStats {
 
   // Number of schema types currently in schema store.
   optional int32 num_schema_types = 10;
+
+  // Number of consecutive initialization failures that immediately preceded
+  // this initialization.
+  optional int32 num_previous_init_failures = 11;
 }
 
 // Stats of the top-level function IcingSearchEngine::Put().
 // Next tag: 7
-message NativePutDocumentStats {
+message PutDocumentStatsProto {
   // Overall time used for the function call.
   optional int32 latency_ms = 1;
 
@@ -125,8 +130,11 @@ message NativePutDocumentStats {
 
 // Stats of the top-level function IcingSearchEngine::Search() and
 // IcingSearchEngine::GetNextPage().
-// Next tag: 15
-message NativeQueryStats {
+// Next tag: 17
+message QueryStatsProto {
+  // The UTF-8 length of the query string
+  optional int32 query_length = 16;
+
   // Number of terms in the query string.
   optional int32 num_terms = 1;
 
@@ -154,7 +162,7 @@ message NativeQueryStats {
   optional int32 num_documents_scored = 8;
 
   // How many of the results in the page returned were snippeted.
-  optional bool num_results_snippeted = 9;
+  optional int32 num_results_with_snippets = 15;
 
   // Overall time used for the function call.
   optional int32 latency_ms = 10;
@@ -172,13 +180,14 @@ message NativeQueryStats {
   // Time used to fetch the document protos. Note that it includes the
   // time to snippet if ‘has_snippets’ is true.
   optional int32 document_retrieval_latency_ms = 14;
+
+  reserved 9;
 }
 
 // Stats of the top-level functions IcingSearchEngine::Delete,
-// IcingSearchEngine::DeleteByNamespace, IcingSearchEngine::DeleteBySchemaType,
-// IcingSearchEngine::DeleteByQuery.
+// IcingSearchEngine::DeleteByNamespace, IcingSearchEngine::DeleteBySchemaType.
 // Next tag: 4
-message NativeDeleteStats {
+message DeleteStatsProto {
   // Overall time used for the function call.
   optional int32 latency_ms = 1;
 
@@ -190,8 +199,10 @@ message NativeDeleteStats {
       // Delete one document.
       SINGLE = 1;
 
-      // Delete by query.
-      QUERY = 2;
+      // Delete by query. This value is deprecated.
+      // IcingSearchEngine::DeleteByQuery will return a DeleteByQueryStatsProto
+      // rather than a DeleteStatsProto.
+      DEPRECATED_QUERY = 2 [deprecated = true];
 
       // Delete by namespace.
       NAMESPACE = 3;
@@ -204,4 +215,33 @@ message NativeDeleteStats {
 
   // Number of documents deleted by this call.
   optional int32 num_documents_deleted = 3;
-}
-\ No newline at end of file
+}
+
+// Stats of the top-level functions IcingSearchEngine::DeleteByQuery.
+// Next tag: 9
+message DeleteByQueryStatsProto {
+  // Overall time used for the function call.
+  optional int32 latency_ms = 1;
+
+  // Number of documents deleted by this call.
+  optional int32 num_documents_deleted = 2;
+
+  // The UTF-8 length of the query string
+  optional int32 query_length = 3;
+
+  // Number of terms in the query string.
+  optional int32 num_terms = 4;
+
+  // Number of namespaces filtered.
+  optional int32 num_namespaces_filtered = 5;
+
+  // Number of schema types filtered.
+  optional int32 num_schema_types_filtered = 6;
+
+  // Time used to parse the query, including 2 parts: tokenizing and
+  // transforming tokens into an iterator tree.
+  optional int32 parse_query_latency_ms = 7;
+
+  // Time used to delete each document.
+  optional int32 document_removal_latency_ms = 8;
+}
diff --git a/proto/icing/proto/optimize.proto b/proto/icing/proto/optimize.proto
index 1baa64c..42290f3 100644
--- a/proto/icing/proto/optimize.proto
+++ b/proto/icing/proto/optimize.proto
@@ -23,7 +23,7 @@ option java_multiple_files = true;
 option objc_class_prefix = "ICNG";
 
 // Result of a call to IcingSearchEngine.Optimize
-// Next tag: 2
+// Next tag: 3
 message OptimizeResultProto {
   // Status code can be one of:
   //   OK
@@ -35,12 +35,13 @@ message OptimizeResultProto {
   // See status.proto for more details.
   optional StatusProto status = 1;
 
+  optional OptimizeStatsProto optimize_stats = 2;
   // TODO(b/147699081): Add a field to indicate lost_schema and lost_documents.
   // go/icing-library-apis.
 }
 
 // Result of a call to IcingSearchEngine.GetOptimizeInfo
-// Next tag: 4
+// Next tag: 5
 message GetOptimizeInfoResultProto {
   // Status code can be one of:
   //   OK
@@ -57,4 +58,37 @@ message GetOptimizeInfoResultProto {
   // Estimated bytes that could be recovered. The exact size per document isn't
   // tracked, so this is based off an average document size.
   optional int64 estimated_optimizable_bytes = 3;
+
+  // The amount of time since the last optimize ran.
+  optional int64 time_since_last_optimize_ms = 4;
+}
+
+// Next tag: 10
+message OptimizeStatsProto {
+  // Overall time used for the function call.
+  optional int32 latency_ms = 1;
+
+  // Time used to optimize the document store.
+  optional int32 document_store_optimize_latency_ms = 2;
+
+  // Time used to restore the index.
+  optional int32 index_restoration_latency_ms = 3;
+
+  // Number of documents before the optimization.
+  optional int32 num_original_documents = 4;
+
+  // Number of documents deleted.
+  optional int32 num_deleted_documents = 5;
+
+  // Number of documents expired.
+  optional int32 num_expired_documents = 6;
+
+  // Size of storage before the optimize.
+  optional int64 storage_size_before = 7;
+
+  // Size of storage after the optimize.
+  optional int64 storage_size_after = 8;
+
+  // The amount of time since the last optimize ran.
+  optional int64 time_since_last_optimize_ms = 9;
 }
diff --git a/proto/icing/proto/persist.proto b/proto/icing/proto/persist.proto
index 77cf987..8d6b372 100644
--- a/proto/icing/proto/persist.proto
+++ b/proto/icing/proto/persist.proto
@@ -22,6 +22,28 @@ option java_package = "com.google.android.icing.proto";
 option java_multiple_files = true;
 option objc_class_prefix = "ICNG";
 
+// The type of persistence guarantee that PersistToDisk should provide.
+// Next tag: 3
+message PersistType {
+  enum Code {
+    // Default. Should never be used.
+    UNKNOWN = 0;
+
+    // Only persist the ground truth. A successful PersistToDisk(LITE) should
+    // ensure that no data is lost the next time Icing initializes. This
+    // should be called after each batch of mutations.
+    LITE = 1;
+
+    // Persists all data in internal Icing components. A successful
+    // PersistToDisk(FULL) should not only ensure no data loss like
+    // PersistToDisk(LITE), but also prevent the need to recover internal data
+    // structures the next time Icing initializes. This should be called at
+    // some point before the app terminates.
+    FULL = 2;
+  }
+  optional Code code = 1;
+}
+
 // Result of a call to IcingSearchEngine.Persist
 // Next tag: 2
 message PersistToDiskResultProto {
diff --git a/proto/icing/proto/schema.proto b/proto/icing/proto/schema.proto
index 4188a8c..c611cbf 100644
--- a/proto/icing/proto/schema.proto
+++ b/proto/icing/proto/schema.proto
@@ -197,7 +197,7 @@ message SchemaProto {
 }
 
 // Result of a call to IcingSearchEngine.SetSchema
-// Next tag: 4
+// Next tag: 8
 message SetSchemaResultProto {
   // Status code can be one of:
   //   OK
@@ -221,6 +221,21 @@ message SetSchemaResultProto {
   // documents that fail validation against the new schema types would also be
   // deleted.
   repeated string incompatible_schema_types = 3;
+
+  // Schema types that did not exist in the previous schema and were added with
+  // the new schema type.
+  repeated string new_schema_types = 4;
+
+  // Schema types that were changed in a way that was backwards compatible and
+  // didn't invalidate the index.
+  repeated string fully_compatible_changed_schema_types = 5;
+
+  // Schema types that were changed in a way that was backwards compatible, but
+  // invalidated the index.
+  repeated string index_incompatible_changed_schema_types = 6;
+
+  // Overall time used for the function call.
+  optional int32 latency_ms = 7;
 }
 
 // Result of a call to IcingSearchEngine.GetSchema
diff --git a/proto/icing/proto/search.proto b/proto/icing/proto/search.proto
index 6c4e3c9..544995e 100644
--- a/proto/icing/proto/search.proto
+++ b/proto/icing/proto/search.proto
@@ -65,7 +65,7 @@ message SearchSpecProto {
 
 // Client-supplied specifications on what to include/how to format the search
 // results.
-// Next tag: 5
+// Next tag: 6
 message ResultSpecProto {
   // The results will be returned in pages, and num_per_page specifies the
   // number of documents in one page.
@@ -102,34 +102,95 @@ message ResultSpecProto {
   // has been specified for a schema type, then *all* properties of that schema
   // type will be retrieved.
   repeated TypePropertyMask type_property_masks = 4;
+
+  // Groupings of namespaces whose total returned results should be
+  // limited together.
+  // Next tag: 3
+  message ResultGrouping {
+    // The namespaces in this grouping.
+    repeated string namespaces = 1;
+
+    // The maximum number of results in this grouping that should be returned.
+    optional int32 max_results = 2;
+  }
+
+  // How to limit the number of results returned per set of namespaces. If
+  // results match for a namespace that is not present in any result groupings,
+  // then those results will be returned without limit.
+  //
+  // Non-existent namespaces will be ignored.
+  //
+  // Example : Suppose that there are four namespaces each with three results
+  // matching the query for "foo". Without any result groupings, Icing would
+  // return the following results:
+  // ["ns0doc0", "ns0doc1", "ns1doc0", "ns3doc0", "ns0doc2", "ns3doc1",
+  //  "ns2doc1", "ns3doc2", "ns2doc0", "ns1doc1", "ns2doc2", "ns1doc1"].
+  //
+  // and the following result groupings:
+  // [ { ["namespace0"], 2 }, { ["namespace1", "namespace2"], 2} ]
+  //
+  // The following results will be returned:
+  // ["ns0doc0", "ns0doc1", "ns1doc0", "ns3doc0", "ns3doc1", "ns2doc1",
+  //  "ns3doc2"].
+  repeated ResultGrouping result_groupings = 5;
 }
 
 // The representation of a single match within a DocumentProto property.
-// Next tag: 6
+//
+// Example : A document whose content is "Necesito comprar comida mañana." and a
+// query for "mana" with window=15
+// Next tag: 12
 message SnippetMatchProto {
-  // Properties may have multiple values. values_index indicates which of these
-  // multiple string values the match occurred in. For properties with only one
-  // value, the values_index will always be 0.
-  // Ex. "Recipients" [
-  //      { { "Name"         : "Daffy Duck" }
-  //        { "EmailAddress" : "daffduck@gmail.com" } },
-  //      { { "Name"         : "Donald Duck" }
-  //        { "EmailAddress" : "donduck@gmail.com" }  }
-  // "Daffy Duck" is the string value with a value_index of 0 for property
-  // "Recipients.Name". "Donald Duck" is the string value with a value_index of
-  // 1 for property "Recipients.Name".
-  optional int32 values_index = 1;
-
-  // The position and length within the matched string at which the exact
-  // match begins.
-  optional int32 exact_match_position = 2;
-
-  optional int32 exact_match_bytes = 3;
-
-  // The position and length of the suggested snippet window.
-  optional int32 window_position = 4;
-
-  optional int32 window_bytes = 5;
+  // The index of the byte in the string at which the match begins and the
+  // length in bytes of the match.
+  //
+  // For the example above, the values of these fields would be
+  // exact_match_byte_position=24, exact_match_byte_length=7 "mañana"
+  optional int32 exact_match_byte_position = 2;
+  optional int32 exact_match_byte_length = 3;
+
+  // The length in bytes of the subterm that matches the query. The beginning of
+  // the submatch is the same as exact_match_byte_position.
+  //
+  // For the example above, the value of this field would be 5. With
+  // exact_match_byte_position=24 above, it would produce the substring "maña"
+  optional int32 submatch_byte_length = 10;
+
+  // The index of the UTF-16 code unit in the string at which the match begins
+  // and the length in UTF-16 code units of the match. This is for use with
+  // UTF-16 encoded strings like Java.lang.String.
+  //
+  // For the example above, the values of these fields would be
+  // exact_match_utf16_position=24, exact_match_utf16_length=6 "mañana"
+  optional int32 exact_match_utf16_position = 6;
+  optional int32 exact_match_utf16_length = 7;
+
+  // The length in UTF-16 code units of the subterm that matches the query. The
+  // beginning of the submatch is the same as exact_match_utf16_position. This
+  // is for use with UTF-16 encoded strings like Java.lang.String.
+  //
+  // For the example above, the value of this field would be 4. With
+  // exact_match_utf16_position=24 above, it would produce the substring "maña"
+  optional int32 submatch_utf16_length = 11;
+
+  // The index of the byte in the string at which the suggested snippet window
+  // begins and the length in bytes of the window.
+  //
+  // For the example above, the values of these fields would be
+  // window_byte_position=17, window_byte_length=15 "comida mañana."
+  optional int32 window_byte_position = 4;
+  optional int32 window_byte_length = 5;
+
+  // The index of the UTF-16 code unit in the string at which the suggested
+  // snippet window begins and the length in UTF-16 code units of the window.
+  // This is for use with UTF-16 encoded strings like Java.lang.String.
+  //
+  // For the example above, the values of these fields would be
+  // window_utf16_position=17, window_utf16_length=14 "comida mañana."
+  optional int32 window_utf16_position = 8;
+  optional int32 window_utf16_length = 9;
+
+  reserved 1;
 }
 
 // A Proto representing all snippets for a single DocumentProto.
@@ -139,9 +200,29 @@ message SnippetProto {
   // property values in the corresponding DocumentProto.
   // Next tag: 3
   message EntryProto {
-    // A '.'-delimited sequence of property names indicating which property in
-    // the DocumentProto these snippets correspond to.
-    // Example properties: 'body', 'sender.name', 'sender.emailaddress', etc.
+    // A property path indicating which property in the DocumentProto these
+    // snippets correspond to. Property paths will contain 1) property names,
+    // 2) the property separator character '.' used to represent nested property
+    // and 3) indices surrounded by brackets to represent a specific value in
+    // that property.
+    //
+    // Example properties:
+    // - 'body'               : the first and only string value of a top-level
+    //                          property called 'body'.
+    // - 'sender.name'        : the first and only string value of a property
+    //                          called 'name' that is a subproperty of a
+    //                          property called 'sender'.
+    // - 'bcc[1].emailaddress': the first and only string value of a property
+    //                          called 'emailaddress' that is a subproperty of
+    //                          the second document value of a property called
+    //                          'bcc'.
+    // - 'attachments[0]'     : the first (of more than one) string value of a
+    //                          property called 'attachments'.
+    // NOTE: If there is only a single value for a property (like
+    // 'sender.name'), then no value index will be added to the property path.
+    // An index of [0] is implied. If there is more than one value for a
+    // property, then the value index will be added to the property path (like
+    // 'attachements[0]').
     optional string property_name = 1;
 
     repeated SnippetMatchProto snippet_matches = 2;
@@ -167,7 +248,7 @@ message SearchResultProto {
   optional StatusProto status = 1;
 
   // The Results that matched the query. Empty if there was an error.
-  // Next tag: 3
+  // Next tag: 4
   message ResultProto {
     // Document that matches the SearchSpecProto.
     optional DocumentProto document = 1;
@@ -175,6 +256,10 @@ message SearchResultProto {
     // Snippeting information for the document if requested in the
     // ResultSpecProto. A default instance, if not requested.
     optional SnippetProto snippet = 2;
+
+    // The score that the document was ranked by. The meaning of this score is
+    // determined by ScoringSpecProto.rank_by.
+    optional double score = 3;
   }
   repeated ResultProto results = 2;
 
@@ -198,7 +283,7 @@ message SearchResultProto {
   // LINT.ThenChange(//depot/google3/icing/result/result-state-manager.h:kInvalidNextPageToken)
 
   // Stats for query execution performance.
-  optional NativeQueryStats query_stats = 5;
+  optional QueryStatsProto query_stats = 5;
 }
 
 // Next tag: 3
diff --git a/proto/icing/proto/storage.proto b/proto/icing/proto/storage.proto
new file mode 100644
index 0000000..39dab6b
--- /dev/null
+++ b/proto/icing/proto/storage.proto
@@ -0,0 +1,187 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package icing.lib;
+
+import "icing/proto/status.proto";
+
+option java_package = "com.google.android.icing.proto";
+option java_multiple_files = true;
+option objc_class_prefix = "ICNG";
+
+// Next tag: 10
+message NamespaceStorageInfoProto {
+  // Name of the namespace
+  optional string namespace = 1;
+
+  // Number of alive documents in this namespace.
+  optional int32 num_alive_documents = 2;
+
+  // NOTE: We don't have stats on number of deleted documents in a namespace
+  // since we completely erase all data on a document when it's deleted. And we
+  // can't figure out which namespace it belonged to.
+
+  // Number of expired documents in this namespace.
+  optional int32 num_expired_documents = 3;
+
+  // LINT.IfChange(namespace_storage_info_usage_types)
+  // Number of alive documents that have a UsageReport.usage_type reported
+  optional int32 num_alive_documents_usage_type1 = 4;
+  optional int32 num_alive_documents_usage_type2 = 5;
+  optional int32 num_alive_documents_usage_type3 = 6;
+
+  // Number of expired documents that have a UsageReport.usage_type reported
+  optional int32 num_expired_documents_usage_type1 = 7;
+  optional int32 num_expired_documents_usage_type2 = 8;
+  optional int32 num_expired_documents_usage_type3 = 9;
+  // LINT.ThenChange()
+}
+
+// Next tag: 15
+message DocumentStorageInfoProto {
+  // Total number of alive documents.
+  optional int32 num_alive_documents = 1;
+
+  // Total number of deleted documents.
+  optional int32 num_deleted_documents = 2;
+
+  // Total number of expired documents.
+  optional int32 num_expired_documents = 3;
+
+  // Total size of the document store in bytes. Will be set to -1 if an IO error
+  // is encountered while calculating this field.
+  optional int64 document_store_size = 4;
+
+  // Total size of the ground truth in bytes. The ground truth may
+  // include deleted or expired documents. Will be set to -1 if an IO error is
+  // encountered while calculating this field.
+  optional int64 document_log_size = 5;
+
+  // Size of the key mapper in bytes. Will be set to -1 if an IO error is
+  // encountered while calculating this field.
+  optional int64 key_mapper_size = 6;
+
+  // Size of the document id mapper in bytes. Will be set to -1 if an IO error
+  // is encountered while calculating this field.
+  optional int64 document_id_mapper_size = 7;
+
+  // Size of the score cache in bytes. Will be set to -1 if an IO error is
+  // encountered while calculating this field.
+  optional int64 score_cache_size = 8;
+
+  // Size of the filter cache in bytes. Will be set to -1 if an IO error is
+  // encountered while calculating this field.
+  optional int64 filter_cache_size = 9;
+
+  // Size of the corpus mapper in bytes. Will be set to -1 if an IO error is
+  // encountered while calculating this field.
+  optional int64 corpus_mapper_size = 10;
+
+  // Size of the corpus score cache in bytes. Will be set to -1 if an IO error
+  // is encountered while calculating this field.
+  optional int64 corpus_score_cache_size = 11;
+
+  // Size of the namespace id mapper in bytes. Will be set to -1 if an IO error
+  // is encountered while calculating this field.
+  optional int64 namespace_id_mapper_size = 12;
+
+  // Number of namespaces seen from the current documents.
+  //
+  // TODO(cassiewang): This isn't technically needed anymore since clients can
+  // get this number from namespace_storage_info. Consider removing this.
+  optional int32 num_namespaces = 13;
+
+  // Storage information of each namespace.
+  repeated NamespaceStorageInfoProto namespace_storage_info = 14;
+}
+
+// Next tag: 5
+message SchemaStoreStorageInfoProto {
+  // Size of the schema store in bytes. Will be set to -1 if an IO error is
+  // encountered while calculating this field.
+  optional int64 schema_store_size = 1;
+
+  // Total number of schema types.
+  optional int32 num_schema_types = 2;
+
+  // Total number of all sections across all types
+  optional int32 num_total_sections = 3;
+
+  // Total number of types at the current section limit.
+  optional int32 num_schema_types_sections_exhausted = 4;
+}
+
+// Next tag: 9
+message IndexStorageInfoProto {
+  // Total size of the index in bytes. Will be set to -1 if an IO error is
+  // encountered while calculating this field.
+  optional int64 index_size = 1;
+
+  // Size of the lite index lexicon in bytes. Will be set to -1 if an IO error
+  // is encountered while calculating this field.
+  optional int64 lite_index_lexicon_size = 2;
+
+  // Size of the lite index hit buffer in bytes. Will be set to -1 if an IO
+  // error is encountered while calculating this field.
+  optional int64 lite_index_hit_buffer_size = 3;
+
+  // Size of the main index lexicon in bytes. Will be set to -1 if an IO error
+  // is encountered while calculating this field.
+  optional int64 main_index_lexicon_size = 4;
+
+  // Size of the main index storage in bytes. Will be set to -1 if an IO error
+  // is encountered while calculating this field.
+  optional int64 main_index_storage_size = 5;
+
+  // Size of one main index block in bytes.
+  optional int64 main_index_block_size = 6;
+
+  // Number of main index blocks.
+  optional int32 num_blocks = 7;
+
+  // Percentage of the main index blocks that are free, assuming
+  // allocated blocks are fully used.
+  optional float min_free_fraction = 8;
+}
+
+// Next tag: 5
+message StorageInfoProto {
+  // Total size of Icing’s storage in bytes. Will be set to -1 if an IO error is
+  // encountered while calculating this field.
+  optional int64 total_storage_size = 1;
+
+  // Storage information of the document store.
+  optional DocumentStorageInfoProto document_storage_info = 2;
+
+  // Storage information of the schema store.
+  optional SchemaStoreStorageInfoProto schema_store_storage_info = 3;
+
+  // Storage information of the index.
+  optional IndexStorageInfoProto index_storage_info = 4;
+}
+
+// Next tag: 3
+message StorageInfoResultProto {
+  // Status code can be one of:
+  //   OK
+  //   FAILED_PRECONDITION
+  //
+  // See status.proto for more details.
+  optional StatusProto status = 1;
+
+  // Storage information of Icing.
+  optional StorageInfoProto storage_info = 2;
+}
diff --git a/proto/icing/proto/usage.proto b/proto/icing/proto/usage.proto
index 7f31a2b..eaa2671 100644
--- a/proto/icing/proto/usage.proto
+++ b/proto/icing/proto/usage.proto
@@ -20,13 +20,11 @@ import "icing/proto/status.proto";
 
 option java_package = "com.google.android.icing.proto";
 option java_multiple_files = true;
-
 option objc_class_prefix = "ICNG";
 
 // Representation of a usage report that is generated from the client and sent
 // to Icing.
 // Next tag: 5
-// LINT.IfChange
 message UsageReport {
   // Namespace of the document.
   optional string document_namespace = 1;
@@ -37,6 +35,7 @@ message UsageReport {
   // Timestamp in milliseconds of when the usage happens.
   optional int64 usage_timestamp_ms = 3;
 
+  // LINT.IfChange
   // Next tag: 3
   enum UsageType {
     // A custom usage type that clients can assign a meaning to. UsageReports of
@@ -50,9 +49,12 @@ message UsageReport {
     // Same as above.
     USAGE_TYPE3 = 2;
   }
+  // LINT.ThenChange(
+  //   //depot/google3/icing/store/usage-store.h:UsageScores,
+  //   //depot/google3/icing/proto/\
+  //     storage.proto:namespace_storage_info_usage_types)
   optional UsageType usage_type = 4;
 }
-// LINT.ThenChange(//depot/google3/icing/store/usage-store.h:UsageScores)
 
 // Result of a call to IcingSearchEngine.ReportUsage
 // Next tag: 2
@@ -64,4 +66,4 @@ message ReportUsageResultProto {
   //
   // See status.proto for more details.
   optional StatusProto status = 1;
-}
-\ No newline at end of file
+}
diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt
index af8248d..f0c066f 100644
--- a/synced_AOSP_CL_number.txt
+++ b/synced_AOSP_CL_number.txt
@@ -1 +1 @@
-set(synced_AOSP_CL_number=351841227)
+set(synced_AOSP_CL_number=390638574)
author	Android Build Coastguard Worker <android-build-coastguard-worker@google.com>	2021-08-22 10:17:49 +0000
committer	Android Build Coastguard Worker <android-build-coastguard-worker@google.com>	2021-08-22 10:17:49 +0000
commit	20437efd05ffb505b36624f092e3e2d6aa834ed7 (patch)
tree	55370a66f2896116815c92e4d897336cca30ea5e
parent	bbbb1f6b786dd46354a81bb88710ab8120240043 (diff)
parent	14ee9a8eb8f3ed47f68117208626045878c943ac (diff)
download	icing-androidx-wear-wear-phone-interactions-release.tar.gz