diff options
author | Tim Barron <tjbarron@google.com> | 2021-07-13 21:29:46 +0000 |
---|---|---|
committer | Tim Barron <tjbarron@google.com> | 2021-07-21 18:09:48 +0000 |
commit | d462d0aedaec65748ad2a8cb5b3f5cb352ab2c20 (patch) | |
tree | c0a00b9b4d52ff3dfeb50f5d894bad2d71389b00 | |
parent | 294421ca6ac449cd9279aaabc63cb4bb080acca3 (diff) | |
download | icing-android12-qpr1-d-s3-release.tar.gz |
Switch to PWrite to grow underlying file in FileBackedVector.android-12.0.0_r32android-12.0.0_r29android-12.0.0_r28android-12.0.0_r27android-12.0.0_r26android-12.0.0_r21android-12.0.0_r20android-12.0.0_r19android-12.0.0_r18android-12.0.0_r16android12-qpr1-releaseandroid12-qpr1-d-s3-releaseandroid12-qpr1-d-s2-releaseandroid12-qpr1-d-s1-releaseandroid12-qpr1-d-releaseandroid12-dev
Modify file-backed-vector to 1) include a check that the size of the
file is greater than or equal to the number of elements *
sizeof(element) and 2) use PWrite to force the system to allocate disk
blocks so that we avoid the risk of trying to mmap a region of the file
that the system hasn't allocated a block for (and is unable to do so).
Bug: 191444782
Test: framework presubmit
Test: manually copied change to google3 and ran all icing lib c++ tests
Test: Unit tests added to FileBackedVectorTest
Change-Id: I1134df4becf246e0a9e170b80ee62c37383f8d7b
-rw-r--r-- | icing/file/file-backed-vector.h | 43 | ||||
-rw-r--r-- | icing/file/file-backed-vector_test.cc | 197 | ||||
-rw-r--r-- | icing/store/document-store.cc | 13 |
3 files changed, 228 insertions, 25 deletions
diff --git a/icing/file/file-backed-vector.h b/icing/file/file-backed-vector.h index 2443cb2..0989935 100644 --- a/icing/file/file-backed-vector.h +++ b/icing/file/file-backed-vector.h @@ -56,6 +56,7 @@ #ifndef ICING_FILE_FILE_BACKED_VECTOR_H_ #define ICING_FILE_FILE_BACKED_VECTOR_H_ +#include <inttypes.h> #include <stdint.h> #include <sys/mman.h> @@ -423,13 +424,6 @@ FileBackedVector<T>::InitializeExistingFile( absl_ports::StrCat("Invalid header kMagic for ", file_path)); } - // Mmap the content of the vector, excluding the header so its easier to - // access elements from the mmapped region - auto mmapped_file = - std::make_unique<MemoryMappedFile>(filesystem, file_path, mmap_strategy); - ICING_RETURN_IF_ERROR( - mmapped_file->Remap(sizeof(Header), file_size - sizeof(Header))); - // Check header if (header->header_checksum != header->CalculateHeaderChecksum()) { return absl_ports::FailedPreconditionError( @@ -442,6 +436,20 @@ FileBackedVector<T>::InitializeExistingFile( header->element_size)); } + int64_t min_file_size = header->num_elements * sizeof(T) + sizeof(Header); + if (min_file_size > file_size) { + return absl_ports::InternalError(IcingStringUtil::StringPrintf( + "Inconsistent file size, expected %" PRId64 ", actual %" PRId64, + min_file_size, file_size)); + } + + // Mmap the content of the vector, excluding the header so its easier to + // access elements from the mmapped region + auto mmapped_file = + std::make_unique<MemoryMappedFile>(filesystem, file_path, mmap_strategy); + ICING_RETURN_IF_ERROR( + mmapped_file->Remap(sizeof(Header), file_size - sizeof(Header))); + // Check vector contents Crc32 vector_checksum; std::string_view vector_contents( @@ -591,9 +599,24 @@ libtextclassifier3::Status FileBackedVector<T>::GrowIfNecessary( least_file_size_needed = math_util::RoundUpTo( least_file_size_needed, int64_t{FileBackedVector<T>::kGrowElements * sizeof(T)}); - if (!filesystem_->Grow(file_path_.c_str(), least_file_size_needed)) { - return absl_ports::InternalError( - absl_ports::StrCat("Couldn't grow file ", file_path_)); + + // We use PWrite here rather than Grow because Grow doesn't actually allocate + // an underlying disk block. This can lead to problems with mmap because mmap + // has no effective way to signal that it was impossible to allocate the disk + // block and ends up crashing instead. PWrite will force the allocation of + // these blocks, which will ensure that any failure to grow will surface here. + int64_t page_size = getpagesize(); + auto buf = std::make_unique<uint8_t[]>(page_size); + int64_t size_to_write = page_size - (current_file_size % page_size); + ScopedFd sfd(filesystem_->OpenForWrite(file_path_.c_str())); + while (current_file_size < least_file_size_needed) { + if (!filesystem_->PWrite(sfd.get(), current_file_size, buf.get(), + size_to_write)) { + return absl_ports::InternalError( + absl_ports::StrCat("Couldn't grow file ", file_path_)); + } + current_file_size += size_to_write; + size_to_write = page_size - (current_file_size % page_size); } ICING_RETURN_IF_ERROR(mmapped_file_->Remap( diff --git a/icing/file/file-backed-vector_test.cc b/icing/file/file-backed-vector_test.cc index bc2fef6..b05ce2d 100644 --- a/icing/file/file-backed-vector_test.cc +++ b/icing/file/file-backed-vector_test.cc @@ -32,6 +32,7 @@ #include "icing/util/logging.h" using ::testing::Eq; +using ::testing::IsTrue; using ::testing::Pointee; namespace icing { @@ -278,7 +279,6 @@ TEST_F(FileBackedVectorTest, Grow) { filesystem_, file_path_, MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0))); - EXPECT_THAT(vector->Set(kMaxNumElts + 11, 'a'), StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); EXPECT_THAT(vector->Set(-1, 'a'), @@ -318,25 +318,32 @@ TEST_F(FileBackedVectorTest, GrowsInChunks) { filesystem_, file_path_, MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); - // Our initial file size should just be the size of the header - EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()), - Eq(sizeof(FileBackedVector<char>::Header))); + // Our initial file size should just be the size of the header. Disk usage + // will indicate that one block has been allocated, which contains the header. + int header_size = sizeof(FileBackedVector<char>::Header); + int page_size = getpagesize(); + EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(header_size)); + EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(page_size)); - // Once we add something though, we'll grow to kGrowElements big + // Once we add something though, we'll grow to be kGrowElements big. From this + // point on, file size and disk usage should be the same because Growing will + // explicitly allocate the number of blocks needed to accomodate the file. Insert(vector.get(), 0, "a"); - EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()), - Eq(kGrowElements * sizeof(int))); + int file_size = kGrowElements * sizeof(int); + EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(file_size)); + EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(file_size)); // Should still be the same size, don't need to grow underlying file Insert(vector.get(), 1, "b"); - EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()), - Eq(kGrowElements * sizeof(int))); + EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(file_size)); + EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(file_size)); // Now we grow by a kGrowElements chunk, so the underlying file is 2 // kGrowElements big + file_size *= 2; Insert(vector.get(), 2, std::string(kGrowElements, 'c')); - EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()), - Eq(kGrowElements * 2 * sizeof(int))); + EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(file_size)); + EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(file_size)); // Destroy/persist the contents. vector.reset(); @@ -463,6 +470,174 @@ TEST_F(FileBackedVectorTest, TruncateAndReReadFile) { } } +TEST_F(FileBackedVectorTest, InitFileTooSmallForHeaderFails) { + { + // 1. Create a vector with a few elements. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<FileBackedVector<char>> vector, + FileBackedVector<char>::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); + Insert(vector.get(), 0, "A"); + Insert(vector.get(), 1, "Z"); + ASSERT_THAT(vector->PersistToDisk(), IsOk()); + } + + // 2. Shrink the file to be smaller than the header. + filesystem_.Truncate(fd_, sizeof(FileBackedVector<char>::Header) - 1); + + { + // 3. Attempt to create the file and confirm that it fails. + EXPECT_THAT(FileBackedVector<char>::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC), + StatusIs(libtextclassifier3::StatusCode::INTERNAL)); + } +} + +TEST_F(FileBackedVectorTest, InitWrongDataSizeFails) { + { + // 1. Create a vector with a few elements. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<FileBackedVector<char>> vector, + FileBackedVector<char>::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); + Insert(vector.get(), 0, "A"); + Insert(vector.get(), 1, "Z"); + ASSERT_THAT(vector->PersistToDisk(), IsOk()); + } + + { + // 2. Attempt to create the file with a different element size and confirm + // that it fails. + EXPECT_THAT(FileBackedVector<int>::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC), + StatusIs(libtextclassifier3::StatusCode::INTERNAL)); + } +} + +TEST_F(FileBackedVectorTest, InitCorruptHeaderFails) { + { + // 1. Create a vector with a few elements. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<FileBackedVector<char>> vector, + FileBackedVector<char>::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); + Insert(vector.get(), 0, "A"); + Insert(vector.get(), 1, "Z"); + ASSERT_THAT(vector->PersistToDisk(), IsOk()); + } + + // 2. Modify the header, but don't update the checksum. This would be similar + // to corruption of the header. + FileBackedVector<char>::Header header; + ASSERT_THAT(filesystem_.PRead(fd_, &header, sizeof(header), /*offset=*/0), + IsTrue()); + header.num_elements = 1; + ASSERT_THAT(filesystem_.PWrite(fd_, /*offset=*/0, &header, sizeof(header)), + IsTrue()); + + { + // 3. Attempt to create the file with a header that doesn't match its + // checksum and confirm that it fails. + EXPECT_THAT(FileBackedVector<char>::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC), + StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); + } +} + +TEST_F(FileBackedVectorTest, InitHeaderElementSizeTooBigFails) { + { + // 1. Create a vector with a few elements. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<FileBackedVector<char>> vector, + FileBackedVector<char>::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); + Insert(vector.get(), 0, "A"); + Insert(vector.get(), 1, "Z"); + ASSERT_THAT(vector->PersistToDisk(), IsOk()); + } + + // 2. Modify the header so that the number of elements exceeds the actual size + // of the underlying file. + FileBackedVector<char>::Header header; + ASSERT_THAT(filesystem_.PRead(fd_, &header, sizeof(header), /*offset=*/0), + IsTrue()); + int64_t file_size = filesystem_.GetFileSize(fd_); + int64_t allocated_elements_size = file_size - sizeof(header); + header.num_elements = (allocated_elements_size / sizeof(char)) + 1; + header.header_checksum = header.CalculateHeaderChecksum(); + ASSERT_THAT(filesystem_.PWrite(fd_, /*offset=*/0, &header, sizeof(header)), + IsTrue()); + + { + // 3. Attempt to create the file with num_elements that is larger than the + // underlying file and confirm that it fails. + EXPECT_THAT(FileBackedVector<char>::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC), + StatusIs(libtextclassifier3::StatusCode::INTERNAL)); + } +} + +TEST_F(FileBackedVectorTest, InitCorruptElementsFails) { + { + // 1. Create a vector with a few elements. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<FileBackedVector<char>> vector, + FileBackedVector<char>::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); + Insert(vector.get(), 0, "A"); + Insert(vector.get(), 1, "Z"); + ASSERT_THAT(vector->PersistToDisk(), IsOk()); + } + + // 2. Overwrite the values of the first two elements. + std::string corrupted_content = "BY"; + ASSERT_THAT( + filesystem_.PWrite(fd_, /*offset=*/sizeof(FileBackedVector<char>::Header), + corrupted_content.c_str(), corrupted_content.length()), + IsTrue()); + + { + // 3. Attempt to create the file with elements that don't match their + // checksum and confirm that it fails. + EXPECT_THAT(FileBackedVector<char>::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC), + StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); + } +} + +TEST_F(FileBackedVectorTest, InitNormalSucceeds) { + { + // 1. Create a vector with a few elements. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<FileBackedVector<char>> vector, + FileBackedVector<char>::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); + Insert(vector.get(), 0, "A"); + Insert(vector.get(), 1, "Z"); + ASSERT_THAT(vector->PersistToDisk(), IsOk()); + } + + { + // 2. Attempt to create the file with a completely valid header and elements + // region. This should succeed. + EXPECT_THAT(FileBackedVector<char>::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC), + IsOk()); + } +} + } // namespace } // namespace lib diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc index 81edce1..226a96b 100644 --- a/icing/store/document-store.cc +++ b/icing/store/document-store.cc @@ -1577,6 +1577,7 @@ libtextclassifier3::Status DocumentStore::OptimizeInto( int size = document_id_mapper_->num_elements(); int num_deleted = 0; int num_expired = 0; + UsageStore::UsageScores default_usage; for (DocumentId document_id = 0; document_id < size; document_id++) { auto document_or = Get(document_id, /*clear_internal_fields=*/false); if (absl_ports::IsNotFound(document_or.status())) { @@ -1625,10 +1626,14 @@ libtextclassifier3::Status DocumentStore::OptimizeInto( // Copy over usage scores. ICING_ASSIGN_OR_RETURN(UsageStore::UsageScores usage_scores, usage_store_->GetUsageScores(document_id)); - - DocumentId new_document_id = new_document_id_or.ValueOrDie(); - ICING_RETURN_IF_ERROR( - new_doc_store->SetUsageScores(new_document_id, usage_scores)); + if (!(usage_scores == default_usage)) { + // If the usage scores for this document are the default (no usage), then + // don't bother setting it. No need to possibly allocate storage if + // there's nothing interesting to store. + DocumentId new_document_id = new_document_id_or.ValueOrDie(); + ICING_RETURN_IF_ERROR( + new_doc_store->SetUsageScores(new_document_id, usage_scores)); + } } if (stats != nullptr) { stats->set_num_original_documents(size); |