diff options
-rw-r--r-- | CHANGELOG | 2 | ||||
-rw-r--r-- | doc/decompressor_errata.md | 21 | ||||
-rw-r--r-- | lib/compress/zstd_compress.c | 127 | ||||
-rw-r--r-- | lib/compress/zstdmt_compress.c | 4 | ||||
-rw-r--r-- | lib/decompress/zstd_decompress.c | 8 | ||||
-rw-r--r-- | lib/zstd.h | 33 | ||||
-rw-r--r-- | tests/fuzz/Makefile | 10 | ||||
-rw-r--r-- | tests/fuzz/decompress_cross_format.c | 130 | ||||
-rwxr-xr-x | tests/fuzz/fuzz.py | 9 | ||||
-rw-r--r-- | tests/fuzz/fuzz_data_producer.c | 10 | ||||
-rw-r--r-- | tests/fuzz/generate_sequences.c | 88 | ||||
-rw-r--r-- | tests/fuzz/regression_driver.c | 3 | ||||
-rw-r--r-- | tests/fuzz/simple_decompress.c | 24 | ||||
-rw-r--r-- | tests/fuzz/stream_round_trip.c | 2 | ||||
-rw-r--r-- | tests/fuzzer.c | 25 |
15 files changed, 420 insertions, 76 deletions
@@ -9,6 +9,7 @@ lib: accept dictionaries with partial literal tables, by @terrelln lib: fix CCtx size estimation with external sequence producer, by @embg lib: fix corner case decoder behaviors, by @Cyan4973 and @aimuz lib: fix zdict prototype mismatch in static_only mode, by @ldv-alt +lib: fix several bugs in magicless-format decoding, by @embg cli: add common compressed file types to `--exclude-compressed`` by @daniellerozenblit cli: fix mixing `-c` and `-o` commands with `--rm`, by @Cyan4973 cli: fix erroneous exclusion of hidden files with `--output-dir-mirror` by @felixhandte @@ -32,6 +33,7 @@ port: sparc64 support validation in CI, by @Cyan4973 port: AIX compatibility, by @likema port: HP-UX compatibility, by @likema doc: Improved specification accuracy, by @elasota +bug: Fix and deprecate ZSTD_generateSequences (#3981) v1.5.5 (Apr 2023) fix: fix rare corruption bug affecting the high compression mode, reported by @danlark1 (#3517, @terrelln) diff --git a/doc/decompressor_errata.md b/doc/decompressor_errata.md index 83d4071c..b570f731 100644 --- a/doc/decompressor_errata.md +++ b/doc/decompressor_errata.md @@ -125,3 +125,24 @@ The total `Block_Content` is `5` bytes, and `Last_Table_Offset` is `2`. See the compressor workaround code: https://github.com/facebook/zstd/blob/8814aa5bfa74f05a86e55e9d508da177a893ceeb/lib/compress/zstd_compress.c#L2667-L2682 + +Magicless format +---------------------- + +**Last affected version**: v1.5.5 + +**Affected decompressor component(s)**: Library + +**Produced by the reference compressor**: Yes (example: https://gist.github.com/embg/9940726094f4cf2cef162cffe9319232) + +**Example Frame**: `27 b5 2f fd 00 03 19 00 00 66 6f 6f 3f ba c4 59` + +v1.5.6 fixes several bugs in which the magicless-format decoder rejects valid frames. +These include but are not limited to: +* Valid frames that happen to begin with a legacy magic number (little-endian) +* Valid frames that happen to begin with a skippable magic number (little-endian) + +If you are affected by this issue and cannot update to v1.5.6 or later, there is a +workaround to recover affected data. Simply prepend the ZSTD magic number +`0xFD2FB528` (little-endian) to your data and decompress using the standard-format +decoder. diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 451f2f91..f6a84e6b 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3361,29 +3361,38 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) return ZSTDbss_compress; } -static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) +static size_t ZSTD_copyBlockSequences(SeqCollector* seqCollector, const seqStore_t* seqStore, const U32 prevRepcodes[ZSTD_REP_NUM]) { - const seqStore_t* seqStore = ZSTD_getSeqStore(zc); - const seqDef* seqStoreSeqs = seqStore->sequencesStart; - size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs; - size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart); - size_t literalsRead = 0; - size_t lastLLSize; + const seqDef* inSeqs = seqStore->sequencesStart; + const size_t nbInSequences = seqStore->sequences - inSeqs; + const size_t nbInLiterals = (size_t)(seqStore->lit - seqStore->litStart); - ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; + ZSTD_Sequence* outSeqs = seqCollector->seqIndex == 0 ? seqCollector->seqStart : seqCollector->seqStart + seqCollector->seqIndex; + const size_t nbOutSequences = nbInSequences + 1; + size_t nbOutLiterals = 0; + repcodes_t repcodes; size_t i; - repcodes_t updatedRepcodes; - assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); - /* Ensure we have enough space for last literals "sequence" */ - assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1); - ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); - for (i = 0; i < seqStoreSeqSize; ++i) { - U32 rawOffset = seqStoreSeqs[i].offBase - ZSTD_REP_NUM; - outSeqs[i].litLength = seqStoreSeqs[i].litLength; - outSeqs[i].matchLength = seqStoreSeqs[i].mlBase + MINMATCH; + /* Bounds check that we have enough space for every input sequence + * and the block delimiter + */ + assert(seqCollector->seqIndex <= seqCollector->maxSequences); + RETURN_ERROR_IF( + nbOutSequences > (size_t)(seqCollector->maxSequences - seqCollector->seqIndex), + dstSize_tooSmall, + "Not enough space to copy sequences"); + + ZSTD_memcpy(&repcodes, prevRepcodes, sizeof(repcodes)); + for (i = 0; i < nbInSequences; ++i) { + U32 rawOffset; + outSeqs[i].litLength = inSeqs[i].litLength; + outSeqs[i].matchLength = inSeqs[i].mlBase + MINMATCH; outSeqs[i].rep = 0; + /* Handle the possible single length >= 64K + * There can only be one because we add MINMATCH to every match length, + * and blocks are at most 128K. + */ if (i == seqStore->longLengthPos) { if (seqStore->longLengthType == ZSTD_llt_literalLength) { outSeqs[i].litLength += 0x10000; @@ -3392,41 +3401,55 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) } } - if (seqStoreSeqs[i].offBase <= ZSTD_REP_NUM) { - /* Derive the correct offset corresponding to a repcode */ - outSeqs[i].rep = seqStoreSeqs[i].offBase; + /* Determine the raw offset given the offBase, which may be a repcode. */ + if (OFFBASE_IS_REPCODE(inSeqs[i].offBase)) { + const U32 repcode = OFFBASE_TO_REPCODE(inSeqs[i].offBase); + assert(repcode > 0); + outSeqs[i].rep = repcode; if (outSeqs[i].litLength != 0) { - rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1]; + rawOffset = repcodes.rep[repcode - 1]; } else { - if (outSeqs[i].rep == 3) { - rawOffset = updatedRepcodes.rep[0] - 1; + if (repcode == 3) { + assert(repcodes.rep[0] > 1); + rawOffset = repcodes.rep[0] - 1; } else { - rawOffset = updatedRepcodes.rep[outSeqs[i].rep]; + rawOffset = repcodes.rep[repcode]; } } + } else { + rawOffset = OFFBASE_TO_OFFSET(inSeqs[i].offBase); } outSeqs[i].offset = rawOffset; - /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode - so we provide seqStoreSeqs[i].offset - 1 */ - ZSTD_updateRep(updatedRepcodes.rep, - seqStoreSeqs[i].offBase, - seqStoreSeqs[i].litLength == 0); - literalsRead += outSeqs[i].litLength; + + /* Update repcode history for the sequence */ + ZSTD_updateRep(repcodes.rep, + inSeqs[i].offBase, + inSeqs[i].litLength == 0); + + nbOutLiterals += outSeqs[i].litLength; } /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0. * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker * for the block boundary, according to the API. */ - assert(seqStoreLiteralsSize >= literalsRead); - lastLLSize = seqStoreLiteralsSize - literalsRead; - outSeqs[i].litLength = (U32)lastLLSize; - outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0; - seqStoreSeqSize++; - zc->seqCollector.seqIndex += seqStoreSeqSize; + assert(nbInLiterals >= nbOutLiterals); + { + const size_t lastLLSize = nbInLiterals - nbOutLiterals; + outSeqs[nbInSequences].litLength = (U32)lastLLSize; + outSeqs[nbInSequences].matchLength = 0; + outSeqs[nbInSequences].offset = 0; + assert(nbOutSequences == nbInSequences + 1); + } + seqCollector->seqIndex += nbOutSequences; + assert(seqCollector->seqIndex <= seqCollector->maxSequences); + + return 0; } size_t ZSTD_sequenceBound(size_t srcSize) { - return (srcSize / ZSTD_MINMATCH_MIN) + 1; + const size_t maxNbSeq = (srcSize / ZSTD_MINMATCH_MIN) + 1; + const size_t maxNbDelims = (srcSize / ZSTD_BLOCKSIZE_MAX_MIN) + 1; + return maxNbSeq + maxNbDelims; } size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, @@ -3435,6 +3458,16 @@ size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, const size_t dstCapacity = ZSTD_compressBound(srcSize); void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem); SeqCollector seqCollector; + { + int targetCBlockSize; + FORWARD_IF_ERROR(ZSTD_CCtx_getParameter(zc, ZSTD_c_targetCBlockSize, &targetCBlockSize), ""); + RETURN_ERROR_IF(targetCBlockSize != 0, parameter_unsupported, "targetCBlockSize != 0"); + } + { + int nbWorkers; + FORWARD_IF_ERROR(ZSTD_CCtx_getParameter(zc, ZSTD_c_nbWorkers, &nbWorkers), ""); + RETURN_ERROR_IF(nbWorkers != 0, parameter_unsupported, "nbWorkers != 0"); + } RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!"); @@ -3444,8 +3477,12 @@ size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, seqCollector.maxSequences = outSeqsSize; zc->seqCollector = seqCollector; - ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); - ZSTD_customFree(dst, ZSTD_defaultCMem); + { + const size_t ret = ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); + ZSTD_customFree(dst, ZSTD_defaultCMem); + FORWARD_IF_ERROR(ret, "ZSTD_compress2 failed"); + } + assert(zc->seqCollector.seqIndex <= ZSTD_sequenceBound(srcSize)); return zc->seqCollector.seqIndex; } @@ -4038,8 +4075,9 @@ ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, cSeqsSize = 1; } + /* Sequence collection not supported when block splitting */ if (zc->seqCollector.collectSequences) { - ZSTD_copyBlockSequences(zc); + FORWARD_IF_ERROR(ZSTD_copyBlockSequences(&zc->seqCollector, seqStore, dRepOriginal.rep), "copyBlockSequences failed"); ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); return 0; } @@ -4261,6 +4299,7 @@ ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, if (bss == ZSTDbss_noCompress) { if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + RETURN_ERROR_IF(zc->seqCollector.collectSequences, sequenceProducer_failed, "Uncompressible block"); cSize = ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock); FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block"); @@ -4293,11 +4332,15 @@ ZSTD_compressBlock_internal(ZSTD_CCtx* zc, { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); - if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } + if (bss == ZSTDbss_noCompress) { + RETURN_ERROR_IF(zc->seqCollector.collectSequences, sequenceProducer_failed, "Uncompressible block"); + cSize = 0; + goto out; + } } if (zc->seqCollector.collectSequences) { - ZSTD_copyBlockSequences(zc); + FORWARD_IF_ERROR(ZSTD_copyBlockSequences(&zc->seqCollector, ZSTD_getSeqStore(zc), zc->blockState.prevCBlock->rep), "copyBlockSequences failed"); ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); return 0; } diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index e86fdb2b..86ccce31 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -121,7 +121,7 @@ static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool) static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_customMem cMem) { - ZSTDMT_bufferPool* const bufPool = + ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_customCalloc(sizeof(ZSTDMT_bufferPool), cMem); if (bufPool==NULL) return NULL; if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) { @@ -380,7 +380,7 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool) static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers, ZSTD_customMem cMem) { - ZSTDMT_CCtxPool* const cctxPool = + ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_customCalloc(sizeof(ZSTDMT_CCtxPool), cMem); assert(nbWorkers > 0); if (!cctxPool) return NULL; diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index ee2cda3b..2f03cf7b 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1085,7 +1085,7 @@ size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, while (srcSize >= ZSTD_startingInputLength(dctx->format)) { #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) - if (ZSTD_isLegacy(src, srcSize)) { + if (dctx->format == ZSTD_f_zstd1 && ZSTD_isLegacy(src, srcSize)) { size_t decodedSize; size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize); if (ZSTD_isError(frameSize)) return frameSize; @@ -1115,7 +1115,7 @@ size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, } #endif - if (srcSize >= 4) { + if (dctx->format == ZSTD_f_zstd1 && srcSize >= 4) { U32 const magicNumber = MEM_readLE32(src); DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber); if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { @@ -1412,6 +1412,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c case ZSTDds_decodeSkippableHeader: assert(src != NULL); assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE); + assert(dctx->format != ZSTD_f_zstd1_magicless); ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */ dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */ dctx->stage = ZSTDds_skipFrame; @@ -2209,7 +2210,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB DEBUGLOG(4, "Consume header"); FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), ""); - if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + if (zds->format == ZSTD_f_zstd1 + && (MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); zds->stage = ZSTDds_skipFrame; } else { @@ -1538,25 +1538,38 @@ typedef enum { ZSTDLIB_STATIC_API size_t ZSTD_sequenceBound(size_t srcSize); /*! ZSTD_generateSequences() : + * WARNING: This function is meant for debugging and informational purposes ONLY! + * Its implementation is flawed, and it will be deleted in a future version. + * It is not guaranteed to succeed, as there are several cases where it will give + * up and fail. You should NOT use this function in production code. + * + * This function is deprecated, and will be removed in a future version. + * * Generate sequences using ZSTD_compress2(), given a source buffer. * + * @param zc The compression context to be used for ZSTD_compress2(). Set any + * compression parameters you need on this context. + * @param outSeqs The output sequences buffer of size @p outSeqsSize + * @param outSeqsSize The size of the output sequences buffer. + * ZSTD_sequenceBound(srcSize) is an upper bound on the number + * of sequences that can be generated. + * @param src The source buffer to generate sequences from of size @p srcSize. + * @param srcSize The size of the source buffer. + * * Each block will end with a dummy sequence * with offset == 0, matchLength == 0, and litLength == length of last literals. * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0) * simply acts as a block delimiter. * - * @zc can be used to insert custom compression params. - * This function invokes ZSTD_compress2(). - * - * The output of this function can be fed into ZSTD_compressSequences() with CCtx - * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters - * @return : number of sequences generated + * @returns The number of sequences generated, necessarily less than + * ZSTD_sequenceBound(srcSize), or an error code that can be checked + * with ZSTD_isError(). */ - +ZSTD_DEPRECATED("For debugging only, will be replaced by ZSTD_extractSequences()") ZSTDLIB_STATIC_API size_t -ZSTD_generateSequences( ZSTD_CCtx* zc, - ZSTD_Sequence* outSeqs, size_t outSeqsSize, - const void* src, size_t srcSize); +ZSTD_generateSequences(ZSTD_CCtx* zc, + ZSTD_Sequence* outSeqs, size_t outSeqsSize, + const void* src, size_t srcSize); /*! ZSTD_mergeBlockDelimiters() : * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile index f96adcfa..430f6df1 100644 --- a/tests/fuzz/Makefile +++ b/tests/fuzz/Makefile @@ -124,7 +124,9 @@ FUZZ_TARGETS := \ sequence_compression_api \ seekable_roundtrip \ huf_round_trip \ - huf_decompress + huf_decompress \ + decompress_cross_format \ + generate_sequences all: libregression.a $(FUZZ_TARGETS) @@ -239,6 +241,12 @@ huf_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_huf_round_trip.o huf_decompress: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_huf_decompress.o $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_huf_decompress.o $(LIB_FUZZING_ENGINE) -o $@ +decompress_cross_format: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_decompress_cross_format.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_decompress_cross_format.o $(LIB_FUZZING_ENGINE) -o $@ + +generate_sequences: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_generate_sequences.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_generate_sequences.o $(LIB_FUZZING_ENGINE) -o $@ + libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c d_fuzz_regression_driver.o $(AR) $(FUZZ_ARFLAGS) $@ d_fuzz_regression_driver.o diff --git a/tests/fuzz/decompress_cross_format.c b/tests/fuzz/decompress_cross_format.c new file mode 100644 index 00000000..78461e69 --- /dev/null +++ b/tests/fuzz/decompress_cross_format.c @@ -0,0 +1,130 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +// This fuzz target validates decompression of magicless-format compressed data. + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "fuzz_helpers.h" +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" +#include "fuzz_data_producer.h" + +static ZSTD_DCtx *dctx = NULL; + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + // Give a random portion of src data to the producer, to use for parameter generation. + // The rest will be interpreted as magicless compressed data. + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size_t magiclessSize = FUZZ_dataProducer_reserveDataPrefix(producer); + const void* const magiclessSrc = src; + size_t const dstSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size); + void* const standardDst = FUZZ_malloc(dstSize); + void* const magiclessDst = FUZZ_malloc(dstSize); + + // Create standard-format src from magicless-format src + const uint32_t zstd_magic = ZSTD_MAGICNUMBER; + size_t standardSize = sizeof(zstd_magic) + magiclessSize; + void* const standardSrc = FUZZ_malloc(standardSize); + memcpy(standardSrc, &zstd_magic, sizeof(zstd_magic)); // assume fuzzing on little-endian machine + memcpy(standardSrc + sizeof(zstd_magic), magiclessSrc, magiclessSize); + + // Truncate to a single frame + { + const size_t standardFrameCompressedSize = ZSTD_findFrameCompressedSize(standardSrc, standardSize); + if (ZSTD_isError(standardFrameCompressedSize)) { + goto cleanup_and_return; + } + standardSize = standardFrameCompressedSize; + magiclessSize = standardFrameCompressedSize - sizeof(zstd_magic); + } + + // Create DCtx if needed + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + // Test one-shot decompression + { + FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters)); + FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1)); + const size_t standardRet = ZSTD_decompressDCtx( + dctx, standardDst, dstSize, standardSrc, standardSize); + + FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters)); + FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless)); + const size_t magiclessRet = ZSTD_decompressDCtx( + dctx, magiclessDst, dstSize, magiclessSrc, magiclessSize); + + // Standard accepts => magicless should accept + if (!ZSTD_isError(standardRet)) FUZZ_ZASSERT(magiclessRet); + + // Magicless accepts => standard should accept + // NOTE: this is nice-to-have, please disable this check if it is difficult to satisfy. + if (!ZSTD_isError(magiclessRet)) FUZZ_ZASSERT(standardRet); + + // If both accept, decompressed size and data should match + if (!ZSTD_isError(standardRet) && !ZSTD_isError(magiclessRet)) { + FUZZ_ASSERT(standardRet == magiclessRet); + if (standardRet > 0) { + FUZZ_ASSERT( + memcmp(standardDst, magiclessDst, standardRet) == 0 + ); + } + } + } + + // Test streaming decompression + { + ZSTD_inBuffer standardIn = { standardSrc, standardSize, 0 }; + ZSTD_inBuffer magiclessIn = { magiclessSrc, magiclessSize, 0 }; + ZSTD_outBuffer standardOut = { standardDst, dstSize, 0 }; + ZSTD_outBuffer magiclessOut = { magiclessDst, dstSize, 0 }; + + FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters)); + FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1)); + const size_t standardRet = ZSTD_decompressStream(dctx, &standardOut, &standardIn); + + FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters)); + FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless)); + const size_t magiclessRet = ZSTD_decompressStream(dctx, &magiclessOut, &magiclessIn); + + // Standard accepts => magicless should accept + if (standardRet == 0) FUZZ_ASSERT(magiclessRet == 0); + + // Magicless accepts => standard should accept + // NOTE: this is nice-to-have, please disable this check if it is difficult to satisfy. + if (magiclessRet == 0) FUZZ_ASSERT(standardRet == 0); + + // If both accept, decompressed size and data should match + if (standardRet == 0 && magiclessRet == 0) { + FUZZ_ASSERT(standardOut.pos == magiclessOut.pos); + if (standardOut.pos > 0) { + FUZZ_ASSERT( + memcmp(standardOut.dst, magiclessOut.dst, standardOut.pos) == 0 + ); + } + } + } + +cleanup_and_return: +#ifndef STATEFUL_FUZZING + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + free(standardSrc); + free(standardDst); + free(magiclessDst); + FUZZ_dataProducer_free(producer); + return 0; +} diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py index c489b8fa..d59df926 100755 --- a/tests/fuzz/fuzz.py +++ b/tests/fuzz/fuzz.py @@ -65,6 +65,8 @@ TARGET_INFO = { 'seekable_roundtrip': TargetInfo(InputType.RAW_DATA), 'huf_round_trip': TargetInfo(InputType.RAW_DATA), 'huf_decompress': TargetInfo(InputType.RAW_DATA), + 'decompress_cross_format': TargetInfo(InputType.RAW_DATA), + 'generate_sequences': TargetInfo(InputType.RAW_DATA), } TARGETS = list(TARGET_INFO.keys()) ALL_TARGETS = TARGETS + ['all'] @@ -405,7 +407,12 @@ def build(args): cxxflags = shlex.split(args.cxxflags) mflags = shlex.split(args.mflags) # Flags to be added to both cflags and cxxflags - common_flags = [] + common_flags = [ + '-Werror', + '-Wno-error=declaration-after-statement', + '-Wno-error=c++-compat', + '-Wno-error=deprecated' # C files are sometimes compiled with CXX + ] cppflags += [ '-DDEBUGLEVEL={}'.format(args.debug), diff --git a/tests/fuzz/fuzz_data_producer.c b/tests/fuzz/fuzz_data_producer.c index bf846b68..056de3ee 100644 --- a/tests/fuzz/fuzz_data_producer.c +++ b/tests/fuzz/fuzz_data_producer.c @@ -28,12 +28,12 @@ void FUZZ_dataProducer_free(FUZZ_dataProducer_t *producer) { free(producer); } uint32_t FUZZ_dataProducer_uint32Range(FUZZ_dataProducer_t *producer, uint32_t min, uint32_t max) { - FUZZ_ASSERT(min <= max); - uint32_t range = max - min; uint32_t rolling = range; uint32_t result = 0; + FUZZ_ASSERT(min <= max); + while (rolling > 0 && producer->size > 0) { uint8_t next = *(producer->data + producer->size - 1); producer->size -= 1; @@ -79,11 +79,11 @@ int FUZZ_dataProducer_empty(FUZZ_dataProducer_t *producer) { size_t FUZZ_dataProducer_contract(FUZZ_dataProducer_t *producer, size_t newSize) { - newSize = newSize > producer->size ? producer->size : newSize; + const size_t effectiveNewSize = newSize > producer->size ? producer->size : newSize; - size_t remaining = producer->size - newSize; + size_t remaining = producer->size - effectiveNewSize; producer->data = producer->data + remaining; - producer->size = newSize; + producer->size = effectiveNewSize; return remaining; } diff --git a/tests/fuzz/generate_sequences.c b/tests/fuzz/generate_sequences.c new file mode 100644 index 00000000..1cc57e84 --- /dev/null +++ b/tests/fuzz/generate_sequences.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#define ZSTD_STATIC_LINKING_ONLY + +#include <stddef.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> + +#include "fuzz_data_producer.h" +#include "fuzz_helpers.h" +#include "zstd_helpers.h" + +/** + * This fuzz target ensures that ZSTD_generateSequences() does not crash and + * if it succeeds that ZSTD_compressSequences() round trips. + */ + +static void testRoundTrip(ZSTD_CCtx* cctx, ZSTD_Sequence const* seqs, size_t nbSeqs, const void* src, size_t srcSize) { + /* Compress the sequences with block delimiters */ + const size_t compressBound = ZSTD_compressBound(srcSize); + void* dst = FUZZ_malloc(compressBound); + FUZZ_ASSERT(dst); + + size_t compressedSize = ZSTD_compressSequences(cctx, dst, compressBound, seqs, nbSeqs, src, srcSize); + FUZZ_ZASSERT(compressedSize); + + void* decompressed = FUZZ_malloc(srcSize); + FUZZ_ASSERT(srcSize == 0 || decompressed); + size_t decompressedSize = ZSTD_decompress(decompressed, srcSize, dst, compressedSize); + FUZZ_ZASSERT(decompressedSize); + FUZZ_ASSERT(decompressedSize == srcSize); + if (srcSize != 0) { + FUZZ_ASSERT(!memcmp(src, decompressed, srcSize)); + } + + free(decompressed); + free(dst); +} + +int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(data, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + FUZZ_ASSERT(cctx); + + const size_t seqsCapacity = FUZZ_dataProducer_uint32Range(producer, 0, 2 * ZSTD_sequenceBound(size)); + ZSTD_Sequence* seqs = (ZSTD_Sequence*)FUZZ_malloc(sizeof(ZSTD_Sequence) * seqsCapacity); + FUZZ_ASSERT(seqsCapacity == 0 || seqs); + + FUZZ_setRandomParameters(cctx, size, producer); + FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetCBlockSize, 0)); + FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0)); + + const size_t nbSeqs = ZSTD_generateSequences(cctx, seqs, seqsCapacity, data, size); + if (ZSTD_isError(nbSeqs)) { + /* Allowed to error if the destination is too small */ + if (ZSTD_getErrorCode(nbSeqs) == ZSTD_error_dstSize_tooSmall) { + FUZZ_ASSERT(seqsCapacity < ZSTD_sequenceBound(size)); + } + } else { + /* Ensure we round trip with and without block delimiters*/ + + FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_explicitBlockDelimiters)); + testRoundTrip(cctx, seqs, nbSeqs, data, size); + + const size_t nbMergedSeqs = ZSTD_mergeBlockDelimiters(seqs, nbSeqs); + FUZZ_ASSERT(nbMergedSeqs <= nbSeqs); + FUZZ_ZASSERT(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only)); + FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_noBlockDelimiters)); + testRoundTrip(cctx, seqs, nbMergedSeqs, data, size); + } + + free(seqs); + ZSTD_freeCCtx(cctx); + FUZZ_dataProducer_free(producer); + return 0; +} diff --git a/tests/fuzz/regression_driver.c b/tests/fuzz/regression_driver.c index 550c65d8..26e2b6af 100644 --- a/tests/fuzz/regression_driver.c +++ b/tests/fuzz/regression_driver.c @@ -44,11 +44,12 @@ int main(int argc, char const **argv) { fprintf(stderr, "WARNING: No files passed to %s\n", argv[0]); for (i = 0; i < files->tableSize; ++i) { char const *fileName = files->fileNames[i]; - DEBUGLOG(3, "Running %s", fileName); size_t const fileSize = UTIL_getFileSize(fileName); size_t readSize; FILE *file; + DEBUGLOG(3, "Running %s", fileName); + /* Check that it is a regular file, and that the fileSize is valid. * If it is not a regular file, then it may have been deleted since we * constructed the list, so just skip it, but return an error exit code. diff --git a/tests/fuzz/simple_decompress.c b/tests/fuzz/simple_decompress.c index 0ee61902..0dc9e5b7 100644 --- a/tests/fuzz/simple_decompress.c +++ b/tests/fuzz/simple_decompress.c @@ -16,6 +16,9 @@ #include <stddef.h> #include <stdlib.h> #include <stdio.h> + +#define ZSTD_STATIC_LINKING_ONLY + #include "fuzz_helpers.h" #include "zstd.h" #include "fuzz_data_producer.h" @@ -34,17 +37,18 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) FUZZ_ASSERT(dctx); } - size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size); - void *rBuf = FUZZ_malloc(bufSize); - - size_t const dSize = ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size); - if (!ZSTD_isError(dSize)) { - /* If decompression was successful, the content size from the frame header(s) should be valid. */ - size_t const expectedSize = ZSTD_findDecompressedSize(src, size); - FUZZ_ASSERT(expectedSize != ZSTD_CONTENTSIZE_ERROR); - FUZZ_ASSERT(expectedSize == ZSTD_CONTENTSIZE_UNKNOWN || expectedSize == dSize); + { + size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size); + void *rBuf = FUZZ_malloc(bufSize); + size_t const dSize = ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size); + if (!ZSTD_isError(dSize)) { + /* If decompression was successful, the content size from the frame header(s) should be valid. */ + unsigned long long const expectedSize = ZSTD_findDecompressedSize(src, size); + FUZZ_ASSERT(expectedSize != ZSTD_CONTENTSIZE_ERROR); + FUZZ_ASSERT(expectedSize == ZSTD_CONTENTSIZE_UNKNOWN || expectedSize == dSize); + } + free(rBuf); } - free(rBuf); FUZZ_dataProducer_free(producer); diff --git a/tests/fuzz/stream_round_trip.c b/tests/fuzz/stream_round_trip.c index c2d6707a..6e340c81 100644 --- a/tests/fuzz/stream_round_trip.c +++ b/tests/fuzz/stream_round_trip.c @@ -136,7 +136,7 @@ static size_t compress(uint8_t *dst, size_t capacity, return dstSize; } -size_t decompress(void* dst, size_t dstCapacity, void const* src, size_t srcSize, FUZZ_dataProducer_t* producer) +static size_t decompress(void* dst, size_t dstCapacity, void const* src, size_t srcSize, FUZZ_dataProducer_t* producer) { ZSTD_inBuffer in = {src, srcSize, 0}; ZSTD_outBuffer out = {dst, dstCapacity, 0}; diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 09349218..f7bdae90 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -3701,6 +3701,31 @@ static int basicUnitTests(U32 const seed, double compressibility) } DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : ZSTD_generateSequences too small output buffer : ", testNb++); + { + const size_t seqsCapacity = 10; + const size_t srcSize = 150 KB; + const BYTE* src = (BYTE*)CNBuffer; + + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + ZSTD_Sequence* const seqs = (ZSTD_Sequence*)malloc(seqsCapacity * sizeof(ZSTD_Sequence)); + + if (seqs == NULL) goto _output_error; + if (cctx == NULL) goto _output_error; + /* Populate src with random data */ + RDG_genBuffer(CNBuffer, srcSize, compressibility, 0.5, seed); + + /* Test with block delimiters roundtrip */ + { + size_t const seqsSize = ZSTD_generateSequences(cctx, seqs, seqsCapacity, src, srcSize); + if (!ZSTD_isError(seqsSize)) goto _output_error; + } + + ZSTD_freeCCtx(cctx); + free(seqs); + } + DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences followed by ZSTD_compressSequences : ", testNb++); { const size_t srcSize = 500 KB; |