summaryrefslogtreecommitdiff
path: root/src/frame/header.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/frame/header.rs')
-rw-r--r--src/frame/header.rs412
1 files changed, 412 insertions, 0 deletions
diff --git a/src/frame/header.rs b/src/frame/header.rs
new file mode 100644
index 0000000..1513c11
--- /dev/null
+++ b/src/frame/header.rs
@@ -0,0 +1,412 @@
+use twox_hash::XxHash32;
+
+use super::Error;
+use std::{
+ convert::TryInto,
+ fmt::Debug,
+ hash::Hasher,
+ io,
+ io::{Read, Write},
+};
+
+const FLG_RESERVED_MASK: u8 = 0b00000010;
+const FLG_VERSION_MASK: u8 = 0b11000000;
+const FLG_SUPPORTED_VERSION_BITS: u8 = 0b01000000;
+
+const FLG_INDEPENDENT_BLOCKS: u8 = 0b00100000;
+const FLG_BLOCK_CHECKSUMS: u8 = 0b00010000;
+const FLG_CONTENT_SIZE: u8 = 0b00001000;
+const FLG_CONTENT_CHECKSUM: u8 = 0b00000100;
+const FLG_DICTIONARY_ID: u8 = 0b00000001;
+
+const BD_RESERVED_MASK: u8 = !BD_BLOCK_SIZE_MASK;
+const BD_BLOCK_SIZE_MASK: u8 = 0b01110000;
+const BD_BLOCK_SIZE_MASK_RSHIFT: u8 = 4;
+
+const BLOCK_UNCOMPRESSED_SIZE_BIT: u32 = 0x80000000;
+
+const LZ4F_MAGIC_NUMBER: u32 = 0x184D2204;
+pub(crate) const LZ4F_LEGACY_MAGIC_NUMBER: u32 = 0x184C2102;
+const LZ4F_SKIPPABLE_MAGIC_RANGE: std::ops::RangeInclusive<u32> = 0x184D2A50..=0x184D2A5F;
+
+pub(crate) const MAGIC_NUMBER_SIZE: usize = 4;
+pub(crate) const MIN_FRAME_INFO_SIZE: usize = 7;
+pub(crate) const MAX_FRAME_INFO_SIZE: usize = 19;
+pub(crate) const BLOCK_INFO_SIZE: usize = 4;
+
+#[derive(Clone, Copy, PartialEq, Debug)]
+/// Different predefines blocksizes to choose when compressing data.
+#[derive(Default)]
+pub enum BlockSize {
+ /// Will detect optimal frame size based on the size of the first write call
+ #[default]
+ Auto = 0,
+ /// The default block size.
+ Max64KB = 4,
+ /// 256KB block size.
+ Max256KB = 5,
+ /// 1MB block size.
+ Max1MB = 6,
+ /// 4MB block size.
+ Max4MB = 7,
+ /// 8MB block size.
+ Max8MB = 8,
+}
+
+impl BlockSize {
+ /// Try to find optimal size based on passed buffer length.
+ pub(crate) fn from_buf_length(buf_len: usize) -> Self {
+ let mut blocksize = BlockSize::Max4MB;
+
+ for candidate in [BlockSize::Max256KB, BlockSize::Max64KB] {
+ if buf_len > candidate.get_size() {
+ return blocksize;
+ }
+ blocksize = candidate;
+ }
+ BlockSize::Max64KB
+ }
+ pub(crate) fn get_size(&self) -> usize {
+ match self {
+ BlockSize::Auto => unreachable!(),
+ BlockSize::Max64KB => 64 * 1024,
+ BlockSize::Max256KB => 256 * 1024,
+ BlockSize::Max1MB => 1024 * 1024,
+ BlockSize::Max4MB => 4 * 1024 * 1024,
+ BlockSize::Max8MB => 8 * 1024 * 1024,
+ }
+ }
+}
+
+#[derive(Clone, Copy, PartialEq, Debug)]
+/// The two `BlockMode` operations that can be set on (`FrameInfo`)[FrameInfo]
+#[derive(Default)]
+pub enum BlockMode {
+ /// Every block is compressed independently. The default.
+ #[default]
+ Independent,
+ /// Blocks can reference data from previous blocks.
+ ///
+ /// Effective when the stream contains small blocks.
+ Linked,
+}
+
+// From: https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md
+//
+// General Structure of LZ4 Frame format
+// -------------------------------------
+//
+// | MagicNb | F. Descriptor | Block | (...) | EndMark | C. Checksum |
+// |:-------:|:-------------:| ----- | ----- | ------- | ----------- |
+// | 4 bytes | 3-15 bytes | | | 4 bytes | 0-4 bytes |
+//
+// Frame Descriptor
+// ----------------
+//
+// | FLG | BD | (Content Size) | (Dictionary ID) | HC |
+// | ------- | ------- |:--------------:|:---------------:| ------- |
+// | 1 byte | 1 byte | 0 - 8 bytes | 0 - 4 bytes | 1 byte |
+//
+// __FLG byte__
+//
+// | BitNb | 7-6 | 5 | 4 | 3 | 2 | 1 | 0 |
+// | ------- |-------|-------|----------|------|----------|----------|------|
+// |FieldName|Version|B.Indep|B.Checksum|C.Size|C.Checksum|*Reserved*|DictID|
+//
+// __BD byte__
+//
+// | BitNb | 7 | 6-5-4 | 3-2-1-0 |
+// | ------- | -------- | ------------- | -------- |
+// |FieldName|*Reserved*| Block MaxSize |*Reserved*|
+//
+// Data Blocks
+// -----------
+//
+// | Block Size | data | (Block Checksum) |
+// |:----------:| ------ |:----------------:|
+// | 4 bytes | | 0 - 4 bytes |
+//
+#[derive(Debug, Default, Clone)]
+/// The metadata for de/compressing with lz4 frame format.
+pub struct FrameInfo {
+ /// If set, includes the total uncompressed size of data in the frame.
+ pub content_size: Option<u64>,
+ /// The identifier for the dictionary that must be used to correctly decode data.
+ /// The compressor and the decompressor must use exactly the same dictionary.
+ ///
+ /// Note that this is currently unsupported and for this reason it's not pub.
+ pub(crate) dict_id: Option<u32>,
+ /// The maximum uncompressed size of each data block.
+ pub block_size: BlockSize,
+ /// The block mode.
+ pub block_mode: BlockMode,
+ /// If set, includes a checksum for each data block in the frame.
+ pub block_checksums: bool,
+ /// If set, includes a content checksum to verify that the full frame contents have been
+ /// decoded correctly.
+ pub content_checksum: bool,
+ /// If set, use the legacy frame format
+ pub legacy_frame: bool,
+}
+
+impl FrameInfo {
+ /// Create a new `FrameInfo`.
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Whether to include the total uncompressed size of data in the frame.
+ pub fn content_size(mut self, content_size: Option<u64>) -> Self {
+ self.content_size = content_size;
+ self
+ }
+
+ /// The maximum uncompressed size of each data block.
+ pub fn block_size(mut self, block_size: BlockSize) -> Self {
+ self.block_size = block_size;
+ self
+ }
+
+ /// The block mode.
+ pub fn block_mode(mut self, block_mode: BlockMode) -> Self {
+ self.block_mode = block_mode;
+ self
+ }
+
+ /// If set, includes a checksum for each data block in the frame.
+ pub fn block_checksums(mut self, block_checksums: bool) -> Self {
+ self.block_checksums = block_checksums;
+ self
+ }
+
+ /// If set, includes a content checksum to verify that the full frame contents have been
+ /// decoded correctly.
+ pub fn content_checksum(mut self, content_checksum: bool) -> Self {
+ self.content_checksum = content_checksum;
+ self
+ }
+
+ /// If set, use the legacy frame format.
+ pub fn legacy_frame(mut self, legacy_frame: bool) -> Self {
+ self.legacy_frame = legacy_frame;
+ self
+ }
+
+ pub(crate) fn read_size(input: &[u8]) -> Result<usize, Error> {
+ let mut required = MIN_FRAME_INFO_SIZE;
+ let magic_num = u32::from_le_bytes(input[0..4].try_into().unwrap());
+ if magic_num == LZ4F_LEGACY_MAGIC_NUMBER {
+ return Ok(MAGIC_NUMBER_SIZE);
+ }
+
+ if input.len() < required {
+ return Ok(required);
+ }
+
+ if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) {
+ return Ok(8);
+ }
+ if magic_num != LZ4F_MAGIC_NUMBER {
+ return Err(Error::WrongMagicNumber);
+ }
+
+ if input[4] & FLG_CONTENT_SIZE != 0 {
+ required += 8;
+ }
+ if input[4] & FLG_DICTIONARY_ID != 0 {
+ required += 4
+ }
+ Ok(required)
+ }
+
+ pub(crate) fn write_size(&self) -> usize {
+ let mut required = MIN_FRAME_INFO_SIZE;
+ if self.content_size.is_some() {
+ required += 8;
+ }
+ if self.dict_id.is_some() {
+ required += 4;
+ }
+ required
+ }
+
+ pub(crate) fn write(&self, output: &mut [u8]) -> Result<usize, Error> {
+ let write_size = self.write_size();
+ if output.len() < write_size {
+ return Err(Error::IoError(io::ErrorKind::UnexpectedEof.into()));
+ }
+ let mut buffer = [0u8; MAX_FRAME_INFO_SIZE];
+ assert!(write_size <= buffer.len());
+ buffer[0..4].copy_from_slice(&LZ4F_MAGIC_NUMBER.to_le_bytes());
+ buffer[4] = FLG_SUPPORTED_VERSION_BITS;
+ if self.block_checksums {
+ buffer[4] |= FLG_BLOCK_CHECKSUMS;
+ }
+ if self.content_checksum {
+ buffer[4] |= FLG_CONTENT_CHECKSUM;
+ }
+ if self.block_mode == BlockMode::Independent {
+ buffer[4] |= FLG_INDEPENDENT_BLOCKS;
+ }
+ buffer[5] = (self.block_size as u8) << BD_BLOCK_SIZE_MASK_RSHIFT;
+
+ // Optional section
+ let mut offset = 6;
+ if let Some(size) = self.content_size {
+ buffer[4] |= FLG_CONTENT_SIZE;
+ buffer[offset..offset + 8].copy_from_slice(&size.to_le_bytes());
+ offset += 8;
+ }
+ if let Some(dict_id) = self.dict_id {
+ buffer[4] |= FLG_DICTIONARY_ID;
+ buffer[offset..offset + 4].copy_from_slice(&dict_id.to_le_bytes());
+ offset += 4;
+ }
+
+ // Header checksum
+ let mut hasher = XxHash32::with_seed(0);
+ hasher.write(&buffer[4..offset]);
+ let header_checksum = (hasher.finish() >> 8) as u8;
+ buffer[offset] = header_checksum;
+ offset += 1;
+
+ debug_assert_eq!(offset, write_size);
+ output[..write_size].copy_from_slice(&buffer[..write_size]);
+ Ok(write_size)
+ }
+
+ pub(crate) fn read(mut input: &[u8]) -> Result<FrameInfo, Error> {
+ let original_input = input;
+ // 4 byte Magic
+ let magic_num = {
+ let mut buffer = [0u8; 4];
+ input.read_exact(&mut buffer)?;
+ u32::from_le_bytes(buffer)
+ };
+ if magic_num == LZ4F_LEGACY_MAGIC_NUMBER {
+ return Ok(FrameInfo {
+ block_size: BlockSize::Max8MB,
+ legacy_frame: true,
+ ..FrameInfo::default()
+ });
+ }
+ if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) {
+ let mut buffer = [0u8; 4];
+ input.read_exact(&mut buffer)?;
+ let user_data_len = u32::from_le_bytes(buffer);
+ return Err(Error::SkippableFrame(user_data_len));
+ }
+ if magic_num != LZ4F_MAGIC_NUMBER {
+ return Err(Error::WrongMagicNumber);
+ }
+
+ // fixed size section
+ let [flg_byte, bd_byte] = {
+ let mut buffer = [0u8, 0];
+ input.read_exact(&mut buffer)?;
+ buffer
+ };
+
+ if flg_byte & FLG_VERSION_MASK != FLG_SUPPORTED_VERSION_BITS {
+ // version is always 01
+ return Err(Error::UnsupportedVersion(flg_byte & FLG_VERSION_MASK));
+ }
+
+ if flg_byte & FLG_RESERVED_MASK != 0 || bd_byte & BD_RESERVED_MASK != 0 {
+ return Err(Error::ReservedBitsSet);
+ }
+
+ let block_mode = if flg_byte & FLG_INDEPENDENT_BLOCKS != 0 {
+ BlockMode::Independent
+ } else {
+ BlockMode::Linked
+ };
+ let content_checksum = flg_byte & FLG_CONTENT_CHECKSUM != 0;
+ let block_checksums = flg_byte & FLG_BLOCK_CHECKSUMS != 0;
+
+ let block_size = match (bd_byte & BD_BLOCK_SIZE_MASK) >> BD_BLOCK_SIZE_MASK_RSHIFT {
+ i @ 0..=3 => return Err(Error::UnsupportedBlocksize(i)),
+ 4 => BlockSize::Max64KB,
+ 5 => BlockSize::Max256KB,
+ 6 => BlockSize::Max1MB,
+ 7 => BlockSize::Max4MB,
+ _ => unreachable!(),
+ };
+
+ // var len section
+ let mut content_size = None;
+ if flg_byte & FLG_CONTENT_SIZE != 0 {
+ let mut buffer = [0u8; 8];
+ input.read_exact(&mut buffer).unwrap();
+ content_size = Some(u64::from_le_bytes(buffer));
+ }
+
+ let mut dict_id = None;
+ if flg_byte & FLG_DICTIONARY_ID != 0 {
+ let mut buffer = [0u8; 4];
+ input.read_exact(&mut buffer)?;
+ dict_id = Some(u32::from_le_bytes(buffer));
+ }
+
+ // 1 byte header checksum
+ let expected_checksum = {
+ let mut buffer = [0u8; 1];
+ input.read_exact(&mut buffer)?;
+ buffer[0]
+ };
+ let mut hasher = XxHash32::with_seed(0);
+ hasher.write(&original_input[4..original_input.len() - input.len() - 1]);
+ let header_hash = (hasher.finish() >> 8) as u8;
+ if header_hash != expected_checksum {
+ return Err(Error::HeaderChecksumError);
+ }
+
+ Ok(FrameInfo {
+ content_size,
+ dict_id,
+ block_size,
+ block_mode,
+ block_checksums,
+ content_checksum,
+ legacy_frame: false,
+ })
+ }
+}
+
+#[derive(Debug)]
+pub(crate) enum BlockInfo {
+ Compressed(u32),
+ Uncompressed(u32),
+ EndMark,
+}
+
+impl BlockInfo {
+ pub(crate) fn read(mut input: &[u8]) -> Result<Self, Error> {
+ let mut size_buffer = [0u8; 4];
+ input.read_exact(&mut size_buffer)?;
+ let size = u32::from_le_bytes(size_buffer);
+ if size == 0 {
+ Ok(BlockInfo::EndMark)
+ } else if size & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 {
+ Ok(BlockInfo::Uncompressed(size & !BLOCK_UNCOMPRESSED_SIZE_BIT))
+ } else {
+ Ok(BlockInfo::Compressed(size))
+ }
+ }
+
+ pub(crate) fn write(&self, mut output: &mut [u8]) -> Result<usize, Error> {
+ let value = match self {
+ BlockInfo::Compressed(len) if *len == 0 => return Err(Error::InvalidBlockInfo),
+ BlockInfo::Compressed(len) | BlockInfo::Uncompressed(len)
+ if *len & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 =>
+ {
+ return Err(Error::InvalidBlockInfo)
+ }
+ BlockInfo::Compressed(len) => *len,
+ BlockInfo::Uncompressed(len) => *len | BLOCK_UNCOMPRESSED_SIZE_BIT,
+ BlockInfo::EndMark => 0,
+ };
+ output.write_all(&value.to_le_bytes())?;
+ Ok(4)
+ }
+}