diff options
Diffstat (limited to 'src/gz/read.rs')
-rw-r--r-- | src/gz/read.rs | 146 |
1 files changed, 123 insertions, 23 deletions
diff --git a/src/gz/read.rs b/src/gz/read.rs index dbbe632..9dbadbd 100644 --- a/src/gz/read.rs +++ b/src/gz/read.rs @@ -8,9 +8,8 @@ use crate::Compression; /// A gzip streaming encoder /// -/// This structure exposes a [`Read`] interface that will read uncompressed data -/// from the underlying reader and expose the compressed version as a [`Read`] -/// interface. +/// This structure implements a [`Read`] interface. When read from, it reads +/// uncompressed data from the underlying [`Read`] and provides the compressed data. /// /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html /// @@ -25,11 +24,11 @@ use crate::Compression; /// // Return a vector containing the GZ compressed version of hello world /// /// fn gzencode_hello_world() -> io::Result<Vec<u8>> { -/// let mut ret_vec = [0;100]; +/// let mut ret_vec = Vec::new(); /// let bytestring = b"hello world"; /// let mut gz = GzEncoder::new(&bytestring[..], Compression::fast()); -/// let count = gz.read(&mut ret_vec)?; -/// Ok(ret_vec[0..count].to_vec()) +/// gz.read_to_end(&mut ret_vec)?; +/// Ok(ret_vec) /// } /// ``` #[derive(Debug)] @@ -90,17 +89,26 @@ impl<R: Read + Write> Write for GzEncoder<R> { } } -/// A gzip streaming decoder +/// A decoder for a single member of a [gzip file]. /// -/// This structure exposes a [`Read`] interface that will consume compressed -/// data from the underlying reader and emit uncompressed data. +/// This structure implements a [`Read`] interface. When read from, it reads +/// compressed data from the underlying [`Read`] and provides the uncompressed data. /// -/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html +/// After reading a single member of the gzip data this reader will return +/// Ok(0) even if there are more bytes available in the underlying reader. +/// `GzDecoder` may have read additional bytes past the end of the gzip data. +/// If you need the following bytes, wrap the `Reader` in a `std::io::BufReader` +/// and use `bufread::GzDecoder` instead. +/// +/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] +/// or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). +/// +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 /// /// # Examples /// /// ``` -/// /// use std::io::prelude::*; /// use std::io; /// # use flate2::Compression; @@ -146,6 +154,9 @@ impl<R> GzDecoder<R> { } /// Acquires a reference to the underlying reader. + /// + /// Note that the decoder may have read past the end of the gzip data. + /// To prevent this use [`bufread::GzDecoder`] instead. pub fn get_ref(&self) -> &R { self.inner.get_ref().get_ref() } @@ -153,12 +164,19 @@ impl<R> GzDecoder<R> { /// Acquires a mutable reference to the underlying stream. /// /// Note that mutation of the stream may result in surprising results if - /// this encoder is continued to be used. + /// this decoder continues to be used. + /// + /// Note that the decoder may have read past the end of the gzip data. + /// To prevent this use [`bufread::GzDecoder`] instead. pub fn get_mut(&mut self) -> &mut R { self.inner.get_mut().get_mut() } /// Consumes this decoder, returning the underlying reader. + /// + /// Note that the decoder may have read past the end of the gzip data. + /// Subsequent reads will skip those bytes. To prevent this use + /// [`bufread::GzDecoder`] instead. pub fn into_inner(self) -> R { self.inner.into_inner().into_inner() } @@ -180,19 +198,20 @@ impl<R: Read + Write> Write for GzDecoder<R> { } } -/// A gzip streaming decoder that decodes all members of a multistream +/// A gzip streaming decoder that decodes a [gzip file] that may have multiple members. /// -/// A gzip member consists of a header, compressed data and a trailer. The [gzip -/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple -/// gzip members to be joined in a single stream. `MultiGzDecoder` will -/// decode all consecutive members while `GzDecoder` will only decompress the -/// first gzip member. The multistream format is commonly used in bioinformatics, -/// for example when using the BGZF compressed data. +/// This structure implements a [`Read`] interface. When read from, it reads +/// compressed data from the underlying [`Read`] and provides the uncompressed +/// data. /// -/// This structure exposes a [`Read`] interface that will consume all gzip members -/// from the underlying reader and emit uncompressed data. +/// A gzip file consists of a series of *members* concatenated one after another. +/// MultiGzDecoder decodes all members of a file and returns Ok(0) once the +/// underlying reader does. /// -/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html +/// To handle members seperately, see [GzDecoder] or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). +/// +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 /// /// # Examples /// @@ -250,7 +269,7 @@ impl<R> MultiGzDecoder<R> { /// Acquires a mutable reference to the underlying stream. /// /// Note that mutation of the stream may result in surprising results if - /// this encoder is continued to be used. + /// this decoder is continued to be used. pub fn get_mut(&mut self) -> &mut R { self.inner.get_mut().get_mut() } @@ -276,3 +295,84 @@ impl<R: Read + Write> Write for MultiGzDecoder<R> { self.get_mut().flush() } } + +#[cfg(test)] +mod tests { + use std::io::{Cursor, ErrorKind, Read, Result, Write}; + + use super::GzDecoder; + + //a cursor turning EOF into blocking errors + #[derive(Debug)] + pub struct BlockingCursor { + pub cursor: Cursor<Vec<u8>>, + } + + impl BlockingCursor { + pub fn new() -> BlockingCursor { + BlockingCursor { + cursor: Cursor::new(Vec::new()), + } + } + + pub fn set_position(&mut self, pos: u64) { + return self.cursor.set_position(pos); + } + } + + impl Write for BlockingCursor { + fn write(&mut self, buf: &[u8]) -> Result<usize> { + return self.cursor.write(buf); + } + fn flush(&mut self) -> Result<()> { + return self.cursor.flush(); + } + } + + impl Read for BlockingCursor { + fn read(&mut self, buf: &mut [u8]) -> Result<usize> { + //use the cursor, except it turns eof into blocking error + let r = self.cursor.read(buf); + match r { + Err(ref err) => { + if err.kind() == ErrorKind::UnexpectedEof { + return Err(ErrorKind::WouldBlock.into()); + } + } + Ok(0) => { + //regular EOF turned into blocking error + return Err(ErrorKind::WouldBlock.into()); + } + Ok(_n) => {} + } + return r; + } + } + + #[test] + fn blocked_partial_header_read() { + // this is a reader which receives data afterwards + let mut r = BlockingCursor::new(); + let data = vec![1, 2, 3]; + + match r.write_all(&data) { + Ok(()) => {} + _ => { + panic!("Unexpected result for write_all"); + } + } + r.set_position(0); + + // this is unused except for the buffering + let mut decoder = GzDecoder::new(r); + let mut out = Vec::with_capacity(7); + match decoder.read(&mut out) { + Err(e) => { + assert_eq!(e.kind(), ErrorKind::WouldBlock); + } + _ => { + panic!("Unexpected result for decoder.read"); + } + } + } +} |