aboutsummaryrefslogtreecommitdiff
path: root/src/gz/read.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/gz/read.rs')
-rw-r--r--src/gz/read.rs146
1 files changed, 123 insertions, 23 deletions
diff --git a/src/gz/read.rs b/src/gz/read.rs
index dbbe632..9dbadbd 100644
--- a/src/gz/read.rs
+++ b/src/gz/read.rs
@@ -8,9 +8,8 @@ use crate::Compression;
/// A gzip streaming encoder
///
-/// This structure exposes a [`Read`] interface that will read uncompressed data
-/// from the underlying reader and expose the compressed version as a [`Read`]
-/// interface.
+/// This structure implements a [`Read`] interface. When read from, it reads
+/// uncompressed data from the underlying [`Read`] and provides the compressed data.
///
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
///
@@ -25,11 +24,11 @@ use crate::Compression;
/// // Return a vector containing the GZ compressed version of hello world
///
/// fn gzencode_hello_world() -> io::Result<Vec<u8>> {
-/// let mut ret_vec = [0;100];
+/// let mut ret_vec = Vec::new();
/// let bytestring = b"hello world";
/// let mut gz = GzEncoder::new(&bytestring[..], Compression::fast());
-/// let count = gz.read(&mut ret_vec)?;
-/// Ok(ret_vec[0..count].to_vec())
+/// gz.read_to_end(&mut ret_vec)?;
+/// Ok(ret_vec)
/// }
/// ```
#[derive(Debug)]
@@ -90,17 +89,26 @@ impl<R: Read + Write> Write for GzEncoder<R> {
}
}
-/// A gzip streaming decoder
+/// A decoder for a single member of a [gzip file].
///
-/// This structure exposes a [`Read`] interface that will consume compressed
-/// data from the underlying reader and emit uncompressed data.
+/// This structure implements a [`Read`] interface. When read from, it reads
+/// compressed data from the underlying [`Read`] and provides the uncompressed data.
///
-/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
+/// After reading a single member of the gzip data this reader will return
+/// Ok(0) even if there are more bytes available in the underlying reader.
+/// `GzDecoder` may have read additional bytes past the end of the gzip data.
+/// If you need the following bytes, wrap the `Reader` in a `std::io::BufReader`
+/// and use `bufread::GzDecoder` instead.
+///
+/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`]
+/// or read more
+/// [in the introduction](../index.html#about-multi-member-gzip-files).
+///
+/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
///
/// # Examples
///
/// ```
-///
/// use std::io::prelude::*;
/// use std::io;
/// # use flate2::Compression;
@@ -146,6 +154,9 @@ impl<R> GzDecoder<R> {
}
/// Acquires a reference to the underlying reader.
+ ///
+ /// Note that the decoder may have read past the end of the gzip data.
+ /// To prevent this use [`bufread::GzDecoder`] instead.
pub fn get_ref(&self) -> &R {
self.inner.get_ref().get_ref()
}
@@ -153,12 +164,19 @@ impl<R> GzDecoder<R> {
/// Acquires a mutable reference to the underlying stream.
///
/// Note that mutation of the stream may result in surprising results if
- /// this encoder is continued to be used.
+ /// this decoder continues to be used.
+ ///
+ /// Note that the decoder may have read past the end of the gzip data.
+ /// To prevent this use [`bufread::GzDecoder`] instead.
pub fn get_mut(&mut self) -> &mut R {
self.inner.get_mut().get_mut()
}
/// Consumes this decoder, returning the underlying reader.
+ ///
+ /// Note that the decoder may have read past the end of the gzip data.
+ /// Subsequent reads will skip those bytes. To prevent this use
+ /// [`bufread::GzDecoder`] instead.
pub fn into_inner(self) -> R {
self.inner.into_inner().into_inner()
}
@@ -180,19 +198,20 @@ impl<R: Read + Write> Write for GzDecoder<R> {
}
}
-/// A gzip streaming decoder that decodes all members of a multistream
+/// A gzip streaming decoder that decodes a [gzip file] that may have multiple members.
///
-/// A gzip member consists of a header, compressed data and a trailer. The [gzip
-/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple
-/// gzip members to be joined in a single stream. `MultiGzDecoder` will
-/// decode all consecutive members while `GzDecoder` will only decompress the
-/// first gzip member. The multistream format is commonly used in bioinformatics,
-/// for example when using the BGZF compressed data.
+/// This structure implements a [`Read`] interface. When read from, it reads
+/// compressed data from the underlying [`Read`] and provides the uncompressed
+/// data.
///
-/// This structure exposes a [`Read`] interface that will consume all gzip members
-/// from the underlying reader and emit uncompressed data.
+/// A gzip file consists of a series of *members* concatenated one after another.
+/// MultiGzDecoder decodes all members of a file and returns Ok(0) once the
+/// underlying reader does.
///
-/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
+/// To handle members seperately, see [GzDecoder] or read more
+/// [in the introduction](../index.html#about-multi-member-gzip-files).
+///
+/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
///
/// # Examples
///
@@ -250,7 +269,7 @@ impl<R> MultiGzDecoder<R> {
/// Acquires a mutable reference to the underlying stream.
///
/// Note that mutation of the stream may result in surprising results if
- /// this encoder is continued to be used.
+ /// this decoder is continued to be used.
pub fn get_mut(&mut self) -> &mut R {
self.inner.get_mut().get_mut()
}
@@ -276,3 +295,84 @@ impl<R: Read + Write> Write for MultiGzDecoder<R> {
self.get_mut().flush()
}
}
+
+#[cfg(test)]
+mod tests {
+ use std::io::{Cursor, ErrorKind, Read, Result, Write};
+
+ use super::GzDecoder;
+
+ //a cursor turning EOF into blocking errors
+ #[derive(Debug)]
+ pub struct BlockingCursor {
+ pub cursor: Cursor<Vec<u8>>,
+ }
+
+ impl BlockingCursor {
+ pub fn new() -> BlockingCursor {
+ BlockingCursor {
+ cursor: Cursor::new(Vec::new()),
+ }
+ }
+
+ pub fn set_position(&mut self, pos: u64) {
+ return self.cursor.set_position(pos);
+ }
+ }
+
+ impl Write for BlockingCursor {
+ fn write(&mut self, buf: &[u8]) -> Result<usize> {
+ return self.cursor.write(buf);
+ }
+ fn flush(&mut self) -> Result<()> {
+ return self.cursor.flush();
+ }
+ }
+
+ impl Read for BlockingCursor {
+ fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
+ //use the cursor, except it turns eof into blocking error
+ let r = self.cursor.read(buf);
+ match r {
+ Err(ref err) => {
+ if err.kind() == ErrorKind::UnexpectedEof {
+ return Err(ErrorKind::WouldBlock.into());
+ }
+ }
+ Ok(0) => {
+ //regular EOF turned into blocking error
+ return Err(ErrorKind::WouldBlock.into());
+ }
+ Ok(_n) => {}
+ }
+ return r;
+ }
+ }
+
+ #[test]
+ fn blocked_partial_header_read() {
+ // this is a reader which receives data afterwards
+ let mut r = BlockingCursor::new();
+ let data = vec![1, 2, 3];
+
+ match r.write_all(&data) {
+ Ok(()) => {}
+ _ => {
+ panic!("Unexpected result for write_all");
+ }
+ }
+ r.set_position(0);
+
+ // this is unused except for the buffering
+ let mut decoder = GzDecoder::new(r);
+ let mut out = Vec::with_capacity(7);
+ match decoder.read(&mut out) {
+ Err(e) => {
+ assert_eq!(e.kind(), ErrorKind::WouldBlock);
+ }
+ _ => {
+ panic!("Unexpected result for decoder.read");
+ }
+ }
+ }
+}