From: Kostya Shishkov Date: Wed, 20 Oct 2021 16:12:58 +0000 (+0200) Subject: core/io: add BoundedFileReader for operating on just a part of the input X-Git-Url: https://git.nihav.org/?p=nihav.git;a=commitdiff_plain;h=a91a83670604f3799b48c240591c9354ed9b3a6b core/io: add BoundedFileReader for operating on just a part of the input --- diff --git a/nihav-core/src/io/byteio.rs b/nihav-core/src/io/byteio.rs index 44b2684..a194e65 100644 --- a/nihav-core/src/io/byteio.rs +++ b/nihav-core/src/io/byteio.rs @@ -92,6 +92,14 @@ pub struct FileReader { eof: bool, } +/// Bytestream reader from anything implementing `std::io::Read` and `std::io::Seek` that operates only on a part of the input. +pub struct BoundedFileReader { + file: Box, + start: u64, + end: Option, + eof: bool, +} + macro_rules! read_int { ($s: ident, $inttype: ty, $size: expr, $which: ident) => ({ unsafe { @@ -633,6 +641,177 @@ impl ByteIO for FileReader { fn flush(&mut self) -> ByteIOResult<()> { Ok(()) } } + +impl BoundedFileReader { + + /// Constructs a new instance of `BoundedFileReader`. The reader pretends that data before `start` and after `end` (if set) does not exist. + pub fn new_read(file: T, start: u64, end: Option) -> ByteIOResult { + let mut file = Box::new(file); + if let Some(epos) = end { + if start > epos { + return Err(ByteIOError::WrongRange); + } + } + if start > 0 && file.seek(SeekFrom::Start(start)).is_err() { + return Err(ByteIOError::SeekError); + } + Ok(Self { file, start, end, eof : false }) + } + /// Constructs a new instance of `BoundedFileReader` using a boxed resource. The reader pretends that data before `start` and after `end` (if set) does not exist. + pub fn new_read_boxed(mut file: Box, start: u64, end: Option) -> ByteIOResult { + if let Some(epos) = end { + if start > epos { + return Err(ByteIOError::WrongRange); + } + } + if start > 0 && file.seek(SeekFrom::Start(start)).is_err() { + return Err(ByteIOError::SeekError); + } + Ok(Self { file, start, end, eof : false }) + } + /// Destroys the reader and releases the reader resource for a further use. + pub fn finish(self) -> Box { self.file } + fn real_tell(&mut self) -> u64 { + self.file.seek(SeekFrom::Current(0)).unwrap() + } + fn max_read_len(&mut self, len: usize) -> usize { + if let Some(epos) = self.end { + (len as u64).min(epos - self.real_tell()) as usize + } else { + len + } + } +} + +impl ByteIO for BoundedFileReader { + fn read_byte(&mut self) -> ByteIOResult { + if let Some(epos) = self.end { + if self.real_tell() >= epos { + self.eof = true; + return Err(ByteIOError::EOF); + } + } + let mut byte : [u8; 1] = [0]; + let ret = self.file.read(&mut byte); + if ret.is_err() { return Err(ByteIOError::ReadError); } + let sz = ret.unwrap(); + if sz == 0 { self.eof = true; return Err(ByteIOError::EOF); } + Ok (byte[0]) + } + + fn peek_byte(&mut self) -> ByteIOResult { + let b = self.read_byte()?; + if self.file.seek(SeekFrom::Current(-1)).is_err() { + return Err(ByteIOError::SeekError); + } + Ok(b) + } + + fn read_buf(&mut self, buf: &mut [u8]) -> ByteIOResult { + let len = self.max_read_len(buf.len()); + match self.file.read_exact(&mut buf[..len]) { + Ok(()) if len == buf.len() => Ok(buf.len()), + Ok(()) => { + self.eof = true; + Err(ByteIOError::EOF) + }, + Err(err) => { + if err.kind() == std::io::ErrorKind::UnexpectedEof { + self.eof = true; + Err(ByteIOError::EOF) + } else { + Err(ByteIOError::ReadError) + } + }, + } + } + + fn read_buf_some(&mut self, buf: &mut [u8]) -> ByteIOResult { + let len = self.max_read_len(buf.len()); + let ret = self.file.read(&mut buf[..len]); + if ret.is_err() { return Err(ByteIOError::ReadError); } + let sz = ret.unwrap(); + if sz < len { + if let Err(_err) = self.file.read(&mut buf[sz..][..1]) { + self.eof = true; + } else { + return Ok(sz + 1); + } + } + Ok(sz) + } + + fn peek_buf(&mut self, buf: &mut [u8]) -> ByteIOResult { + let len = self.max_read_len(buf.len()); + let size = self.read_buf(&mut buf[..len])?; + if self.file.seek(SeekFrom::Current(-(size as i64))).is_err() { + return Err(ByteIOError::SeekError); + } + Ok(size) + } + + #[allow(unused_variables)] + fn write_buf(&mut self, buf: &[u8]) -> ByteIOResult<()> { + Err(ByteIOError::NotImplemented) + } + + fn tell(&mut self) -> u64 { + self.file.seek(SeekFrom::Current(0)).unwrap() - self.start + } + + fn seek(&mut self, pos: SeekFrom) -> ByteIOResult { + let res = match pos { + SeekFrom::Start(off) => { + let dpos = self.start + off; + if let Some(epos) = self.end { + if dpos > epos { + return Err(ByteIOError::WrongRange); + } + } + self.file.seek(SeekFrom::Start(dpos)) + }, + SeekFrom::Current(off) => { + let dpos = (self.real_tell() as i64) + off; + let end = self.end.unwrap_or(dpos as u64); + if dpos < 0 || ((dpos as u64) < self.start) || ((dpos as u64) > end) { + return Err(ByteIOError::WrongRange); + } + self.file.seek(pos) + }, + SeekFrom::End(off) => { + if let Some(epos) = self.end { + let dpos = (epos as i64) + off; + if dpos < (self.start as i64) || ((dpos as u64) > epos) { + return Err(ByteIOError::WrongRange); + } + self.file.seek(SeekFrom::Start(dpos as u64)) + } else { + self.file.seek(pos) + } + }, + }; + match res { + Ok(r) => Ok(r), + Err(_) => Err(ByteIOError::SeekError), + } + } + + fn is_eof(&self) -> bool { + self.eof + } + + fn is_seekable(&mut self) -> bool { + true + } + + fn size(&mut self) -> i64 { + -1 + } + + fn flush(&mut self) -> ByteIOResult<()> { Ok(()) } +} + + /// High-level bytestream writer. /// /// User is supposed to create some writer implementing [`ByteIO`] trait e.g. [`MemoryWriter`] and use it to create `ByteWriter` which can be used for writing e.g. various integer types.