diff --git a/benches/read_entry.rs b/benches/read_entry.rs index 8fd4ef3a5..c936ed458 100644 --- a/benches/read_entry.rs +++ b/benches/read_entry.rs @@ -37,5 +37,37 @@ fn read_entry(bench: &mut Bencher) { bench.bytes = size as u64; } -benchmark_group!(benches, read_entry); +fn read_entry_iterable(bench: &mut Bencher) { + use zip::read::Config; + use zip::read::IterableZip; + let size = 1024 * 1024; + let bytes = generate_random_archive(size); + let mut archive = + IterableZip::try_new(Cursor::new(bytes.as_slice()), Config::default()).unwrap(); + + bench.iter(|| { + let file = archive + .files() + .unwrap() + .find(|f| { + let file = f.as_ref().unwrap(); + let filename = &*file.file_name; + filename == "random.dat" + }) + .unwrap() + .unwrap(); + let mut buf = [0u8; 1024]; + let mut file_reader = archive.by_file_data(&file, Default::default()).unwrap(); + loop { + let n = file_reader.read(&mut buf).unwrap(); + if n == 0 { + break; + } + } + }); + + bench.bytes = size as u64; +} + +benchmark_group!(benches, read_entry, read_entry_iterable); benchmark_main!(benches); diff --git a/src/read.rs b/src/read.rs index 13b088307..56b447dc2 100644 --- a/src/read.rs +++ b/src/read.rs @@ -34,6 +34,10 @@ pub(crate) mod stream; pub(crate) mod magic_finder; +/// Iterable zip +pub mod iterable_zip; +pub use iterable_zip::IterableZip; + /// Immutable metadata about a `ZipArchive`. #[derive(Debug)] pub struct ZipArchiveMetadata { diff --git a/src/read/iterable_zip.rs b/src/read/iterable_zip.rs new file mode 100644 index 000000000..7b1141736 --- /dev/null +++ b/src/read/iterable_zip.rs @@ -0,0 +1,162 @@ +//! Iterable zip reader + +use std::{ + borrow::Cow, + io::{Read, Seek, SeekFrom}, +}; + +use crate::{ + read::{ + central_header_to_zip_file, find_content, make_crypto_reader, make_reader, + unsupported_zip_error, CentralDirectoryInfo, Config, ZipFile, + }, + result::{ZipError, ZipResult}, + spec, + types::ZipFileData, + ZipReadOptions, +}; + +/// Iterable version of ZipArchive +pub struct IterableZip { + #[allow(unused)] + pub(crate) config: Config, + pub(crate) iterable_files: IterableZipFiles, +} +impl IterableZip { + /// Try to create a new zip archive + pub fn try_new(reader: R, config: Config) -> ZipResult> { + Self::with_config(config, reader) + } + + fn with_config(config: Config, mut reader: R) -> ZipResult> { + let file_len = reader.seek(SeekFrom::End(0))?; + let mut end_exclusive = file_len; + let mut last_err = None; + + let central_directory = loop { + let cde = match spec::find_central_directory( + &mut reader, + config.archive_offset, + end_exclusive, + file_len, + ) { + Ok(cde) => cde, + Err(e) => return Err(last_err.unwrap_or(e)), + }; + + match CentralDirectoryInfo::try_from(&cde) { + Ok(info) => break info, + Err(e) => { + last_err = Some(e); + end_exclusive = cde.eocd.position; + } + } + }; + + // If the parsed number of files is greater than the offset then + // something fishy is going on and we shouldn't trust number_of_files. + if central_directory.number_of_files > central_directory.directory_start as usize { + return unsupported_zip_error("Fishy error :)"); + } + + if central_directory.disk_number != central_directory.disk_with_central_directory { + return unsupported_zip_error("Support for multi-disk files is not implemented"); + } + + let iterable_shared = IterableZipFiles::try_new(reader, central_directory)?; + + Ok(IterableZip { + config, + iterable_files: iterable_shared, + }) + } + + /// Get the file as an iterator + pub fn files(&mut self) -> ZipResult<&mut IterableZipFiles> { + self.iterable_files.reset()?; + Ok(&mut self.iterable_files) + } + + /// Get a contained file by index with options. + pub fn by_file_data<'data>( + &'data mut self, + data: &'data ZipFileData, + mut options: ZipReadOptions<'_>, + ) -> ZipResult> { + if options.ignore_encryption_flag { + // Always use no password when we're ignoring the encryption flag. + options.password = None; + } else { + // Require and use the password only if the file is encrypted. + match (options.password, data.encrypted) { + (None, true) => { + return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED)) + } + // Password supplied, but none needed! Discard. + (Some(_), false) => options.password = None, + _ => {} + } + } + let limit_reader = find_content(data, &mut self.iterable_files.reader)?; + + let crypto_reader = + make_crypto_reader(data, limit_reader, options.password, data.aes_mode)?; + + Ok(ZipFile { + data: Cow::Borrowed(data), + reader: make_reader( + data.compression_method, + data.uncompressed_size, + data.crc32, + crypto_reader, + #[cfg(feature = "legacy-zip")] + data.flags, + )?, + }) + } +} + +/// Iterable Files +#[derive(Debug)] +pub struct IterableZipFiles { + reader: R, + central_directory: CentralDirectoryInfo, + current_file: usize, +} + +impl IterableZipFiles { + /// Try to create an iterable of files + pub(crate) fn try_new( + mut reader: R, + central_directory: CentralDirectoryInfo, + ) -> ZipResult { + reader.seek(SeekFrom::Start(central_directory.directory_start))?; + Ok(Self { + reader, + central_directory, + current_file: 0, + }) + } + + pub(crate) fn reset(&mut self) -> ZipResult<()> { + self.current_file = 0; + self.reader + .seek(SeekFrom::Start(self.central_directory.directory_start))?; + Ok(()) + } +} + +impl Iterator for IterableZipFiles { + type Item = ZipResult; + + fn next(&mut self) -> Option { + if self.current_file >= self.central_directory.number_of_files { + return None; + } + self.current_file += 1; + Some(central_header_to_zip_file( + &mut self.reader, + &self.central_directory, + )) + } +}