From 5954f298681a4fddccd6fc9b9792c2e1a2537145 Mon Sep 17 00:00:00 2001 From: Michael McMaster Date: Sat, 21 May 2011 23:23:31 +1000 Subject: [PATCH] Initial release! --- Compressor.cc | 27 +- Decompressor.cc | 4 +- Doxyfile.in | 2 +- Makefile.am | 25 +- NEWS | 2 +- README | 4 +- autodeb.sh | 48 ++ configure.ac | 4 +- debian/changelog | 4 +- debian/copyright | 16 +- debian/libzipper-dev.install | 1 - debian/{libzipper1.dirs => libzipper.dirs} | 0 .../{libzipper1.install => libzipper.install} | 0 deflate.cc | 620 +++--------------- deflate.hh | 19 +- gzip.cc | 328 ++++----- gzip.hh | 5 + libzipper-1.0.pc.in | 4 +- util.hh | 66 ++ zip.cc | 463 +++++++++---- zip.hh | 8 +- zipper.cc | 28 + zipper.hh | 209 +++--- 23 files changed, 879 insertions(+), 1008 deletions(-) create mode 100755 autodeb.sh rename debian/{libzipper1.dirs => libzipper.dirs} (100%) rename debian/{libzipper1.install => libzipper.install} (100%) diff --git a/Compressor.cc b/Compressor.cc index 456385f..8d12ccf 100644 --- a/Compressor.cc +++ b/Compressor.cc @@ -16,13 +16,12 @@ // along with libzipper. If not, see . #include "zipper.hh" -#include "Zip.hh" +#include "gzip.hh" +#include "zip.hh" #include "util.hh" #include -#include - using namespace zipper; class Compressor::CompressorImpl @@ -85,6 +84,20 @@ namespace WriterPtr m_writer; std::vector m_records; }; + + class GzipCompressor : public Compressor::CompressorImpl + { + public: + GzipCompressor(const WriterPtr& writer) : m_writer(writer) {} + + virtual void + addFile(const std::string& filename, const Reader& reader) + { + gzip(filename, reader, m_writer); + } + private: + WriterPtr m_writer; + }; } Compressor::Compressor(ContainerFormat format, const WriterPtr& writer) @@ -97,8 +110,8 @@ Compressor::Compressor(ContainerFormat format, const WriterPtr& writer) case Container_zip: m_compressor = new ZipCompressor(writer); break; - //case Container_none: - // m_compressor = new GzipCompressor(writer); break; + case Container_gzip: + m_compressor = new GzipCompressor(writer); break; default: throw UnsupportedException("Unknown format"); @@ -117,8 +130,8 @@ Compressor::Compressor(ContainerFormat format, Writer& writer) : case Container_zip: m_compressor = new ZipCompressor(ptr); break; - //case Container_none: - // m_compressor = new GzipCompressor(ptr); break; + case Container_gzip: + m_compressor = new GzipCompressor(ptr); break; default: throw UnsupportedException("Unknown format"); diff --git a/Decompressor.cc b/Decompressor.cc index e51f9a6..79dbe07 100644 --- a/Decompressor.cc +++ b/Decompressor.cc @@ -18,8 +18,8 @@ #include "zipper.hh" #include "util.hh" -#include "Unzip.hh" -#include "Ungzip.hh" +#include "gzip.hh" +#include "zip.hh" using namespace zipper; diff --git a/Doxyfile.in b/Doxyfile.in index 6fc4093..5d30837 100644 --- a/Doxyfile.in +++ b/Doxyfile.in @@ -236,7 +236,7 @@ EXTENSION_MAPPING = # func(std::string) {}). This also make the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. -BUILTIN_STL_SUPPORT = NO +BUILTIN_STL_SUPPORT = YES # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. diff --git a/Makefile.am b/Makefile.am index 287d6f0..fdceb5d 100644 --- a/Makefile.am +++ b/Makefile.am @@ -18,6 +18,8 @@ include doxygen.am dist_noinst_SCRIPTS = autogen.sh +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = libzipper-1.0.pc EXTRA_DIST = \ configure.ac \ @@ -27,37 +29,40 @@ EXTRA_DIST = \ README \ VERSION + lib_LTLIBRARIES = libzipper.la libzipper_la_SOURCES = \ Compressor.cc \ CompressedFile.cc \ Container.cc \ Decompressor.cc \ + deflate.cc \ + deflate.hh \ Exception.cc \ FileReader.cc \ FileWriter.cc \ + gzip.cc \ + gzip.hh \ Reader.cc \ - Ungzip.cc \ - Ungzip.hh \ - Unzip.cc \ - Unzip.hh \ util.hh \ Writer.cc \ - Zip.hh \ - Zip.cc \ - zipper.hh + zip.hh \ + zip.cc + +# Public API headers go here, for installation to /usr/include +include_HEADERS = zipper.hh -libzipper_la_LDFLAGS = ${ZLIB_LIBS} +libzipper_la_LDFLAGS = ${ZLIB_LIBS} -version-info 1:0 libzipper_la_CFLAGS = ${ZLIB_CFLAGS} -bin_PROGRAMS = zipper +EXTRA_PROGRAMS = zipper zipper_SOURCES = \ zipper.cc zipper_LDADD = libzipper.la -CXXFLAGS=-g -O2 -W -Wall -Werror -std=c++0x +CXXFLAGS=-g -O2 -W -Wall -std=c++0x MOSTLYCLEANFILES=$(DX_CLEANFILES) diff --git a/NEWS b/NEWS index 2faeb53..2c73c39 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,3 @@ -2011-01-31 Version 1.0.0 +2011-05-21 Version 1.0.0 - Initial release diff --git a/README b/README index 9810ba6..9f13e9e 100644 --- a/README +++ b/README @@ -6,11 +6,11 @@ multiple formats. Supported Formats - raw (ie. not compressed) - - ZIP + - gzip + - zip Missing Features - zip64 support - - compression Requirements - zlib diff --git a/autodeb.sh b/autodeb.sh new file mode 100755 index 0000000..7d0953a --- /dev/null +++ b/autodeb.sh @@ -0,0 +1,48 @@ +#!/bin/sh +# Copyright (C) 2011 Michael McMaster +# +# This file is part of libzipper. +# +# libzipper is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# libzipper is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with libzipper. If not, see . + +SRC=`pwd` +BUILD=`mktemp -d --tmpdir libzipper-debuild.XXXXXXXXXX` +VERSION=`cat VERSION` +cd ${BUILD} +$SRC/configure +make dist +mv libzipper-${VERSION}.tar.gz libzipper_${VERSION}.orig.tar.gz +tar xzvf libzipper_${VERSION}.orig.tar.gz +cp -a ${SRC}/debian libzipper-${VERSION} +dpkg-source -b libzipper-${VERSION}/ + +# Remove intermediate results. +make distclean +rm -rf libzipper-${VERSION}/ + +# Now we have the source package, copy then build it +tar xzvf libzipper_*.orig.tar.gz +cd libzipper-${VERSION} +tar xzvf ../libzipper_*.debian.tar.gz +dpkg-buildpackage -rfakeroot -uc -b + +echo +echo +echo "Debian source package:" +echo ${BUILD}/libzipper_*.orig.tar.gz +echo ${BUILD}/libzipper_*.debian.tar.gz +echo ${BUILD}/libzipper_*.dsc +echo "Debian binary packages:" +echo ${BUILD}/libzipper_*.deb +echo ${BUILD}/libzipper-dev_*.deb diff --git a/configure.ac b/configure.ac index e37dfa4..40d24c2 100644 --- a/configure.ac +++ b/configure.ac @@ -20,7 +20,9 @@ AC_CANONICAL_HOST AC_CANONICAL_TARGET AM_INIT_AUTOMAKE([foreign]) AC_CONFIG_HEADERS([autoconfig.h]) -AC_CONFIG_FILES([Makefile Doxyfile]) +AC_CONFIG_FILES([Makefile Doxyfile libzipper-1.0.pc]) + +AM_MAINTAINER_MODE AC_SUBST([libzipper_version], m4_esyscmd_s([cat VERSION])) diff --git a/debian/changelog b/debian/changelog index be1dcb7..39139b2 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ libzipper (1.0.0-1) unstable; urgency=low - * Initial release (Closes: #nnnn) + * Initial release - -- Michael McMaster Sat, 21 May 2011 00:01:11 +1000 + -- Michael McMaster Sat, 21 May 2011 23:23:11 +1000 diff --git a/debian/copyright b/debian/copyright index 5ae1531..e0e2d6e 100644 --- a/debian/copyright +++ b/debian/copyright @@ -1,10 +1,9 @@ Format: http://dep.debian.net/deps/dep5 Upstream-Name: libzipper -Source: +Source: Files: * -Copyright: - +Copyright: 2011 Michael McMaster License: GPL-3.0+ Files: debian/* @@ -27,14 +26,3 @@ License: GPL-3.0+ . On Debian systems, the complete text of the GNU General Public License version 3 can be found in "/usr/share/common-licenses/GPL-3". - -# Please choose a license for your packaging work. If the program you package -# uses a mainstream license, using the same license is the safest choice. -# Please avoid to pick license terms that are more restrictive than the -# packaged work, as it may make Debian's contributions unacceptable upstream. -# If you just want it to be GPL version 3, leave the following line in. - -and is licensed under the GPL version 3, see above. - -# Please also look if there are files or directories which have a -# different copyright/license attached and list them here. diff --git a/debian/libzipper-dev.install b/debian/libzipper-dev.install index 3c996c8..68a175d 100644 --- a/debian/libzipper-dev.install +++ b/debian/libzipper-dev.install @@ -3,4 +3,3 @@ usr/lib/lib*.a usr/lib/lib*.so usr/lib/pkgconfig/* usr/lib/*.la -usr/share/pkgconfig/* diff --git a/debian/libzipper1.dirs b/debian/libzipper.dirs similarity index 100% rename from debian/libzipper1.dirs rename to debian/libzipper.dirs diff --git a/debian/libzipper1.install b/debian/libzipper.install similarity index 100% rename from debian/libzipper1.install rename to debian/libzipper.install diff --git a/deflate.cc b/deflate.cc index 0819b80..3fff160 100644 --- a/deflate.cc +++ b/deflate.cc @@ -16,34 +16,18 @@ // along with libzipper. If not, see . #include "zipper.hh" -#include "zip.hh" +#include "deflate.hh" #include "util.hh" #include -#include #include #include -#include - using namespace zipper; namespace { - struct InflateDeleter - { - public: - InflateDeleter(z_stream* stream) : m_stream(stream) {} - ~InflateDeleter() - { - inflateEnd(m_stream); - - } - private: - z_stream* m_stream; - }; - struct DeflateDeleter { public: @@ -57,426 +41,40 @@ namespace z_stream* m_stream; }; - class FileEntry : public CompressedFile + struct InflateDeleter { public: - FileEntry( - const ReaderPtr& reader, - uint16_t versionNeeded, - uint16_t gpFlag, - uint16_t compressionMethod, - uint32_t crc, - zsize_t compressedSize, - zsize_t uncompressedSize, - zsize_t localHeaderOffset, - std::string fileName - ) : - m_reader(reader), - m_versionNeeded(versionNeeded), - m_gpFlag(gpFlag), - m_compressionMethod(compressionMethod), - m_crc(crc), - m_compressedSize(compressedSize), - m_uncompressedSize(uncompressedSize), - m_localHeaderOffset(localHeaderOffset), - m_fileName(fileName) - { - } - - virtual bool isDecompressSupported() const - { - return ((m_versionNeeded & 0xf) <= 20) && - ((m_gpFlag & 0x1) == 0) && // Not encrypted - ((m_compressionMethod == 0) || (m_compressionMethod == 8)); - } - - virtual const std::string& getPath() const - { - return m_fileName; - } - - virtual zsize_t getCompressedSize() const { return m_compressedSize; } - virtual zsize_t getUncompressedSize() const - { - return m_uncompressedSize; - } - - virtual void decompress(Writer& writer) + InflateDeleter(z_stream* stream) : m_stream(stream) {} + ~InflateDeleter() { - enum - { - Signature = 0x04034b50, - MinRecordBytes = 30, - ChunkSize = 64*1024, - WindowBits = 15 - }; - - std::vector localRecord(MinRecordBytes); - m_reader->readData( - m_localHeaderOffset, MinRecordBytes, &localRecord[0] - ); - if (read32_le(localRecord, 0) != Signature) - { - throw FormatException("Invalid local ZIP record"); - } - - // Don't trust the lengths for filename and extra content read from - // the central records. At least for extra, these DO differ for - // unknown reasons - zsize_t filenameLength(read16_le(localRecord, 26)); - zsize_t extraLength(read16_le(localRecord, 28)); - - zsize_t startCompressedBytes( - m_localHeaderOffset + - MinRecordBytes + - filenameLength + - extraLength - ); - - zsize_t endCompressedBytes( - startCompressedBytes + m_compressedSize - ); - - if (endCompressedBytes > m_reader->getSize()) - { - throw FormatException("Compressed file size is too long"); - } - - switch (m_compressionMethod) - { - case 0: // No compression - { - for (zsize_t pos(startCompressedBytes); - pos < endCompressedBytes; - pos += ChunkSize - ) - { - uint8_t buf[ChunkSize]; - zsize_t bytes( - std::min(zsize_t(ChunkSize), endCompressedBytes - pos) - ); - m_reader->readData(pos, bytes, &buf[0]); - writer.writeData(pos, bytes, &buf[0]); - } - }; break; - - case 8: // Deflate - { - uint8_t inChunk[ChunkSize]; - uint8_t outChunk[ChunkSize]; - - z_stream stream; - stream.zalloc = NULL; - stream.zfree = NULL; - stream.opaque = NULL; - int zlibErr(inflateInit2(&stream, -WindowBits)); - assert(zlibErr == Z_OK); - InflateDeleter deleter(&stream); - stream.next_in = NULL; - stream.avail_in = 0; - bool finished(false); - - zsize_t pos(startCompressedBytes); - zsize_t outPos(0); - uint32_t crc(crc32(0, NULL, 0)); - while (pos < endCompressedBytes) - { - if (stream.avail_in == 0) - { - stream.avail_in = - std::min( - zsize_t(ChunkSize), - endCompressedBytes - pos - ); - m_reader->readData( - pos, stream.avail_in, &inChunk[0] - ); - stream.next_in = reinterpret_cast(&inChunk); - pos += stream.avail_in; - } - - stream.next_out = reinterpret_cast(&outChunk); - stream.avail_out = sizeof(outChunk); - - zlibErr = inflate(&stream, Z_SYNC_FLUSH); - - finished = false; - if (zlibErr == Z_STREAM_END) - { - finished = true; - - } - else if (zlibErr != Z_OK) - { - throw FormatException("Corrupt Data"); - } - - zsize_t bytesToWrite(sizeof(outChunk) - stream.avail_out); - writer.writeData( - outPos, - bytesToWrite, - &outChunk[0] - ); - outPos += bytesToWrite; - crc = crc32(crc, &outChunk[0], bytesToWrite); - - if (finished) break; - } - if (!finished) - { - // Ran out of data to process - throw FormatException("Corrupt Data"); - } - - if (m_gpFlag & 0x4) // CRC is after compressed data - { - uint8_t dataDescriptor[12]; - ::memcpy( - dataDescriptor, - stream.next_in, - std::min(12u, stream.avail_in)); - - if (stream.avail_in < 12) - { - m_reader->readData( - pos, - 12 - stream.avail_in, - &dataDescriptor[stream.avail_in]); - } - m_crc = read32_le(dataDescriptor, 0); - m_compressedSize = read32_le(dataDescriptor, 4); - m_uncompressedSize = read32_le(dataDescriptor, 8); - } - - if (crc != m_crc) - { - throw FormatException("Corrupt Data (CRC failure)"); - } + inflateEnd(m_stream); - }; break; - default: - throw UnsupportedException("Unsupported compression scheme"); - }; } - private: - ReaderPtr m_reader; - uint16_t m_versionNeeded; - uint16_t m_gpFlag; - uint16_t m_compressionMethod; - uint32_t m_crc; - zsize_t m_compressedSize; - zsize_t m_uncompressedSize; - zsize_t m_localHeaderOffset; - std::string m_fileName; + z_stream* m_stream; }; - bool readEndCentralDirectory( - const ReaderPtr& reader, - zsize_t& centralDirectoryBytes, - zsize_t& centralDirectoryOffset, - zsize_t& centralDirectoryEntries - ) - { - // Read the end of central directory record. This - // record enables us to find the remainding - // records without searching for record signatures. - - // TODO does not consider the Zip64 entries. - - enum - { - MinRecordBytes = 22, // Minimum size with no comment - MaxCommentBytes = 65535, // 2 bytes to store comment length - Signature = 0x06054b50 - }; - - zsize_t providerSize(reader->getSize()); - if (providerSize < MinRecordBytes) - { - throw FormatException("Too small"); - } - - size_t bufSize( - std::min(zsize_t(MinRecordBytes + MaxCommentBytes), providerSize) - ); - std::vector buffer(bufSize); - reader->readData(providerSize - bufSize, bufSize, &buffer[0]); - - // Need to search for this record, as it ends in a variable-length - // comment field. Search backwards, with the assumption that the - // comment doesn't exist, or is much smaller than the maximum - // length - - bool recordFound(false); - ssize_t pos(bufSize - MinRecordBytes); - for (; pos >= 0; --pos) - { - recordFound = (read32_le(buffer, pos) == Signature); - break; - } - - if (recordFound) - { - if (read16_le(buffer, pos + 4) != 0) - { - throw UnsupportedException("Spanned disks not supported"); - } - - centralDirectoryBytes = read32_le(buffer, pos + 12); - centralDirectoryOffset = read32_le(buffer, pos + 16); - centralDirectoryEntries = read16_le(buffer, pos + 10); - } - return recordFound; - } - - std::vector - readCentralDirectory(const ReaderPtr& reader) + enum Constants { - enum Constants - { - MinRecordBytes = 46, - Signature = 0x02014b50 - }; - - zsize_t centralDirectoryBytes(0); - zsize_t centralDirectoryOffset(0); - zsize_t centralDirectoryEntries(0); - bool isZip( - readEndCentralDirectory( - reader, - centralDirectoryBytes, - centralDirectoryOffset, - centralDirectoryEntries - ) - ); - assert(isZip); - - std::vector buffer(centralDirectoryBytes); - reader->readData( - centralDirectoryOffset, - centralDirectoryBytes, - &buffer[0] - ); - - zsize_t pos(0); - std::vector entries; - while ((pos + MinRecordBytes) < buffer.size()) - { - if (read32_le(buffer, pos) != Signature) - { - // Unknown record type. - pos += 1; - continue; - } - - uint16_t versionNeeded(read16_le(buffer, pos + 6)); - uint16_t gpFlag(read16_le(buffer, pos + 8)); - uint16_t compressionMethod(read16_le(buffer, pos + 10)); - uint32_t crc(read32_le(buffer, pos + 16)); - uint32_t compressedSize(read32_le(buffer, pos + 20)); - uint32_t uncompressedSize(read32_le(buffer, pos + 24)); - size_t fileNameLen(read16_le(buffer, pos + 28)); - size_t extraLen(read16_le(buffer, pos + 30)); - size_t commentLen(read16_le(buffer, pos + 32)); - uint32_t localHeaderOffset(read32_le(buffer, pos + 42)); - - if ((fileNameLen + extraLen + commentLen + MinRecordBytes + pos) > - buffer.size() - ) - { - throw FormatException("File comments are too long"); - } - - std::string fileName( - &buffer[pos + MinRecordBytes], - &buffer[pos + MinRecordBytes + fileNameLen] - ); - - entries.push_back( - CompressedFilePtr( - new FileEntry( - reader, - versionNeeded, - gpFlag, - compressionMethod, - crc, - compressedSize, - uncompressedSize, - localHeaderOffset, - fileName - ) - ) - ); - - pos += MinRecordBytes + fileNameLen + extraLen + commentLen; - } - return entries; - } - + ChunkSize = 64*1024, + WindowBits = 15 + }; } - void -zipper::zip( - const std::string& filename, +zipper::deflate( const Reader& reader, const WriterPtr& writer, - ZipFileRecord& outRecord) + zsize_t& writeOffset, + zsize_t& uncompressedSize, + zsize_t& compressedSize, + uint32_t& crc) { - enum Constants - { - ChunkSize = 64*1024, - WindowBits = 15, - CRC32Pos = 14 - }; - - static uint8_t Header[] = - { - 0x50, 0x4b, 0x03, 0x04, // Header - 20, // Version (2.0) - 0, // File attributes - 0,0, // gp flag. - 8,0, // deflate method - 0,0, // file time - 0,0, // file date - 0,0,0,0, // CRC32 - 0,0,0,0, // Compressed size - 0,0,0,0 // Uncompressed size - }; - - zsize_t outPos(writer->getSize()); - outRecord.localHeaderOffset = outPos; - outRecord.filename = filename; - - // Write header - { - uint8_t buffer[ChunkSize]; - memcpy(buffer, Header, sizeof(Header)); - zsize_t pos(sizeof(Header)); - - std::string::size_type filenameSize(filename.size()); - if (filenameSize > (ChunkSize - pos)) - { - filenameSize = ChunkSize - pos; - } - buffer[pos++] = filenameSize & 0xff; - buffer[pos++] = (filenameSize >> 8); - buffer[pos++] = 0; // extra field len - buffer[pos++] = 0; // extra field len - memcpy(buffer + pos, filename.data(), filenameSize); - pos += filenameSize; - writer->writeData(outPos, pos, &buffer[0]); - outPos += pos; - } - - // Write compressed data - uint8_t inChunk[ChunkSize]; uint8_t outChunk[ChunkSize]; - outRecord.uncompressedSize = 0; - outRecord.compressedSize = 0; + uncompressedSize = 0; + compressedSize = 0; z_stream stream; stream.zalloc = NULL; @@ -499,7 +97,7 @@ zipper::zip( zsize_t pos(0); zsize_t end(reader.getSize()); - outRecord.crc32 = crc32(0, NULL, 0); + crc = crc32(0, NULL, 0); while (pos < end) { @@ -511,9 +109,8 @@ zipper::zip( pos, stream.avail_in, &inChunk[0]); stream.next_in = reinterpret_cast(&inChunk); pos += stream.avail_in; - outRecord.uncompressedSize += stream.avail_in; - outRecord.crc32 = - crc32(outRecord.crc32, stream.next_in, stream.avail_in); + uncompressedSize += stream.avail_in; + crc = crc32(crc, stream.next_in, stream.avail_in); } stream.next_out = reinterpret_cast(&outChunk); @@ -535,148 +132,77 @@ zipper::zip( } zsize_t bytesToWrite(sizeof(outChunk) - stream.avail_out); - writer->writeData( - outPos, - bytesToWrite, - &outChunk[0]); - outPos += bytesToWrite; - outRecord.compressedSize += bytesToWrite; + writer->writeData(writeOffset, bytesToWrite, &outChunk[0]); + writeOffset += bytesToWrite; + compressedSize += bytesToWrite; } - - // Go back and complete the header. - uint8_t trailer[12]; - write32_le(outRecord.crc32, &trailer[0]); - write32_le(outRecord.compressedSize, &trailer[4]); - write32_le(outRecord.uncompressedSize, &trailer[8]); - writer->writeData( - outRecord.localHeaderOffset + CRC32Pos, sizeof(trailer), &trailer[0]); } + void -zipper::zipFinalise( - const std::vector& records, - const WriterPtr& writer) +zipper::inflate( + const ReaderPtr& reader, + Writer& writer, + zsize_t& readOffset, + zsize_t readEnd, + zsize_t& writeOffset, + uint32_t& crc) { - enum Constants - { - ChunkSize = 64*1024 - }; - - static uint8_t FileHeader[] = - { - 0x50, 0x4b, 0x01, 0x02, // Header - 20, 0x00, // Version (2.0) - 20, 0x00, // Version Needed to extract (2.0) - 0,0, // gp flag. - 8,0, // deflate method - 0,0, // file time - 0,0 // file date - }; + uint8_t inChunk[ChunkSize]; + uint8_t outChunk[ChunkSize]; - zsize_t outPos(writer->getSize()); - uint32_t centralDirOffset(outPos); + z_stream stream; + stream.zalloc = NULL; + stream.zfree = NULL; + stream.opaque = NULL; + int zlibErr(inflateInit2(&stream, -WindowBits)); + assert(zlibErr == Z_OK); + InflateDeleter deleter(&stream); + stream.next_in = NULL; + stream.avail_in = 0; + bool finished(false); - for (size_t i = 0; i < records.size(); ++i) + zsize_t pos(readOffset); + crc = crc32(0, NULL, 0); + while (pos < readEnd) { - uint8_t buffer[ChunkSize]; - memcpy(buffer, FileHeader, sizeof(FileHeader)); - zsize_t pos(sizeof(FileHeader)); - - write32_le(records[i].crc32, &buffer[pos]); - pos += 4; - - write32_le(records[i].compressedSize, &buffer[pos]); - pos += 4; - - write32_le(records[i].uncompressedSize, &buffer[pos]); - pos += 4; - - std::string::size_type filenameSize(records[i].filename.size()); - if (filenameSize > (ChunkSize - pos)) + if (stream.avail_in == 0) { - filenameSize = ChunkSize - pos; + stream.avail_in = std::min(zsize_t(ChunkSize), readEnd - pos); + reader->readData(pos, stream.avail_in, &inChunk[0]); + stream.next_in = reinterpret_cast(&inChunk); + pos += stream.avail_in; } - buffer[pos++] = filenameSize & 0xff; - buffer[pos++] = (filenameSize >> 8); - buffer[pos++] = 0; // extra field len - buffer[pos++] = 0; // extra field len - - buffer[pos++] = 0; // file comment len - buffer[pos++] = 0; // file comment len - - buffer[pos++] = 0; // disk number - buffer[pos++] = 0; // disk number - buffer[pos++] = 0; // internal file attributes - buffer[pos++] = 0; // internal file attributes + stream.next_out = reinterpret_cast(&outChunk); + stream.avail_out = sizeof(outChunk); - buffer[pos++] = 0; // external file attributes - buffer[pos++] = 0; // external file attributes - buffer[pos++] = 0; // external file attributes - buffer[pos++] = 0; // external file attributes + zlibErr = inflate(&stream, Z_SYNC_FLUSH); - write32_le(records[i].localHeaderOffset, &buffer[pos]); - pos += 4; + finished = false; + if (zlibErr == Z_STREAM_END) + { + finished = true; + } + else if (zlibErr != Z_OK) + { + throw FormatException("Corrupt Data"); + } - memcpy(buffer + pos, records[i].filename.data(), filenameSize); - pos += filenameSize; + zsize_t bytesToWrite(sizeof(outChunk) - stream.avail_out); + writer.writeData(writeOffset, bytesToWrite, &outChunk[0]); + writeOffset += bytesToWrite; + crc = crc32(crc, &outChunk[0], bytesToWrite); - writer->writeData(outPos, pos, &buffer[0]); - outPos += pos; + if (finished) break; } - - uint32_t centralDirSize(writer->getSize() - centralDirOffset); - + if (!finished) { - // End-of-directory record. - static uint8_t EndDirectory[] = - { - 0x50, 0x4b, 0x05, 0x06, // Header - 0x00, 0x00, // Disk num - 0x00, 0x00 // Disk with central dir - }; - uint8_t buffer[ChunkSize]; - memcpy(buffer, EndDirectory, sizeof(EndDirectory)); - zsize_t pos(sizeof(EndDirectory)); - - buffer[pos++] = records.size() & 0xff; // Entries on this disk - buffer[pos++] = records.size() >> 8; - buffer[pos++] = records.size() & 0xff; // Total entries - buffer[pos++] = records.size() >> 8; - - write32_le(centralDirSize, &buffer[pos]); - pos += 4; - write32_le(centralDirOffset, &buffer[pos]); - pos += 4; - - buffer[pos++] = 0; // Zip comment length - buffer[pos++] = 0; // Zip comment length - - writer->writeData(outPos, pos, &buffer[0]); - outPos += pos; + // Ran out of data to process + throw FormatException("Corrupt Data"); } -} -std::vector -zipper::unzip(const ReaderPtr& reader) -{ - return readCentralDirectory(reader); -} - -bool -zipper::isZip(const ReaderPtr& reader) -{ - zsize_t centralDirectoryBytes(0); - zsize_t centralDirectoryOffset(0); - zsize_t centralDirectoryEntries(0); - bool result( - readEndCentralDirectory( - reader, - centralDirectoryBytes, - centralDirectoryOffset, - centralDirectoryEntries - ) - ); - return result; + // We've read data that wasn't consumed! + readOffset = pos - stream.avail_in; } diff --git a/deflate.hh b/deflate.hh index 9acf552..84e21e0 100644 --- a/deflate.hh +++ b/deflate.hh @@ -21,13 +21,20 @@ namespace zipper { - bool isGzip(const ReaderPtr& reader); - - void gzip( - const std::string& filename, + void deflate( const Reader& reader, - const WriterPtr& writer); + const WriterPtr& writer, + zsize_t& writeOffset, + zsize_t& uncompressedSize, + zsize_t& compressedSize, + uint32_t& crc); - std::vector ungzip(const ReaderPtr& reader); + void inflate( + const ReaderPtr& reader, + Writer& writer, + zsize_t& readOffset, + zsize_t readEnd, + zsize_t& writeOffset, + uint32_t& crc); } diff --git a/gzip.cc b/gzip.cc index 774d6c6..c1b9dd4 100644 --- a/gzip.cc +++ b/gzip.cc @@ -16,9 +16,9 @@ // along with libzipper. If not, see . #include "zipper.hh" -#include "Ungzip.hh" - -#include +#include "gzip.hh" +#include "util.hh" +#include "deflate.hh" #include #include @@ -31,31 +31,6 @@ using namespace zipper; namespace { - uint32_t - read32(const uint8_t* zipData) - { - // Read 4 bytes in little-endian order. - // Return results in host-endian. - return uint32_t( - zipData[0] | - (uint32_t(zipData[1]) << 8) | - (uint32_t(zipData[2]) << 16) | - (uint32_t(zipData[3]) << 24) - ); - } - - uint16_t - read16(const std::vector& zipData, size_t pos) - { - // Read 2 bytes in little-endian order. - // Return results in host-endian. - return uint16_t( - zipData[pos] | - (uint16_t(zipData[pos+1]) << 8) - ); - } - - size_t findNull(const std::vector& zipData, size_t start) { @@ -75,19 +50,6 @@ namespace return start; } - struct InflateDeleter - { - public: - InflateDeleter(z_stream* stream) : m_stream(stream) {} - ~InflateDeleter() - { - inflateEnd(m_stream); - - } - private: - z_stream* m_stream; - }; - class FileEntry : public CompressedFile { public: @@ -117,85 +79,21 @@ namespace virtual void decompress(Writer& writer) { - enum - { - ChunkSize = 64*1024, - WindowBits = 15 - }; - - uint8_t inChunk[ChunkSize]; - uint8_t outChunk[ChunkSize]; zsize_t endCompressedBytes = m_reader->getSize() - 8; // CRC+ISIZE - uint32_t crc(crc32(0, NULL, 0)); - - z_stream stream; - stream.zalloc = NULL; - stream.zfree = NULL; - stream.opaque = NULL; - int zlibErr(inflateInit2(&stream, -WindowBits)); - assert(zlibErr == Z_OK); - InflateDeleter deleter(&stream); - stream.next_in = NULL; - stream.avail_in = 0; - - bool finished(false); - zsize_t pos(m_dataOffset); + zsize_t inPos(m_dataOffset); zsize_t outPos(0); - while (pos < endCompressedBytes) - { - if (stream.avail_in == 0) - { - stream.avail_in = - std::min( - zsize_t(ChunkSize), - endCompressedBytes - pos - ); - m_reader->readData( - pos, stream.avail_in, &inChunk[0] - ); - stream.next_in = reinterpret_cast(&inChunk); - pos += stream.avail_in; - } - - stream.next_out = reinterpret_cast(&outChunk); - stream.avail_out = sizeof(outChunk); - - zlibErr = inflate(&stream, Z_SYNC_FLUSH); - - finished = false; - if (zlibErr == Z_STREAM_END) - { - finished = true; - } - else if (zlibErr != Z_OK) - { - throw FormatException("Corrupt Data"); - } - - zsize_t bytesToWrite(sizeof(outChunk) - stream.avail_out); - writer.writeData(outPos, bytesToWrite, &outChunk[0]); - outPos += bytesToWrite; - crc = crc32(crc, &outChunk[0], bytesToWrite); - - if (finished) break; - } - - if (!finished) - { - // Ran out of data to process - throw FormatException("Corrupt Data"); - } + uint32_t crc(0); + inflate( + m_reader, + writer, + inPos, + endCompressedBytes, + outPos, + crc); uint8_t crcBuffer[4]; - ::memcpy(crcBuffer, stream.next_in, std::min(4u, stream.avail_in)); - - if (stream.avail_in < 4) - { - m_reader->readData( - pos, 4 - stream.avail_in, &crcBuffer[stream.avail_in] - ); - } - uint32_t savedCRC = read32(&crcBuffer[0]); + m_reader->readData(inPos, sizeof(crcBuffer), &crcBuffer[0]); + uint32_t savedCRC = read32_le(&crcBuffer[0]); if (savedCRC != crc) { throw FormatException("Corrupt Data (CRC Failure)"); @@ -209,101 +107,153 @@ namespace }; } -namespace zipper +std::vector +zipper::ungzip(const ReaderPtr& reader) { - std::vector - ungzip(const ReaderPtr& reader) + enum { - enum - { - MaxHeader = 64*1024 // Artifical limit to simplify code - }; + MaxHeader = 64*1024 // Artifical limit to simplify code + }; - if (!isGzip(reader)) - { - throw FormatException("Invalid gzip file"); - } + if (!isGzip(reader)) + { + throw FormatException("Invalid gzip file"); + } - std::vector header( - std::min(reader->getSize(), zsize_t(MaxHeader))); - reader->readData(0, header.size(), &header[0]); + std::vector header( + std::min(reader->getSize(), zsize_t(MaxHeader))); + reader->readData(0, header.size(), &header[0]); - if (header[2] != 8) // "deflate" method - { - throw UnsupportedException("Unknown gzip compression method"); - } + if (header[2] != 8) // "deflate" method + { + throw UnsupportedException("Unknown gzip compression method"); + } - bool fextra = (header[3] & 4) != 0; - bool fname = (header[3] & 8) != 0; - bool fcomment = (header[3] & 0x10) != 0; - bool fhcrc = (header[3] & 2) != 0; + bool fextra = (header[3] & 4) != 0; + bool fname = (header[3] & 8) != 0; + bool fcomment = (header[3] & 0x10) != 0; + bool fhcrc = (header[3] & 2) != 0; - size_t offset(10); + size_t offset(10); - if (fextra) + if (fextra) + { + if (offset + 2 > header.size()) { - if (offset + 2 > header.size()) - { - throw FormatException("Unexpected end-of-file"); - } - uint16_t fextraBytes(read16(header, offset)); - offset += 2; - - offset += fextraBytes; + throw FormatException("Unexpected end-of-file"); } + uint16_t fextraBytes(read16_le(header, offset)); + offset += 2; - std::string embeddedName(reader->getSourceName()); - if (fname) - { - size_t nullOffset(findNull(header, offset)); - embeddedName = - std::string( - reinterpret_cast(&header[offset]), - nullOffset - offset); - offset = nullOffset + 1; - } + offset += fextraBytes; + } - if (fcomment) - { - size_t nullOffset(findNull(header, offset)); - offset = nullOffset + 1; - } + std::string embeddedName(reader->getSourceName()); + if (fname) + { + size_t nullOffset(findNull(header, offset)); + embeddedName = + std::string( + reinterpret_cast(&header[offset]), nullOffset - offset); + offset = nullOffset + 1; + } - if (fhcrc) - { - offset += 2; - } + if (fcomment) + { + size_t nullOffset(findNull(header, offset)); + offset = nullOffset + 1; + } - if (offset >= header.size()) - { - throw FormatException("Unexpected end-of-file"); - } + if (fhcrc) + { + offset += 2; + } + + if (offset >= header.size()) + { + throw FormatException("Unexpected end-of-file"); + } + + std::vector result; + result.push_back( + CompressedFilePtr(new FileEntry(reader, offset, embeddedName))); - std::vector result; - result.push_back( - CompressedFilePtr(new FileEntry(reader, offset, embeddedName))); + return result; +} + +bool +zipper::isGzip(const ReaderPtr& reader) +{ + enum Constants + { + MinFileBytes = 18, // Header + CRC + size + ID1 = 0x1f, + ID2 = 0x8b + }; - return result; + bool isGzip(false); + if (reader->getSize() >= MinFileBytes) + { + uint8_t magic[2]; + reader->readData(0, sizeof(magic), &magic[0]); + isGzip = (magic[0] == ID1) && (magic[1] == ID2); } + return isGzip; +} + +void +zipper::gzip( + const std::string& filename, + const Reader& reader, + const WriterPtr& writer) +{ + enum Constants + { + ChunkSize = 64*1024, + WindowBits = 15 + }; - bool - isGzip(const ReaderPtr& reader) + static uint8_t Header[] = { - enum Constants - { - MinFileBytes = 18, // Header + CRC + size - ID1 = 0x1f, - ID2 = 0x8b - }; + 0x1f, 0x8b, // ID + 0x08, // deflate + 0x8, // Flags (filename set) + 0x0, 0x0, 0x0, 0x0, // mtime + 0x0, // Extra flags + 0xff // OS + }; - bool isGzip(false); - if (reader->getSize() >= MinFileBytes) + zsize_t outPos(writer->getSize()); + + // Write header + { + uint8_t buffer[ChunkSize]; + memcpy(buffer, Header, sizeof(Header)); + zsize_t pos(sizeof(Header)); + + zsize_t filenameSize(filename.size()); + if (filenameSize > (ChunkSize - pos - 1)) { - uint8_t magic[2]; - reader->readData(0, sizeof(magic), &magic[0]); - isGzip = (magic[0] == ID1) && (magic[1] == ID2); + filenameSize = ChunkSize - pos - 1; } - return isGzip; + std::copy(&filename[0], &filename[filenameSize], &buffer[pos]); + pos += filenameSize; + buffer[pos++] = '\0'; + + writer->writeData(outPos, pos, &buffer[0]); + outPos += pos; } + + // Compress data + zsize_t uncompressedSize(0); + zsize_t compressedSize(0); + uint32_t crc(0); + deflate(reader, writer, outPos, uncompressedSize, compressedSize, crc); + + // Write trailer. + uint8_t trailer[8]; + write32_le(crc, &trailer[0]); + write32_le(reader.getSize(), &trailer[4]); + writer->writeData(outPos, sizeof(trailer), &trailer[0]); } diff --git a/gzip.hh b/gzip.hh index 95d445b..9acf552 100644 --- a/gzip.hh +++ b/gzip.hh @@ -23,6 +23,11 @@ namespace zipper { bool isGzip(const ReaderPtr& reader); + void gzip( + const std::string& filename, + const Reader& reader, + const WriterPtr& writer); + std::vector ungzip(const ReaderPtr& reader); } diff --git a/libzipper-1.0.pc.in b/libzipper-1.0.pc.in index 6e192d7..f1b40c8 100644 --- a/libzipper-1.0.pc.in +++ b/libzipper-1.0.pc.in @@ -6,6 +6,6 @@ includedir=@includedir@ Name: libzipper Description: libzipper offers a flexible C++ interface for reading compressed files in multiple formats. Requires: zlib -Version: @PACKAGE_VERSION@ -Libs: -L${libdir} -lzipper-1.0 +Version: @libzipper_version@ +Libs: -L${libdir} -lzipper Cflags: -I${includedir} diff --git a/util.hh b/util.hh index fbce16a..103b389 100644 --- a/util.hh +++ b/util.hh @@ -26,6 +26,72 @@ namespace zipper { void operator()(T*) {} }; + + template + uint32_t + read32_le(const T& inArray, size_t pos = 0) + { + // Read 4 bytes in little-endian order. + // Return results in host-endian. + return uint32_t( + inArray[pos] | + (uint32_t(inArray[pos+1]) << 8) | + (uint32_t(inArray[pos+2]) << 16) | + (uint32_t(inArray[pos+3]) << 24) + ); + } + + template + uint16_t + read16_le(const T& inArray, size_t pos = 0) + { + // Read 2 bytes in little-endian order. + // Return results in host-endian. + return uint16_t( + inArray[pos] | + (uint16_t(inArray[pos+1]) << 8) + ); + } + + template + void + write32_le(uint32_t value, T& outArray, size_t pos = 0) + { + // Write 4 bytes in little-endian order. + outArray[pos] = value & 0xff; + outArray[pos + 1] = (value >> 8) & 0xff; + outArray[pos + 2] = (value >> 16) & 0xff; + outArray[pos + 3] = (value >> 24) & 0xff; + } + + template + void + write32_le(uint32_t value, T* outArray, size_t pos = 0) + { + // Write 4 bytes in little-endian order. + outArray[pos] = value & 0xff; + outArray[pos + 1] = (value >> 8) & 0xff; + outArray[pos + 2] = (value >> 16) & 0xff; + outArray[pos + 3] = (value >> 24) & 0xff; + } + + template + void + write16_le(uint16_t value, T& outArray, size_t pos = 0) + { + // Write 4 bytes in little-endian order. + outArray[pos] = value & 0xff; + outArray[pos + 1] = (value >> 8); + } + + template + void + write16_le(uint16_t value, T* outArray, size_t pos = 0) + { + // Write 4 bytes in little-endian order. + outArray[pos] = value & 0xff; + outArray[pos + 1] = (value >> 8); + } } #endif diff --git a/zip.cc b/zip.cc index 8048ece..4171517 100644 --- a/zip.cc +++ b/zip.cc @@ -16,9 +16,10 @@ // along with libzipper. If not, see . #include "zipper.hh" -#include "Zip.hh" +#include "zip.hh" +#include "util.hh" +#include "deflate.hh" -#include #include #include @@ -30,28 +31,297 @@ using namespace zipper; namespace { - void - write32(uint32_t value, uint8_t* zipData) - { - // Write 4 bytes in little-endian order. - zipData[0] = value & 0xff; - zipData[1] = (value >> 8) & 0xff; - zipData[2] = (value >> 16) & 0xff; - zipData[3] = (value >> 24) & 0xff; - } - - struct DeflateDeleter + class FileEntry : public CompressedFile { public: - DeflateDeleter(z_stream* stream) : m_stream(stream) {} - ~DeflateDeleter() + FileEntry( + const ReaderPtr& reader, + uint16_t versionNeeded, + uint16_t gpFlag, + uint16_t compressionMethod, + uint32_t crc, + zsize_t compressedSize, + zsize_t uncompressedSize, + zsize_t localHeaderOffset, + std::string fileName + ) : + m_reader(reader), + m_versionNeeded(versionNeeded), + m_gpFlag(gpFlag), + m_compressionMethod(compressionMethod), + m_crc(crc), + m_compressedSize(compressedSize), + m_uncompressedSize(uncompressedSize), + m_localHeaderOffset(localHeaderOffset), + m_fileName(fileName) { - deflateEnd(m_stream); + } + virtual bool isDecompressSupported() const + { + return ((m_versionNeeded & 0xf) <= 20) && + ((m_gpFlag & 0x1) == 0) && // Not encrypted + ((m_compressionMethod == 0) || (m_compressionMethod == 8)); } + + virtual const std::string& getPath() const + { + return m_fileName; + } + + virtual zsize_t getCompressedSize() const { return m_compressedSize; } + virtual zsize_t getUncompressedSize() const + { + return m_uncompressedSize; + } + + virtual void decompress(Writer& writer) + { + enum + { + Signature = 0x04034b50, + MinRecordBytes = 30, + ChunkSize = 64*1024 + }; + + std::vector localRecord(MinRecordBytes); + m_reader->readData( + m_localHeaderOffset, MinRecordBytes, &localRecord[0] + ); + if (read32_le(localRecord, 0) != Signature) + { + throw FormatException("Invalid local ZIP record"); + } + + // Don't trust the lengths for filename and extra content read from + // the central records. At least for extra, these DO differ for + // unknown reasons + zsize_t filenameLength(read16_le(localRecord, 26)); + zsize_t extraLength(read16_le(localRecord, 28)); + + zsize_t startCompressedBytes( + m_localHeaderOffset + + MinRecordBytes + + filenameLength + + extraLength + ); + + zsize_t endCompressedBytes( + startCompressedBytes + m_compressedSize + ); + + if (endCompressedBytes > m_reader->getSize()) + { + throw FormatException("Compressed file size is too long"); + } + + switch (m_compressionMethod) + { + case 0: // No compression + { + for (zsize_t pos(startCompressedBytes); + pos < endCompressedBytes; + pos += ChunkSize + ) + { + uint8_t buf[ChunkSize]; + zsize_t bytes( + std::min(zsize_t(ChunkSize), endCompressedBytes - pos) + ); + m_reader->readData(pos, bytes, &buf[0]); + writer.writeData(pos, bytes, &buf[0]); + } + }; break; + + case 8: // Deflate + { + uint32_t crc(0); + zsize_t inPos(startCompressedBytes); + zsize_t outPos(0); + inflate( + m_reader, + writer, + inPos, + endCompressedBytes, + outPos, + crc); + + if (m_gpFlag & 0x4) // CRC is after compressed data + { + uint8_t dataDescriptor[12]; + m_reader->readData( + inPos, sizeof(dataDescriptor), &dataDescriptor[0]); + m_crc = read32_le(dataDescriptor, 0); + m_compressedSize = read32_le(dataDescriptor, 4); + m_uncompressedSize = read32_le(dataDescriptor, 8); + } + + if (crc != m_crc) + { + throw FormatException("Corrupt Data (CRC failure)"); + } + + }; break; + default: + throw UnsupportedException("Unsupported compression scheme"); + }; + } + private: - z_stream* m_stream; + ReaderPtr m_reader; + uint16_t m_versionNeeded; + uint16_t m_gpFlag; + uint16_t m_compressionMethod; + uint32_t m_crc; + zsize_t m_compressedSize; + zsize_t m_uncompressedSize; + zsize_t m_localHeaderOffset; + std::string m_fileName; }; + + bool readEndCentralDirectory( + const ReaderPtr& reader, + zsize_t& centralDirectoryBytes, + zsize_t& centralDirectoryOffset, + zsize_t& centralDirectoryEntries + ) + { + // Read the end of central directory record. This + // record enables us to find the remainding + // records without searching for record signatures. + + // TODO does not consider the Zip64 entries. + + enum + { + MinRecordBytes = 22, // Minimum size with no comment + MaxCommentBytes = 65535, // 2 bytes to store comment length + Signature = 0x06054b50 + }; + + zsize_t providerSize(reader->getSize()); + if (providerSize < MinRecordBytes) + { + throw FormatException("Too small"); + } + + size_t bufSize( + std::min(zsize_t(MinRecordBytes + MaxCommentBytes), providerSize) + ); + std::vector buffer(bufSize); + reader->readData(providerSize - bufSize, bufSize, &buffer[0]); + + // Need to search for this record, as it ends in a variable-length + // comment field. Search backwards, with the assumption that the + // comment doesn't exist, or is much smaller than the maximum + // length + + bool recordFound(false); + ssize_t pos(bufSize - MinRecordBytes); + for (; pos >= 0; --pos) + { + recordFound = (read32_le(buffer, pos) == Signature); + break; + } + + if (recordFound) + { + if (read16_le(buffer, pos + 4) != 0) + { + throw UnsupportedException("Spanned disks not supported"); + } + + centralDirectoryBytes = read32_le(buffer, pos + 12); + centralDirectoryOffset = read32_le(buffer, pos + 16); + centralDirectoryEntries = read16_le(buffer, pos + 10); + } + return recordFound; + } + + std::vector + readCentralDirectory(const ReaderPtr& reader) + { + enum Constants + { + MinRecordBytes = 46, + Signature = 0x02014b50 + }; + + zsize_t centralDirectoryBytes(0); + zsize_t centralDirectoryOffset(0); + zsize_t centralDirectoryEntries(0); + bool isZip( + readEndCentralDirectory( + reader, + centralDirectoryBytes, + centralDirectoryOffset, + centralDirectoryEntries + ) + ); + assert(isZip); + + std::vector buffer(centralDirectoryBytes); + reader->readData( + centralDirectoryOffset, + centralDirectoryBytes, + &buffer[0] + ); + + zsize_t pos(0); + std::vector entries; + while ((pos + MinRecordBytes) < buffer.size()) + { + if (read32_le(buffer, pos) != Signature) + { + // Unknown record type. + pos += 1; + continue; + } + + uint16_t versionNeeded(read16_le(buffer, pos + 6)); + uint16_t gpFlag(read16_le(buffer, pos + 8)); + uint16_t compressionMethod(read16_le(buffer, pos + 10)); + uint32_t crc(read32_le(buffer, pos + 16)); + uint32_t compressedSize(read32_le(buffer, pos + 20)); + uint32_t uncompressedSize(read32_le(buffer, pos + 24)); + size_t fileNameLen(read16_le(buffer, pos + 28)); + size_t extraLen(read16_le(buffer, pos + 30)); + size_t commentLen(read16_le(buffer, pos + 32)); + uint32_t localHeaderOffset(read32_le(buffer, pos + 42)); + + if ((fileNameLen + extraLen + commentLen + MinRecordBytes + pos) > + buffer.size() + ) + { + throw FormatException("File comments are too long"); + } + + std::string fileName( + &buffer[pos + MinRecordBytes], + &buffer[pos + MinRecordBytes + fileNameLen] + ); + + entries.push_back( + CompressedFilePtr( + new FileEntry( + reader, + versionNeeded, + gpFlag, + compressionMethod, + crc, + compressedSize, + uncompressedSize, + localHeaderOffset, + fileName + ) + ) + ); + + pos += MinRecordBytes + fileNameLen + extraLen + commentLen; + } + return entries; + } + } @@ -109,83 +379,19 @@ zipper::zip( } // Write compressed data - - uint8_t inChunk[ChunkSize]; - uint8_t outChunk[ChunkSize]; - - outRecord.uncompressedSize = 0; - outRecord.compressedSize = 0; - - z_stream stream; - stream.zalloc = NULL; - stream.zfree = NULL; - stream.opaque = NULL; - int zlibErr( - deflateInit2( - &stream, - Z_DEFAULT_COMPRESSION, - Z_DEFLATED, - -WindowBits, - MAX_MEM_LEVEL, - Z_DEFAULT_STRATEGY) - ); - - assert(zlibErr == Z_OK); - DeflateDeleter deleter(&stream); - stream.next_in = NULL; - stream.avail_in = 0; - - zsize_t pos(0); - zsize_t end(reader.getSize()); - outRecord.crc32 = crc32(0, NULL, 0); - - while (pos < end) - { - if (stream.avail_in == 0) - { - stream.avail_in = - std::min(zsize_t(ChunkSize), end - pos); - reader.readData( - pos, stream.avail_in, &inChunk[0]); - stream.next_in = reinterpret_cast(&inChunk); - pos += stream.avail_in; - outRecord.uncompressedSize += stream.avail_in; - outRecord.crc32 = - crc32(outRecord.crc32, stream.next_in, stream.avail_in); - } - - stream.next_out = reinterpret_cast(&outChunk); - stream.avail_out = sizeof(outChunk); - - zlibErr = deflate(&stream, (pos < end) ? Z_NO_FLUSH : Z_FINISH); - - if (zlibErr == Z_STREAM_END) - { - if (pos < end) - { - assert(!"zlib buffer unexpectedly empty"); - std::terminate(); - } - } - else if (zlibErr != Z_OK) - { - throw FormatException("Corrupt Data"); - } - - zsize_t bytesToWrite(sizeof(outChunk) - stream.avail_out); - writer->writeData( - outPos, - bytesToWrite, - &outChunk[0]); - outPos += bytesToWrite; - outRecord.compressedSize += bytesToWrite; - } + deflate( + reader, + writer, + outPos, + outRecord.uncompressedSize, + outRecord.compressedSize, + outRecord.crc32); // Go back and complete the header. uint8_t trailer[12]; - write32(outRecord.crc32, &trailer[0]); - write32(outRecord.compressedSize, &trailer[4]); - write32(outRecord.uncompressedSize, &trailer[8]); + write32_le(outRecord.crc32, &trailer[0]); + write32_le(outRecord.compressedSize, &trailer[4]); + write32_le(outRecord.uncompressedSize, &trailer[8]); writer->writeData( outRecord.localHeaderOffset + CRC32Pos, sizeof(trailer), &trailer[0]); } @@ -220,13 +426,13 @@ zipper::zipFinalise( memcpy(buffer, FileHeader, sizeof(FileHeader)); zsize_t pos(sizeof(FileHeader)); - write32(records[i].crc32, &buffer[pos]); + write32_le(records[i].crc32, &buffer[pos]); pos += 4; - write32(records[i].compressedSize, &buffer[pos]); + write32_le(records[i].compressedSize, &buffer[pos]); pos += 4; - write32(records[i].uncompressedSize, &buffer[pos]); + write32_le(records[i].uncompressedSize, &buffer[pos]); pos += 4; std::string::size_type filenameSize(records[i].filename.size()); @@ -234,26 +440,25 @@ zipper::zipFinalise( { filenameSize = ChunkSize - pos; } - buffer[pos++] = filenameSize & 0xff; - buffer[pos++] = (filenameSize >> 8); - buffer[pos++] = 0; // extra field len - buffer[pos++] = 0; // extra field len + write16_le(filenameSize, &buffer[pos]); + pos += 2; - buffer[pos++] = 0; // file comment len - buffer[pos++] = 0; // file comment len + write16_le(0, &buffer[pos]); // extra field len + pos += 2; - buffer[pos++] = 0; // disk number - buffer[pos++] = 0; // disk number + write16_le(0, &buffer[pos]); // file comment len + pos += 2; - buffer[pos++] = 0; // internal file attributes - buffer[pos++] = 0; // internal file attributes + write16_le(0, &buffer[pos]); // disk number + pos += 2; - buffer[pos++] = 0; // external file attributes - buffer[pos++] = 0; // external file attributes - buffer[pos++] = 0; // external file attributes - buffer[pos++] = 0; // external file attributes + write16_le(0, &buffer[pos]); // internal file attributes + pos += 2; - write32(records[i].localHeaderOffset, &buffer[pos]); + write32_le(0, &buffer[pos]); // external file attributes + pos += 4; + + write32_le(records[i].localHeaderOffset, &buffer[pos]); pos += 4; memcpy(buffer + pos, records[i].filename.data(), filenameSize); @@ -277,20 +482,44 @@ zipper::zipFinalise( memcpy(buffer, EndDirectory, sizeof(EndDirectory)); zsize_t pos(sizeof(EndDirectory)); - buffer[pos++] = records.size() & 0xff; // Entries on this disk - buffer[pos++] = records.size() >> 8; - buffer[pos++] = records.size() & 0xff; // Total entries - buffer[pos++] = records.size() >> 8; + write16_le(records.size(), &buffer[pos]); // Entries on this disk + pos += 2; + write16_le(records.size(), &buffer[pos]); // Total entries + pos += 2; - write32(centralDirSize, &buffer[pos]); + write32_le(centralDirSize, &buffer[pos]); pos += 4; - write32(centralDirOffset, &buffer[pos]); + write32_le(centralDirOffset, &buffer[pos]); pos += 4; - buffer[pos++] = 0; // Zip comment length - buffer[pos++] = 0; // Zip comment length + write16_le(0, &buffer[pos]); // Zip comment length + pos += 2; writer->writeData(outPos, pos, &buffer[0]); outPos += pos; } } + +std::vector +zipper::unzip(const ReaderPtr& reader) +{ + return readCentralDirectory(reader); +} + +bool +zipper::isZip(const ReaderPtr& reader) +{ + zsize_t centralDirectoryBytes(0); + zsize_t centralDirectoryOffset(0); + zsize_t centralDirectoryEntries(0); + bool result( + readEndCentralDirectory( + reader, + centralDirectoryBytes, + centralDirectoryOffset, + centralDirectoryEntries + ) + ); + return result; +} + diff --git a/zip.hh b/zip.hh index 7680a61..c6df4b0 100644 --- a/zip.hh +++ b/zip.hh @@ -26,8 +26,8 @@ namespace zipper { zsize_t localHeaderOffset; uint32_t crc32; - uint32_t compressedSize; - uint32_t uncompressedSize; + zsize_t compressedSize; + zsize_t uncompressedSize; std::string filename; }; @@ -40,5 +40,9 @@ namespace zipper void zipFinalise( const std::vector& records, const WriterPtr& writer); + + bool isZip(const ReaderPtr& reader); + + std::vector unzip(const ReaderPtr& reader); } diff --git a/zipper.cc b/zipper.cc index 97e0667..6570ac8 100644 --- a/zipper.cc +++ b/zipper.cc @@ -27,7 +27,20 @@ using namespace zipper; int main() { + /* + { + FileReader reader("test.gz"); + Decompressor decomp(reader); + std::vector entries(decomp.getEntries()); + for (size_t f = 0; f < entries.size(); ++f) + { + FileWriter writer(entries[f]->getPath(), 0660); + entries[f]->decompress(writer); + } + } + + { FileReader reader("test.zip"); Decompressor decomp(reader); std::vector entries(decomp.getEntries()); @@ -36,11 +49,26 @@ int main() FileWriter writer(entries[f]->getPath(), 0660); entries[f]->decompress(writer); } + } */ + + + { FileReader reader("test"); FileWriter writer("test.zip", 0660); { Compressor comp(Container_zip, writer); comp.addFile(reader); } + } + + { + FileReader reader("test"); + FileWriter writer("test.gz", 0660); + { + Compressor comp(Container_gzip, writer); + comp.addFile(reader); + } + } + } diff --git a/zipper.hh b/zipper.hh index 9964e95..53529ee 100644 --- a/zipper.hh +++ b/zipper.hh @@ -27,110 +27,111 @@ #include // For mode_t -/// \mainpage libzipper C++ (de)compression library -/// -/// \section intro Introduction -/// libzipper offers a flexible C++ interface for reading compressed files -/// in multiple formats. -/// -/// Homepage -/// -/// libzipper aims to provide applications a transparent method of accessing -/// compressed data. eg. libzipper is suited to reading XML config files that -/// are compressed to save space. -/// -/// libzipper is not a general-purpose archive management library, as it -/// does not provide access to the filesystem attributes of each file. -/// (ie. libzipper does not support the concepts of file owner, group, -/// permissions, or timestamps. -/// -/// \section formats Supported Formats -///
    -///
  • zip
  • -///
-/// -/// \section example_read Reading a compressed file into memory -/// -/// \code -/// #include -/// #include -/// #include -/// -/// class MemWriter : public zipper::Writer -/// { -/// public: -/// std::vector data; -/// virtual void writeData( -/// zsize_t offset, zsize_t bytes, const uint8_t* inData) -/// { -/// data.resize(std::max(offset + bytes, data.size())); -/// std::copy(inData, inData + bytes, &data[offset]); -/// } -/// virtual zsize_t getSize() const { return data.size(); } -/// }; -/// -/// std::vector readSavedGame(const std::string& filename) -/// { -/// // open the compressed input file. FileReader will throw an -/// // exception if an IO error occurs. -/// zipper::FileReader reader(filename); -/// -/// MemWriter writer; -/// -/// zipper::Decompressor decomp(reader); -/// -/// std::vector entries(decomp.getEntries()); -/// -/// if (!entries.empty()) -/// { -/// // Uncompress the first file. Will pass-though data as-is if the -/// // file is not compressed. -/// entries.front()->decompress(writer); -/// } -/// return writer.data; -/// } -/// -/// \endcode -/// -/// \section example_write Writing compressed files. -/// \code -/// #include -/// #include -/// #include -/// -/// class MemReader : public zipper::Reader -/// { -/// public: -/// MemReader(const vector& data) : m_data(data) {} -/// -/// virtual const std::string& getSourceName() const -/// { -/// static std::string Name("savedGame.dat"); -/// return Name; -/// } -/// -/// virtual zsize_t getSize() const { return m_data.size(); } -/// -/// virtual void readData( -/// zsize_t offset, zsize_t bytes, uint8_t* dest -/// ) const -/// { -/// std::copy(&m_data[offset], &m_data[offset + bytes], dest); -/// } -/// private: -/// std::vector m_data; -/// }; -/// -/// void writeSavedGame( -/// const std::string& filename, const std::vector& gameData -/// ) -/// { -/// zipper::FileWriter writer(filename); -/// zipper::Compressor comp(zipper::Container_zip, writer); -/// comp.addFile(MemReader(gameData)); -/// } -/// -/// \endcode +/** +\mainpage libzipper C++ (de)compression library + +\section intro Introduction +libzipper offers a flexible C++ interface for reading compressed files +in multiple formats. + +Homepage + +libzipper aims to provide applications a transparent method of accessing +compressed data. eg. libzipper is suited to reading XML config files that +are compressed to save space. + +libzipper is not a general-purpose archive management library, as it +does not provide access to the filesystem attributes of each file. +(ie. libzipper does not support the concepts of file owner, group, +permissions, or timestamps. + +\section formats Supported Formats +
    +
  • gzip
  • +
  • zip
  • +
+ +\section example_read Reading a compressed file into memory +\code +#include +#include +#include + +class MemWriter : public zipper::Writer +{ +public: + std::vector data; + + virtual void writeData( + zsize_t offset, zsize_t bytes, const uint8_t* inData) + { + data.resize(std::max(offset + bytes, data.size())); + std::copy(inData, inData + bytes, &data[offset]); + } + virtual zsize_t getSize() const { return data.size(); } +}; + +std::vector readSavedGame(const std::string& filename) +{ + // open the compressed input file. FileReader will throw an + // exception if an IO error occurs. + zipper::FileReader reader(filename); + + MemWriter writer; + + zipper::Decompressor decomp(reader); + + std::vector entries(decomp.getEntries()); + + if (!entries.empty()) + { + // Uncompress the first file. Will pass-though data as-is if the + // file is not compressed. + entries.front()->decompress(writer); + } + return writer.data; +} + +\endcode + +\section example_write Writing compressed files. +\code +#include +#include +#include + +class MemReader : public zipper::Reader +{ +public: + MemReader(const vector& data) : m_data(data) {} + + virtual const std::string& getSourceName() const + { + static std::string Name("savedGame.dat"); + return Name; + } + + virtual zsize_t getSize() const { return m_data.size(); } + + virtual void readData(zsize_t offset, zsize_t bytes, uint8_t* dest) const + { + std::copy(&m_data[offset], &m_data[offset + bytes], dest); + } + +private: + std::vector m_data; +}; + +void writeSavedGame( + const std::string& filename, const std::vector& gameData) +{ + zipper::FileWriter writer(filename); + zipper::Compressor comp(zipper::Container_zip, writer); + comp.addFile(MemReader(gameData)); +} + +\endcode +*/ /// \namespace zipper /// \brief The zipper namespace contains the libzipper public API. -- 2.38.5