@@ -14,10 +14,12 @@ * LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this * file. */ #include "config.h" + +#include #import "OFLHAArchive.h" #import "OFLHAArchiveEntry.h" #import "OFLHAArchiveEntry+Private.h" #ifdef OF_HAVE_FILES @@ -24,15 +26,48 @@ # import "OFFile.h" #endif #import "OFStream.h" #import "OFSeekableStream.h" #import "OFString.h" + +#import "huffman_tree.h" #import "OFInvalidArgumentException.h" +#import "OFInvalidFormatException.h" #import "OFNotImplementedException.h" #import "OFNotOpenException.h" #import "OFTruncatedDataException.h" + +#define LHSTREAM_BUFFER_SIZE 4096 + +@interface OFLHAArchive_LHStream: OFStream +{ +@public + OFStream *_stream; + uint8_t _distanceBits, _dictionaryBits; + unsigned char _buffer[LHSTREAM_BUFFER_SIZE]; + uint16_t _bufferIndex, _bufferLength; + uint8_t _byte; + uint8_t _bitIndex, _savedBitsLength; + uint16_t _savedBits; + unsigned char *_slidingWindow; + uint32_t _slidingWindowIndex, _slidingWindowMask; + int _state; + uint16_t _symbolsLeft; + struct of_huffman_tree *_codeLenTree, *_litLenTree, *_distTree; + struct of_huffman_tree *_treeIter; + uint16_t _codesCount, _codesReceived; + bool _currentIsExtendedLength, _skip; + uint8_t *_codesLengths; + uint16_t _length; + uint32_t _distance; +} + +- (instancetype)of_initWithStream: (OFStream *)stream + distanceBits: (uint8_t)distanceBits + dictionaryBits: (uint8_t)dictionaryBits; +@end @interface OFLHAArchive_FileReadStream: OFStream { OF_KINDOF(OFStream *) _stream; uint32_t _toRead; @@ -41,10 +76,69 @@ - (instancetype)of_initWithStream: (OFStream *)stream entry: (OFLHAArchiveEntry *)entry; - (void)of_skip; @end + +enum state { + STATE_BLOCK_HEADER, + STATE_CODE_LEN_CODES_COUNT, + STATE_CODE_LEN_TREE, + STATE_CODE_LEN_TREE_SINGLE, + STATE_LITLEN_CODES_COUNT, + STATE_LITLEN_TREE, + STATE_LITLEN_TREE_SINGLE, + STATE_DIST_CODES_COUNT, + STATE_DIST_TREE, + STATE_DIST_TREE_SINGLE, + STATE_BLOCK_LITLEN, + STATE_BLOCK_DIST_LENGTH, + STATE_BLOCK_DIST_LENGTH_EXTRA, + STATE_BLOCK_LEN_DIST_PAIR +}; + +static bool +tryReadBits(OFLHAArchive_LHStream *stream, uint16_t *bits, uint8_t count) +{ + uint16_t ret = stream->_savedBits; + + assert(stream->_savedBitsLength < count); + + for (uint_fast8_t i = stream->_savedBitsLength; i < count; i++) { + if OF_UNLIKELY (stream->_bitIndex == 8) { + if (stream->_bufferIndex < stream->_bufferLength) + stream->_byte = + stream->_buffer[stream->_bufferIndex++]; + else { + size_t length = [stream->_stream + readIntoBuffer: stream->_buffer + length: LHSTREAM_BUFFER_SIZE]; + + if OF_UNLIKELY (length < 1) { + stream->_savedBits = ret; + stream->_savedBitsLength = i; + return false; + } + + stream->_byte = stream->_buffer[0]; + stream->_bufferIndex = 1; + stream->_bufferLength = (uint16_t)length; + } + + stream->_bitIndex = 0; + } + + ret = (ret << 1) | + ((stream->_byte >> (7 - stream->_bitIndex++)) & 1); + } + + stream->_savedBits = 0; + stream->_savedBitsLength = 0; + *bits = ret; + + return true; +} @implementation OFLHAArchive @synthesize encoding = _encoding; + (instancetype)archiveWithStream: (OF_KINDOF(OFStream *))stream @@ -167,16 +261,10 @@ if (_lastReturnedStream == nil) @throw [OFInvalidArgumentException exception]; method = [_lastEntry method]; - if (![method isEqual: @"-lh0-"] && ![method isEqual: @"-lhd-"] && - ![method isEqual: @"-lz4-"]) - @throw [OFNotImplementedException - exceptionWithSelector: _cmd - object: self]; - return [[_lastReturnedStream retain] autorelease]; } - (void)close { @@ -192,25 +280,463 @@ [_stream release]; _stream = nil; } @end + +@implementation OFLHAArchive_LHStream +- (instancetype)of_initWithStream: (OFStream *)stream + distanceBits: (uint8_t)distanceBits + dictionaryBits: (uint8_t)dictionaryBits +{ + self = [super init]; + + @try { + _stream = [stream retain]; + + /* 0-7 address the bit, 8 means fetch next byte */ + _bitIndex = 8; + + _distanceBits = distanceBits; + _dictionaryBits = dictionaryBits; + + _slidingWindowMask = (1 << dictionaryBits) - 1; + _slidingWindow = [self allocMemoryWithSize: + _slidingWindowMask + 1]; + /* Avoid leaking data */ + memset(_slidingWindow, 0, _slidingWindowMask + 1); + } @catch (id e) { + [self release]; + @throw e; + } + + return self; +} + +- (void)dealloc +{ + [self close]; + + if (_codeLenTree != NULL) + of_huffman_tree_release(_codeLenTree); + if (_litLenTree != NULL) + of_huffman_tree_release(_litLenTree); + if (_distTree != NULL) + of_huffman_tree_release(_distTree); + + [super dealloc]; +} + +- (size_t)lowlevelReadIntoBuffer: (void *)buffer_ + length: (size_t)length +{ + unsigned char *buffer = buffer_; + uint16_t bits, value; + size_t bytesWritten = 0; + + if (_stream == nil) + @throw [OFNotOpenException exceptionWithObject: self]; + + if ([_stream isAtEndOfStream] && _bufferLength - _bufferIndex == 0 && + _state == STATE_BLOCK_HEADER) + return 0; + +start: + switch ((enum state)_state) { + case STATE_BLOCK_HEADER: + if OF_UNLIKELY (!tryReadBits(self, &bits, 16)) + return bytesWritten; + + _symbolsLeft = bits; + + _state = STATE_CODE_LEN_CODES_COUNT; + goto start; + case STATE_CODE_LEN_CODES_COUNT: + if OF_UNLIKELY (!tryReadBits(self, &bits, 5)) + return bytesWritten; + + if OF_UNLIKELY (bits > 20) + @throw [OFInvalidFormatException exception]; + + if OF_UNLIKELY (bits == 0) { + _state = STATE_CODE_LEN_TREE_SINGLE; + goto start; + } + + _codesCount = bits; + _codesReceived = 0; + _codesLengths = [self allocMemoryWithSize: bits]; + memset(_codesLengths, 0, bits); + _skip = true; + + _state = STATE_CODE_LEN_TREE; + goto start; + case STATE_CODE_LEN_TREE: + while (_codesReceived < _codesCount) { + if OF_UNLIKELY (_currentIsExtendedLength) { + if OF_UNLIKELY (!tryReadBits(self, &bits, 1)) + return bytesWritten; + + if OF_UNLIKELY (bits == 0) { + _codesReceived++; + _currentIsExtendedLength = false; + continue; + } + + _codesLengths[_codesReceived]++; + continue; + } + + if OF_UNLIKELY (_codesReceived == 3 && _skip) { + if OF_UNLIKELY (!tryReadBits(self, &bits, 2)) + return bytesWritten; + + if OF_UNLIKELY (_codesReceived + bits > + _codesCount) + @throw [OFInvalidFormatException + exception]; + + for (uint_fast8_t j = 0; j < bits; j++) + _codesLengths[_codesReceived++] = 0; + + _skip = false; + continue; + } + + if OF_UNLIKELY (!tryReadBits(self, &bits, 3)) + return bytesWritten; + + _codesLengths[_codesReceived] = bits; + + if OF_UNLIKELY (bits == 7) { + _currentIsExtendedLength = true; + continue; + } else + _codesReceived++; + } + + _codeLenTree = of_huffman_tree_construct(_codesLengths, + _codesCount); + [self freeMemory: _codesLengths]; + + _state = STATE_LITLEN_CODES_COUNT; + goto start; + case STATE_CODE_LEN_TREE_SINGLE: + if OF_UNLIKELY (!tryReadBits(self, &bits, 5)) + return bytesWritten; + + _codeLenTree = of_huffman_tree_construct_single(bits); + + _state = STATE_LITLEN_CODES_COUNT; + goto start; + case STATE_LITLEN_CODES_COUNT: + if OF_UNLIKELY (!tryReadBits(self, &bits, 9)) + return bytesWritten; + + if OF_UNLIKELY (bits > 510) + @throw [OFInvalidFormatException exception]; + + if OF_UNLIKELY (bits == 0) { + of_huffman_tree_release(_codeLenTree); + _codeLenTree = NULL; + + _state = STATE_LITLEN_TREE_SINGLE; + goto start; + } + + _codesCount = bits; + _codesReceived = 0; + _codesLengths = [self allocMemoryWithSize: bits]; + memset(_codesLengths, 0, bits); + _skip = false; + + _treeIter = _codeLenTree; + _state = STATE_LITLEN_TREE; + goto start; + case STATE_LITLEN_TREE: + while (_codesReceived < _codesCount) { + if OF_UNLIKELY (_skip) { + uint_fast16_t skipCount; + + switch (_codesLengths[_codesReceived]) { + case 0: + skipCount = 1; + break; + case 1: + if OF_UNLIKELY (!tryReadBits(self, + &bits, 4)) + return bytesWritten; + + skipCount = bits + 3; + break; + case 2: + if OF_UNLIKELY (!tryReadBits(self, + &bits, 9)) + return bytesWritten; + + skipCount = bits + 20; + break; + default: + assert(0); + } + + if OF_UNLIKELY (_codesReceived + skipCount > + _codesCount) + @throw [OFInvalidFormatException + exception]; + + for (uint16_t j = 0; j < skipCount; j++) + _codesLengths[_codesReceived++] = 0; + + _skip = false; + continue; + } + + if (!of_huffman_tree_walk(self, tryReadBits, + &_treeIter, &value)) + return bytesWritten; + + _treeIter = _codeLenTree; + + if (value < 3) { + _codesLengths[_codesReceived] = value; + _skip = true; + } else + _codesLengths[_codesReceived++] = value - 2; + } + + _litLenTree = of_huffman_tree_construct(_codesLengths, + _codesCount); + [self freeMemory: _codesLengths]; + + of_huffman_tree_release(_codeLenTree); + _codeLenTree = NULL; + + _state = STATE_DIST_CODES_COUNT; + goto start; + case STATE_LITLEN_TREE_SINGLE: + if OF_UNLIKELY (!tryReadBits(self, &bits, 9)) + return bytesWritten; + + _litLenTree = of_huffman_tree_construct_single(bits); + + _state = STATE_DIST_CODES_COUNT; + goto start; + case STATE_DIST_CODES_COUNT: + if OF_UNLIKELY (!tryReadBits(self, &bits, _distanceBits)) + return bytesWritten; + + if OF_UNLIKELY (bits > _dictionaryBits) + @throw [OFInvalidFormatException exception]; + + if OF_UNLIKELY (bits == 0) { + _state = STATE_DIST_TREE_SINGLE; + goto start; + } + + _codesCount = bits; + _codesReceived = 0; + _codesLengths = [self allocMemoryWithSize: bits]; + memset(_codesLengths, 0, bits); + + _treeIter = _codeLenTree; + _state = STATE_DIST_TREE; + goto start; + case STATE_DIST_TREE: + while (_codesReceived < _codesCount) { + if OF_UNLIKELY (_currentIsExtendedLength) { + if OF_UNLIKELY (!tryReadBits(self, &bits, 1)) + return bytesWritten; + + if OF_UNLIKELY (bits == 0) { + _codesReceived++; + _currentIsExtendedLength = false; + continue; + } + + _codesLengths[_codesReceived]++; + continue; + } + + if OF_UNLIKELY (!tryReadBits(self, &bits, 3)) + return bytesWritten; + + _codesLengths[_codesReceived] = bits; + + if OF_UNLIKELY (bits == 7) { + _currentIsExtendedLength = true; + continue; + } else + _codesReceived++; + } + + _distTree = of_huffman_tree_construct(_codesLengths, + _codesCount); + [self freeMemory: _codesLengths]; + + _treeIter = _litLenTree; + _state = STATE_BLOCK_LITLEN; + goto start; + case STATE_DIST_TREE_SINGLE: + if OF_UNLIKELY (!tryReadBits(self, &bits, _distanceBits)) + return bytesWritten; + + _distTree = of_huffman_tree_construct_single(bits); + + _treeIter = _litLenTree; + _state = STATE_BLOCK_LITLEN; + goto start; + case STATE_BLOCK_LITLEN: + if OF_UNLIKELY (_symbolsLeft == 0) { + of_huffman_tree_release(_litLenTree); + of_huffman_tree_release(_distTree); + _litLenTree = _distTree = NULL; + + _state = STATE_BLOCK_HEADER; + + /* + * We must return here, as there is no indication + * whether this was the last block. Whoever called this + * method needs to check if everything has been read + * already and only call read again if that is not the + * case. + * + * We must also unread the buffer, in case this was the + * last block and something else follows, e.g. another + * LHA header. + */ + [_stream unreadFromBuffer: _buffer + _bufferIndex + length: _bufferLength - + _bufferIndex]; + _bufferIndex = _bufferLength = 0; + + return bytesWritten; + } + + if OF_UNLIKELY (length == 0) + return bytesWritten; + + if OF_UNLIKELY (!of_huffman_tree_walk(self, tryReadBits, + &_treeIter, &value)) + return bytesWritten; + + if OF_LIKELY (value < 256) { + buffer[bytesWritten++] = value; + length--; + + _slidingWindow[_slidingWindowIndex] = value; + _slidingWindowIndex = (_slidingWindowIndex + 1) & + _slidingWindowMask; + + _symbolsLeft--; + _treeIter = _litLenTree; + } else { + _length = value - 253; + _treeIter = _distTree; + _state = STATE_BLOCK_DIST_LENGTH; + } + + goto start; + case STATE_BLOCK_DIST_LENGTH: + if OF_UNLIKELY (!of_huffman_tree_walk(self, tryReadBits, + &_treeIter, &value)) + return bytesWritten; + + _distance = value; + + _state = (value < 2 + ? STATE_BLOCK_LEN_DIST_PAIR + : STATE_BLOCK_DIST_LENGTH_EXTRA); + goto start; + case STATE_BLOCK_DIST_LENGTH_EXTRA: + if OF_UNLIKELY (!tryReadBits(self, &bits, _distance - 1)) + return bytesWritten; + + _distance = bits + (1 << (_distance - 1)); + + _state = STATE_BLOCK_LEN_DIST_PAIR; + goto start; + case STATE_BLOCK_LEN_DIST_PAIR: + for (uint_fast16_t i = 0; i < _length; i++) { + uint32_t idx; + + if OF_UNLIKELY (length == 0) { + _length -= i; + return bytesWritten; + } + + idx = (_slidingWindowIndex - _distance - 1) & + _slidingWindowMask; + value = _slidingWindow[idx]; + + buffer[bytesWritten++] = value; + length--; + + _slidingWindow[_slidingWindowIndex] = value; + _slidingWindowIndex = (_slidingWindowIndex + 1) & + _slidingWindowMask; + } + + _symbolsLeft--; + + _treeIter = _litLenTree; + _state = STATE_BLOCK_LITLEN; + goto start; + } +} + +- (bool)lowlevelIsAtEndOfStream +{ + if (_stream == nil) + @throw [OFNotOpenException exceptionWithObject: self]; + + return ([_stream isAtEndOfStream] && + _bufferLength - _bufferIndex == 0 && _state == STATE_BLOCK_HEADER); +} + +- (void)close +{ + [_stream release]; + _stream = nil; + + [super close]; +} +@end @implementation OFLHAArchive_FileReadStream - (instancetype)of_initWithStream: (OFStream *)stream entry: (OFLHAArchiveEntry *)entry { self = [super init]; @try { - _stream = [stream retain]; - /* - * Use the compressed size, as that is the number of bytes we - * need to skip for the next entry and is equal for - * uncompressed files, the only thing supported so far. - */ - _toRead = [entry compressedSize]; + OFString *method = [entry method]; + + if ([method isEqual: @"-lh4-"] || [method isEqual: @"-lh5-"]) + _stream = [[OFLHAArchive_LHStream alloc] + of_initWithStream: stream + distanceBits: 4 + dictionaryBits: 14]; + else if ([method isEqual: @"-lh6-"]) + _stream = [[OFLHAArchive_LHStream alloc] + of_initWithStream: stream + distanceBits: 5 + dictionaryBits: 16]; + else if ([method isEqual: @"-lh7-"]) + _stream = [[OFLHAArchive_LHStream alloc] + of_initWithStream: stream + distanceBits: 5 + dictionaryBits: 17]; + else + _stream = [stream retain]; + + if ([method isEqual: @"-lh0-"] || [method isEqual: @"-lhd-"] || + [method isEqual: @"-lz4-"] || [method isEqual: @"-lh5-"] || + [method isEqual: @"-lh6-"] || [method isEqual: @"-lh6-"]) + _toRead = [entry uncompressedSize]; + else + _toRead = [entry compressedSize]; } @catch (id e) { [self release]; @throw e; } @@ -239,15 +765,15 @@ length = _toRead; ret = [_stream readIntoBuffer: buffer length: length]; - if (ret == 0) - _atEndOfStream = true; - _toRead -= ret; + if (_toRead == 0) + _atEndOfStream = true; + return ret; } - (bool)lowlevelIsAtEndOfStream {