ObjFW  Artifact [19e9187eb0]

Artifact 19e9187eb005d44f00e3e2cf0e6a6e01d5630131ba221c64bb6130501614fbd7:

  • File src/OFLHAArchive.m — part of check-in [744956a000] at 2018-06-04 22:47:15 on branch trunk — OFLHAArchive: Prefill the sliding window with ' '

    Apparently, there are two incompatible variants of LHA: One where the
    sliding window is prefilled with zeros and one where it is prefilled
    with spaces. It seems prefilling it with spaces is the original way,
    while LHA on Amiga prefilled it with zeros.

    As it is impossible to detect which one was used, a flag will be added
    later to provide an Amiga compatibility mode. (user: js, size: 18365) [annotate] [blame] [check-ins using]


/*
 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017,
 *               2018
 *   Jonathan Schleifer <js@heap.zone>
 *
 * All rights reserved.
 *
 * This file is part of ObjFW. It may be distributed under the terms of the
 * Q Public License 1.0, which can be found in the file LICENSE.QPL included in
 * the packaging of this file.
 *
 * Alternatively, it may be distributed under the terms of the GNU General
 * Public License, either version 2 or 3, which can be found in the file
 * LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this
 * file.
 */

#include "config.h"

#include <assert.h>

#import "OFLHAArchive.h"
#import "OFLHAArchiveEntry.h"
#import "OFLHAArchiveEntry+Private.h"
#ifdef OF_HAVE_FILES
# import "OFFile.h"
#endif
#import "OFStream.h"
#import "OFSeekableStream.h"
#import "OFString.h"

#import "crc16.h"
#import "huffman_tree.h"

#import "OFChecksumFailedException.h"
#import "OFInvalidArgumentException.h"
#import "OFInvalidFormatException.h"
#import "OFNotImplementedException.h"
#import "OFNotOpenException.h"
#import "OFTruncatedDataException.h"

#define LHSTREAM_BUFFER_SIZE 4096

@interface OFLHAArchive_LHStream: OFStream
{
@public
	OFStream *_stream;
	uint8_t _distanceBits, _dictionaryBits;
	unsigned char _buffer[LHSTREAM_BUFFER_SIZE];
	uint16_t _bufferIndex, _bufferLength;
	uint8_t _byte;
	uint8_t _bitIndex, _savedBitsLength;
	uint16_t _savedBits;
	unsigned char *_slidingWindow;
	uint32_t _slidingWindowIndex, _slidingWindowMask;
	int _state;
	uint16_t _symbolsLeft;
	struct of_huffman_tree *_codeLenTree, *_litLenTree, *_distTree;
	struct of_huffman_tree *_treeIter;
	uint16_t _codesCount, _codesReceived;
	bool _currentIsExtendedLength, _skip;
	uint8_t *_codesLengths;
	uint16_t _length;
	uint32_t _distance;
}

- (instancetype)of_initWithStream: (OFStream *)stream
		     distanceBits: (uint8_t)distanceBits
		   dictionaryBits: (uint8_t)dictionaryBits;
@end

@interface OFLHAArchive_FileReadStream: OFStream <OFReadyForReadingObserving>
{
	OF_KINDOF(OFStream *) _stream;
	uint32_t _toRead;
	uint16_t _expectedCRC16, _CRC16;
	bool _atEndOfStream;
}

- (instancetype)of_initWithStream: (OFStream *)stream
			    entry: (OFLHAArchiveEntry *)entry;
- (void)of_skip;
@end

enum state {
	STATE_BLOCK_HEADER,
	STATE_CODE_LEN_CODES_COUNT,
	STATE_CODE_LEN_TREE,
	STATE_CODE_LEN_TREE_SINGLE,
	STATE_LITLEN_CODES_COUNT,
	STATE_LITLEN_TREE,
	STATE_LITLEN_TREE_SINGLE,
	STATE_DIST_CODES_COUNT,
	STATE_DIST_TREE,
	STATE_DIST_TREE_SINGLE,
	STATE_BLOCK_LITLEN,
	STATE_BLOCK_DIST_LENGTH,
	STATE_BLOCK_DIST_LENGTH_EXTRA,
	STATE_BLOCK_LEN_DIST_PAIR
};

static bool
tryReadBits(OFLHAArchive_LHStream *stream, uint16_t *bits, uint8_t count)
{
	uint16_t ret = stream->_savedBits;

	assert(stream->_savedBitsLength < count);

	for (uint_fast8_t i = stream->_savedBitsLength; i < count; i++) {
		if OF_UNLIKELY (stream->_bitIndex == 8) {
			if (stream->_bufferIndex < stream->_bufferLength)
				stream->_byte =
				    stream->_buffer[stream->_bufferIndex++];
			else {
				size_t length = [stream->_stream
				    readIntoBuffer: stream->_buffer
					    length: LHSTREAM_BUFFER_SIZE];

				if OF_UNLIKELY (length < 1) {
					stream->_savedBits = ret;
					stream->_savedBitsLength = i;
					return false;
				}

				stream->_byte = stream->_buffer[0];
				stream->_bufferIndex = 1;
				stream->_bufferLength = (uint16_t)length;
			}

			stream->_bitIndex = 0;
		}

		ret = (ret << 1) |
		    ((stream->_byte >> (7 - stream->_bitIndex++)) & 1);
	}

	stream->_savedBits = 0;
	stream->_savedBitsLength = 0;
	*bits = ret;

	return true;
}

@implementation OFLHAArchive
@synthesize encoding = _encoding;

+ (instancetype)archiveWithStream: (OF_KINDOF(OFStream *))stream
			     mode: (OFString *)mode
{
	return [[[self alloc] initWithStream: stream
					mode: mode] autorelease];
}

#ifdef OF_HAVE_FILES
+ (instancetype)archiveWithPath: (OFString *)path
			   mode: (OFString *)mode
{
	return [[[self alloc] initWithPath: path
				      mode: mode] autorelease];
}
#endif

- (instancetype)init
{
	OF_INVALID_INIT_METHOD
}

- (instancetype)initWithStream: (OF_KINDOF(OFStream *))stream
			  mode: (OFString *)mode
{
	self = [super init];

	@try {
		if (![mode isEqual: @"r"])
			@throw [OFNotImplementedException
			    exceptionWithSelector: _cmd
					   object: self];

		_stream = [stream retain];
		_encoding = OF_STRING_ENCODING_ISO_8859_1;
	} @catch (id e) {
		[self release];
		@throw e;
	}

	return self;
}

#ifdef OF_HAVE_FILES
- (instancetype)initWithPath: (OFString *)path
			mode: (OFString *)mode
{
	OFFile *file;

	if ([mode isEqual: @"a"])
		file = [[OFFile alloc] initWithPath: path
					       mode: @"r+"];
	else
		file = [[OFFile alloc] initWithPath: path
					       mode: mode];

	@try {
		self = [self initWithStream: file
				       mode: mode];
	} @finally {
		[file release];
	}

	return self;
}
#endif

- (void)dealloc
{
	[self close];

	[super dealloc];
}

- (OFLHAArchiveEntry *)nextEntry
{
	char header[21];
	size_t headerLen;

	[_lastEntry release];
	_lastEntry = nil;

	[_lastReturnedStream of_skip];
	[_lastReturnedStream close];
	[_lastReturnedStream release];
	_lastReturnedStream = nil;

	for (headerLen = 0; headerLen < 21;) {
		if ([_stream isAtEndOfStream]) {
			if (headerLen == 0)
				return nil;

			if (headerLen == 1 && header[0] == 0)
				return nil;

			@throw [OFTruncatedDataException exception];
		}

		headerLen += [_stream readIntoBuffer: header + headerLen
					      length: 21 - headerLen];
	}

	_lastEntry = [[OFLHAArchiveEntry alloc]
	    of_initWithHeader: header
		       stream: _stream
		     encoding: _encoding];

	_lastReturnedStream = [[OFLHAArchive_FileReadStream alloc]
	    of_initWithStream: _stream
			entry: _lastEntry];

	return [[_lastEntry copy] autorelease];
}

- (OFStream <OFReadyForReadingObserving> *)streamForReadingCurrentEntry
{
	if (_lastReturnedStream == nil)
		@throw [OFInvalidArgumentException exception];

	return [[_lastReturnedStream retain] autorelease];
}

- (void)close
{
	if (_stream == nil)
		return;

	[_lastEntry release];
	_lastEntry = nil;

	[_lastReturnedStream close];
	[_lastReturnedStream release];
	_lastReturnedStream = nil;

	[_stream release];
	_stream = nil;
}
@end

@implementation OFLHAArchive_LHStream
- (instancetype)of_initWithStream: (OFStream *)stream
		     distanceBits: (uint8_t)distanceBits
		   dictionaryBits: (uint8_t)dictionaryBits
{
	self = [super init];

	@try {
		_stream = [stream retain];

		/* 0-7 address the bit, 8 means fetch next byte */
		_bitIndex = 8;

		_distanceBits = distanceBits;
		_dictionaryBits = dictionaryBits;

		_slidingWindowMask = (1 << dictionaryBits) - 1;
		_slidingWindow = [self allocMemoryWithSize:
		    _slidingWindowMask + 1];
		memset(_slidingWindow, ' ', _slidingWindowMask + 1);
	} @catch (id e) {
		[self release];
		@throw e;
	}

	return self;
}

- (void)dealloc
{
	[self close];

	if (_codeLenTree != NULL)
		of_huffman_tree_release(_codeLenTree);
	if (_litLenTree != NULL)
		of_huffman_tree_release(_litLenTree);
	if (_distTree != NULL)
		of_huffman_tree_release(_distTree);

	[super dealloc];
}

- (size_t)lowlevelReadIntoBuffer: (void *)buffer_
			  length: (size_t)length
{
	unsigned char *buffer = buffer_;
	uint16_t bits, value;
	size_t bytesWritten = 0;

	if (_stream == nil)
		@throw [OFNotOpenException exceptionWithObject: self];

	if ([_stream isAtEndOfStream] && _bufferLength - _bufferIndex == 0 &&
	    _state == STATE_BLOCK_HEADER)
		return 0;

start:
	switch ((enum state)_state) {
	case STATE_BLOCK_HEADER:
		if OF_UNLIKELY (!tryReadBits(self, &bits, 16))
			return bytesWritten;

		_symbolsLeft = bits;

		_state = STATE_CODE_LEN_CODES_COUNT;
		goto start;
	case STATE_CODE_LEN_CODES_COUNT:
		if OF_UNLIKELY (!tryReadBits(self, &bits, 5))
			return bytesWritten;

		if OF_UNLIKELY (bits > 20)
			@throw [OFInvalidFormatException exception];

		if OF_UNLIKELY (bits == 0) {
			_state = STATE_CODE_LEN_TREE_SINGLE;
			goto start;
		}

		_codesCount = bits;
		_codesReceived = 0;
		_codesLengths = [self allocZeroedMemoryWithSize: bits];
		_skip = true;

		_state = STATE_CODE_LEN_TREE;
		goto start;
	case STATE_CODE_LEN_TREE:
		while (_codesReceived < _codesCount) {
			if OF_UNLIKELY (_currentIsExtendedLength) {
				if OF_UNLIKELY (!tryReadBits(self, &bits, 1))
					return bytesWritten;

				if OF_UNLIKELY (bits == 0) {
					_codesReceived++;
					_currentIsExtendedLength = false;
					continue;
				}

				_codesLengths[_codesReceived]++;
				continue;
			}

			if OF_UNLIKELY (_codesReceived == 3 && _skip) {
				if OF_UNLIKELY (!tryReadBits(self, &bits, 2))
					return bytesWritten;

				if OF_UNLIKELY (_codesReceived + bits >
				    _codesCount)
					@throw [OFInvalidFormatException
					    exception];

				for (uint_fast8_t j = 0; j < bits; j++)
					_codesLengths[_codesReceived++] = 0;

				_skip = false;
				continue;
			}

			if OF_UNLIKELY (!tryReadBits(self, &bits, 3))
				return bytesWritten;

			_codesLengths[_codesReceived] = bits;

			if OF_UNLIKELY (bits == 7) {
				_currentIsExtendedLength = true;
				continue;
			} else
				_codesReceived++;
		}

		_codeLenTree = of_huffman_tree_construct(_codesLengths,
		    _codesCount);
		[self freeMemory: _codesLengths];

		_state = STATE_LITLEN_CODES_COUNT;
		goto start;
	case STATE_CODE_LEN_TREE_SINGLE:
		if OF_UNLIKELY (!tryReadBits(self, &bits, 5))
			return bytesWritten;

		_codeLenTree = of_huffman_tree_construct_single(bits);

		_state = STATE_LITLEN_CODES_COUNT;
		goto start;
	case STATE_LITLEN_CODES_COUNT:
		if OF_UNLIKELY (!tryReadBits(self, &bits, 9))
			return bytesWritten;

		if OF_UNLIKELY (bits > 510)
			@throw [OFInvalidFormatException exception];

		if OF_UNLIKELY (bits == 0) {
			of_huffman_tree_release(_codeLenTree);
			_codeLenTree = NULL;

			_state = STATE_LITLEN_TREE_SINGLE;
			goto start;
		}

		_codesCount = bits;
		_codesReceived = 0;
		_codesLengths = [self allocZeroedMemoryWithSize: bits];
		_skip = false;

		_treeIter = _codeLenTree;
		_state = STATE_LITLEN_TREE;
		goto start;
	case STATE_LITLEN_TREE:
		while (_codesReceived < _codesCount) {
			if OF_UNLIKELY (_skip) {
				uint16_t skipCount;

				switch (_codesLengths[_codesReceived]) {
				case 0:
					skipCount = 1;
					break;
				case 1:
					if OF_UNLIKELY (!tryReadBits(self,
					    &bits, 4))
						return bytesWritten;

					skipCount = bits + 3;
					break;
				case 2:
					if OF_UNLIKELY (!tryReadBits(self,
					    &bits, 9))
						return bytesWritten;

					skipCount = bits + 20;
					break;
				default:
					assert(0);
				}

				if OF_UNLIKELY (_codesReceived + skipCount >
				    _codesCount)
					@throw [OFInvalidFormatException
					    exception];

				for (uint_fast16_t j = 0; j < skipCount; j++)
					_codesLengths[_codesReceived++] = 0;

				_skip = false;
				continue;
			}

			if (!of_huffman_tree_walk(self, tryReadBits,
			    &_treeIter, &value))
				return bytesWritten;

			_treeIter = _codeLenTree;

			if (value < 3) {
				_codesLengths[_codesReceived] = value;
				_skip = true;
			} else
				_codesLengths[_codesReceived++] = value - 2;
		}

		_litLenTree = of_huffman_tree_construct(_codesLengths,
		    _codesCount);
		[self freeMemory: _codesLengths];

		of_huffman_tree_release(_codeLenTree);
		_codeLenTree = NULL;

		_state = STATE_DIST_CODES_COUNT;
		goto start;
	case STATE_LITLEN_TREE_SINGLE:
		if OF_UNLIKELY (!tryReadBits(self, &bits, 9))
			return bytesWritten;

		_litLenTree = of_huffman_tree_construct_single(bits);

		_state = STATE_DIST_CODES_COUNT;
		goto start;
	case STATE_DIST_CODES_COUNT:
		if OF_UNLIKELY (!tryReadBits(self, &bits, _distanceBits))
			return bytesWritten;

		if OF_UNLIKELY (bits > _dictionaryBits)
			@throw [OFInvalidFormatException exception];

		if OF_UNLIKELY (bits == 0) {
			_state = STATE_DIST_TREE_SINGLE;
			goto start;
		}

		_codesCount = bits;
		_codesReceived = 0;
		_codesLengths = [self allocZeroedMemoryWithSize: bits];

		_treeIter = _codeLenTree;
		_state = STATE_DIST_TREE;
		goto start;
	case STATE_DIST_TREE:
		while (_codesReceived < _codesCount) {
			if OF_UNLIKELY (_currentIsExtendedLength) {
				if OF_UNLIKELY (!tryReadBits(self, &bits, 1))
					return bytesWritten;

				if OF_UNLIKELY (bits == 0) {
					_codesReceived++;
					_currentIsExtendedLength = false;
					continue;
				}

				_codesLengths[_codesReceived]++;
				continue;
			}

			if OF_UNLIKELY (!tryReadBits(self, &bits, 3))
				return bytesWritten;

			_codesLengths[_codesReceived] = bits;

			if OF_UNLIKELY (bits == 7) {
				_currentIsExtendedLength = true;
				continue;
			} else
				_codesReceived++;
		}

		_distTree = of_huffman_tree_construct(_codesLengths,
		    _codesCount);
		[self freeMemory: _codesLengths];

		_treeIter = _litLenTree;
		_state = STATE_BLOCK_LITLEN;
		goto start;
	case STATE_DIST_TREE_SINGLE:
		if OF_UNLIKELY (!tryReadBits(self, &bits, _distanceBits))
			return bytesWritten;

		_distTree = of_huffman_tree_construct_single(bits);

		_treeIter = _litLenTree;
		_state = STATE_BLOCK_LITLEN;
		goto start;
	case STATE_BLOCK_LITLEN:
		if OF_UNLIKELY (_symbolsLeft == 0) {
			of_huffman_tree_release(_litLenTree);
			of_huffman_tree_release(_distTree);
			_litLenTree = _distTree = NULL;

			_state = STATE_BLOCK_HEADER;

			/*
			 * We must return here, as there is no indication
			 * whether this was the last block. Whoever called this
			 * method needs to check if everything has been read
			 * already and only call read again if that is not the
			 * case.
			 *
			 * We must also unread the buffer, in case this was the
			 * last block and something else follows, e.g. another
			 * LHA header.
			 */
			[_stream unreadFromBuffer: _buffer + _bufferIndex
					   length: _bufferLength -
						   _bufferIndex];
			_bufferIndex = _bufferLength = 0;

			return bytesWritten;
		}

		if OF_UNLIKELY (length == 0)
			return bytesWritten;

		if OF_UNLIKELY (!of_huffman_tree_walk(self, tryReadBits,
		    &_treeIter, &value))
			return bytesWritten;

		if OF_LIKELY (value < 256) {
			buffer[bytesWritten++] = value;
			length--;

			_slidingWindow[_slidingWindowIndex] = value;
			_slidingWindowIndex = (_slidingWindowIndex + 1) &
			    _slidingWindowMask;

			_symbolsLeft--;
			_treeIter = _litLenTree;
		} else {
			_length = value - 253;
			_treeIter = _distTree;
			_state = STATE_BLOCK_DIST_LENGTH;
		}

		goto start;
	case STATE_BLOCK_DIST_LENGTH:
		if OF_UNLIKELY (!of_huffman_tree_walk(self, tryReadBits,
		    &_treeIter, &value))
			return bytesWritten;

		_distance = value;

		_state = (value < 2
		    ? STATE_BLOCK_LEN_DIST_PAIR
		    : STATE_BLOCK_DIST_LENGTH_EXTRA);
		goto start;
	case STATE_BLOCK_DIST_LENGTH_EXTRA:
		if OF_UNLIKELY (!tryReadBits(self, &bits, _distance - 1))
			return bytesWritten;

		_distance = bits + (1 << (_distance - 1));

		_state = STATE_BLOCK_LEN_DIST_PAIR;
		goto start;
	case STATE_BLOCK_LEN_DIST_PAIR:
		for (uint_fast16_t i = 0; i < _length; i++) {
			uint32_t idx;

			if OF_UNLIKELY (length == 0) {
				_length -= i;
				return bytesWritten;
			}

			idx = (_slidingWindowIndex - _distance - 1) &
			    _slidingWindowMask;
			value = _slidingWindow[idx];

			buffer[bytesWritten++] = value;
			length--;

			_slidingWindow[_slidingWindowIndex] = value;
			_slidingWindowIndex = (_slidingWindowIndex + 1) &
			    _slidingWindowMask;
		}

		_symbolsLeft--;

		_treeIter = _litLenTree;
		_state = STATE_BLOCK_LITLEN;
		goto start;
	}

	OF_UNREACHABLE
}

- (bool)lowlevelIsAtEndOfStream
{
	if (_stream == nil)
		@throw [OFNotOpenException exceptionWithObject: self];

	return ([_stream isAtEndOfStream] &&
	    _bufferLength - _bufferIndex == 0 && _state == STATE_BLOCK_HEADER);
}

- (void)close
{
	[_stream release];
	_stream = nil;

	[super close];
}
@end

@implementation OFLHAArchive_FileReadStream
- (instancetype)of_initWithStream: (OFStream *)stream
			    entry: (OFLHAArchiveEntry *)entry
{
	self = [super init];

	@try {
		OFString *method = [entry method];

		if ([method isEqual: @"-lh4-"] || [method isEqual: @"-lh5-"])
			_stream = [[OFLHAArchive_LHStream alloc]
			    of_initWithStream: stream
				 distanceBits: 4
			       dictionaryBits: 14];
		else if ([method isEqual: @"-lh6-"])
			_stream = [[OFLHAArchive_LHStream alloc]
			    of_initWithStream: stream
				 distanceBits: 5
			       dictionaryBits: 16];
		else if ([method isEqual: @"-lh7-"])
			_stream = [[OFLHAArchive_LHStream alloc]
			    of_initWithStream: stream
				 distanceBits: 5
			       dictionaryBits: 17];
		else
			_stream = [stream retain];

		if ([method isEqual: @"-lh0-"] || [method isEqual: @"-lhd-"] ||
		    [method isEqual: @"-lz4-"] || [method isEqual: @"-lh5-"] ||
		    [method isEqual: @"-lh6-"] || [method isEqual: @"-lh6-"])
			_toRead = [entry uncompressedSize];
		else
			_toRead = [entry compressedSize];

		_expectedCRC16 = [entry CRC16];
	} @catch (id e) {
		[self release];
		@throw e;
	}

	return self;
}

- (void)dealloc
{
	[self close];

	[super dealloc];
}

- (size_t)lowlevelReadIntoBuffer: (void *)buffer
			  length: (size_t)length
{
	size_t ret;

	if (_stream == nil)
		@throw [OFNotOpenException exceptionWithObject: self];

	if (_atEndOfStream)
		return 0;

	if (length > _toRead)
		length = _toRead;

	ret = [_stream readIntoBuffer: buffer
			       length: length];

	_toRead -= ret;
	_CRC16 = of_crc16(_CRC16, buffer, ret);

	if (_toRead == 0) {
		_atEndOfStream = true;

		if (_CRC16 != _expectedCRC16)
			@throw [OFChecksumFailedException exception];
	}

	return ret;
}

- (bool)lowlevelIsAtEndOfStream
{
	if (_stream == nil)
		@throw [OFNotOpenException exceptionWithObject: self];

	return _atEndOfStream;
}

- (bool)hasDataInReadBuffer
{
	return ([super hasDataInReadBuffer] || [_stream hasDataInReadBuffer]);
}

- (int)fileDescriptorForReading
{
	return [_stream fileDescriptorForReading];
}

- (void)close
{
	[self of_skip];

	[_stream release];
	_stream = nil;

	[super close];
}

- (void)of_skip
{
	if (_stream == nil || _toRead == 0)
		return;

	if ([_stream isKindOfClass: [OFSeekableStream class]] &&
	    (sizeof(of_offset_t) > 4 || _toRead < INT32_MAX)) {
		[_stream seekToOffset: (of_offset_t)_toRead
			       whence: SEEK_CUR];

		_toRead = 0;
	} else {
		while (_toRead > 0) {
			char buffer[512];
			size_t min = _toRead;

			if (min > 512)
				min = 512;

			_toRead -= [_stream readIntoBuffer: buffer
						    length: min];
		}
	}
}
@end