ObjFW  OFXMLParser.m at [ca9b843d37]

File src/OFXMLParser.m artifact e395ba83dc part of check-in ca9b843d37


/*
 * Copyright (c) 2008-2021 Jonathan Schleifer <js@nil.im>
 *
 * All rights reserved.
 *
 * This file is part of ObjFW. It may be distributed under the terms of the
 * Q Public License 1.0, which can be found in the file LICENSE.QPL included in
 * the packaging of this file.
 *
 * Alternatively, it may be distributed under the terms of the GNU General
 * Public License, either version 2 or 3, which can be found in the file
 * LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this
 * file.
 */

#include "config.h"

#define OF_XML_PARSER_M

#include <string.h>

#import "OFXMLParser.h"
#import "OFString.h"
#import "OFArray.h"
#import "OFDictionary.h"
#import "OFData.h"
#import "OFXMLAttribute.h"
#import "OFStream.h"
#ifdef OF_HAVE_FILES
# import "OFFile.h"
#endif
#import "OFSystemInfo.h"

#import "OFInitializationFailedException.h"
#import "OFInvalidArgumentException.h"
#import "OFInvalidEncodingException.h"
#import "OFInvalidFormatException.h"
#import "OFMalformedXMLException.h"
#import "OFOutOfRangeException.h"
#import "OFUnboundPrefixException.h"

@interface OFXMLParser () <OFStringXMLUnescapingDelegate>
@end

static void inByteOrderMarkState(OFXMLParser *);
static void outsideTagState(OFXMLParser *);
static void tagOpenedState(OFXMLParser *);
static void inProcessingInstructionsState(OFXMLParser *);
static void inTagNameState(OFXMLParser *);
static void inCloseTagNameState(OFXMLParser *);
static void inTagState(OFXMLParser *);
static void inAttributeNameState(OFXMLParser *);
static void expectAttributeEqualSignState(OFXMLParser *);
static void expectAttributeDelimiterState(OFXMLParser *);
static void inAttributeValueState(OFXMLParser *);
static void expectTagCloseState(OFXMLParser *);
static void expectSpaceOrTagCloseState(OFXMLParser *);
static void inExclamationMarkState(OFXMLParser *);
static void inCDATAOpeningState(OFXMLParser *);
static void inCDATAState(OFXMLParser *);
static void inCommentOpeningState(OFXMLParser *);
static void inCommentState1(OFXMLParser *);
static void inCommentState2(OFXMLParser *);
static void inDOCTYPEState(OFXMLParser *);
typedef void (*state_function_t)(OFXMLParser *);
static state_function_t lookupTable[] = {
	[OF_XMLPARSER_IN_BYTE_ORDER_MARK] = inByteOrderMarkState,
	[OF_XMLPARSER_OUTSIDE_TAG] = outsideTagState,
	[OF_XMLPARSER_TAG_OPENED] = tagOpenedState,
	[OF_XMLPARSER_IN_PROCESSING_INSTRUCTIONS] =
	    inProcessingInstructionsState,
	[OF_XMLPARSER_IN_TAG_NAME] = inTagNameState,
	[OF_XMLPARSER_IN_CLOSE_TAG_NAME] = inCloseTagNameState,
	[OF_XMLPARSER_IN_TAG] = inTagState,
	[OF_XMLPARSER_IN_ATTRIBUTE_NAME] = inAttributeNameState,
	[OF_XMLPARSER_EXPECT_ATTRIBUTE_EQUAL_SIGN] =
	    expectAttributeEqualSignState,
	[OF_XMLPARSER_EXPECT_ATTRIBUTE_DELIMITER] =
	    expectAttributeDelimiterState,
	[OF_XMLPARSER_IN_ATTRIBUTE_VALUE] = inAttributeValueState,
	[OF_XMLPARSER_EXPECT_TAG_CLOSE] = expectTagCloseState,
	[OF_XMLPARSER_EXPECT_SPACE_OR_TAG_CLOSE] = expectSpaceOrTagCloseState,
	[OF_XMLPARSER_IN_EXCLAMATION_MARK] = inExclamationMarkState,
	[OF_XMLPARSER_IN_CDATA_OPENING] = inCDATAOpeningState,
	[OF_XMLPARSER_IN_CDATA] = inCDATAState,
	[OF_XMLPARSER_IN_COMMENT_OPENING] = inCommentOpeningState,
	[OF_XMLPARSER_IN_COMMENT_1] = inCommentState1,
	[OF_XMLPARSER_IN_COMMENT_2] = inCommentState2,
	[OF_XMLPARSER_IN_DOCTYPE] = inDOCTYPEState
};

static OF_INLINE void
appendToBuffer(OFMutableData *buffer, const char *string,
    of_string_encoding_t encoding, size_t length)
{
	if OF_LIKELY(encoding == OF_STRING_ENCODING_UTF_8)
		[buffer addItems: string count: length];
	else {
		void *pool = objc_autoreleasePoolPush();
		OFString *tmp = [OFString stringWithCString: string
						   encoding: encoding
						     length: length];
		[buffer addItems: tmp.UTF8String count: tmp.UTF8StringLength];
		objc_autoreleasePoolPop(pool);
	}
}

static OFString *
transformString(OFXMLParser *parser, OFMutableData *buffer, size_t cut,
    bool unescape)
{
	char *items = buffer.mutableItems;
	size_t length = buffer.count - cut;
	bool hasEntities = false;
	OFString *ret;

	for (size_t i = 0; i < length; i++) {
		if (items[i] == '\r') {
			if (i + 1 < length && items[i + 1] == '\n') {
				[buffer removeItemAtIndex: i];
				items = buffer.mutableItems;

				i--;
				length--;
			} else
				items[i] = '\n';
		} else if (items[i] == '&')
			hasEntities = true;
	}

	ret = [OFString stringWithUTF8String: items length: length];

	if (unescape && hasEntities) {
		@try {
			return [ret stringByXMLUnescapingWithDelegate: parser];
		} @catch (OFInvalidFormatException *e) {
			@throw [OFMalformedXMLException
			    exceptionWithParser: parser];
		}
	}

	return ret;
}

static OFString *
namespaceForPrefix(OFString *prefix, OFArray *namespaces)
{
	OFDictionary *const *objects = namespaces.objects;
	size_t count = namespaces.count;

	if (prefix == nil)
		prefix = @"";

	while (count > 0) {
		OFString *tmp;

		if ((tmp = [objects[--count] objectForKey: prefix]) != nil)
			return tmp;
	}

	return nil;
}

static OF_INLINE void
resolveAttributeNamespace(OFXMLAttribute *attribute, OFArray *namespaces,
    OFXMLParser *self)
{
	OFString *attributeNS;
	OFString *attributePrefix = attribute->_namespace;

	if (attributePrefix == nil)
		return;

	attributeNS = namespaceForPrefix(attributePrefix, namespaces);

	if ((attributePrefix != nil && attributeNS == nil))
		@throw [OFUnboundPrefixException
		    exceptionWithPrefix: attributePrefix
				 parser: self];

	[attribute->_namespace release];
	attribute->_namespace = [attributeNS retain];
}

@implementation OFXMLParser
@synthesize delegate = _delegate, depthLimit = _depthLimit;

+ (instancetype)parser
{
	return [[[self alloc] init] autorelease];
}

- (instancetype)init
{
	self = [super init];

	@try {
		void *pool;
		OFMutableDictionary *dict;

		_buffer = [[OFMutableData alloc] init];
		_previous = [[OFMutableArray alloc] init];
		_namespaces = [[OFMutableArray alloc] init];
		_attributes = [[OFMutableArray alloc] init];

		pool = objc_autoreleasePoolPush();
		dict = [OFMutableDictionary dictionaryWithKeysAndObjects:
		    @"xml", @"http://www.w3.org/XML/1998/namespace",
		    @"xmlns", @"http://www.w3.org/2000/xmlns/", nil];
		[_namespaces addObject: dict];

		_acceptProlog = true;
		_lineNumber = 1;
		_encoding = OF_STRING_ENCODING_UTF_8;
		_depthLimit = 32;

		objc_autoreleasePoolPop(pool);
	} @catch (id e) {
		[self release];
		@throw e;
	}

	return self;
}

- (void)dealloc
{
	[_buffer release];
	[_name release];
	[_prefix release];
	[_namespaces release];
	[_attributes release];
	[_attributeName release];
	[_attributePrefix release];
	[_previous release];

	[super dealloc];
}

- (void)parseBuffer: (const char *)buffer length: (size_t)length
{
	_data = buffer;

	for (_i = _last = 0; _i < length; _i++) {
		size_t j = _i;

		lookupTable[_state](self);

		/* Ensure we don't count this character twice */
		if (_i != j)
			continue;

		if (_data[_i] == '\r' || (_data[_i] == '\n' &&
		    !_lastCarriageReturn))
			_lineNumber++;

		_lastCarriageReturn = (_data[_i] == '\r');
	}

	/* In OF_XMLPARSER_IN_TAG, there can be only spaces */
	if (length - _last > 0 && _state != OF_XMLPARSER_IN_TAG)
		appendToBuffer(_buffer, _data + _last, _encoding,
		    length - _last);
}

- (void)parseString: (OFString *)string
{
	[self parseBuffer: string.UTF8String length: string.UTF8StringLength];
}

- (void)parseStream: (OFStream *)stream
{
	size_t pageSize = [OFSystemInfo pageSize];
	char *buffer = of_alloc(1, pageSize);

	@try {
		while (!stream.atEndOfStream) {
			size_t length = [stream readIntoBuffer: buffer
							length: pageSize];
			[self parseBuffer: buffer length: length];
		}
	} @finally {
		free(buffer);
	}
}

static void
inByteOrderMarkState(OFXMLParser *self)
{
	if (self->_data[self->_i] != "\xEF\xBB\xBF"[self->_level]) {
		if (self->_level == 0) {
			self->_state = OF_XMLPARSER_OUTSIDE_TAG;
			self->_i--;
			return;
		}

		@throw [OFMalformedXMLException exceptionWithParser: self];
	}

	if (self->_level++ == 2)
		self->_state = OF_XMLPARSER_OUTSIDE_TAG;

	self->_last = self->_i + 1;
}

/* Not in a tag */
static void
outsideTagState(OFXMLParser *self)
{
	size_t length;

	if ((self->_finishedParsing || self->_previous.count < 1) &&
	    self->_data[self->_i] != ' '  && self->_data[self->_i] != '\t' &&
	    self->_data[self->_i] != '\n' && self->_data[self->_i] != '\r' &&
	    self->_data[self->_i] != '<')
		@throw [OFMalformedXMLException exceptionWithParser: self];

	if (self->_data[self->_i] != '<')
		return;

	if ((length = self->_i - self->_last) > 0)
		appendToBuffer(self->_buffer, self->_data + self->_last,
		    self->_encoding, length);

	if (self->_buffer.count > 0) {
		void *pool = objc_autoreleasePoolPush();
		OFString *characters = transformString(self, self->_buffer, 0,
		    true);

		if ([self->_delegate respondsToSelector:
		    @selector(parser:foundCharacters:)])
			[self->_delegate parser: self
				foundCharacters: characters];

		objc_autoreleasePoolPop(pool);
	}

	[self->_buffer removeAllItems];

	self->_last = self->_i + 1;
	self->_state = OF_XMLPARSER_TAG_OPENED;
}

/* Tag was just opened */
static void
tagOpenedState(OFXMLParser *self)
{
	if (self->_finishedParsing && self->_data[self->_i] != '!' &&
	    self->_data[self->_i] != '?')
		@throw [OFMalformedXMLException exceptionWithParser: self];

	switch (self->_data[self->_i]) {
	case '?':
		self->_last = self->_i + 1;
		self->_state = OF_XMLPARSER_IN_PROCESSING_INSTRUCTIONS;
		self->_level = 0;
		break;
	case '/':
		self->_last = self->_i + 1;
		self->_state = OF_XMLPARSER_IN_CLOSE_TAG_NAME;
		self->_acceptProlog = false;
		break;
	case '!':
		self->_last = self->_i + 1;
		self->_state = OF_XMLPARSER_IN_EXCLAMATION_MARK;
		self->_acceptProlog = false;
		break;
	default:
		if (self->_depthLimit > 0 &&
		    self->_previous.count >= self->_depthLimit)
			@throw [OFOutOfRangeException exception];

		self->_state = OF_XMLPARSER_IN_TAG_NAME;
		self->_acceptProlog = false;
		self->_i--;
		break;
	}
}

/* <?xml […]?> */
static bool
parseXMLProcessingInstructions(OFXMLParser *self, OFString *pi)
{
	const char *cString;
	size_t length, last;
	int PIState = 0;
	OFString *attribute = nil;
	OFMutableString *value = nil;
	char piDelimiter = 0;
	bool hasVersion = false;

	if (!self->_acceptProlog)
		return false;

	self->_acceptProlog = false;

	pi = [pi substringFromIndex: 3];
	pi = pi.stringByDeletingEnclosingWhitespaces;

	cString = pi.UTF8String;
	length = pi.UTF8StringLength;

	last = 0;
	for (size_t i = 0; i < length; i++) {
		switch (PIState) {
		case 0:
			if (cString[i] == ' ' || cString[i] == '\t' ||
			    cString[i] == '\r' || cString[i] == '\n')
				continue;

			last = i;
			PIState = 1;
			i--;

			break;
		case 1:
			if (cString[i] != '=')
				continue;

			attribute = [OFString
			    stringWithCString: cString + last
				     encoding: self->_encoding
				       length: i - last];
			last = i + 1;
			PIState = 2;

			break;
		case 2:
			if (cString[i] != '\'' && cString[i] != '"')
				return false;

			piDelimiter = cString[i];
			last = i + 1;
			PIState = 3;

			break;
		case 3:
			if (cString[i] != piDelimiter)
				continue;

			value = [OFMutableString
			    stringWithCString: cString + last
				     encoding: self->_encoding
				       length: i - last];

			if ([attribute isEqual: @"version"]) {
				if (![value hasPrefix: @"1."])
					return false;

				hasVersion = true;
			}

			if ([attribute isEqual: @"encoding"]) {
				@try {
					self->_encoding =
					    of_string_parse_encoding(value);
				} @catch (OFInvalidArgumentException *e) {
					@throw [OFInvalidEncodingException
					    exception];
				}
			}

			last = i + 1;
			PIState = 0;

			break;
		}
	}

	if (PIState != 0 || !hasVersion)
		return false;

	return true;
}

/* Inside processing instructions */
static void
inProcessingInstructionsState(OFXMLParser *self)
{
	if (self->_data[self->_i] == '?')
		self->_level = 1;
	else if (self->_level == 1 && self->_data[self->_i] == '>') {
		void *pool = objc_autoreleasePoolPush();
		OFString *PI;

		appendToBuffer(self->_buffer, self->_data + self->_last,
		    self->_encoding, self->_i - self->_last);
		PI = transformString(self, self->_buffer, 1, false);

		if ([PI isEqual: @"xml"] || [PI hasPrefix: @"xml "] ||
		    [PI hasPrefix: @"xml\t"] || [PI hasPrefix: @"xml\r"] ||
		    [PI hasPrefix: @"xml\n"])
			if (!parseXMLProcessingInstructions(self, PI))
				@throw [OFMalformedXMLException
				    exceptionWithParser: self];

		if ([self->_delegate respondsToSelector:
		    @selector(parser:foundProcessingInstructions:)])
			[self->_delegate parser: self
			    foundProcessingInstructions: PI];

		objc_autoreleasePoolPop(pool);

		[self->_buffer removeAllItems];

		self->_last = self->_i + 1;
		self->_state = OF_XMLPARSER_OUTSIDE_TAG;
	} else
		self->_level = 0;
}

/* Inside a tag, no name yet */
static void
inTagNameState(OFXMLParser *self)
{
	void *pool;
	const char *bufferCString, *tmp;
	size_t length, bufferLength;
	OFString *bufferString;

	if (self->_data[self->_i] != ' '  && self->_data[self->_i] != '\t' &&
	    self->_data[self->_i] != '\n' && self->_data[self->_i] != '\r' &&
	    self->_data[self->_i] != '>'  && self->_data[self->_i] != '/')
		return;

	if ((length = self->_i - self->_last) > 0)
		appendToBuffer(self->_buffer, self->_data + self->_last,
		    self->_encoding, length);

	pool = objc_autoreleasePoolPush();

	bufferCString = self->_buffer.items;
	bufferLength = self->_buffer.count;
	bufferString = [OFString stringWithUTF8String: bufferCString
					       length: bufferLength];

	if ((tmp = memchr(bufferCString, ':', bufferLength)) != NULL) {
		self->_name = [[OFString alloc]
		    initWithUTF8String: tmp + 1
				length: bufferLength -
					(tmp - bufferCString) - 1];
		self->_prefix = [[OFString alloc]
		    initWithUTF8String: bufferCString
				length: tmp - bufferCString];
	} else {
		self->_name = [bufferString copy];
		self->_prefix = nil;
	}

	if (self->_data[self->_i] == '>' || self->_data[self->_i] == '/') {
		OFString *namespace;

		namespace = namespaceForPrefix(self->_prefix,
		    self->_namespaces);

		if (self->_prefix != nil && namespace == nil)
			@throw [OFUnboundPrefixException
			    exceptionWithPrefix: self->_prefix
					 parser: self];

		if ([self->_delegate respondsToSelector: @selector(parser:
		    didStartElement:prefix:namespace:attributes:)])
			[self->_delegate parser: self
				didStartElement: self->_name
					 prefix: self->_prefix
				      namespace: namespace
				     attributes: nil];

		if (self->_data[self->_i] == '/') {
			if ([self->_delegate respondsToSelector:
			    @selector(parser:didEndElement:prefix:namespace:)])
				[self->_delegate parser: self
					  didEndElement: self->_name
						 prefix: self->_prefix
					      namespace: namespace];

			if (self->_previous.count == 0)
				self->_finishedParsing = true;
		} else
			[self->_previous addObject: bufferString];

		[self->_name release];
		[self->_prefix release];
		self->_name = self->_prefix = nil;

		self->_state = (self->_data[self->_i] == '/'
		    ? OF_XMLPARSER_EXPECT_TAG_CLOSE
		    : OF_XMLPARSER_OUTSIDE_TAG);
	} else
		self->_state = OF_XMLPARSER_IN_TAG;

	if (self->_data[self->_i] != '/')
		[self->_namespaces addObject: [OFMutableDictionary dictionary]];

	objc_autoreleasePoolPop(pool);

	[self->_buffer removeAllItems];
	self->_last = self->_i + 1;
}

/* Inside a close tag, no name yet */
static void
inCloseTagNameState(OFXMLParser *self)
{
	void *pool;
	const char *bufferCString, *tmp;
	size_t length, bufferLength;
	OFString *bufferString, *namespace;

	if (self->_data[self->_i] != ' '  && self->_data[self->_i] != '\t' &&
	    self->_data[self->_i] != '\n' && self->_data[self->_i] != '\r' &&
	    self->_data[self->_i] != '>')
		return;

	if ((length = self->_i - self->_last) > 0)
		appendToBuffer(self->_buffer, self->_data + self->_last,
		    self->_encoding, length);

	pool = objc_autoreleasePoolPush();

	bufferCString = self->_buffer.items;
	bufferLength = self->_buffer.count;
	bufferString = [OFString stringWithUTF8String: bufferCString
					       length: bufferLength];

	if ((tmp = memchr(bufferCString, ':', bufferLength)) != NULL) {
		self->_name = [[OFString alloc]
		    initWithUTF8String: tmp + 1
				length: bufferLength -
					(tmp - bufferCString) - 1];
		self->_prefix = [[OFString alloc]
		    initWithUTF8String: bufferCString
				length: tmp - bufferCString];
	} else {
		self->_name = [bufferString copy];
		self->_prefix = nil;
	}

	if (![self->_previous.lastObject isEqual: bufferString])
		@throw [OFMalformedXMLException exceptionWithParser: self];

	[self->_previous removeLastObject];

	[self->_buffer removeAllItems];

	namespace = namespaceForPrefix(self->_prefix, self->_namespaces);
	if (self->_prefix != nil && namespace == nil)
		@throw [OFUnboundPrefixException
		    exceptionWithPrefix: self->_prefix
				 parser: self];

	if ([self->_delegate respondsToSelector:
	    @selector(parser:didEndElement:prefix:namespace:)])
		[self->_delegate parser: self
			  didEndElement: self->_name
				 prefix: self->_prefix
			      namespace: namespace];

	objc_autoreleasePoolPop(pool);

	[self->_namespaces removeLastObject];
	[self->_name release];
	[self->_prefix release];
	self->_name = self->_prefix = nil;

	self->_last = self->_i + 1;
	self->_state = (self->_data[self->_i] == '>'
	    ? OF_XMLPARSER_OUTSIDE_TAG
	    : OF_XMLPARSER_EXPECT_SPACE_OR_TAG_CLOSE);

	if (self->_previous.count == 0)
		self->_finishedParsing = true;
}

/* Inside a tag, name found */
static void
inTagState(OFXMLParser *self)
{
	void *pool;
	OFString *namespace;
	OFXMLAttribute *const *attributesObjects;
	size_t attributesCount;

	if (self->_data[self->_i] != '>' && self->_data[self->_i] != '/') {
		if (self->_data[self->_i] != ' ' &&
		    self->_data[self->_i] != '\t' &&
		    self->_data[self->_i] != '\n' &&
		    self->_data[self->_i] != '\r') {
			self->_last = self->_i;
			self->_state = OF_XMLPARSER_IN_ATTRIBUTE_NAME;
			self->_i--;
		}

		return;
	}

	attributesObjects = self->_attributes.objects;
	attributesCount = self->_attributes.count;

	namespace = namespaceForPrefix(self->_prefix, self->_namespaces);

	if (self->_prefix != nil && namespace == nil)
		@throw [OFUnboundPrefixException
		    exceptionWithPrefix: self->_prefix
				 parser: self];

	for (size_t j = 0; j < attributesCount; j++)
		resolveAttributeNamespace(attributesObjects[j],
		    self->_namespaces, self);

	pool = objc_autoreleasePoolPush();

	if ([self->_delegate respondsToSelector:
	    @selector(parser:didStartElement:prefix:namespace:attributes:)])
		[self->_delegate parser: self
			didStartElement: self->_name
				 prefix: self->_prefix
			      namespace: namespace
			     attributes: self->_attributes];

	if (self->_data[self->_i] == '/') {
		if ([self->_delegate respondsToSelector:
		    @selector(parser:didEndElement:prefix:namespace:)])
			[self->_delegate parser: self
				  didEndElement: self->_name
					 prefix: self->_prefix
				      namespace: namespace];

		if (self->_previous.count == 0)
			self->_finishedParsing = true;

		[self->_namespaces removeLastObject];
	} else if (self->_prefix != nil) {
		OFString *str = [OFString stringWithFormat:
		    @"%@:%@", self->_prefix, self->_name];
		[self->_previous addObject: str];
	} else
		[self->_previous addObject: self->_name];

	objc_autoreleasePoolPop(pool);

	[self->_name release];
	[self->_prefix release];
	[self->_attributes removeAllObjects];
	self->_name = self->_prefix = nil;

	self->_last = self->_i + 1;
	self->_state = (self->_data[self->_i] == '/'
	    ? OF_XMLPARSER_EXPECT_TAG_CLOSE
	    : OF_XMLPARSER_OUTSIDE_TAG);
}

/* Looking for attribute name */
static void
inAttributeNameState(OFXMLParser *self)
{
	void *pool;
	OFString *bufferString;
	const char *bufferCString, *tmp;
	size_t length, bufferLength;

	if (self->_data[self->_i] != '='  && self->_data[self->_i] != ' '  &&
	    self->_data[self->_i] != '\t' && self->_data[self->_i] != '\n' &&
	    self->_data[self->_i] != '\r')
		return;

	if ((length = self->_i - self->_last) > 0)
		appendToBuffer(self->_buffer, self->_data + self->_last,
		    self->_encoding, length);

	pool = objc_autoreleasePoolPush();

	bufferString = [OFString stringWithUTF8String: self->_buffer.items
					       length: self->_buffer.count];

	bufferCString = bufferString.UTF8String;
	bufferLength = bufferString.UTF8StringLength;

	if ((tmp = memchr(bufferCString, ':', bufferLength)) != NULL) {
		self->_attributeName = [[OFString alloc]
		    initWithUTF8String: tmp + 1
				length: bufferLength -
					(tmp - bufferCString) - 1];
		self->_attributePrefix = [[OFString alloc]
		    initWithUTF8String: bufferCString
				length: tmp - bufferCString];
	} else {
		self->_attributeName = [bufferString copy];
		self->_attributePrefix = nil;
	}

	objc_autoreleasePoolPop(pool);

	[self->_buffer removeAllItems];

	self->_last = self->_i + 1;
	self->_state = (self->_data[self->_i] == '='
	    ? OF_XMLPARSER_EXPECT_ATTRIBUTE_DELIMITER
	    : OF_XMLPARSER_EXPECT_ATTRIBUTE_EQUAL_SIGN);
}

/* Expecting equal sign of an attribute */
static void
expectAttributeEqualSignState(OFXMLParser *self)
{
	if (self->_data[self->_i] == '=') {
		self->_last = self->_i + 1;
		self->_state = OF_XMLPARSER_EXPECT_ATTRIBUTE_DELIMITER;
		return;
	}

	if (self->_data[self->_i] != ' '  && self->_data[self->_i] != '\t' &&
	    self->_data[self->_i] != '\n' && self->_data[self->_i] != '\r')
		@throw [OFMalformedXMLException exceptionWithParser: self];
}

/* Expecting name/value delimiter of an attribute */
static void
expectAttributeDelimiterState(OFXMLParser *self)
{
	self->_last = self->_i + 1;

	if (self->_data[self->_i] == ' '  || self->_data[self->_i] == '\t' ||
	    self->_data[self->_i] == '\n' || self->_data[self->_i] == '\r')
		return;

	if (self->_data[self->_i] != '\'' && self->_data[self->_i] != '"')
		@throw [OFMalformedXMLException exceptionWithParser: self];

	self->_delimiter = self->_data[self->_i];
	self->_state = OF_XMLPARSER_IN_ATTRIBUTE_VALUE;
}

/* Looking for attribute value */
static void
inAttributeValueState(OFXMLParser *self)
{
	void *pool;
	OFString *attributeValue;
	size_t length;
	OFXMLAttribute *attribute;

	if (self->_data[self->_i] != self->_delimiter)
		return;

	if ((length = self->_i - self->_last) > 0)
		appendToBuffer(self->_buffer, self->_data + self->_last,
		    self->_encoding, length);

	pool = objc_autoreleasePoolPush();
	attributeValue = transformString(self, self->_buffer, 0, true);

	if (self->_attributePrefix == nil &&
	    [self->_attributeName isEqual: @"xmlns"])
		[self->_namespaces.lastObject setObject: attributeValue
						 forKey: @""];
	if ([self->_attributePrefix isEqual: @"xmlns"])
		[self->_namespaces.lastObject setObject: attributeValue
						 forKey: self->_attributeName];

	attribute = [OFXMLAttribute attributeWithName: self->_attributeName
					    namespace: self->_attributePrefix
					  stringValue: attributeValue];
	attribute->_useDoubleQuotes = (self->_delimiter == '"');
	[self->_attributes addObject: attribute];

	objc_autoreleasePoolPop(pool);

	[self->_buffer removeAllItems];
	[self->_attributeName release];
	[self->_attributePrefix release];
	self->_attributeName = self->_attributePrefix = nil;

	self->_last = self->_i + 1;
	self->_state = OF_XMLPARSER_IN_TAG;
}

/* Expecting closing '>' */
static void
expectTagCloseState(OFXMLParser *self)
{
	if (self->_data[self->_i] == '>') {
		self->_last = self->_i + 1;
		self->_state = OF_XMLPARSER_OUTSIDE_TAG;
	} else
		@throw [OFMalformedXMLException exceptionWithParser: self];
}

/* Expecting closing '>' or space */
static void
expectSpaceOrTagCloseState(OFXMLParser *self)
{
	if (self->_data[self->_i] == '>') {
		self->_last = self->_i + 1;
		self->_state = OF_XMLPARSER_OUTSIDE_TAG;
	} else if (self->_data[self->_i] != ' ' &&
	    self->_data[self->_i] != '\t' && self->_data[self->_i] != '\n' &&
	    self->_data[self->_i] != '\r')
		@throw [OFMalformedXMLException exceptionWithParser: self];
}

/* In <! */
static void
inExclamationMarkState(OFXMLParser *self)
{
	if (self->_finishedParsing && self->_data[self->_i] != '-')
		@throw [OFMalformedXMLException exceptionWithParser: self];

	if (self->_data[self->_i] == '-')
		self->_state = OF_XMLPARSER_IN_COMMENT_OPENING;
	else if (self->_data[self->_i] == '[') {
		self->_state = OF_XMLPARSER_IN_CDATA_OPENING;
		self->_level = 0;
	} else if (self->_data[self->_i] == 'D') {
		self->_state = OF_XMLPARSER_IN_DOCTYPE;
		self->_level = 0;
	} else
		@throw [OFMalformedXMLException exceptionWithParser: self];

	self->_last = self->_i + 1;
}

/* CDATA */
static void
inCDATAOpeningState(OFXMLParser *self)
{
	if (self->_data[self->_i] != "CDATA["[self->_level])
		@throw [OFMalformedXMLException exceptionWithParser: self];

	if (++self->_level == 6) {
		self->_state = OF_XMLPARSER_IN_CDATA;
		self->_level = 0;
	}

	self->_last = self->_i + 1;
}

static void
inCDATAState(OFXMLParser *self)
{
	if (self->_data[self->_i] == ']')
		self->_level++;
	else if (self->_data[self->_i] == '>' && self->_level >= 2) {
		void *pool = objc_autoreleasePoolPush();
		OFString *CDATA;

		appendToBuffer(self->_buffer, self->_data + self->_last,
		    self->_encoding, self->_i - self->_last);
		CDATA = transformString(self, self->_buffer, 2, false);

		if ([self->_delegate respondsToSelector:
		    @selector(parser:foundCDATA:)])
			[self->_delegate parser: self foundCDATA: CDATA];

		objc_autoreleasePoolPop(pool);

		[self->_buffer removeAllItems];

		self->_last = self->_i + 1;
		self->_state = OF_XMLPARSER_OUTSIDE_TAG;
	} else
		self->_level = 0;
}

/* Comment */
static void
inCommentOpeningState(OFXMLParser *self)
{
	if (self->_data[self->_i] != '-')
		@throw [OFMalformedXMLException exceptionWithParser: self];

	self->_last = self->_i + 1;
	self->_state = OF_XMLPARSER_IN_COMMENT_1;
	self->_level = 0;
}

static void
inCommentState1(OFXMLParser *self)
{
	if (self->_data[self->_i] == '-')
		self->_level++;
	else
		self->_level = 0;

	if (self->_level == 2)
		self->_state = OF_XMLPARSER_IN_COMMENT_2;
}

static void
inCommentState2(OFXMLParser *self)
{
	void *pool;
	OFString *comment;

	if (self->_data[self->_i] != '>')
		@throw [OFMalformedXMLException exceptionWithParser: self];

	pool = objc_autoreleasePoolPush();

	appendToBuffer(self->_buffer, self->_data + self->_last,
	    self->_encoding, self->_i - self->_last);
	comment = transformString(self, self->_buffer, 2, false);

	if ([self->_delegate respondsToSelector:
	    @selector(parser:foundComment:)])
		[self->_delegate parser: self foundComment: comment];

	objc_autoreleasePoolPop(pool);

	[self->_buffer removeAllItems];

	self->_last = self->_i + 1;
	self->_state = OF_XMLPARSER_OUTSIDE_TAG;
}

/* In <!DOCTYPE ...> */
static void
inDOCTYPEState(OFXMLParser *self)
{
	if ((self->_level < 6 &&
	    self->_data[self->_i] != "OCTYPE"[self->_level]) ||
	    (self->_level == 6 && self->_data[self->_i] != ' ' &&
	    self->_data[self->_i] != '\t' && self->_data[self->_i] != '\n' &&
	    self->_data[self->_i] != '\r'))
		@throw [OFMalformedXMLException exceptionWithParser: self];

	self->_level++;

	if (self->_level > 6 && self->_data[self->_i] == '>')
		self->_state = OF_XMLPARSER_OUTSIDE_TAG;

	self->_last = self->_i + 1;
}

- (size_t)lineNumber
{
	return _lineNumber;
}

- (bool)hasFinishedParsing
{
	return _finishedParsing;
}

-	  (OFString *)string: (OFString *)string
  containsUnknownEntityNamed: (OFString *)entity
{
	if ([_delegate respondsToSelector:
	    @selector(parser:foundUnknownEntityNamed:)])
		return [_delegate parser: self foundUnknownEntityNamed: entity];

	return nil;
}
@end