/*
* Copyright (c) 2008-2021 Jonathan Schleifer <js@nil.im>
*
* All rights reserved.
*
* This file is part of ObjFW. It may be distributed under the terms of the
* Q Public License 1.0, which can be found in the file LICENSE.QPL included in
* the packaging of this file.
*
* Alternatively, it may be distributed under the terms of the GNU General
* Public License, either version 2 or 3, which can be found in the file
* LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this
* file.
*/
#include "config.h"
#define OF_XML_PARSER_M
#include <string.h>
#import "OFXMLParser.h"
#import "OFString.h"
#import "OFArray.h"
#import "OFDictionary.h"
#import "OFData.h"
#import "OFXMLAttribute.h"
#import "OFStream.h"
#ifdef OF_HAVE_FILES
# import "OFFile.h"
#endif
#import "OFSystemInfo.h"
#import "OFInitializationFailedException.h"
#import "OFInvalidArgumentException.h"
#import "OFInvalidEncodingException.h"
#import "OFInvalidFormatException.h"
#import "OFMalformedXMLException.h"
#import "OFOutOfRangeException.h"
#import "OFUnboundPrefixException.h"
@interface OFXMLParser () <OFStringXMLUnescapingDelegate>
@end
static void inByteOrderMarkState(OFXMLParser *);
static void outsideTagState(OFXMLParser *);
static void tagOpenedState(OFXMLParser *);
static void inProcessingInstructionsState(OFXMLParser *);
static void inTagNameState(OFXMLParser *);
static void inCloseTagNameState(OFXMLParser *);
static void inTagState(OFXMLParser *);
static void inAttributeNameState(OFXMLParser *);
static void expectAttributeEqualSignState(OFXMLParser *);
static void expectAttributeDelimiterState(OFXMLParser *);
static void inAttributeValueState(OFXMLParser *);
static void expectTagCloseState(OFXMLParser *);
static void expectSpaceOrTagCloseState(OFXMLParser *);
static void inExclamationMarkState(OFXMLParser *);
static void inCDATAOpeningState(OFXMLParser *);
static void inCDATAState(OFXMLParser *);
static void inCommentOpeningState(OFXMLParser *);
static void inCommentState1(OFXMLParser *);
static void inCommentState2(OFXMLParser *);
static void inDOCTYPEState(OFXMLParser *);
typedef void (*state_function_t)(OFXMLParser *);
static state_function_t lookupTable[] = {
[OF_XMLPARSER_IN_BYTE_ORDER_MARK] = inByteOrderMarkState,
[OF_XMLPARSER_OUTSIDE_TAG] = outsideTagState,
[OF_XMLPARSER_TAG_OPENED] = tagOpenedState,
[OF_XMLPARSER_IN_PROCESSING_INSTRUCTIONS] =
inProcessingInstructionsState,
[OF_XMLPARSER_IN_TAG_NAME] = inTagNameState,
[OF_XMLPARSER_IN_CLOSE_TAG_NAME] = inCloseTagNameState,
[OF_XMLPARSER_IN_TAG] = inTagState,
[OF_XMLPARSER_IN_ATTRIBUTE_NAME] = inAttributeNameState,
[OF_XMLPARSER_EXPECT_ATTRIBUTE_EQUAL_SIGN] =
expectAttributeEqualSignState,
[OF_XMLPARSER_EXPECT_ATTRIBUTE_DELIMITER] =
expectAttributeDelimiterState,
[OF_XMLPARSER_IN_ATTRIBUTE_VALUE] = inAttributeValueState,
[OF_XMLPARSER_EXPECT_TAG_CLOSE] = expectTagCloseState,
[OF_XMLPARSER_EXPECT_SPACE_OR_TAG_CLOSE] = expectSpaceOrTagCloseState,
[OF_XMLPARSER_IN_EXCLAMATION_MARK] = inExclamationMarkState,
[OF_XMLPARSER_IN_CDATA_OPENING] = inCDATAOpeningState,
[OF_XMLPARSER_IN_CDATA] = inCDATAState,
[OF_XMLPARSER_IN_COMMENT_OPENING] = inCommentOpeningState,
[OF_XMLPARSER_IN_COMMENT_1] = inCommentState1,
[OF_XMLPARSER_IN_COMMENT_2] = inCommentState2,
[OF_XMLPARSER_IN_DOCTYPE] = inDOCTYPEState
};
static OF_INLINE void
appendToBuffer(OFMutableData *buffer, const char *string,
of_string_encoding_t encoding, size_t length)
{
if OF_LIKELY(encoding == OF_STRING_ENCODING_UTF_8)
[buffer addItems: string count: length];
else {
void *pool = objc_autoreleasePoolPush();
OFString *tmp = [OFString stringWithCString: string
encoding: encoding
length: length];
[buffer addItems: tmp.UTF8String count: tmp.UTF8StringLength];
objc_autoreleasePoolPop(pool);
}
}
static OFString *
transformString(OFXMLParser *parser, OFMutableData *buffer, size_t cut,
bool unescape)
{
char *items = buffer.mutableItems;
size_t length = buffer.count - cut;
bool hasEntities = false;
OFString *ret;
for (size_t i = 0; i < length; i++) {
if (items[i] == '\r') {
if (i + 1 < length && items[i + 1] == '\n') {
[buffer removeItemAtIndex: i];
items = buffer.mutableItems;
i--;
length--;
} else
items[i] = '\n';
} else if (items[i] == '&')
hasEntities = true;
}
ret = [OFString stringWithUTF8String: items length: length];
if (unescape && hasEntities) {
@try {
return [ret stringByXMLUnescapingWithDelegate: parser];
} @catch (OFInvalidFormatException *e) {
@throw [OFMalformedXMLException
exceptionWithParser: parser];
}
}
return ret;
}
static OFString *
namespaceForPrefix(OFString *prefix, OFArray *namespaces)
{
OFDictionary *const *objects = namespaces.objects;
size_t count = namespaces.count;
if (prefix == nil)
prefix = @"";
while (count > 0) {
OFString *tmp;
if ((tmp = [objects[--count] objectForKey: prefix]) != nil)
return tmp;
}
return nil;
}
static OF_INLINE void
resolveAttributeNamespace(OFXMLAttribute *attribute, OFArray *namespaces,
OFXMLParser *self)
{
OFString *attributeNS;
OFString *attributePrefix = attribute->_namespace;
if (attributePrefix == nil)
return;
attributeNS = namespaceForPrefix(attributePrefix, namespaces);
if ((attributePrefix != nil && attributeNS == nil))
@throw [OFUnboundPrefixException
exceptionWithPrefix: attributePrefix
parser: self];
[attribute->_namespace release];
attribute->_namespace = [attributeNS retain];
}
@implementation OFXMLParser
@synthesize delegate = _delegate, depthLimit = _depthLimit;
+ (instancetype)parser
{
return [[[self alloc] init] autorelease];
}
- (instancetype)init
{
self = [super init];
@try {
void *pool;
OFMutableDictionary *dict;
_buffer = [[OFMutableData alloc] init];
_previous = [[OFMutableArray alloc] init];
_namespaces = [[OFMutableArray alloc] init];
_attributes = [[OFMutableArray alloc] init];
pool = objc_autoreleasePoolPush();
dict = [OFMutableDictionary dictionaryWithKeysAndObjects:
@"xml", @"http://www.w3.org/XML/1998/namespace",
@"xmlns", @"http://www.w3.org/2000/xmlns/", nil];
[_namespaces addObject: dict];
_acceptProlog = true;
_lineNumber = 1;
_encoding = OF_STRING_ENCODING_UTF_8;
_depthLimit = 32;
objc_autoreleasePoolPop(pool);
} @catch (id e) {
[self release];
@throw e;
}
return self;
}
- (void)dealloc
{
[_buffer release];
[_name release];
[_prefix release];
[_namespaces release];
[_attributes release];
[_attributeName release];
[_attributePrefix release];
[_previous release];
[super dealloc];
}
- (void)parseBuffer: (const char *)buffer length: (size_t)length
{
_data = buffer;
for (_i = _last = 0; _i < length; _i++) {
size_t j = _i;
lookupTable[_state](self);
/* Ensure we don't count this character twice */
if (_i != j)
continue;
if (_data[_i] == '\r' || (_data[_i] == '\n' &&
!_lastCarriageReturn))
_lineNumber++;
_lastCarriageReturn = (_data[_i] == '\r');
}
/* In OF_XMLPARSER_IN_TAG, there can be only spaces */
if (length - _last > 0 && _state != OF_XMLPARSER_IN_TAG)
appendToBuffer(_buffer, _data + _last, _encoding,
length - _last);
}
- (void)parseString: (OFString *)string
{
[self parseBuffer: string.UTF8String length: string.UTF8StringLength];
}
- (void)parseStream: (OFStream *)stream
{
size_t pageSize = [OFSystemInfo pageSize];
char *buffer = of_alloc(1, pageSize);
@try {
while (!stream.atEndOfStream) {
size_t length = [stream readIntoBuffer: buffer
length: pageSize];
[self parseBuffer: buffer length: length];
}
} @finally {
free(buffer);
}
}
static void
inByteOrderMarkState(OFXMLParser *self)
{
if (self->_data[self->_i] != "\xEF\xBB\xBF"[self->_level]) {
if (self->_level == 0) {
self->_state = OF_XMLPARSER_OUTSIDE_TAG;
self->_i--;
return;
}
@throw [OFMalformedXMLException exceptionWithParser: self];
}
if (self->_level++ == 2)
self->_state = OF_XMLPARSER_OUTSIDE_TAG;
self->_last = self->_i + 1;
}
/* Not in a tag */
static void
outsideTagState(OFXMLParser *self)
{
size_t length;
if ((self->_finishedParsing || self->_previous.count < 1) &&
self->_data[self->_i] != ' ' && self->_data[self->_i] != '\t' &&
self->_data[self->_i] != '\n' && self->_data[self->_i] != '\r' &&
self->_data[self->_i] != '<')
@throw [OFMalformedXMLException exceptionWithParser: self];
if (self->_data[self->_i] != '<')
return;
if ((length = self->_i - self->_last) > 0)
appendToBuffer(self->_buffer, self->_data + self->_last,
self->_encoding, length);
if (self->_buffer.count > 0) {
void *pool = objc_autoreleasePoolPush();
OFString *characters = transformString(self, self->_buffer, 0,
true);
if ([self->_delegate respondsToSelector:
@selector(parser:foundCharacters:)])
[self->_delegate parser: self
foundCharacters: characters];
objc_autoreleasePoolPop(pool);
}
[self->_buffer removeAllItems];
self->_last = self->_i + 1;
self->_state = OF_XMLPARSER_TAG_OPENED;
}
/* Tag was just opened */
static void
tagOpenedState(OFXMLParser *self)
{
if (self->_finishedParsing && self->_data[self->_i] != '!' &&
self->_data[self->_i] != '?')
@throw [OFMalformedXMLException exceptionWithParser: self];
switch (self->_data[self->_i]) {
case '?':
self->_last = self->_i + 1;
self->_state = OF_XMLPARSER_IN_PROCESSING_INSTRUCTIONS;
self->_level = 0;
break;
case '/':
self->_last = self->_i + 1;
self->_state = OF_XMLPARSER_IN_CLOSE_TAG_NAME;
self->_acceptProlog = false;
break;
case '!':
self->_last = self->_i + 1;
self->_state = OF_XMLPARSER_IN_EXCLAMATION_MARK;
self->_acceptProlog = false;
break;
default:
if (self->_depthLimit > 0 &&
self->_previous.count >= self->_depthLimit)
@throw [OFOutOfRangeException exception];
self->_state = OF_XMLPARSER_IN_TAG_NAME;
self->_acceptProlog = false;
self->_i--;
break;
}
}
/* <?xml […]?> */
static bool
parseXMLProcessingInstructions(OFXMLParser *self, OFString *pi)
{
const char *cString;
size_t length, last;
int PIState = 0;
OFString *attribute = nil;
OFMutableString *value = nil;
char piDelimiter = 0;
bool hasVersion = false;
if (!self->_acceptProlog)
return false;
self->_acceptProlog = false;
pi = [pi substringFromIndex: 3];
pi = pi.stringByDeletingEnclosingWhitespaces;
cString = pi.UTF8String;
length = pi.UTF8StringLength;
last = 0;
for (size_t i = 0; i < length; i++) {
switch (PIState) {
case 0:
if (cString[i] == ' ' || cString[i] == '\t' ||
cString[i] == '\r' || cString[i] == '\n')
continue;
last = i;
PIState = 1;
i--;
break;
case 1:
if (cString[i] != '=')
continue;
attribute = [OFString
stringWithCString: cString + last
encoding: self->_encoding
length: i - last];
last = i + 1;
PIState = 2;
break;
case 2:
if (cString[i] != '\'' && cString[i] != '"')
return false;
piDelimiter = cString[i];
last = i + 1;
PIState = 3;
break;
case 3:
if (cString[i] != piDelimiter)
continue;
value = [OFMutableString
stringWithCString: cString + last
encoding: self->_encoding
length: i - last];
if ([attribute isEqual: @"version"]) {
if (![value hasPrefix: @"1."])
return false;
hasVersion = true;
}
if ([attribute isEqual: @"encoding"]) {
@try {
self->_encoding =
of_string_parse_encoding(value);
} @catch (OFInvalidArgumentException *e) {
@throw [OFInvalidEncodingException
exception];
}
}
last = i + 1;
PIState = 0;
break;
}
}
if (PIState != 0 || !hasVersion)
return false;
return true;
}
/* Inside processing instructions */
static void
inProcessingInstructionsState(OFXMLParser *self)
{
if (self->_data[self->_i] == '?')
self->_level = 1;
else if (self->_level == 1 && self->_data[self->_i] == '>') {
void *pool = objc_autoreleasePoolPush();
OFString *PI;
appendToBuffer(self->_buffer, self->_data + self->_last,
self->_encoding, self->_i - self->_last);
PI = transformString(self, self->_buffer, 1, false);
if ([PI isEqual: @"xml"] || [PI hasPrefix: @"xml "] ||
[PI hasPrefix: @"xml\t"] || [PI hasPrefix: @"xml\r"] ||
[PI hasPrefix: @"xml\n"])
if (!parseXMLProcessingInstructions(self, PI))
@throw [OFMalformedXMLException
exceptionWithParser: self];
if ([self->_delegate respondsToSelector:
@selector(parser:foundProcessingInstructions:)])
[self->_delegate parser: self
foundProcessingInstructions: PI];
objc_autoreleasePoolPop(pool);
[self->_buffer removeAllItems];
self->_last = self->_i + 1;
self->_state = OF_XMLPARSER_OUTSIDE_TAG;
} else
self->_level = 0;
}
/* Inside a tag, no name yet */
static void
inTagNameState(OFXMLParser *self)
{
void *pool;
const char *bufferCString, *tmp;
size_t length, bufferLength;
OFString *bufferString;
if (self->_data[self->_i] != ' ' && self->_data[self->_i] != '\t' &&
self->_data[self->_i] != '\n' && self->_data[self->_i] != '\r' &&
self->_data[self->_i] != '>' && self->_data[self->_i] != '/')
return;
if ((length = self->_i - self->_last) > 0)
appendToBuffer(self->_buffer, self->_data + self->_last,
self->_encoding, length);
pool = objc_autoreleasePoolPush();
bufferCString = self->_buffer.items;
bufferLength = self->_buffer.count;
bufferString = [OFString stringWithUTF8String: bufferCString
length: bufferLength];
if ((tmp = memchr(bufferCString, ':', bufferLength)) != NULL) {
self->_name = [[OFString alloc]
initWithUTF8String: tmp + 1
length: bufferLength -
(tmp - bufferCString) - 1];
self->_prefix = [[OFString alloc]
initWithUTF8String: bufferCString
length: tmp - bufferCString];
} else {
self->_name = [bufferString copy];
self->_prefix = nil;
}
if (self->_data[self->_i] == '>' || self->_data[self->_i] == '/') {
OFString *namespace;
namespace = namespaceForPrefix(self->_prefix,
self->_namespaces);
if (self->_prefix != nil && namespace == nil)
@throw [OFUnboundPrefixException
exceptionWithPrefix: self->_prefix
parser: self];
if ([self->_delegate respondsToSelector: @selector(parser:
didStartElement:prefix:namespace:attributes:)])
[self->_delegate parser: self
didStartElement: self->_name
prefix: self->_prefix
namespace: namespace
attributes: nil];
if (self->_data[self->_i] == '/') {
if ([self->_delegate respondsToSelector:
@selector(parser:didEndElement:prefix:namespace:)])
[self->_delegate parser: self
didEndElement: self->_name
prefix: self->_prefix
namespace: namespace];
if (self->_previous.count == 0)
self->_finishedParsing = true;
} else
[self->_previous addObject: bufferString];
[self->_name release];
[self->_prefix release];
self->_name = self->_prefix = nil;
self->_state = (self->_data[self->_i] == '/'
? OF_XMLPARSER_EXPECT_TAG_CLOSE
: OF_XMLPARSER_OUTSIDE_TAG);
} else
self->_state = OF_XMLPARSER_IN_TAG;
if (self->_data[self->_i] != '/')
[self->_namespaces addObject: [OFMutableDictionary dictionary]];
objc_autoreleasePoolPop(pool);
[self->_buffer removeAllItems];
self->_last = self->_i + 1;
}
/* Inside a close tag, no name yet */
static void
inCloseTagNameState(OFXMLParser *self)
{
void *pool;
const char *bufferCString, *tmp;
size_t length, bufferLength;
OFString *bufferString, *namespace;
if (self->_data[self->_i] != ' ' && self->_data[self->_i] != '\t' &&
self->_data[self->_i] != '\n' && self->_data[self->_i] != '\r' &&
self->_data[self->_i] != '>')
return;
if ((length = self->_i - self->_last) > 0)
appendToBuffer(self->_buffer, self->_data + self->_last,
self->_encoding, length);
pool = objc_autoreleasePoolPush();
bufferCString = self->_buffer.items;
bufferLength = self->_buffer.count;
bufferString = [OFString stringWithUTF8String: bufferCString
length: bufferLength];
if ((tmp = memchr(bufferCString, ':', bufferLength)) != NULL) {
self->_name = [[OFString alloc]
initWithUTF8String: tmp + 1
length: bufferLength -
(tmp - bufferCString) - 1];
self->_prefix = [[OFString alloc]
initWithUTF8String: bufferCString
length: tmp - bufferCString];
} else {
self->_name = [bufferString copy];
self->_prefix = nil;
}
if (![self->_previous.lastObject isEqual: bufferString])
@throw [OFMalformedXMLException exceptionWithParser: self];
[self->_previous removeLastObject];
[self->_buffer removeAllItems];
namespace = namespaceForPrefix(self->_prefix, self->_namespaces);
if (self->_prefix != nil && namespace == nil)
@throw [OFUnboundPrefixException
exceptionWithPrefix: self->_prefix
parser: self];
if ([self->_delegate respondsToSelector:
@selector(parser:didEndElement:prefix:namespace:)])
[self->_delegate parser: self
didEndElement: self->_name
prefix: self->_prefix
namespace: namespace];
objc_autoreleasePoolPop(pool);
[self->_namespaces removeLastObject];
[self->_name release];
[self->_prefix release];
self->_name = self->_prefix = nil;
self->_last = self->_i + 1;
self->_state = (self->_data[self->_i] == '>'
? OF_XMLPARSER_OUTSIDE_TAG
: OF_XMLPARSER_EXPECT_SPACE_OR_TAG_CLOSE);
if (self->_previous.count == 0)
self->_finishedParsing = true;
}
/* Inside a tag, name found */
static void
inTagState(OFXMLParser *self)
{
void *pool;
OFString *namespace;
OFXMLAttribute *const *attributesObjects;
size_t attributesCount;
if (self->_data[self->_i] != '>' && self->_data[self->_i] != '/') {
if (self->_data[self->_i] != ' ' &&
self->_data[self->_i] != '\t' &&
self->_data[self->_i] != '\n' &&
self->_data[self->_i] != '\r') {
self->_last = self->_i;
self->_state = OF_XMLPARSER_IN_ATTRIBUTE_NAME;
self->_i--;
}
return;
}
attributesObjects = self->_attributes.objects;
attributesCount = self->_attributes.count;
namespace = namespaceForPrefix(self->_prefix, self->_namespaces);
if (self->_prefix != nil && namespace == nil)
@throw [OFUnboundPrefixException
exceptionWithPrefix: self->_prefix
parser: self];
for (size_t j = 0; j < attributesCount; j++)
resolveAttributeNamespace(attributesObjects[j],
self->_namespaces, self);
pool = objc_autoreleasePoolPush();
if ([self->_delegate respondsToSelector:
@selector(parser:didStartElement:prefix:namespace:attributes:)])
[self->_delegate parser: self
didStartElement: self->_name
prefix: self->_prefix
namespace: namespace
attributes: self->_attributes];
if (self->_data[self->_i] == '/') {
if ([self->_delegate respondsToSelector:
@selector(parser:didEndElement:prefix:namespace:)])
[self->_delegate parser: self
didEndElement: self->_name
prefix: self->_prefix
namespace: namespace];
if (self->_previous.count == 0)
self->_finishedParsing = true;
[self->_namespaces removeLastObject];
} else if (self->_prefix != nil) {
OFString *str = [OFString stringWithFormat:
@"%@:%@", self->_prefix, self->_name];
[self->_previous addObject: str];
} else
[self->_previous addObject: self->_name];
objc_autoreleasePoolPop(pool);
[self->_name release];
[self->_prefix release];
[self->_attributes removeAllObjects];
self->_name = self->_prefix = nil;
self->_last = self->_i + 1;
self->_state = (self->_data[self->_i] == '/'
? OF_XMLPARSER_EXPECT_TAG_CLOSE
: OF_XMLPARSER_OUTSIDE_TAG);
}
/* Looking for attribute name */
static void
inAttributeNameState(OFXMLParser *self)
{
void *pool;
OFString *bufferString;
const char *bufferCString, *tmp;
size_t length, bufferLength;
if (self->_data[self->_i] != '=' && self->_data[self->_i] != ' ' &&
self->_data[self->_i] != '\t' && self->_data[self->_i] != '\n' &&
self->_data[self->_i] != '\r')
return;
if ((length = self->_i - self->_last) > 0)
appendToBuffer(self->_buffer, self->_data + self->_last,
self->_encoding, length);
pool = objc_autoreleasePoolPush();
bufferString = [OFString stringWithUTF8String: self->_buffer.items
length: self->_buffer.count];
bufferCString = bufferString.UTF8String;
bufferLength = bufferString.UTF8StringLength;
if ((tmp = memchr(bufferCString, ':', bufferLength)) != NULL) {
self->_attributeName = [[OFString alloc]
initWithUTF8String: tmp + 1
length: bufferLength -
(tmp - bufferCString) - 1];
self->_attributePrefix = [[OFString alloc]
initWithUTF8String: bufferCString
length: tmp - bufferCString];
} else {
self->_attributeName = [bufferString copy];
self->_attributePrefix = nil;
}
objc_autoreleasePoolPop(pool);
[self->_buffer removeAllItems];
self->_last = self->_i + 1;
self->_state = (self->_data[self->_i] == '='
? OF_XMLPARSER_EXPECT_ATTRIBUTE_DELIMITER
: OF_XMLPARSER_EXPECT_ATTRIBUTE_EQUAL_SIGN);
}
/* Expecting equal sign of an attribute */
static void
expectAttributeEqualSignState(OFXMLParser *self)
{
if (self->_data[self->_i] == '=') {
self->_last = self->_i + 1;
self->_state = OF_XMLPARSER_EXPECT_ATTRIBUTE_DELIMITER;
return;
}
if (self->_data[self->_i] != ' ' && self->_data[self->_i] != '\t' &&
self->_data[self->_i] != '\n' && self->_data[self->_i] != '\r')
@throw [OFMalformedXMLException exceptionWithParser: self];
}
/* Expecting name/value delimiter of an attribute */
static void
expectAttributeDelimiterState(OFXMLParser *self)
{
self->_last = self->_i + 1;
if (self->_data[self->_i] == ' ' || self->_data[self->_i] == '\t' ||
self->_data[self->_i] == '\n' || self->_data[self->_i] == '\r')
return;
if (self->_data[self->_i] != '\'' && self->_data[self->_i] != '"')
@throw [OFMalformedXMLException exceptionWithParser: self];
self->_delimiter = self->_data[self->_i];
self->_state = OF_XMLPARSER_IN_ATTRIBUTE_VALUE;
}
/* Looking for attribute value */
static void
inAttributeValueState(OFXMLParser *self)
{
void *pool;
OFString *attributeValue;
size_t length;
OFXMLAttribute *attribute;
if (self->_data[self->_i] != self->_delimiter)
return;
if ((length = self->_i - self->_last) > 0)
appendToBuffer(self->_buffer, self->_data + self->_last,
self->_encoding, length);
pool = objc_autoreleasePoolPush();
attributeValue = transformString(self, self->_buffer, 0, true);
if (self->_attributePrefix == nil &&
[self->_attributeName isEqual: @"xmlns"])
[self->_namespaces.lastObject setObject: attributeValue
forKey: @""];
if ([self->_attributePrefix isEqual: @"xmlns"])
[self->_namespaces.lastObject setObject: attributeValue
forKey: self->_attributeName];
attribute = [OFXMLAttribute attributeWithName: self->_attributeName
namespace: self->_attributePrefix
stringValue: attributeValue];
attribute->_useDoubleQuotes = (self->_delimiter == '"');
[self->_attributes addObject: attribute];
objc_autoreleasePoolPop(pool);
[self->_buffer removeAllItems];
[self->_attributeName release];
[self->_attributePrefix release];
self->_attributeName = self->_attributePrefix = nil;
self->_last = self->_i + 1;
self->_state = OF_XMLPARSER_IN_TAG;
}
/* Expecting closing '>' */
static void
expectTagCloseState(OFXMLParser *self)
{
if (self->_data[self->_i] == '>') {
self->_last = self->_i + 1;
self->_state = OF_XMLPARSER_OUTSIDE_TAG;
} else
@throw [OFMalformedXMLException exceptionWithParser: self];
}
/* Expecting closing '>' or space */
static void
expectSpaceOrTagCloseState(OFXMLParser *self)
{
if (self->_data[self->_i] == '>') {
self->_last = self->_i + 1;
self->_state = OF_XMLPARSER_OUTSIDE_TAG;
} else if (self->_data[self->_i] != ' ' &&
self->_data[self->_i] != '\t' && self->_data[self->_i] != '\n' &&
self->_data[self->_i] != '\r')
@throw [OFMalformedXMLException exceptionWithParser: self];
}
/* In <! */
static void
inExclamationMarkState(OFXMLParser *self)
{
if (self->_finishedParsing && self->_data[self->_i] != '-')
@throw [OFMalformedXMLException exceptionWithParser: self];
if (self->_data[self->_i] == '-')
self->_state = OF_XMLPARSER_IN_COMMENT_OPENING;
else if (self->_data[self->_i] == '[') {
self->_state = OF_XMLPARSER_IN_CDATA_OPENING;
self->_level = 0;
} else if (self->_data[self->_i] == 'D') {
self->_state = OF_XMLPARSER_IN_DOCTYPE;
self->_level = 0;
} else
@throw [OFMalformedXMLException exceptionWithParser: self];
self->_last = self->_i + 1;
}
/* CDATA */
static void
inCDATAOpeningState(OFXMLParser *self)
{
if (self->_data[self->_i] != "CDATA["[self->_level])
@throw [OFMalformedXMLException exceptionWithParser: self];
if (++self->_level == 6) {
self->_state = OF_XMLPARSER_IN_CDATA;
self->_level = 0;
}
self->_last = self->_i + 1;
}
static void
inCDATAState(OFXMLParser *self)
{
if (self->_data[self->_i] == ']')
self->_level++;
else if (self->_data[self->_i] == '>' && self->_level >= 2) {
void *pool = objc_autoreleasePoolPush();
OFString *CDATA;
appendToBuffer(self->_buffer, self->_data + self->_last,
self->_encoding, self->_i - self->_last);
CDATA = transformString(self, self->_buffer, 2, false);
if ([self->_delegate respondsToSelector:
@selector(parser:foundCDATA:)])
[self->_delegate parser: self foundCDATA: CDATA];
objc_autoreleasePoolPop(pool);
[self->_buffer removeAllItems];
self->_last = self->_i + 1;
self->_state = OF_XMLPARSER_OUTSIDE_TAG;
} else
self->_level = 0;
}
/* Comment */
static void
inCommentOpeningState(OFXMLParser *self)
{
if (self->_data[self->_i] != '-')
@throw [OFMalformedXMLException exceptionWithParser: self];
self->_last = self->_i + 1;
self->_state = OF_XMLPARSER_IN_COMMENT_1;
self->_level = 0;
}
static void
inCommentState1(OFXMLParser *self)
{
if (self->_data[self->_i] == '-')
self->_level++;
else
self->_level = 0;
if (self->_level == 2)
self->_state = OF_XMLPARSER_IN_COMMENT_2;
}
static void
inCommentState2(OFXMLParser *self)
{
void *pool;
OFString *comment;
if (self->_data[self->_i] != '>')
@throw [OFMalformedXMLException exceptionWithParser: self];
pool = objc_autoreleasePoolPush();
appendToBuffer(self->_buffer, self->_data + self->_last,
self->_encoding, self->_i - self->_last);
comment = transformString(self, self->_buffer, 2, false);
if ([self->_delegate respondsToSelector:
@selector(parser:foundComment:)])
[self->_delegate parser: self foundComment: comment];
objc_autoreleasePoolPop(pool);
[self->_buffer removeAllItems];
self->_last = self->_i + 1;
self->_state = OF_XMLPARSER_OUTSIDE_TAG;
}
/* In <!DOCTYPE ...> */
static void
inDOCTYPEState(OFXMLParser *self)
{
if ((self->_level < 6 &&
self->_data[self->_i] != "OCTYPE"[self->_level]) ||
(self->_level == 6 && self->_data[self->_i] != ' ' &&
self->_data[self->_i] != '\t' && self->_data[self->_i] != '\n' &&
self->_data[self->_i] != '\r'))
@throw [OFMalformedXMLException exceptionWithParser: self];
self->_level++;
if (self->_level > 6 && self->_data[self->_i] == '>')
self->_state = OF_XMLPARSER_OUTSIDE_TAG;
self->_last = self->_i + 1;
}
- (size_t)lineNumber
{
return _lineNumber;
}
- (bool)hasFinishedParsing
{
return _finishedParsing;
}
- (OFString *)string: (OFString *)string
containsUnknownEntityNamed: (OFString *)entity
{
if ([_delegate respondsToSelector:
@selector(parser:foundUnknownEntityNamed:)])
return [_delegate parser: self foundUnknownEntityNamed: entity];
return nil;
}
@end