Index: src/OFXMLParser.h ================================================================== --- src/OFXMLParser.h +++ src/OFXMLParser.h @@ -147,10 +147,12 @@ OF_XMLPARSER_IN_COMMENT_1, OF_XMLPARSER_IN_COMMENT_2, OF_XMLPARSER_IN_DOCTYPE, OF_XMLPARSER_NUM_STATES } _state; + size_t _i, _last; + const char *_data; OFDataArray *_buffer; OFString *_name, *_prefix; OFMutableArray *_namespaces, *_attributes; OFString *_attributeName, *_attributePrefix; char _delimiter; Index: src/OFXMLParser.m ================================================================== --- src/OFXMLParser.m +++ src/OFXMLParser.m @@ -35,11 +35,11 @@ #import "OFUnboundNamespaceException.h" #import "autorelease.h" #import "macros.h" -typedef void (*state_function)(id, SEL, const char*, size_t*, size_t*); +typedef void (*state_function)(id, SEL); static SEL selectors[OF_XMLPARSER_NUM_STATES]; static state_function lookupTable[OF_XMLPARSER_NUM_STATES]; static OF_INLINE void buffer_append(OFDataArray *buffer, const char *string, @@ -138,30 +138,30 @@ + (void)initialize { size_t i; const SEL selectors_[OF_XMLPARSER_NUM_STATES] = { - @selector(OF_parseInByteOrderMarkWithBuffer:i:last:), - @selector(OF_parseOutsideTagWithBuffer:i:last:), - @selector(OF_parseTagOpenedWithBuffer:i:last:), - @selector(OF_parseInProcessingInstructionsWithBuffer:i:last:), - @selector(OF_parseInTagNameWithBuffer:i:last:), - @selector(OF_parseInCloseTagNameWithBuffer:i:last:), - @selector(OF_parseInTagWithBuffer:i:last:), - @selector(OF_parseInAttributeNameWithBuffer:i:last:), - @selector(OF_parseExpectDelimiterWithBuffer:i:last:), - @selector(OF_parseInAttributeValueWithBuffer:i:last:), - @selector(OF_parseExpectCloseWithBuffer:i:last:), - @selector(OF_parseExpectSpaceOrCloseWithBuffer:i:last:), - @selector(OF_parseInExclamationMarkWithBuffer:i:last:), - @selector(OF_parseInCDATAOpeningWithBuffer:i:last:), - @selector(OF_parseInCDATA1WithBuffer:i:last:), - @selector(OF_parseInCDATA2WithBuffer:i:last:), - @selector(OF_parseInCommentOpeningWithBuffer:i:last:), - @selector(OF_parseInComment1WithBuffer:i:last:), - @selector(OF_parseInComment2WithBuffer:i:last:), - @selector(OF_parseInDoctypeWithBuffer:i:last:), + @selector(OF_inByteOrderMarkState), + @selector(OF_outsideTagState), + @selector(OF_tagOpenedState), + @selector(OF_inProcessingInstructionsState), + @selector(OF_inTagNameState), + @selector(OF_inCloseTagNameState), + @selector(OF_inTagState), + @selector(OF_inAttributeNameState), + @selector(OF_expectDelimiterState), + @selector(OF_inAttributeValueState), + @selector(OF_expectCloseState), + @selector(OF_expectSpaceOrCloseState), + @selector(OF_inExclamationMarkState), + @selector(OF_inCDATAOpeningState), + @selector(OF_inCDATAState1), + @selector(OF_inCDATAState2), + @selector(OF_inCommentOpeningState), + @selector(OF_inCommentState1), + @selector(OF_inCommentState2), + @selector(OF_inDOCTYPEState) }; memcpy(selectors, selectors_, sizeof(selectors_)); for (i = 0; i < OF_XMLPARSER_NUM_STATES; i++) { if (![self instancesRespondToSelector: selectors[i]]) @@ -246,31 +246,32 @@ } - (void)parseBuffer: (const char*)buffer length: (size_t)length { - size_t i, last = 0; + _data = buffer; - for (i = 0; i < length; i++) { - size_t j = i; + for (_i = _last = 0; _i < length; _i++) { + size_t j = _i; - lookupTable[_state](self, selectors[_state], buffer, &i, &last); + lookupTable[_state](self, selectors[_state]); /* Ensure we don't count this character twice */ - if (i != j) + if (_i != j) continue; - if (buffer[i] == '\r' || (buffer[i] == '\n' && + if (_data[_i] == '\r' || (_data[_i] == '\n' && !_lastCarriageReturn)) _lineNumber++; - _lastCarriageReturn = (buffer[i] == '\r'); + _lastCarriageReturn = (_data[_i] == '\r'); } /* In OF_XMLPARSER_IN_TAG, there can be only spaces */ - if (length - last > 0 && _state != OF_XMLPARSER_IN_TAG) - buffer_append(_buffer, buffer + last, _encoding, length - last); + if (length - _last > 0 && _state != OF_XMLPARSER_IN_TAG) + buffer_append(_buffer, _data + _last, _encoding, + length - _last); } - (void)parseString: (OFString*)string { [self parseBuffer: [string UTF8String] @@ -311,18 +312,16 @@ * The following methods handle the different states of the parser. They are * looked up in +[initialize] and put in a lookup table to speed things up. * One dispatch for every character would be way too slow! */ -- (void)OF_parseInByteOrderMarkWithBuffer: (const char*)buffer - i: (size_t*)i - last: (size_t*)last +- (void)OF_inByteOrderMarkState { - if (buffer[*i] != "\xEF\xBB\xBF"[_level]) { + if (_data[_i] != "\xEF\xBB\xBF"[_level]) { if (_level == 0) { _state = OF_XMLPARSER_OUTSIDE_TAG; - (*i)--; + _i--; return; } @throw [OFMalformedXMLException exceptionWithClass: [self class] parser: self]; @@ -329,31 +328,29 @@ } if (_level++ == 2) _state = OF_XMLPARSER_OUTSIDE_TAG; - *last = *i + 1; + _last = _i + 1; } /* Not in a tag */ -- (void)OF_parseOutsideTagWithBuffer: (const char*)buffer - i: (size_t*)i - last: (size_t*)last +- (void)OF_outsideTagState { size_t length; - if ((_finishedParsing || [_previous count] < 1) && buffer[*i] != ' ' && - buffer[*i] != '\t' && buffer[*i] != '\n' && buffer[*i] != '\r' && - buffer[*i] != '<') + if ((_finishedParsing || [_previous count] < 1) && _data[_i] != ' ' && + _data[_i] != '\t' && _data[_i] != '\n' && _data[_i] != '\r' && + _data[_i] != '<') @throw [OFMalformedXMLException exceptionWithClass: [self class] parser: self]; - if (buffer[*i] != '<') + if (_data[_i] != '<') return; - if ((length = *i - *last) > 0) - buffer_append(_buffer, buffer + *last, _encoding, length); + if ((length = _i - _last) > 0) + buffer_append(_buffer, _data + _last, _encoding, length); if ([_buffer count] > 0) { void *pool = objc_autoreleasePoolPush(); OFString *characters = transform_string(_buffer, 0, true, self); @@ -365,36 +362,34 @@ objc_autoreleasePoolPop(pool); } [_buffer removeAllItems]; - *last = *i + 1; + _last = _i + 1; _state = OF_XMLPARSER_TAG_OPENED; } /* Tag was just opened */ -- (void)OF_parseTagOpenedWithBuffer: (const char*)buffer - i: (size_t*)i - last: (size_t*)last +- (void)OF_tagOpenedState { - if (_finishedParsing && buffer[*i] != '!' && buffer[*i] != '?') + if (_finishedParsing && _data[_i] != '!' && _data[_i] != '?') @throw [OFMalformedXMLException exceptionWithClass: [self class] parser: self]; - switch (buffer[*i]) { + switch (_data[_i]) { case '?': - *last = *i + 1; + _last = _i + 1; _state = OF_XMLPARSER_IN_PROCESSING_INSTRUCTIONS; _level = 0; break; case '/': - *last = *i + 1; + _last = _i + 1; _state = OF_XMLPARSER_IN_CLOSE_TAG_NAME; _acceptProlog = false; break; case '!': - *last = *i + 1; + _last = _i + 1; _state = OF_XMLPARSER_IN_EXCLAMATIONMARK; _acceptProlog = false; break; default: if (_depthLimit > 0 && [_previous count] >= _depthLimit) @@ -402,11 +397,11 @@ exceptionWithClass: [self class] parser: self]; _state = OF_XMLPARSER_IN_TAG_NAME; _acceptProlog = false; - (*i)--; + _i--; break; } } /* */ @@ -446,12 +441,13 @@ case 1: if (cString[i] != '=') continue; attribute = [OFString - stringWithUTF8String: cString + last - length: i - last]; + stringWithCString: cString + last + encoding: _encoding + length: i - last]; last = i + 1; PIState = 2; break; case 2: @@ -466,12 +462,13 @@ case 3: if (cString[i] != piDelimiter) continue; value = [OFMutableString - stringWithUTF8String: cString + last - length: i - last]; + stringWithCString: cString + last + encoding: _encoding + length: i - last]; if ([attribute isEqual: @"version"]) { if (![value hasPrefix: @"1."]) return false; @@ -508,21 +505,19 @@ return true; } /* Inside processing instructions */ -- (void)OF_parseInProcessingInstructionsWithBuffer: (const char*)buffer - i: (size_t*)i - last: (size_t*)last +- (void)OF_inProcessingInstructionsState { - if (buffer[*i] == '?') + if (_data[_i] == '?') _level = 1; - else if (_level == 1 && buffer[*i] == '>') { + else if (_level == 1 && _data[_i] == '>') { void *pool = objc_autoreleasePoolPush(); OFString *PI; - buffer_append(_buffer, buffer + *last, _encoding, *i - *last); + buffer_append(_buffer, _data + _last, _encoding, _i - _last); PI = transform_string(_buffer, 1, false, nil); if ([PI isEqual: @"xml"] || [PI hasPrefix: @"xml "] || [PI hasPrefix: @"xml\t"] || [PI hasPrefix: @"xml\r"] || [PI hasPrefix: @"xml\n"]) @@ -538,32 +533,30 @@ objc_autoreleasePoolPop(pool); [_buffer removeAllItems]; - *last = *i + 1; + _last = _i + 1; _state = OF_XMLPARSER_OUTSIDE_TAG; } else _level = 0; } /* Inside a tag, no name yet */ -- (void)OF_parseInTagNameWithBuffer: (const char*)buffer - i: (size_t*)i - last: (size_t*)last +- (void)OF_inTagNameState { void *pool; const char *bufferCString, *tmp; size_t length, bufferLength; OFString *bufferString; - if (buffer[*i] != ' ' && buffer[*i] != '\t' && buffer[*i] != '\n' && - buffer[*i] != '\r' && buffer[*i] != '>' && buffer[*i] != '/') + if (_data[_i] != ' ' && _data[_i] != '\t' && _data[_i] != '\n' && + _data[_i] != '\r' && _data[_i] != '>' && _data[_i] != '/') return; - if ((length = *i - *last) > 0) - buffer_append(_buffer, buffer + *last, _encoding, length); + if ((length = _i - _last) > 0) + buffer_append(_buffer, _data + _last, _encoding, length); pool = objc_autoreleasePoolPush(); bufferCString = [_buffer items]; bufferLength = [_buffer count]; @@ -581,11 +574,11 @@ } else { _name = [bufferString copy]; _prefix = nil; } - if (buffer[*i] == '>' || buffer[*i] == '/') { + if (_data[_i] == '>' || _data[_i] == '/') { OFString *namespace; namespace = namespace_for_prefix(_prefix, _namespaces); if (_prefix != nil && namespace == nil) @@ -599,11 +592,11 @@ didStartElement: _name prefix: _prefix namespace: namespace attributes: nil]; - if (buffer[*i] == '/') { + if (_data[_i] == '/') { if ([_delegate respondsToSelector: @selector(parser:didEndElement:prefix:namespace:)]) [_delegate parser: self didEndElement: _name prefix: _prefix @@ -616,41 +609,39 @@ [_name release]; [_prefix release]; _name = _prefix = nil; - _state = (buffer[*i] == '/' + _state = (_data[_i] == '/' ? OF_XMLPARSER_EXPECT_CLOSE : OF_XMLPARSER_OUTSIDE_TAG); } else _state = OF_XMLPARSER_IN_TAG; - if (buffer[*i] != '/') + if (_data[_i] != '/') [_namespaces addObject: [OFMutableDictionary dictionary]]; objc_autoreleasePoolPop(pool); [_buffer removeAllItems]; - *last = *i + 1; + _last = _i + 1; } /* Inside a close tag, no name yet */ -- (void)OF_parseInCloseTagNameWithBuffer: (const char*)buffer - i: (size_t*)i - last: (size_t*)last +- (void)OF_inCloseTagNameState { void *pool; const char *bufferCString, *tmp; size_t length, bufferLength; OFString *bufferString, *namespace; - if (buffer[*i] != ' ' && buffer[*i] != '\t' && buffer[*i] != '\n' && - buffer[*i] != '\r' && buffer[*i] != '>') + if (_data[_i] != ' ' && _data[_i] != '\t' && _data[_i] != '\n' && + _data[_i] != '\r' && _data[_i] != '>') return; - if ((length = *i - *last) > 0) - buffer_append(_buffer, buffer + *last, _encoding, length); + if ((length = _i - _last) > 0) + buffer_append(_buffer, _data + _last, _encoding, length); pool = objc_autoreleasePoolPush(); bufferCString = [_buffer items]; bufferLength = [_buffer count]; @@ -696,35 +687,33 @@ [_namespaces removeLastObject]; [_name release]; [_prefix release]; _name = _prefix = nil; - *last = *i + 1; - _state = (buffer[*i] == '>' + _last = _i + 1; + _state = (_data[_i] == '>' ? OF_XMLPARSER_OUTSIDE_TAG : OF_XMLPARSER_EXPECT_SPACE_OR_CLOSE); if ([_previous count] == 0) _finishedParsing = true; } /* Inside a tag, name found */ -- (void)OF_parseInTagWithBuffer: (const char*)buffer - i: (size_t*)i - last: (size_t*)last +- (void)OF_inTagState { void *pool; OFString *namespace; OFXMLAttribute **attributesObjects; size_t j, attributesCount; - if (buffer[*i] != '>' && buffer[*i] != '/') { - if (buffer[*i] != ' ' && buffer[*i] != '\t' && - buffer[*i] != '\n' && buffer[*i] != '\r') { - *last = *i; + if (_data[_i] != '>' && _data[_i] != '/') { + if (_data[_i] != ' ' && _data[_i] != '\t' && + _data[_i] != '\n' && _data[_i] != '\r') { + _last = _i; _state = OF_XMLPARSER_IN_ATTR_NAME; - (*i)--; + _i--; } return; } @@ -750,11 +739,11 @@ didStartElement: _name prefix: _prefix namespace: namespace attributes: _attributes]; - if (buffer[*i] == '/') { + if (_data[_i] == '/') { if ([_delegate respondsToSelector: @selector(parser:didEndElement:prefix:namespace:)]) [_delegate parser: self didEndElement: _name prefix: _prefix @@ -776,31 +765,29 @@ [_name release]; [_prefix release]; [_attributes removeAllObjects]; _name = _prefix = nil; - *last = *i + 1; - _state = (buffer[*i] == '/' + _last = _i + 1; + _state = (_data[_i] == '/' ? OF_XMLPARSER_EXPECT_CLOSE : OF_XMLPARSER_OUTSIDE_TAG); } /* Looking for attribute name */ -- (void)OF_parseInAttributeNameWithBuffer: (const char*)buffer - i: (size_t*)i - last: (size_t*)last +- (void)OF_inAttributeNameState { void *pool; OFMutableString *bufferString; const char *bufferCString, *tmp; size_t length, bufferLength; - if (buffer[*i] != '=') + if (_data[_i] != '=') return; - if ((length = *i - *last) > 0) - buffer_append(_buffer, buffer + *last, _encoding, length); + if ((length = _i - _last) > 0) + buffer_append(_buffer, _data + _last, _encoding, length); pool = objc_autoreleasePoolPush(); bufferString = [OFMutableString stringWithUTF8String: [_buffer items] length: [_buffer count]]; @@ -826,47 +813,43 @@ objc_autoreleasePoolPop(pool); [_buffer removeAllItems]; - *last = *i + 1; + _last = _i + 1; _state = OF_XMLPARSER_EXPECT_DELIMITER; } /* Expecting delimiter */ -- (void)OF_parseExpectDelimiterWithBuffer: (const char*)buffer - i: (size_t*)i - last: (size_t*)last -{ - *last = *i + 1; - - if (buffer[*i] == ' ' || buffer[*i] == '\t' || buffer[*i] == '\n' || - buffer[*i] == '\r') +- (void)OF_expectDelimiterState +{ + _last = _i + 1; + + if (_data[_i] == ' ' || _data[_i] == '\t' || _data[_i] == '\n' || + _data[_i] == '\r') return; - if (buffer[*i] != '\'' && buffer[*i] != '"') + if (_data[_i] != '\'' && _data[_i] != '"') @throw [OFMalformedXMLException exceptionWithClass: [self class] parser: self]; - _delimiter = buffer[*i]; + _delimiter = _data[_i]; _state = OF_XMLPARSER_IN_ATTR_VALUE; } /* Looking for attribute value */ -- (void)OF_parseInAttributeValueWithBuffer: (const char*)buffer - i: (size_t*)i - last: (size_t*)last +- (void)OF_inAttributeValueState { void *pool; OFString *attributeValue; size_t length; - if (buffer[*i] != _delimiter) + if (_data[_i] != _delimiter) return; - if ((length = *i - *last) > 0) - buffer_append(_buffer, buffer + *last, _encoding, length); + if ((length = _i - _last) > 0) + buffer_append(_buffer, _data + _last, _encoding, length); pool = objc_autoreleasePoolPush(); attributeValue = transform_string(_buffer, 0, true, self); if (_attributePrefix == nil && [_attributeName isEqual: @"xmlns"]) @@ -886,112 +869,100 @@ [_buffer removeAllItems]; [_attributeName release]; [_attributePrefix release]; _attributeName = _attributePrefix = nil; - *last = *i + 1; + _last = _i + 1; _state = OF_XMLPARSER_IN_TAG; } /* Expecting closing '>' */ -- (void)OF_parseExpectCloseWithBuffer: (const char*)buffer - i: (size_t*)i - last: (size_t*)last +- (void)OF_expectCloseState { - if (buffer[*i] == '>') { - *last = *i + 1; + if (_data[_i] == '>') { + _last = _i + 1; _state = OF_XMLPARSER_OUTSIDE_TAG; } else @throw [OFMalformedXMLException exceptionWithClass: [self class] parser: self]; } /* Expecting closing '>' or space */ -- (void)OF_parseExpectSpaceOrCloseWithBuffer: (const char*)buffer - i: (size_t*)i - last: (size_t*)last +- (void)OF_expectSpaceOrCloseState { - if (buffer[*i] == '>') { - *last = *i + 1; + if (_data[_i] == '>') { + _last = _i + 1; _state = OF_XMLPARSER_OUTSIDE_TAG; - } else if (buffer[*i] != ' ' && buffer[*i] != '\t' && - buffer[*i] != '\n' && buffer[*i] != '\r') + } else if (_data[_i] != ' ' && _data[_i] != '\t' && + _data[_i] != '\n' && _data[_i] != '\r') @throw [OFMalformedXMLException exceptionWithClass: [self class] parser: self]; } /* In ') { + if (_data[_i] != '>') { _state = OF_XMLPARSER_IN_CDATA_1; - _level = (buffer[*i] == ']' ? 1 : 0); + _level = (_data[_i] == ']' ? 1 : 0); return; } pool = objc_autoreleasePoolPush(); - buffer_append(_buffer, buffer + *last, _encoding, *i - *last); + buffer_append(_buffer, _data + _last, _encoding, _i - _last); CDATA = transform_string(_buffer, 2, false, nil); if ([_delegate respondsToSelector: @selector(parser:foundCDATA:)]) [_delegate parser: self foundCDATA: CDATA]; @@ -998,55 +969,49 @@ objc_autoreleasePoolPop(pool); [_buffer removeAllItems]; - *last = *i + 1; + _last = _i + 1; _state = OF_XMLPARSER_OUTSIDE_TAG; } /* Comment */ -- (void)OF_parseInCommentOpeningWithBuffer: (const char*)buffer - i: (size_t*)i - last: (size_t*)last +- (void)OF_inCommentOpeningState { - if (buffer[*i] != '-') + if (_data[_i] != '-') @throw [OFMalformedXMLException exceptionWithClass: [self class] parser: self]; - *last = *i + 1; + _last = _i + 1; _state = OF_XMLPARSER_IN_COMMENT_1; _level = 0; } -- (void)OF_parseInComment1WithBuffer: (const char*)buffer - i: (size_t*)i - last: (size_t*)last +- (void)OF_inCommentState1 { - if (buffer[*i] == '-') + if (_data[_i] == '-') _level++; else _level = 0; if (_level == 2) _state = OF_XMLPARSER_IN_COMMENT_2; } -- (void)OF_parseInComment2WithBuffer: (const char*)buffer - i: (size_t*)i - last: (size_t*)last +- (void)OF_inCommentState2 { void *pool; OFString *comment; - if (buffer[*i] != '>') + if (_data[_i] != '>') @throw [OFMalformedXMLException exceptionWithClass: [self class] parser: self]; pool = objc_autoreleasePoolPush(); - buffer_append(_buffer, buffer + *last, _encoding, *i - *last); + buffer_append(_buffer, _data + _last, _encoding, _i - _last); comment = transform_string(_buffer, 2, false, nil); if ([_delegate respondsToSelector: @selector(parser:foundComment:)]) [_delegate parser: self foundComment: comment]; @@ -1053,36 +1018,29 @@ objc_autoreleasePoolPop(pool); [_buffer removeAllItems]; - *last = *i + 1; + _last = _i + 1; _state = OF_XMLPARSER_OUTSIDE_TAG; } /* In */ -- (void)OF_parseInDoctypeWithBuffer: (const char*)buffer - i: (size_t*)i - last: (size_t*)last +- (void)OF_inDOCTYPEState { - if ((_level < 6 && buffer[*i] != "OCTYPE"[_level]) || - (_level == 6 && buffer[*i] != ' ' && buffer[*i] != '\t' && - buffer[*i] != '\n' && buffer[*i] != '\r')) + if ((_level < 6 && _data[_i] != "OCTYPE"[_level]) || + (_level == 6 && _data[_i] != ' ' && _data[_i] != '\t' && + _data[_i] != '\n' && _data[_i] != '\r')) @throw [OFMalformedXMLException exceptionWithClass: [self class] parser: self]; - if (_level < 7 || buffer[*i] == '<') - _level++; - - if (buffer[*i] == '>') { - if (_level == 7) - _state = OF_XMLPARSER_OUTSIDE_TAG; - else - _level--; - } - - *last = *i + 1; + _level++; + + if (_level > 6 && _data[_i] == '>') + _state = OF_XMLPARSER_OUTSIDE_TAG; + + _last = _i + 1; } - (size_t)lineNumber { return _lineNumber; Index: tests/OFXMLParserTests.m ================================================================== --- tests/OFXMLParserTests.m +++ tests/OFXMLParserTests.m @@ -322,11 +322,11 @@ - (void)XMLParserTests { OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init]; const char *str = "\xEF\xBB\xBF" - "<<>>>>\r\r" + "\r\r" " \n" " \r\n" " \n" " \n" " \n"