@@ -18,36 +18,60 @@ #define OF_XML_PARSER_M #include #import "OFXMLParser.h" -#import "OFString.h" #import "OFArray.h" -#import "OFDictionary.h" +#import "OFCharacterSet.h" #import "OFData.h" -#import "OFXMLAttribute.h" -#import "OFStream.h" +#import "OFDictionary.h" #ifdef OF_HAVE_FILES # import "OFFile.h" #endif +#import "OFStream.h" +#import "OFString.h" #import "OFSystemInfo.h" +#import "OFXMLAttribute.h" #import "OFInitializationFailedException.h" #import "OFInvalidArgumentException.h" #import "OFInvalidEncodingException.h" #import "OFInvalidFormatException.h" #import "OFMalformedXMLException.h" #import "OFOutOfRangeException.h" #import "OFUnboundPrefixException.h" + +enum { + stateInByteOrderMark, + stateOutsideTag, + stateTagOpened, + stateInProcessingInstruction, + stateInTagName, + stateInCloseTagName, + stateInTag, + stateInAttributeName, + stateExpectAttributeEqualSign, + stateExpectAttributeDelimiter, + stateInAttributeValue, + stateExpectTagClose, + stateExpectSpaceOrTagClose, + stateInExclamationMark, + stateInCDATAOpening, + stateInCDATA, + stateInCommentOpening, + stateInComment1, + stateInComment2, + stateInDOCTYPE +}; @interface OFXMLParser () @end static void inByteOrderMarkState(OFXMLParser *); static void outsideTagState(OFXMLParser *); static void tagOpenedState(OFXMLParser *); -static void inProcessingInstructionsState(OFXMLParser *); +static void inProcessingInstructionState(OFXMLParser *); static void inTagNameState(OFXMLParser *); static void inCloseTagNameState(OFXMLParser *); static void inTagState(OFXMLParser *); static void inAttributeNameState(OFXMLParser *); static void expectAttributeEqualSignState(OFXMLParser *); @@ -60,42 +84,39 @@ static void inCDATAState(OFXMLParser *); static void inCommentOpeningState(OFXMLParser *); static void inCommentState1(OFXMLParser *); static void inCommentState2(OFXMLParser *); static void inDOCTYPEState(OFXMLParser *); -typedef void (*state_function_t)(OFXMLParser *); -static state_function_t lookupTable[] = { - [OF_XMLPARSER_IN_BYTE_ORDER_MARK] = inByteOrderMarkState, - [OF_XMLPARSER_OUTSIDE_TAG] = outsideTagState, - [OF_XMLPARSER_TAG_OPENED] = tagOpenedState, - [OF_XMLPARSER_IN_PROCESSING_INSTRUCTIONS] = - inProcessingInstructionsState, - [OF_XMLPARSER_IN_TAG_NAME] = inTagNameState, - [OF_XMLPARSER_IN_CLOSE_TAG_NAME] = inCloseTagNameState, - [OF_XMLPARSER_IN_TAG] = inTagState, - [OF_XMLPARSER_IN_ATTRIBUTE_NAME] = inAttributeNameState, - [OF_XMLPARSER_EXPECT_ATTRIBUTE_EQUAL_SIGN] = - expectAttributeEqualSignState, - [OF_XMLPARSER_EXPECT_ATTRIBUTE_DELIMITER] = - expectAttributeDelimiterState, - [OF_XMLPARSER_IN_ATTRIBUTE_VALUE] = inAttributeValueState, - [OF_XMLPARSER_EXPECT_TAG_CLOSE] = expectTagCloseState, - [OF_XMLPARSER_EXPECT_SPACE_OR_TAG_CLOSE] = expectSpaceOrTagCloseState, - [OF_XMLPARSER_IN_EXCLAMATION_MARK] = inExclamationMarkState, - [OF_XMLPARSER_IN_CDATA_OPENING] = inCDATAOpeningState, - [OF_XMLPARSER_IN_CDATA] = inCDATAState, - [OF_XMLPARSER_IN_COMMENT_OPENING] = inCommentOpeningState, - [OF_XMLPARSER_IN_COMMENT_1] = inCommentState1, - [OF_XMLPARSER_IN_COMMENT_2] = inCommentState2, - [OF_XMLPARSER_IN_DOCTYPE] = inDOCTYPEState +typedef void (*StateFunction)(OFXMLParser *); +static StateFunction lookupTable[] = { + [stateInByteOrderMark] = inByteOrderMarkState, + [stateOutsideTag] = outsideTagState, + [stateTagOpened] = tagOpenedState, + [stateInProcessingInstruction] = inProcessingInstructionState, + [stateInTagName] = inTagNameState, + [stateInCloseTagName] = inCloseTagNameState, + [stateInTag] = inTagState, + [stateInAttributeName] = inAttributeNameState, + [stateExpectAttributeEqualSign] = expectAttributeEqualSignState, + [stateExpectAttributeDelimiter] = expectAttributeDelimiterState, + [stateInAttributeValue] = inAttributeValueState, + [stateExpectTagClose] = expectTagCloseState, + [stateExpectSpaceOrTagClose] = expectSpaceOrTagCloseState, + [stateInExclamationMark] = inExclamationMarkState, + [stateInCDATAOpening] = inCDATAOpeningState, + [stateInCDATA] = inCDATAState, + [stateInCommentOpening] = inCommentOpeningState, + [stateInComment1] = inCommentState1, + [stateInComment2] = inCommentState2, + [stateInDOCTYPE] = inDOCTYPEState }; static OF_INLINE void appendToBuffer(OFMutableData *buffer, const char *string, - of_string_encoding_t encoding, size_t length) + OFStringEncoding encoding, size_t length) { - if OF_LIKELY(encoding == OF_STRING_ENCODING_UTF_8) + if OF_LIKELY(encoding == OFStringEncodingUTF8) [buffer addItems: string count: length]; else { void *pool = objc_autoreleasePoolPush(); OFString *tmp = [OFString stringWithCString: string encoding: encoding @@ -209,11 +230,11 @@ @"xmlns", @"http://www.w3.org/2000/xmlns/", nil]; [_namespaces addObject: dict]; _acceptProlog = true; _lineNumber = 1; - _encoding = OF_STRING_ENCODING_UTF_8; + _encoding = OFStringEncodingUTF8; _depthLimit = 32; objc_autoreleasePoolPop(pool); } @catch (id e) { [self release]; @@ -255,12 +276,12 @@ _lineNumber++; _lastCarriageReturn = (_data[_i] == '\r'); } - /* In OF_XMLPARSER_IN_TAG, there can be only spaces */ - if (length - _last > 0 && _state != OF_XMLPARSER_IN_TAG) + /* In stateInTag, there can be only spaces */ + if (length - _last > 0 && _state != stateInTag) appendToBuffer(_buffer, _data + _last, _encoding, length - _last); } - (void)parseString: (OFString *)string @@ -269,38 +290,38 @@ } - (void)parseStream: (OFStream *)stream { size_t pageSize = [OFSystemInfo pageSize]; - char *buffer = of_alloc(1, pageSize); + char *buffer = OFAllocMemory(1, pageSize); @try { while (!stream.atEndOfStream) { size_t length = [stream readIntoBuffer: buffer length: pageSize]; [self parseBuffer: buffer length: length]; } } @finally { - free(buffer); + OFFreeMemory(buffer); } } static void inByteOrderMarkState(OFXMLParser *self) { if (self->_data[self->_i] != "\xEF\xBB\xBF"[self->_level]) { if (self->_level == 0) { - self->_state = OF_XMLPARSER_OUTSIDE_TAG; + self->_state = stateOutsideTag; self->_i--; return; } @throw [OFMalformedXMLException exceptionWithParser: self]; } if (self->_level++ == 2) - self->_state = OF_XMLPARSER_OUTSIDE_TAG; + self->_state = stateOutsideTag; self->_last = self->_i + 1; } /* Not in a tag */ @@ -336,11 +357,11 @@ } [self->_buffer removeAllItems]; self->_last = self->_i + 1; - self->_state = OF_XMLPARSER_TAG_OPENED; + self->_state = stateTagOpened; } /* Tag was just opened */ static void tagOpenedState(OFXMLParser *self) @@ -350,38 +371,38 @@ @throw [OFMalformedXMLException exceptionWithParser: self]; switch (self->_data[self->_i]) { case '?': self->_last = self->_i + 1; - self->_state = OF_XMLPARSER_IN_PROCESSING_INSTRUCTIONS; + self->_state = stateInProcessingInstruction; self->_level = 0; break; case '/': self->_last = self->_i + 1; - self->_state = OF_XMLPARSER_IN_CLOSE_TAG_NAME; + self->_state = stateInCloseTagName; self->_acceptProlog = false; break; case '!': self->_last = self->_i + 1; - self->_state = OF_XMLPARSER_IN_EXCLAMATION_MARK; + self->_state = stateInExclamationMark; self->_acceptProlog = false; break; default: if (self->_depthLimit > 0 && self->_previous.count >= self->_depthLimit) @throw [OFOutOfRangeException exception]; - self->_state = OF_XMLPARSER_IN_TAG_NAME; + self->_state = stateInTagName; self->_acceptProlog = false; self->_i--; break; } } /* */ static bool -parseXMLProcessingInstructions(OFXMLParser *self, OFString *pi) +parseXMLProcessingInstruction(OFXMLParser *self, OFString *data) { const char *cString; size_t length, last; int PIState = 0; OFString *attribute = nil; @@ -392,15 +413,12 @@ if (!self->_acceptProlog) return false; self->_acceptProlog = false; - pi = [pi substringFromIndex: 3]; - pi = pi.stringByDeletingEnclosingWhitespaces; - - cString = pi.UTF8String; - length = pi.UTF8StringLength; + cString = data.UTF8String; + length = data.UTF8StringLength; last = 0; for (size_t i = 0; i < length; i++) { switch (PIState) { case 0: @@ -451,11 +469,11 @@ } if ([attribute isEqual: @"encoding"]) { @try { self->_encoding = - of_string_parse_encoding(value); + OFStringEncodingParseName(value); } @catch (OFInvalidArgumentException *e) { @throw [OFInvalidEncodingException exception]; } } @@ -471,42 +489,56 @@ return false; return true; } -/* Inside processing instructions */ +/* Inside processing instruction */ static void -inProcessingInstructionsState(OFXMLParser *self) +inProcessingInstructionState(OFXMLParser *self) { if (self->_data[self->_i] == '?') self->_level = 1; else if (self->_level == 1 && self->_data[self->_i] == '>') { void *pool = objc_autoreleasePoolPush(); - OFString *PI; + OFString *PI, *target, *data = nil; + OFCharacterSet *whitespaceCS; + size_t pos; appendToBuffer(self->_buffer, self->_data + self->_last, self->_encoding, self->_i - self->_last); PI = transformString(self, self->_buffer, 1, false); - if ([PI isEqual: @"xml"] || [PI hasPrefix: @"xml "] || - [PI hasPrefix: @"xml\t"] || [PI hasPrefix: @"xml\r"] || - [PI hasPrefix: @"xml\n"]) - if (!parseXMLProcessingInstructions(self, PI)) + whitespaceCS = [OFCharacterSet + characterSetWithCharactersInString: @" \r\n\r"]; + pos = [PI indexOfCharacterFromSet: whitespaceCS]; + if (pos != OFNotFound) { + target = [PI substringToIndex: pos]; + data = [[PI substringFromIndex: pos + 1] + stringByDeletingEnclosingWhitespaces]; + + if (data.length == 0) + data = nil; + } else + target = PI; + + if ([target caseInsensitiveCompare: @"xml"] == OFOrderedSame) + if (!parseXMLProcessingInstruction(self, data)) @throw [OFMalformedXMLException exceptionWithParser: self]; - if ([self->_delegate respondsToSelector: - @selector(parser:foundProcessingInstructions:)]) + if ([self->_delegate respondsToSelector: @selector( + parser:foundProcessingInstructionWithTarget:data:)]) [self->_delegate parser: self - foundProcessingInstructions: PI]; + foundProcessingInstructionWithTarget: target + data: data]; objc_autoreleasePoolPop(pool); [self->_buffer removeAllItems]; self->_last = self->_i + 1; - self->_state = OF_XMLPARSER_OUTSIDE_TAG; + self->_state = stateOutsideTag; } else self->_level = 0; } /* Inside a tag, no name yet */ @@ -546,13 +578,11 @@ self->_name = [bufferString copy]; self->_prefix = nil; } if (self->_data[self->_i] == '>' || self->_data[self->_i] == '/') { - OFString *namespace; - - namespace = namespaceForPrefix(self->_prefix, + OFString *namespace = namespaceForPrefix(self->_prefix, self->_namespaces); if (self->_prefix != nil && namespace == nil) @throw [OFUnboundPrefixException exceptionWithPrefix: self->_prefix @@ -582,14 +612,13 @@ [self->_name release]; [self->_prefix release]; self->_name = self->_prefix = nil; self->_state = (self->_data[self->_i] == '/' - ? OF_XMLPARSER_EXPECT_TAG_CLOSE - : OF_XMLPARSER_OUTSIDE_TAG); + ? stateExpectTagClose : stateOutsideTag); } else - self->_state = OF_XMLPARSER_IN_TAG; + self->_state = stateInTag; if (self->_data[self->_i] != '/') [self->_namespaces addObject: [OFMutableDictionary dictionary]]; objc_autoreleasePoolPop(pool); @@ -663,12 +692,11 @@ [self->_prefix release]; self->_name = self->_prefix = nil; self->_last = self->_i + 1; self->_state = (self->_data[self->_i] == '>' - ? OF_XMLPARSER_OUTSIDE_TAG - : OF_XMLPARSER_EXPECT_SPACE_OR_TAG_CLOSE); + ? stateOutsideTag : stateExpectSpaceOrTagClose); if (self->_previous.count == 0) self->_finishedParsing = true; } @@ -685,11 +713,11 @@ if (self->_data[self->_i] != ' ' && self->_data[self->_i] != '\t' && self->_data[self->_i] != '\n' && self->_data[self->_i] != '\r') { self->_last = self->_i; - self->_state = OF_XMLPARSER_IN_ATTRIBUTE_NAME; + self->_state = stateInAttributeName; self->_i--; } return; } @@ -744,12 +772,11 @@ [self->_attributes removeAllObjects]; self->_name = self->_prefix = nil; self->_last = self->_i + 1; self->_state = (self->_data[self->_i] == '/' - ? OF_XMLPARSER_EXPECT_TAG_CLOSE - : OF_XMLPARSER_OUTSIDE_TAG); + ? stateExpectTagClose : stateOutsideTag); } /* Looking for attribute name */ static void inAttributeNameState(OFXMLParser *self) @@ -793,21 +820,20 @@ [self->_buffer removeAllItems]; self->_last = self->_i + 1; self->_state = (self->_data[self->_i] == '=' - ? OF_XMLPARSER_EXPECT_ATTRIBUTE_DELIMITER - : OF_XMLPARSER_EXPECT_ATTRIBUTE_EQUAL_SIGN); + ? stateExpectAttributeDelimiter : stateExpectAttributeEqualSign); } /* Expecting equal sign of an attribute */ static void expectAttributeEqualSignState(OFXMLParser *self) { if (self->_data[self->_i] == '=') { self->_last = self->_i + 1; - self->_state = OF_XMLPARSER_EXPECT_ATTRIBUTE_DELIMITER; + self->_state = stateExpectAttributeDelimiter; return; } if (self->_data[self->_i] != ' ' && self->_data[self->_i] != '\t' && self->_data[self->_i] != '\n' && self->_data[self->_i] != '\r') @@ -826,11 +852,11 @@ if (self->_data[self->_i] != '\'' && self->_data[self->_i] != '"') @throw [OFMalformedXMLException exceptionWithParser: self]; self->_delimiter = self->_data[self->_i]; - self->_state = OF_XMLPARSER_IN_ATTRIBUTE_VALUE; + self->_state = stateInAttributeValue; } /* Looking for attribute value */ static void inAttributeValueState(OFXMLParser *self) @@ -870,20 +896,20 @@ [self->_attributeName release]; [self->_attributePrefix release]; self->_attributeName = self->_attributePrefix = nil; self->_last = self->_i + 1; - self->_state = OF_XMLPARSER_IN_TAG; + self->_state = stateInTag; } /* Expecting closing '>' */ static void expectTagCloseState(OFXMLParser *self) { if (self->_data[self->_i] == '>') { self->_last = self->_i + 1; - self->_state = OF_XMLPARSER_OUTSIDE_TAG; + self->_state = stateOutsideTag; } else @throw [OFMalformedXMLException exceptionWithParser: self]; } /* Expecting closing '>' or space */ @@ -890,11 +916,11 @@ static void expectSpaceOrTagCloseState(OFXMLParser *self) { if (self->_data[self->_i] == '>') { self->_last = self->_i + 1; - self->_state = OF_XMLPARSER_OUTSIDE_TAG; + self->_state = stateOutsideTag; } else if (self->_data[self->_i] != ' ' && self->_data[self->_i] != '\t' && self->_data[self->_i] != '\n' && self->_data[self->_i] != '\r') @throw [OFMalformedXMLException exceptionWithParser: self]; } @@ -905,16 +931,16 @@ { if (self->_finishedParsing && self->_data[self->_i] != '-') @throw [OFMalformedXMLException exceptionWithParser: self]; if (self->_data[self->_i] == '-') - self->_state = OF_XMLPARSER_IN_COMMENT_OPENING; + self->_state = stateInCommentOpening; else if (self->_data[self->_i] == '[') { - self->_state = OF_XMLPARSER_IN_CDATA_OPENING; + self->_state = stateInCDATAOpening; self->_level = 0; } else if (self->_data[self->_i] == 'D') { - self->_state = OF_XMLPARSER_IN_DOCTYPE; + self->_state = stateInDOCTYPE; self->_level = 0; } else @throw [OFMalformedXMLException exceptionWithParser: self]; self->_last = self->_i + 1; @@ -926,11 +952,11 @@ { if (self->_data[self->_i] != "CDATA["[self->_level]) @throw [OFMalformedXMLException exceptionWithParser: self]; if (++self->_level == 6) { - self->_state = OF_XMLPARSER_IN_CDATA; + self->_state = stateInCDATA; self->_level = 0; } self->_last = self->_i + 1; } @@ -955,11 +981,11 @@ objc_autoreleasePoolPop(pool); [self->_buffer removeAllItems]; self->_last = self->_i + 1; - self->_state = OF_XMLPARSER_OUTSIDE_TAG; + self->_state = stateOutsideTag; } else self->_level = 0; } /* Comment */ @@ -968,11 +994,11 @@ { if (self->_data[self->_i] != '-') @throw [OFMalformedXMLException exceptionWithParser: self]; self->_last = self->_i + 1; - self->_state = OF_XMLPARSER_IN_COMMENT_1; + self->_state = stateInComment1; self->_level = 0; } static void inCommentState1(OFXMLParser *self) @@ -981,11 +1007,11 @@ self->_level++; else self->_level = 0; if (self->_level == 2) - self->_state = OF_XMLPARSER_IN_COMMENT_2; + self->_state = stateInComment2; } static void inCommentState2(OFXMLParser *self) { @@ -1008,11 +1034,11 @@ objc_autoreleasePoolPop(pool); [self->_buffer removeAllItems]; self->_last = self->_i + 1; - self->_state = OF_XMLPARSER_OUTSIDE_TAG; + self->_state = stateOutsideTag; } /* In */ static void inDOCTYPEState(OFXMLParser *self) @@ -1025,11 +1051,11 @@ @throw [OFMalformedXMLException exceptionWithParser: self]; self->_level++; if (self->_level > 6 && self->_data[self->_i] == '>') - self->_state = OF_XMLPARSER_OUTSIDE_TAG; + self->_state = stateOutsideTag; self->_last = self->_i + 1; } - (size_t)lineNumber