Index: src/OFXMLParser.h ================================================================== --- src/OFXMLParser.h +++ src/OFXMLParser.h @@ -50,25 +50,28 @@ prefix: (OFString*)prefix namespace: (OFString*)ns; /** * This callback is called when the XML parser found a string. + * + * In case there are comments or CDATA, it is possible that this callback is + * called multiple times in a row. * * \param parser The parser which found a string * \param string The string the XML parser found */ - (void)xmlParser: (OFXMLParser*)parser - foundString: (OFString*)string; + didFindString: (OFString*)string; /** * This callback is called when the XML parser found a comment. * * \param parser The parser which found a comment * \param comment The comment the XML parser found */ - (void)xmlParser: (OFXMLParser*)parser - foundComment: (OFString*)comment; + didFindComment: (OFString*)comment; /** * This callback is called when the XML parser found an entity it doesn't know. * The callback is supposed to return a substitution for the entity or nil if * it is not known to the callback as well, in which case an exception will be @@ -76,12 +79,12 @@ * * \param parser The parser which found an unknown entity * \param entity The name of the entity the XML parser didn't know * \return A substitution for the entity or nil */ -- (OFString*)xmlParser: (OFXMLParser*)parser - foundUnknownEntityNamed: (OFString*)entity; +- (OFString*)xmlParser: (OFXMLParser*)parser + didFindUnknownEntityNamed: (OFString*)entity; @end /** * \brief A protocol that needs to be implemented by delegates for * -[stringByXMLUnescapingWithHandler:]. @@ -94,11 +97,11 @@ * exception will be thrown. * * \param entity The name of the entity that is unknown * \return A substitution for the entity or nil */ -- (OFString*)foundUnknownEntityNamed: (OFString*)entity; +- (OFString*)didFindUnknownEntityNamed: (OFString*)entity; @end /** * \brief An event-based XML parser. * @@ -117,14 +120,24 @@ OF_XMLPARSER_IN_ATTR_NAME, OF_XMLPARSER_EXPECT_DELIM, OF_XMLPARSER_IN_ATTR_VALUE, OF_XMLPARSER_EXPECT_CLOSE, OF_XMLPARSER_EXPECT_SPACE_OR_CLOSE, + OF_XMLPARSER_IN_CDATA_OR_COMMENT, + OF_XMLPARSER_IN_CDATA_OPENING_1, + OF_XMLPARSER_IN_CDATA_OPENING_2, + OF_XMLPARSER_IN_CDATA_OPENING_3, + OF_XMLPARSER_IN_CDATA_OPENING_4, + OF_XMLPARSER_IN_CDATA_OPENING_5, + OF_XMLPARSER_IN_CDATA_OPENING_6, + OF_XMLPARSER_IN_CDATA_1, + OF_XMLPARSER_IN_CDATA_2, + OF_XMLPARSER_IN_CDATA_3, + OF_XMLPARSER_IN_COMMENT_OPENING, OF_XMLPARSER_IN_COMMENT_1, OF_XMLPARSER_IN_COMMENT_2, - OF_XMLPARSER_IN_COMMENT_3, - OF_XMLPARSER_IN_COMMENT_4 + OF_XMLPARSER_IN_COMMENT_3 } state; OFMutableString *cache; OFString *name; OFString *prefix; OFMutableArray *namespaces; Index: src/OFXMLParser.m ================================================================== --- src/OFXMLParser.m +++ src/OFXMLParser.m @@ -186,11 +186,11 @@ OFString *str; pool = [[OFAutoreleasePool alloc] init]; str = transform_string(cache, self); [delegate xmlParser: self - foundString: str]; + didFindString: str]; [pool release]; } [cache setToCString: ""]; @@ -200,15 +200,15 @@ break; /* Tag was just opened */ case OF_XMLPARSER_TAG_OPENED: if (buf[i] == '/') { - last = i + 1; state = OF_XMLPARSER_IN_CLOSE_TAG_NAME; + last = i + 1; } else if(buf[i] == '!') { + state = OF_XMLPARSER_IN_CDATA_OR_COMMENT; last = i + 1; - state = OF_XMLPARSER_IN_COMMENT_1; } else { state = OF_XMLPARSER_IN_TAG_NAME; i--; } break; @@ -527,51 +527,152 @@ } else if (buf[i] != ' ' && buf[i] != '\n' && buf[i] != '\r') @throw [OFMalformedXMLException newWithClass: isa]; break; + + /* CDATA or comment */ + case OF_XMLPARSER_IN_CDATA_OR_COMMENT: + if (buf[i] == '-') + state = OF_XMLPARSER_IN_COMMENT_OPENING; + else if (buf[i] == '[') + state = OF_XMLPARSER_IN_CDATA_OPENING_1; + else + @throw [OFMalformedXMLException + newWithClass: isa]; + + last = i + 1; + break; + + /* CDATA */ + case OF_XMLPARSER_IN_CDATA_OPENING_1: + if (buf[i] == 'C') + state = OF_XMLPARSER_IN_CDATA_OPENING_2; + else + @throw [OFMalformedXMLException + newWithClass: isa]; + last = i + 1; + break; + case OF_XMLPARSER_IN_CDATA_OPENING_2: + if (buf[i] == 'D') + state = OF_XMLPARSER_IN_CDATA_OPENING_3; + else + @throw [OFMalformedXMLException + newWithClass: isa]; + last = i + 1; + break; + case OF_XMLPARSER_IN_CDATA_OPENING_3: + if (buf[i] == 'A') + state = OF_XMLPARSER_IN_CDATA_OPENING_4; + else + @throw [OFMalformedXMLException + newWithClass: isa]; + last = i + 1; + break; + case OF_XMLPARSER_IN_CDATA_OPENING_4: + if (buf[i] == 'T') + state = OF_XMLPARSER_IN_CDATA_OPENING_5; + else + @throw [OFMalformedXMLException + newWithClass: isa]; + last = i + 1; + break; + case OF_XMLPARSER_IN_CDATA_OPENING_5: + if (buf[i] == 'A') + state = OF_XMLPARSER_IN_CDATA_OPENING_6; + else + @throw [OFMalformedXMLException + newWithClass: isa]; + last = i + 1; + break; + case OF_XMLPARSER_IN_CDATA_OPENING_6: + if (buf[i] == '[') + state = OF_XMLPARSER_IN_CDATA_1; + else + @throw [OFMalformedXMLException + newWithClass: isa]; + last = i + 1; + break; + case OF_XMLPARSER_IN_CDATA_1: + if (buf[i] == ']') + state = OF_XMLPARSER_IN_CDATA_2; + break; + case OF_XMLPARSER_IN_CDATA_2: + if (buf[i] == ']') + state = OF_XMLPARSER_IN_CDATA_3; + else + state = OF_XMLPARSER_IN_CDATA_1; + break; + case OF_XMLPARSER_IN_CDATA_3: + if (buf[i] == '>') { + OFMutableString *cdata; + size_t len; + + pool = [[OFAutoreleasePool alloc] init]; + + [cache + appendCStringWithoutUTF8Checking: buf + last + length: i - last]; + cdata = [[cache mutableCopy] autorelease]; + len = [cdata length]; + + [cdata removeCharactersFromIndex: len - 2 + toIndex: len]; + [delegate xmlParser: self + didFindString: cdata]; + [pool release]; + + [cache setToCString: ""]; + + last = i + 1; + state = OF_XMLPARSER_OUTSIDE_TAG; + } else if (buf[i] != ']') + state = OF_XMLPARSER_IN_CDATA_1; + break; /* Comment */ - case OF_XMLPARSER_IN_COMMENT_1: - case OF_XMLPARSER_IN_COMMENT_2: + case OF_XMLPARSER_IN_COMMENT_OPENING: if (buf[i] != '-') @throw [OFMalformedXMLException newWithClass: isa]; last = i + 1; - state++; + state = OF_XMLPARSER_IN_COMMENT_1; + break; + case OF_XMLPARSER_IN_COMMENT_1: + if (buf[i] == '-') + state = OF_XMLPARSER_IN_COMMENT_2; + break; + case OF_XMLPARSER_IN_COMMENT_2: + state = (buf[i] == '-' ? OF_XMLPARSER_IN_COMMENT_3 : + OF_XMLPARSER_IN_COMMENT_1); break; case OF_XMLPARSER_IN_COMMENT_3: - if (buf[i] == '-') - state = OF_XMLPARSER_IN_COMMENT_4; - break; - case OF_XMLPARSER_IN_COMMENT_4: - if (buf[i] == '-') { + if (buf[i] == '>') { OFMutableString *comment; size_t len; pool = [[OFAutoreleasePool alloc] init]; [cache appendCStringWithoutUTF8Checking: buf + last length: i - last]; - comment = [[cache mutableCopy] autorelease]; len = [comment length]; - [comment removeCharactersFromIndex: len - 1 + [comment removeCharactersFromIndex: len - 2 toIndex: len]; - [comment removeLeadingAndTrailingWhitespaces]; [delegate xmlParser: self - foundComment: comment]; + didFindComment: comment]; [pool release]; [cache setToCString: ""]; last = i + 1; - state = OF_XMLPARSER_EXPECT_CLOSE; + state = OF_XMLPARSER_OUTSIDE_TAG; } else - state = OF_XMLPARSER_IN_COMMENT_3; + @throw [OFMalformedXMLException + newWithClass: isa]; break; } } @@ -580,14 +681,14 @@ if (len > 0 && state != OF_XMLPARSER_IN_TAG) [cache appendCStringWithoutUTF8Checking: buf + last length: len]; } -- (OFString*)foundUnknownEntityNamed: (OFString*)entity +- (OFString*)didFindUnknownEntityNamed: (OFString*)entity { return [delegate xmlParser: self - foundUnknownEntityNamed: entity]; + didFindUnknownEntityNamed: entity]; } @end @implementation OFString (OFXMLUnescaping) - (OFString*)stringByXMLUnescaping @@ -659,11 +760,11 @@ pool = [[OFAutoreleasePool alloc] init]; n = [OFString stringWithCString: entity length: len]; - tmp = [h foundUnknownEntityNamed: n]; + tmp = [h didFindUnknownEntityNamed: n]; if (tmp == nil) @throw [OFInvalidEncodingException newWithClass: isa]; @@ -703,20 +804,20 @@ namespace: (OFString*)ns { } - (void)xmlParser: (OFXMLParser*)parser - foundString: (OFString*)string + didFindString: (OFString*)string { } - (void)xmlParser: (OFXMLParser*)parser - foundComment: (OFString*)comment + didFindComment: (OFString*)comment { } -- (OFString*)xmlParser: (OFXMLParser*)parser - foundUnknownEntityNamed: (OFString*)entity +- (OFString*)xmlParser: (OFXMLParser*)parser + didFindUnknownEntityNamed: (OFString*)entity { return nil; } @end Index: tests/OFStringTests.m ================================================================== --- tests/OFStringTests.m +++ tests/OFStringTests.m @@ -26,11 +26,11 @@ @interface EntityHandler: OFObject @end @implementation EntityHandler -- (OFString*)foundUnknownEntityNamed: (OFString*)entity +- (OFString*)didFindUnknownEntityNamed: (OFString*)entity { if ([entity isEqual: @"foo"]) return @"bar"; return nil; Index: tests/OFXMLParserTests.m ================================================================== --- tests/OFXMLParserTests.m +++ tests/OFXMLParserTests.m @@ -46,37 +46,44 @@ i++; msg = [OFString stringWithFormat: @"Parsing part #%d", i]; switch (i) { case 1: - TEST(msg, et == STRING && [string isEqual: @"foo"]) + TEST(msg, et == TAG_START && [name isEqual: @"root"] && + prefix == nil && ns == nil && [attrs count] == 0) break; case 2: + TEST(msg, et == STRING && [string isEqual: @"\n "]) + break; + case 3: + TEST(msg, et == STRING && [string isEqual: @"f\n" - " \n" - " \n" - " \n" - " \n" - " \n" - " \n" - " \n" - " \n" - ""; + const char *str = "\n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + ""; size_t j, len; TEST(@"+[xmlParser]", (parser = [OFXMLParser xmlParser])) TEST(@"-[setDelegate:]", R([parser setDelegate: self])) @@ -307,10 +323,10 @@ else [parser parseBuffer: str + j withSize: 2]; } - TEST(@"Checking if everything was parsed", i == 26) + TEST(@"Checking if everything was parsed", i == 30) [pool drain]; } @end