@@ -38,19 +38,51 @@ typedef void (*state_function)(id, SEL, const char*, size_t*, size_t*); static SEL selectors[OF_XMLPARSER_NUM_STATES]; static state_function lookupTable[OF_XMLPARSER_NUM_STATES]; -static OF_INLINE OFString* -transform_string(OFMutableString *cache, +static OFString* +transform_string(OFMutableString *cache, size_t cut, BOOL unescape, OFObject *delegate) { [cache replaceOccurrencesOfString: @"\r\n" withString: @"\n"]; [cache replaceOccurrencesOfString: @"\r" withString: @"\n"]; - return [cache stringByXMLUnescapingWithDelegate: delegate]; + + if (cut > 0) { + /* + * We need to create a mutable copy in order to detect possible + * UTF-8, as we never checked for UTF-8 when appending to the + * cache for performance reasons. + */ + OFMutableString *ret = [[cache mutableCopy] autorelease]; + size_t length; + + length = [ret length]; + [ret deleteCharactersFromIndex: length - cut + toIndex: length]; + + if (unescape) + return [ret stringByXMLUnescapingWithDelegate: + delegate]; + + /* + * Class swizzle the string to be immutable. We pass it as + * OFString*, so it can't be modified anyway. But not swizzling + * it would create a real copy each time -[copy] is called. + */ + ret->isa = [OFString class]; + + return ret; + } else { + if (unescape) + return [cache stringByXMLUnescapingWithDelegate: + delegate]; + else + return [[cache copy] autorelease]; + } } static OFString* namespace_for_prefix(OFString *prefix, OFArray *namespaces) { @@ -158,10 +190,11 @@ @"xmlns", @"http://www.w3.org/2000/xmlns/", nil]; [namespaces addObject: dict]; acceptProlog = YES; lineNumber = 1; + encoding = OF_STRING_ENCODING_UTF_8; [pool release]; } @catch (id e) { [self release]; @throw e; @@ -225,10 +258,11 @@ } /* In OF_XMLPARSER_IN_TAG, there can be only spaces */ if (length - last > 0 && state != OF_XMLPARSER_IN_TAG) [cache appendCStringWithoutUTF8Checking: buf + last + encoding: encoding length: length - last]; } - (void)parseString: (OFString*)string { @@ -287,18 +321,19 @@ if (buffer[*i] != '<') return; if ((length = *i - *last) > 0) [cache appendCStringWithoutUTF8Checking: buffer + *last + encoding: encoding length: length]; if ([cache cStringLength] > 0) { OFString *characters; OFAutoreleasePool *pool; pool = [[OFAutoreleasePool alloc] init]; - characters = transform_string(cache, self); + characters = transform_string(cache, 0, YES, self); #if defined(OF_HAVE_PROPERTIES) && defined(OF_HAVE_BLOCKS) if (charactersHandler != NULL) charactersHandler(self, characters); else @@ -353,11 +388,11 @@ { const char *cString; size_t i, last, length; int piState = 0; OFString *attribute = nil; - OFString *value = nil; + OFMutableString *value = nil; char piDelimiter = 0; if (!acceptProlog) return NO; @@ -403,21 +438,35 @@ break; case 3: if (cString[i] != piDelimiter) continue; - value = [OFString stringWithCString: cString + last - length: i - last]; + value = [OFMutableString + stringWithCString: cString + last + length: i - last]; if ([attribute isEqual: @"version"]) if (![value hasPrefix: @"1."]) return NO; - if ([attribute isEqual: @"encoding"]) - if ([value caseInsensitiveCompare: @"utf-8"] != - OF_ORDERED_SAME) + if ([attribute isEqual: @"encoding"]) { + [value lower]; + + if ([value isEqual: @"utf-8"]) + encoding = OF_STRING_ENCODING_UTF_8; + else if ([value isEqual: @"iso-8859-1"]) + encoding = + OF_STRING_ENCODING_ISO_8859_1; + else if ([value isEqual: @"iso-8859-15"]) + encoding = + OF_STRING_ENCODING_ISO_8859_15; + else if ([value isEqual: @"windows-1252"]) + encoding = + OF_STRING_ENCODING_WINDOWS_1252; + else return NO; + } last = i + 1; piState = 0; break; @@ -437,27 +486,16 @@ { if (buffer[*i] == '?') level = 1; else if (level == 1 && buffer[*i] == '>') { OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init]; - OFMutableString *pi; - size_t len; + OFString *pi; [cache appendCStringWithoutUTF8Checking: buffer + *last + encoding: encoding length: *i - *last]; - pi = [[cache mutableCopy] autorelease]; - len = [pi length]; - - [pi deleteCharactersFromIndex: len - 1 - toIndex: len]; - - /* - * Class swizzle the string to be immutable. We pass it as - * OFString*, so it can't be modified anyway. But not swizzling - * it would create a real copy each time -[copy] is called. - */ - pi->isa = [OFString class]; + pi = transform_string(cache, 1, NO, nil); if ([pi isEqual: @"xml"] || [pi hasPrefix: @"xml "] || [pi hasPrefix: @"xml\t"] || [pi hasPrefix: @"xml\r"] || [pi hasPrefix: @"xml\n"]) if (![self _parseXMLProcessingInstructions: pi]) @@ -490,10 +528,11 @@ buffer[*i] != '\r' && buffer[*i] != '>' && buffer[*i] != '/') return; if ((length = *i - *last) > 0) [cache appendCStringWithoutUTF8Checking: buffer + *last + encoding: encoding length: length]; cacheCString = [cache cString]; cacheLength = [cache cStringLength]; @@ -584,11 +623,13 @@ buffer[*i] != '\r' && buffer[*i] != '>') return; if ((length = *i - *last) > 0) [cache appendCStringWithoutUTF8Checking: buffer + *last + encoding: encoding length: length]; + cacheCString = [cache cString]; cacheLength = [cache cStringLength]; if ((tmp = memchr(cacheCString, ':', cacheLength)) != NULL) { name = [[OFString alloc] initWithCString: tmp + 1 @@ -734,13 +775,15 @@ if (buffer[*i] != '=') return; if ((length = *i - *last) > 0) [cache appendCStringWithoutUTF8Checking: buffer + *last + encoding: encoding length: length]; [cache deleteLeadingAndTrailingWhitespaces]; + cacheCString = [cache cString]; cacheLength = [cache cStringLength]; if ((tmp = memchr(cacheCString, ':', cacheLength)) != NULL) { attributeName = [[OFString alloc] @@ -791,14 +834,15 @@ if (buffer[*i] != delimiter) return; if ((length = *i - *last) > 0) [cache appendCStringWithoutUTF8Checking: buffer + *last + encoding: encoding length: length]; pool = [[OFAutoreleasePool alloc] init]; - attributeValue = transform_string(cache, self); + attributeValue = transform_string(cache, 0, YES, self); if (attributePrefix == nil && [attributeName isEqual: @"xmlns"]) [[namespaces lastObject] setObject: attributeValue forKey: @""]; if ([attributePrefix isEqual: @"xmlns"]) @@ -908,12 +952,11 @@ - (void)_parseInCDATA2WithBuffer: (const char*)buffer i: (size_t*)i last: (size_t*)last { OFAutoreleasePool *pool; - OFMutableString *CDATA; - size_t length; + OFString *CDATA; if (buffer[*i] != '>') { state = OF_XMLPARSER_IN_CDATA_1; level = (buffer[*i] == ']' ? 1 : 0); @@ -921,23 +964,13 @@ } pool = [[OFAutoreleasePool alloc] init]; [cache appendCStringWithoutUTF8Checking: buffer + *last + encoding: encoding length: *i - *last]; - CDATA = [[cache mutableCopy] autorelease]; - length = [CDATA length]; - - [CDATA deleteCharactersFromIndex: length - 2 - toIndex: length]; - - /* - * Class swizzle the string to be immutable. We pass it as OFString*, so - * it can't be modified anyway. But not swizzling it would create a - * real copy each time -[copy] is called. - */ - CDATA->isa = [OFString class]; + CDATA = transform_string(cache, 2, NO, nil); #if defined(OF_HAVE_PROPERTIES) && defined(OF_HAVE_BLOCKS) if (CDATAHandler != NULL) CDATAHandler(self, CDATA); else @@ -983,33 +1016,22 @@ - (void)_parseInComment2WithBuffer: (const char*)buffer i: (size_t*)i last: (size_t*)last { OFAutoreleasePool *pool; - OFMutableString *comment; - size_t length; + OFString *comment; if (buffer[*i] != '>') @throw [OFMalformedXMLException newWithClass: isa parser: self]; pool = [[OFAutoreleasePool alloc] init]; [cache appendCStringWithoutUTF8Checking: buffer + *last + encoding: encoding length: *i - *last]; - comment = [[cache mutableCopy] autorelease]; - length = [comment length]; - - [comment deleteCharactersFromIndex: length - 2 - toIndex: length]; - - /* - * Class swizzle the string to be immutable. We pass it as OFString*, so - * it can't be modified anyway. But not swizzling it would create a - * real copy each time -[copy] is called. - */ - comment->isa = [OFString class]; + comment = transform_string(cache, 2, NO, nil); #if defined(OF_HAVE_PROPERTIES) && defined(OF_HAVE_BLOCKS) if (commentHandler != NULL) commentHandler(self, comment); else