Index: src/OFDataArray.m ================================================================== --- src/OFDataArray.m +++ src/OFDataArray.m @@ -39,11 +39,11 @@ /* References for static linking */ void _references_to_categories_of_OFDataArray(void) { _OFDataArray_Hashing_reference = 1; -}; +} @implementation OFDataArray + dataArray { return [[[self alloc] init] autorelease]; Index: src/OFMutableString.h ================================================================== --- src/OFMutableString.h +++ src/OFMutableString.h @@ -22,46 +22,59 @@ /** * \brief A class for storing and modifying strings. */ @interface OFMutableString: OFString /** - * Sets the OFString to the specified UTF-8 encoded C string. + * Sets the OFMutableString to the specified UTF-8 encoded C string. * - * \param string A UTF-8 encoded C string to set the OFString to. + * \param string A UTF-8 encoded C string to set the OFMutableString to. */ - (void)setToCString: (const char*)string; /** - * Appends a UTF-8 encoded C string to the OFString. + * Appends a UTF-8 encoded C string to the OFMutableString. * * \param string A UTF-8 encoded C string to append */ - (void)appendCString: (const char*)string; /** - * Appends a UTF-8 encoded C string with the specified length to the OFString. + * Appends a UTF-8 encoded C string with the specified length to the + * OFMutableString. * * \param string A UTF-8 encoded C string to append * \param length The length of the UTF-8 encoded C string */ - (void)appendCString: (const char*)string withLength: (size_t)length; /** - * Appends a UTF-8 encoded C string to the OFString without checking whether it - * is valid UTF-8. + * Appends a C string with the specified encoding and length to the + * OFMutableString. + * + * \param string A C string to append + * \param encoding The encoding of the C string + * \param length The length of the UTF-8 encoded C string + */ +- (void)appendCString: (const char*)string + withEncoding: (of_string_encoding_t)encoding + length: (size_t)length; + +/** + * Appends a UTF-8 encoded C string to the OFMutableString without checking + * whether it is valid UTF-8. * * Only use this if you are 100% sure the string you append is either ASCII or * UTF-8! * * \param string A UTF-8 encoded C string to append */ - (void)appendCStringWithoutUTF8Checking: (const char*)string; /** - * Appends a UTF-8 encoded C string with the specified length to the OFString - * without checking whether it is valid UTF-8. + * Appends a UTF-8 encoded C string with the specified length to the + * OFMutableString without checking whether it is valid UTF-8. * * Only use this if you are 100% sure the string you append is either ASCII or * UTF-8! * * \param string A UTF-8 encoded C string to append @@ -69,53 +82,69 @@ */ - (void)appendCStringWithoutUTF8Checking: (const char*)string length: (size_t)length; /** - * Appends another OFString to the OFString. + * Appends a C string with the specified encoding and length length to the + * OFString without checking whether it is valid UTF-8 if the specified encoding + * is UTF-8. + * + * Only use this if you are 100% sure the string you append is either ASCII or + * UTF-8 if you specified UTF-8 as encoding! + * + * \param string A C string to append + * \param encoding The encoding of the C string + * \param length The length of the UTF-8 encoded C string + */ +- (void)appendCStringWithoutUTF8Checking: (const char*)string + encoding: (of_string_encoding_t)encoding + length: (size_t)length; + +/** + * Appends another OFString to the OFMutableString. * * \param string An OFString to append */ - (void)appendString: (OFString*)string; /** - * Appends a formatted UTF-8 encoded C string to the OFString. + * Appends a formatted UTF-8 encoded C string to the OFMutableString. * See printf for the format syntax. * * \param format A format string which generates the string to append */ - (void)appendFormat: (OFString*)format, ...; /** - * Appends a formatted UTF-8 encoded C string to the OFString. + * Appends a formatted UTF-8 encoded C string to the OFMutableString. * See printf for the format syntax. * * \param format A format string which generates the string to append * \param arguments The arguments used in the format string */ - (void)appendFormat: (OFString*)format withArguments: (va_list)arguments; /** - * Prepends another OFString to the OFString. + * Prepends another OFString to the OFMutableString. * * \param string An OFString to prepend */ - (void)prependString: (OFString*)string; /** - * Reverse the OFString. + * Reverse the OFMutableString. */ - (void)reverse; /** - * Upper the OFString. + * Upper the OFMutableString. */ - (void)upper; /** - * Lower the OFString. + * Lower the OFMutableString. */ - (void)lower; /** * Inserts a string at the specified index. Index: src/OFMutableString.m ================================================================== --- src/OFMutableString.m +++ src/OFMutableString.m @@ -27,10 +27,11 @@ #else # define madvise(addr, len, advise) #endif #import "OFString.h" +#import "OFAutoreleasePool.h" #import "OFInvalidArgumentException.h" #import "OFInvalidEncodingException.h" #import "OFInvalidFormatException.h" #import "OFOutOfMemoryException.h" @@ -194,10 +195,26 @@ toSize: length + length_ + 1]; memcpy(string + length, string_, length_); length += length_; string[length] = 0; } + +- (void)appendCString: (const char*)string_ + withEncoding: (of_string_encoding_t)encoding + length: (size_t)length_ +{ + if (encoding == OF_STRING_ENCODING_UTF_8) + [self appendCString: string_ + withLength: length_]; + else { + OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init]; + [self appendString: [OFString stringWithCString: string_ + encoding: encoding + length: length_]]; + [pool release]; + } +} - (void)appendCStringWithoutUTF8Checking: (const char*)string_ { size_t len; @@ -215,10 +232,26 @@ toSize: length + length_ + 1]; memcpy(string + length, string_, length_); length += length_; string[length] = 0; } + +- (void)appendCStringWithoutUTF8Checking: (const char*)string_ + encoding: (of_string_encoding_t)encoding + length: (size_t)length_ +{ + if (encoding == OF_STRING_ENCODING_UTF_8) + [self appendCStringWithoutUTF8Checking: string_ + length: length_]; + else { + OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init]; + [self appendString: [OFString stringWithCString: string_ + encoding: encoding + length: length_]]; + [pool release]; + } +} - (void)appendString: (OFString*)string_ { if (string_ == nil) @throw [OFInvalidArgumentException newWithClass: isa Index: src/OFXMLParser.h ================================================================== --- src/OFXMLParser.h +++ src/OFXMLParser.h @@ -182,10 +182,11 @@ size_t level; BOOL acceptProlog; size_t lineNumber; BOOL lastCarriageReturn; BOOL finishedParsing; + of_string_encoding_t encoding; } #ifdef OF_HAVE_PROPERTIES @property (retain) id delegate; # ifdef OF_HAVE_BLOCKS Index: src/OFXMLParser.m ================================================================== --- src/OFXMLParser.m +++ src/OFXMLParser.m @@ -38,19 +38,51 @@ typedef void (*state_function)(id, SEL, const char*, size_t*, size_t*); static SEL selectors[OF_XMLPARSER_NUM_STATES]; static state_function lookupTable[OF_XMLPARSER_NUM_STATES]; -static OF_INLINE OFString* -transform_string(OFMutableString *cache, +static OFString* +transform_string(OFMutableString *cache, size_t cut, BOOL unescape, OFObject *delegate) { [cache replaceOccurrencesOfString: @"\r\n" withString: @"\n"]; [cache replaceOccurrencesOfString: @"\r" withString: @"\n"]; - return [cache stringByXMLUnescapingWithDelegate: delegate]; + + if (cut > 0) { + /* + * We need to create a mutable copy in order to detect possible + * UTF-8, as we never checked for UTF-8 when appending to the + * cache for performance reasons. + */ + OFMutableString *ret = [[cache mutableCopy] autorelease]; + size_t length; + + length = [ret length]; + [ret deleteCharactersFromIndex: length - cut + toIndex: length]; + + if (unescape) + return [ret stringByXMLUnescapingWithDelegate: + delegate]; + + /* + * Class swizzle the string to be immutable. We pass it as + * OFString*, so it can't be modified anyway. But not swizzling + * it would create a real copy each time -[copy] is called. + */ + ret->isa = [OFString class]; + + return ret; + } else { + if (unescape) + return [cache stringByXMLUnescapingWithDelegate: + delegate]; + else + return [[cache copy] autorelease]; + } } static OFString* namespace_for_prefix(OFString *prefix, OFArray *namespaces) { @@ -158,10 +190,11 @@ @"xmlns", @"http://www.w3.org/2000/xmlns/", nil]; [namespaces addObject: dict]; acceptProlog = YES; lineNumber = 1; + encoding = OF_STRING_ENCODING_UTF_8; [pool release]; } @catch (id e) { [self release]; @throw e; @@ -225,10 +258,11 @@ } /* In OF_XMLPARSER_IN_TAG, there can be only spaces */ if (length - last > 0 && state != OF_XMLPARSER_IN_TAG) [cache appendCStringWithoutUTF8Checking: buf + last + encoding: encoding length: length - last]; } - (void)parseString: (OFString*)string { @@ -287,18 +321,19 @@ if (buffer[*i] != '<') return; if ((length = *i - *last) > 0) [cache appendCStringWithoutUTF8Checking: buffer + *last + encoding: encoding length: length]; if ([cache cStringLength] > 0) { OFString *characters; OFAutoreleasePool *pool; pool = [[OFAutoreleasePool alloc] init]; - characters = transform_string(cache, self); + characters = transform_string(cache, 0, YES, self); #if defined(OF_HAVE_PROPERTIES) && defined(OF_HAVE_BLOCKS) if (charactersHandler != NULL) charactersHandler(self, characters); else @@ -353,11 +388,11 @@ { const char *cString; size_t i, last, length; int piState = 0; OFString *attribute = nil; - OFString *value = nil; + OFMutableString *value = nil; char piDelimiter = 0; if (!acceptProlog) return NO; @@ -403,21 +438,35 @@ break; case 3: if (cString[i] != piDelimiter) continue; - value = [OFString stringWithCString: cString + last - length: i - last]; + value = [OFMutableString + stringWithCString: cString + last + length: i - last]; if ([attribute isEqual: @"version"]) if (![value hasPrefix: @"1."]) return NO; - if ([attribute isEqual: @"encoding"]) - if ([value caseInsensitiveCompare: @"utf-8"] != - OF_ORDERED_SAME) + if ([attribute isEqual: @"encoding"]) { + [value lower]; + + if ([value isEqual: @"utf-8"]) + encoding = OF_STRING_ENCODING_UTF_8; + else if ([value isEqual: @"iso-8859-1"]) + encoding = + OF_STRING_ENCODING_ISO_8859_1; + else if ([value isEqual: @"iso-8859-15"]) + encoding = + OF_STRING_ENCODING_ISO_8859_15; + else if ([value isEqual: @"windows-1252"]) + encoding = + OF_STRING_ENCODING_WINDOWS_1252; + else return NO; + } last = i + 1; piState = 0; break; @@ -437,27 +486,16 @@ { if (buffer[*i] == '?') level = 1; else if (level == 1 && buffer[*i] == '>') { OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init]; - OFMutableString *pi; - size_t len; + OFString *pi; [cache appendCStringWithoutUTF8Checking: buffer + *last + encoding: encoding length: *i - *last]; - pi = [[cache mutableCopy] autorelease]; - len = [pi length]; - - [pi deleteCharactersFromIndex: len - 1 - toIndex: len]; - - /* - * Class swizzle the string to be immutable. We pass it as - * OFString*, so it can't be modified anyway. But not swizzling - * it would create a real copy each time -[copy] is called. - */ - pi->isa = [OFString class]; + pi = transform_string(cache, 1, NO, nil); if ([pi isEqual: @"xml"] || [pi hasPrefix: @"xml "] || [pi hasPrefix: @"xml\t"] || [pi hasPrefix: @"xml\r"] || [pi hasPrefix: @"xml\n"]) if (![self _parseXMLProcessingInstructions: pi]) @@ -490,10 +528,11 @@ buffer[*i] != '\r' && buffer[*i] != '>' && buffer[*i] != '/') return; if ((length = *i - *last) > 0) [cache appendCStringWithoutUTF8Checking: buffer + *last + encoding: encoding length: length]; cacheCString = [cache cString]; cacheLength = [cache cStringLength]; @@ -584,11 +623,13 @@ buffer[*i] != '\r' && buffer[*i] != '>') return; if ((length = *i - *last) > 0) [cache appendCStringWithoutUTF8Checking: buffer + *last + encoding: encoding length: length]; + cacheCString = [cache cString]; cacheLength = [cache cStringLength]; if ((tmp = memchr(cacheCString, ':', cacheLength)) != NULL) { name = [[OFString alloc] initWithCString: tmp + 1 @@ -734,13 +775,15 @@ if (buffer[*i] != '=') return; if ((length = *i - *last) > 0) [cache appendCStringWithoutUTF8Checking: buffer + *last + encoding: encoding length: length]; [cache deleteLeadingAndTrailingWhitespaces]; + cacheCString = [cache cString]; cacheLength = [cache cStringLength]; if ((tmp = memchr(cacheCString, ':', cacheLength)) != NULL) { attributeName = [[OFString alloc] @@ -791,14 +834,15 @@ if (buffer[*i] != delimiter) return; if ((length = *i - *last) > 0) [cache appendCStringWithoutUTF8Checking: buffer + *last + encoding: encoding length: length]; pool = [[OFAutoreleasePool alloc] init]; - attributeValue = transform_string(cache, self); + attributeValue = transform_string(cache, 0, YES, self); if (attributePrefix == nil && [attributeName isEqual: @"xmlns"]) [[namespaces lastObject] setObject: attributeValue forKey: @""]; if ([attributePrefix isEqual: @"xmlns"]) @@ -908,12 +952,11 @@ - (void)_parseInCDATA2WithBuffer: (const char*)buffer i: (size_t*)i last: (size_t*)last { OFAutoreleasePool *pool; - OFMutableString *CDATA; - size_t length; + OFString *CDATA; if (buffer[*i] != '>') { state = OF_XMLPARSER_IN_CDATA_1; level = (buffer[*i] == ']' ? 1 : 0); @@ -921,23 +964,13 @@ } pool = [[OFAutoreleasePool alloc] init]; [cache appendCStringWithoutUTF8Checking: buffer + *last + encoding: encoding length: *i - *last]; - CDATA = [[cache mutableCopy] autorelease]; - length = [CDATA length]; - - [CDATA deleteCharactersFromIndex: length - 2 - toIndex: length]; - - /* - * Class swizzle the string to be immutable. We pass it as OFString*, so - * it can't be modified anyway. But not swizzling it would create a - * real copy each time -[copy] is called. - */ - CDATA->isa = [OFString class]; + CDATA = transform_string(cache, 2, NO, nil); #if defined(OF_HAVE_PROPERTIES) && defined(OF_HAVE_BLOCKS) if (CDATAHandler != NULL) CDATAHandler(self, CDATA); else @@ -983,33 +1016,22 @@ - (void)_parseInComment2WithBuffer: (const char*)buffer i: (size_t*)i last: (size_t*)last { OFAutoreleasePool *pool; - OFMutableString *comment; - size_t length; + OFString *comment; if (buffer[*i] != '>') @throw [OFMalformedXMLException newWithClass: isa parser: self]; pool = [[OFAutoreleasePool alloc] init]; [cache appendCStringWithoutUTF8Checking: buffer + *last + encoding: encoding length: *i - *last]; - comment = [[cache mutableCopy] autorelease]; - length = [comment length]; - - [comment deleteCharactersFromIndex: length - 2 - toIndex: length]; - - /* - * Class swizzle the string to be immutable. We pass it as OFString*, so - * it can't be modified anyway. But not swizzling it would create a - * real copy each time -[copy] is called. - */ - comment->isa = [OFString class]; + comment = transform_string(cache, 2, NO, nil); #if defined(OF_HAVE_PROPERTIES) && defined(OF_HAVE_BLOCKS) if (commentHandler != NULL) commentHandler(self, comment); else